diff --git a/apps/api/sharedLibs/html-transformer/src/lib.rs b/apps/api/sharedLibs/html-transformer/src/lib.rs
index f5eb86e5..d4a29934 100644
--- a/apps/api/sharedLibs/html-transformer/src/lib.rs
+++ b/apps/api/sharedLibs/html-transformer/src/lib.rs
@@ -17,7 +17,8 @@ pub unsafe extern "C" fn extract_links(html: *const libc::c_char) -> *mut i8 {
let mut out: Vec = Vec::new();
- for anchor in document.select("a[href]").unwrap() {
+ let anchors: Vec<_> = document.select("a[href]").unwrap().collect();
+ for anchor in anchors {
let mut href = anchor.attributes.borrow().get("href").unwrap().to_string();
if href.starts_with("http:/") && !href.starts_with("http://") {
diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts
index ceee94ef..ea606f44 100644
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@@ -401,7 +401,13 @@ export class WebCrawler {
public async extractLinksFromHTML(html: string, url: string) {
try {
- return await this.extractLinksFromHTMLRust(html, url);
+ return (await this.extractLinksFromHTMLRust(html, url)).map(x => {
+ try {
+ return new URL(x, url).href
+ } catch (e) {
+ return null;
+ }
+ }).filter(x => x !== null) as string[];
} catch (error) {
this.logger.error("Failed to call html-transformer! Falling back to cheerio...", {
error,