Merge pull request #449 from mendableai/bugfix/malformed-url-sitemap

Added regex for links in sitemap
This commit is contained in:
Nicolas
2024-07-24 20:37:35 -04:00
committed by GitHub
+1 -1
View File
@@ -64,7 +64,7 @@ export class WebCrawler {
private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] { private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
return sitemapLinks return sitemapLinks
.filter((link) => { .filter((link) => {
const url = new URL(link); const url = new URL(link.trim(), this.baseUrl);
const path = url.pathname; const path = url.pathname;
const depth = getURLDepth(url.toString()); const depth = getURLDepth(url.toString());