Merge pull request #485 from mendableai/bugfix/issue-435

[Bug] Issue with crawl going beyond Limit
This commit is contained in:
Nicolas
2024-07-31 18:10:29 -04:00
committed by GitHub
+4 -3
View File
@@ -164,7 +164,7 @@ export class WebCrawler {
concurrencyLimit, concurrencyLimit,
inProgress inProgress
); );
if ( if (
urls.length === 0 && urls.length === 0 &&
this.filterLinks([this.initialUrl], limit, this.maxCrawledDepth).length > 0 this.filterLinks([this.initialUrl], limit, this.maxCrawledDepth).length > 0
@@ -420,9 +420,10 @@ export class WebCrawler {
".woff", ".woff",
".ttf", ".ttf",
".woff2", ".woff2",
".webp" ".webp",
".inc"
]; ];
return fileExtensions.some((ext) => url.endsWith(ext)); return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
} }
private isSocialMediaOrEmail(url: string): boolean { private isSocialMediaOrEmail(url: string): boolean {