Nick: fixed bugs associated with absolute path replacements

This commit is contained in:
Nicolas
2024-06-11 12:43:16 -07:00
parent 788abdce6e
commit 520739c9f4
4 changed files with 24 additions and 18 deletions
+6 -5
View File
@@ -302,9 +302,10 @@ export class WebScraperDataProvider {
}
private applyPathReplacements(documents: Document[]): Document[] {
return this.replaceAllPathsWithAbsolutePaths
? replacePathsWithAbsolutePaths(documents)
: replaceImgPathsWithAbsolutePaths(documents);
if (this.replaceAllPathsWithAbsolutePaths) {
documents = replacePathsWithAbsolutePaths(documents);
}
return replaceImgPathsWithAbsolutePaths(documents);
}
private async applyImgAltText(documents: Document[]): Promise<Document[]> {
@@ -473,9 +474,9 @@ export class WebScraperDataProvider {
this.limit = options.crawlerOptions?.limit ?? 10000;
this.generateImgAltText =
options.crawlerOptions?.generateImgAltText ?? false;
this.pageOptions = options.pageOptions ?? { onlyMainContent: false, includeHtml: false };
this.pageOptions = options.pageOptions ?? { onlyMainContent: false, includeHtml: false, replaceAllPathsWithAbsolutePaths: false };
this.extractorOptions = options.extractorOptions ?? {mode: "markdown"}
this.replaceAllPathsWithAbsolutePaths = options.crawlerOptions?.replaceAllPathsWithAbsolutePaths ?? false;
this.replaceAllPathsWithAbsolutePaths = options.crawlerOptions?.replaceAllPathsWithAbsolutePaths ?? options.pageOptions?.replaceAllPathsWithAbsolutePaths ?? false;
//! @nicolas, for some reason this was being injected and breaking everything. Don't have time to find source of the issue so adding this check
this.excludes = this.excludes.filter((item) => item !== "");
this.crawlerMode = options.crawlerOptions?.mode ?? "default";