From 5cbf0dcaf5b558094b23884ac7a0c62f2c781fbf Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:07:28 -0300 Subject: [PATCH] fix(v1): includeTags --- apps/api/src/scraper/WebScraper/index.ts | 3 ++- apps/api/src/scraper/WebScraper/single_url.ts | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 24b71235..44a90b85 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -582,8 +582,9 @@ export class WebScraperDataProvider { this.pageOptions = { onlyMainContent: options.pageOptions?.onlyMainContent ?? false, includeHtml: options.pageOptions?.includeHtml ?? false, - replaceAllPathsWithAbsolutePaths: options.pageOptions?.replaceAllPathsWithAbsolutePaths ?? false, + replaceAllPathsWithAbsolutePaths: options.pageOptions?.replaceAllPathsWithAbsolutePaths ?? true, parsePDF: options.pageOptions?.parsePDF ?? true, + onlyIncludeTags: options.pageOptions?.onlyIncludeTags ?? [], removeTags: options.pageOptions?.removeTags ?? [], includeMarkdown: options.pageOptions?.includeMarkdown ?? true, includeRawHtml: options.pageOptions?.includeRawHtml ?? false, diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 59afde16..781d7026 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -138,9 +138,10 @@ export async function scrapSingleUrl( fullPageScreenshot: pageOptions.fullPageScreenshot ?? false, headers: pageOptions.headers ?? undefined, includeLinks: pageOptions.includeLinks ?? true, - replaceAllPathsWithAbsolutePaths: pageOptions.replaceAllPathsWithAbsolutePaths ?? false, + replaceAllPathsWithAbsolutePaths: pageOptions.replaceAllPathsWithAbsolutePaths ?? true, parsePDF: pageOptions.parsePDF ?? true, removeTags: pageOptions.removeTags ?? [], + onlyIncludeTags: pageOptions.onlyIncludeTags ?? [], } if (extractorOptions) {