nested includeHtml inside pageOptions

This commit is contained in:
rafaelsideguide
2024-05-07 13:40:24 -03:00
parent 509250c4ef
commit e1f52c538f
11 changed files with 19 additions and 47 deletions
@@ -103,8 +103,7 @@ export async function scrapWithPlaywright(url: string): Promise<string> {
export async function scrapSingleUrl(
urlToScrap: string,
pageOptions: PageOptions = { onlyMainContent: true },
includeHtml: boolean = false
pageOptions: PageOptions = { onlyMainContent: true, includeHtml: false },
): Promise<Document> {
urlToScrap = urlToScrap.trim();
@@ -193,7 +192,7 @@ export async function scrapSingleUrl(
url: urlToScrap,
content: text,
markdown: text,
html: includeHtml ? html : undefined,
html: pageOptions.includeHtml ? html : undefined,
metadata: { ...metadata, sourceURL: urlToScrap },
} as Document;
}
@@ -217,7 +216,7 @@ export async function scrapSingleUrl(
return {
content: text,
markdown: text,
html: includeHtml ? html : undefined,
html: pageOptions.includeHtml ? html : undefined,
metadata: { ...metadata, sourceURL: urlToScrap },
} as Document;
} catch (error) {