Added metadata.pageStatusCode and metadata.pageError properties to the responses
This commit is contained in:
@@ -241,7 +241,7 @@ export class WebScraperDataProvider {
|
||||
content: "",
|
||||
html: this.pageOptions?.includeHtml ? "" : undefined,
|
||||
markdown: "",
|
||||
metadata: { sourceURL: url },
|
||||
metadata: { sourceURL: url, pageStatusCode: 200 },
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -280,10 +280,10 @@ export class WebScraperDataProvider {
|
||||
private async fetchPdfDocuments(pdfLinks: string[]): Promise<Document[]> {
|
||||
return Promise.all(
|
||||
pdfLinks.map(async (pdfLink) => {
|
||||
const pdfContent = await fetchAndProcessPdf(pdfLink);
|
||||
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(pdfLink);
|
||||
return {
|
||||
content: pdfContent,
|
||||
metadata: { sourceURL: pdfLink },
|
||||
content: content,
|
||||
metadata: { sourceURL: pdfLink, pageStatusCode, pageError },
|
||||
provider: "web-scraper",
|
||||
};
|
||||
})
|
||||
@@ -292,10 +292,10 @@ export class WebScraperDataProvider {
|
||||
private async fetchDocxDocuments(docxLinks: string[]): Promise<Document[]> {
|
||||
return Promise.all(
|
||||
docxLinks.map(async (p) => {
|
||||
const docXDocument = await fetchAndProcessDocx(p);
|
||||
const { content, pageStatusCode, pageError } = await fetchAndProcessDocx(p);
|
||||
return {
|
||||
content: docXDocument,
|
||||
metadata: { sourceURL: p },
|
||||
content,
|
||||
metadata: { sourceURL: p, pageStatusCode, pageError },
|
||||
provider: "web-scraper",
|
||||
};
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user