Revert error tallying
This commit is contained in:
@@ -283,13 +283,6 @@ export async function scrapeController(req: Request, res: Response) {
|
|||||||
|
|
||||||
return res.status(result.returnCode).json(result);
|
return res.status(result.returnCode).json(result);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (typeof error === "string" && error.startsWith("{\"type\":\"all\",")) {
|
|
||||||
return res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: "All scraping methods failed for URL: " + req.body.url,
|
|
||||||
details: JSON.parse(error).errors as string[],
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
Logger.error(error);
|
Logger.error(error);
|
||||||
return res.status(500).json({
|
return res.status(500).json({
|
||||||
@@ -299,5 +292,4 @@ export async function scrapeController(req: Request, res: Response) {
|
|||||||
: error?.message ?? "Internal Server Error",
|
: error?.message ?? "Internal Server Error",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -196,9 +196,7 @@ export async function searchController(req: Request, res: Response) {
|
|||||||
return res.status(408).json({ error: "Request timed out" });
|
return res.status(408).json({ error: "Request timed out" });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(error instanceof Error && error.message.startsWith('{"type":"all",'))) {
|
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
}
|
|
||||||
Logger.error(error);
|
Logger.error(error);
|
||||||
return res.status(500).json({ error: error.message });
|
return res.status(500).json({ error: error.message });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,21 +64,22 @@ export async function scrapeController(
|
|||||||
success: false,
|
success: false,
|
||||||
error: "Request timed out",
|
error: "Request timed out",
|
||||||
});
|
});
|
||||||
} else if (typeof e === "string" && e.startsWith("{\"type\":\"all\",")) {
|
} else {
|
||||||
return res.status(500).json({
|
return res.status(500).json({
|
||||||
success: false,
|
success: false,
|
||||||
error: "All scraping methods failed for URL: " + req.body.url,
|
error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
|
||||||
details: JSON.parse(e).errors as string[],
|
extractorOptions && extractorOptions.mode !== "markdown"
|
||||||
|
? " - Could be due to LLM parsing issues"
|
||||||
|
: ""
|
||||||
|
}`,
|
||||||
});
|
});
|
||||||
} else {
|
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
await job.remove();
|
await job.remove();
|
||||||
|
|
||||||
if (!doc) {
|
if (!doc) {
|
||||||
// console.error("!!! PANIC DOC IS", doc, job);
|
console.error("!!! PANIC DOC IS", doc, job);
|
||||||
return res.status(200).json({
|
return res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
warning: "No page found",
|
warning: "No page found",
|
||||||
|
|||||||
@@ -357,9 +357,6 @@ export async function scrapSingleUrl(
|
|||||||
pageStatusCode: 200,
|
pageStatusCode: 200,
|
||||||
pageError: undefined,
|
pageError: undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
const errors: Record<string, string> = {};
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let urlKey = urlToScrap;
|
let urlKey = urlToScrap;
|
||||||
try {
|
try {
|
||||||
@@ -401,12 +398,6 @@ export async function scrapSingleUrl(
|
|||||||
pageError = undefined;
|
pageError = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attempt.pageError) {
|
|
||||||
errors[scraper] = attempt.pageError;
|
|
||||||
} else {
|
|
||||||
errors[scraper] = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
||||||
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
||||||
break;
|
break;
|
||||||
@@ -421,9 +412,7 @@ export async function scrapSingleUrl(
|
|||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: This exception for status codes may only work with fire-engine. In lieu of better error management,
|
if (!text) {
|
||||||
// it's the best we can do. - mogery
|
|
||||||
if (!text && !Object.values(errors).some(x => x.startsWith("Request failed with status code ") || x === "NOT FOUND")) {
|
|
||||||
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
|
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -460,17 +449,12 @@ export async function scrapSingleUrl(
|
|||||||
|
|
||||||
return document;
|
return document;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
||||||
ScrapeEvents.insert(jobId, {
|
ScrapeEvents.insert(jobId, {
|
||||||
type: "error",
|
type: "error",
|
||||||
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
|
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
|
||||||
stack: error.stack,
|
stack: error.stack,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (error instanceof Error && error.message.startsWith("All scraping methods failed")) {
|
|
||||||
throw new Error(JSON.stringify({"type": "all", "errors": Object.values(errors)}));
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
content: "",
|
content: "",
|
||||||
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
|
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
|
||||||
|
|||||||
@@ -448,7 +448,7 @@ async function processJob(job: Job, token: string) {
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
|
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
|
||||||
|
|
||||||
if (!(error instanceof Error && (error.message.includes("JSON parsing error(s): ") || error.message.startsWith('{"type":"all",')))) {
|
if (!(error instanceof Error && error.message.includes("JSON parsing error(s): "))) {
|
||||||
Sentry.captureException(error, {
|
Sentry.captureException(error, {
|
||||||
data: {
|
data: {
|
||||||
job: job.id,
|
job: job.id,
|
||||||
|
|||||||
Reference in New Issue
Block a user