Revert error tallying

This commit is contained in:
Gergő Móricz
2024-09-24 10:27:49 +02:00
parent a4b128e8b7
commit a59b5836d5
5 changed files with 16 additions and 41 deletions
+5 -13
View File
@@ -283,21 +283,13 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(result.returnCode).json(result); return res.status(result.returnCode).json(result);
} catch (error) { } catch (error) {
if (typeof error === "string" && error.startsWith("{\"type\":\"all\",")) { Sentry.captureException(error);
return res.status(500).json({ Logger.error(error);
success: false, return res.status(500).json({
error: "All scraping methods failed for URL: " + req.body.url, error:
details: JSON.parse(error).errors as string[],
});
} else {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({
error:
typeof error === "string" typeof error === "string"
? error ? error
: error?.message ?? "Internal Server Error", : error?.message ?? "Internal Server Error",
}); });
}
} }
} }
+1 -3
View File
@@ -196,9 +196,7 @@ export async function searchController(req: Request, res: Response) {
return res.status(408).json({ error: "Request timed out" }); return res.status(408).json({ error: "Request timed out" });
} }
if (!(error instanceof Error && error.message.startsWith('{"type":"all",'))) { Sentry.captureException(error);
Sentry.captureException(error);
}
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ error: error.message });
} }
+7 -6
View File
@@ -64,21 +64,22 @@ export async function scrapeController(
success: false, success: false,
error: "Request timed out", error: "Request timed out",
}); });
} else if (typeof e === "string" && e.startsWith("{\"type\":\"all\",")) { } else {
return res.status(500).json({ return res.status(500).json({
success: false, success: false,
error: "All scraping methods failed for URL: " + req.body.url, error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
details: JSON.parse(e).errors as string[], extractorOptions && extractorOptions.mode !== "markdown"
? " - Could be due to LLM parsing issues"
: ""
}`,
}); });
} else {
throw e;
} }
} }
await job.remove(); await job.remove();
if (!doc) { if (!doc) {
// console.error("!!! PANIC DOC IS", doc, job); console.error("!!! PANIC DOC IS", doc, job);
return res.status(200).json({ return res.status(200).json({
success: true, success: true,
warning: "No page found", warning: "No page found",
+2 -18
View File
@@ -357,9 +357,6 @@ export async function scrapSingleUrl(
pageStatusCode: 200, pageStatusCode: 200,
pageError: undefined, pageError: undefined,
}; };
const errors: Record<string, string> = {};
try { try {
let urlKey = urlToScrap; let urlKey = urlToScrap;
try { try {
@@ -401,12 +398,6 @@ export async function scrapSingleUrl(
pageError = undefined; pageError = undefined;
} }
if (attempt.pageError) {
errors[scraper] = attempt.pageError;
} else {
errors[scraper] = null;
}
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) { if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`); Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
break; break;
@@ -421,9 +412,7 @@ export async function scrapSingleUrl(
// } // }
} }
// NOTE: This exception for status codes may only work with fire-engine. In lieu of better error management, if (!text) {
// it's the best we can do. - mogery
if (!text && !Object.values(errors).some(x => x.startsWith("Request failed with status code ") || x === "NOT FOUND")) {
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`); throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
} }
@@ -460,17 +449,12 @@ export async function scrapSingleUrl(
return document; return document;
} catch (error) { } catch (error) {
Logger.error(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`); Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
ScrapeEvents.insert(jobId, { ScrapeEvents.insert(jobId, {
type: "error", type: "error",
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error), message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
stack: error.stack, stack: error.stack,
}); });
if (error instanceof Error && error.message.startsWith("All scraping methods failed")) {
throw new Error(JSON.stringify({"type": "all", "errors": Object.values(errors)}));
}
return { return {
content: "", content: "",
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined, markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
+1 -1
View File
@@ -448,7 +448,7 @@ async function processJob(job: Job, token: string) {
} catch (error) { } catch (error) {
Logger.error(`🐂 Job errored ${job.id} - ${error}`); Logger.error(`🐂 Job errored ${job.id} - ${error}`);
if (!(error instanceof Error && (error.message.includes("JSON parsing error(s): ") || error.message.startsWith('{"type":"all",')))) { if (!(error instanceof Error && error.message.includes("JSON parsing error(s): "))) {
Sentry.captureException(error, { Sentry.captureException(error, {
data: { data: {
job: job.id, job: job.id,