diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts index ce3831f2..f5fcf6b6 100644 --- a/apps/api/src/controllers/v1/crawl-status.ts +++ b/apps/api/src/controllers/v1/crawl-status.ts @@ -196,6 +196,7 @@ export async function crawlStatusController( nextURL.searchParams.set("limit", req.query.limit); } + // deprecated: this is done on queue-worker side now. if you see this after january 8, 2025, remove this if (data.length > 0) { if (!doneJobs[0].data.scrapeOptions.formats.includes("rawHtml")) { for (let ii = 0; ii < doneJobs.length; ii++) { @@ -205,6 +206,7 @@ export async function crawlStatusController( } } } + // remove until here res.status(200).json({ success: true, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index a48c798b..90c96cf6 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -674,6 +674,10 @@ async function processJob(job: Job & { id: string }, token: string) { const rawHtml = doc.rawHtml ?? ""; + if (!job.data.scrapeOptions.formats.includes("rawHtml")) { + delete doc.rawHtml; + } + const data = { success: true, result: {