fix(crawl): don't push rawhtml to db unless requested
This commit is contained in:
@@ -196,6 +196,7 @@ export async function crawlStatusController(
|
|||||||
nextURL.searchParams.set("limit", req.query.limit);
|
nextURL.searchParams.set("limit", req.query.limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// deprecated: this is done on queue-worker side now. if you see this after january 8, 2025, remove this
|
||||||
if (data.length > 0) {
|
if (data.length > 0) {
|
||||||
if (!doneJobs[0].data.scrapeOptions.formats.includes("rawHtml")) {
|
if (!doneJobs[0].data.scrapeOptions.formats.includes("rawHtml")) {
|
||||||
for (let ii = 0; ii < doneJobs.length; ii++) {
|
for (let ii = 0; ii < doneJobs.length; ii++) {
|
||||||
@@ -205,6 +206,7 @@ export async function crawlStatusController(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// remove until here
|
||||||
|
|
||||||
res.status(200).json({
|
res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
|
|||||||
@@ -674,6 +674,10 @@ async function processJob(job: Job & { id: string }, token: string) {
|
|||||||
|
|
||||||
const rawHtml = doc.rawHtml ?? "";
|
const rawHtml = doc.rawHtml ?? "";
|
||||||
|
|
||||||
|
if (!job.data.scrapeOptions.formats.includes("rawHtml")) {
|
||||||
|
delete doc.rawHtml;
|
||||||
|
}
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
success: true,
|
success: true,
|
||||||
result: {
|
result: {
|
||||||
|
|||||||
Reference in New Issue
Block a user