From b92a4eb79b04d090ccb8322db1af9a95b838b819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Sat, 4 Jan 2025 16:59:35 +0100 Subject: [PATCH] fix(queue-worker): only do redirect handling logic on crawls, not batch scrape --- apps/api/src/controllers/v1/batch-scrape.ts | 2 +- apps/api/src/services/queue-worker.ts | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/v1/batch-scrape.ts b/apps/api/src/controllers/v1/batch-scrape.ts index 19ce3ba0..21c9745c 100644 --- a/apps/api/src/controllers/v1/batch-scrape.ts +++ b/apps/api/src/controllers/v1/batch-scrape.ts @@ -61,7 +61,7 @@ export async function batchScrapeController( } logger.debug("Batch scrape " + id + " starting", { - urlsLength: urls, + urlsLength: urls.length, appendToId: req.body.appendToId, account: req.account, }); diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 9e6f3d24..a48c798b 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -710,7 +710,8 @@ async function processJob(job: Job & { id: string }, token: string) { doc.metadata.url !== undefined && doc.metadata.sourceURL !== undefined && normalizeURL(doc.metadata.url, sc) !== - normalizeURL(doc.metadata.sourceURL, sc) + normalizeURL(doc.metadata.sourceURL, sc) && + job.data.crawlerOptions !== null // only on crawls, don't care on batch scrape ) { const crawler = crawlToCrawler(job.data.crawl_id, sc); if (