fix(queue-worker): only do redirect handling logic on crawls, not batch scrape
This commit is contained in:
@@ -61,7 +61,7 @@ export async function batchScrapeController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Batch scrape " + id + " starting", {
|
logger.debug("Batch scrape " + id + " starting", {
|
||||||
urlsLength: urls,
|
urlsLength: urls.length,
|
||||||
appendToId: req.body.appendToId,
|
appendToId: req.body.appendToId,
|
||||||
account: req.account,
|
account: req.account,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -710,7 +710,8 @@ async function processJob(job: Job & { id: string }, token: string) {
|
|||||||
doc.metadata.url !== undefined &&
|
doc.metadata.url !== undefined &&
|
||||||
doc.metadata.sourceURL !== undefined &&
|
doc.metadata.sourceURL !== undefined &&
|
||||||
normalizeURL(doc.metadata.url, sc) !==
|
normalizeURL(doc.metadata.url, sc) !==
|
||||||
normalizeURL(doc.metadata.sourceURL, sc)
|
normalizeURL(doc.metadata.sourceURL, sc) &&
|
||||||
|
job.data.crawlerOptions !== null // only on crawls, don't care on batch scrape
|
||||||
) {
|
) {
|
||||||
const crawler = crawlToCrawler(job.data.crawl_id, sc);
|
const crawler = crawlToCrawler(job.data.crawl_id, sc);
|
||||||
if (
|
if (
|
||||||
|
|||||||
Reference in New Issue
Block a user