From 49e584f8e19488178b1e06b246784d46ba82b13e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=B3ricz=20Gerg=C5=91?= Date: Thu, 9 Jan 2025 09:51:16 +0100 Subject: [PATCH] fix(queue-worker/crawl): use SCARD to generate num_docs field --- apps/api/src/lib/crawl-redis.ts | 4 ++++ apps/api/src/services/queue-worker.ts | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index 80720bc6..a200b9b5 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -160,6 +160,10 @@ export async function getCrawlJobs(id: string): Promise { return await redisConnection.smembers("crawl:" + id + ":jobs"); } +export async function getCrawlJobCount(id: string): Promise { + return await redisConnection.scard("crawl:" + id + ":jobs"); +} + export async function getThrottledJobs(teamId: string): Promise { return await redisConnection.zrangebyscore( "concurrency-limiter:" + teamId + ":throttled", diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index f28f3f35..d8d154cc 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -25,6 +25,7 @@ import { finishCrawl, generateURLPermutations, getCrawl, + getCrawlJobCount, getCrawlJobs, lockURL, lockURLs, @@ -212,7 +213,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) { ); } } else { - const jobIDs = await getCrawlJobs(job.data.crawl_id); + const num_docs = await getCrawlJobCount(job.data.crawl_id); const jobStatus = sc.cancelled ? "failed" : "completed"; await logJob( @@ -220,7 +221,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) { job_id: job.data.crawl_id, success: jobStatus === "completed", message: sc.cancelled ? "Cancelled" : undefined, - num_docs: jobIDs.length, + num_docs, docs: [], time_taken: (Date.now() - sc.createdAt) / 1000, team_id: job.data.team_id,