fix(queue-worker/crawl): use SCARD to generate num_docs field
This commit is contained in:
@@ -160,6 +160,10 @@ export async function getCrawlJobs(id: string): Promise<string[]> {
|
|||||||
return await redisConnection.smembers("crawl:" + id + ":jobs");
|
return await redisConnection.smembers("crawl:" + id + ":jobs");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getCrawlJobCount(id: string): Promise<number> {
|
||||||
|
return await redisConnection.scard("crawl:" + id + ":jobs");
|
||||||
|
}
|
||||||
|
|
||||||
export async function getThrottledJobs(teamId: string): Promise<string[]> {
|
export async function getThrottledJobs(teamId: string): Promise<string[]> {
|
||||||
return await redisConnection.zrangebyscore(
|
return await redisConnection.zrangebyscore(
|
||||||
"concurrency-limiter:" + teamId + ":throttled",
|
"concurrency-limiter:" + teamId + ":throttled",
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import {
|
|||||||
finishCrawl,
|
finishCrawl,
|
||||||
generateURLPermutations,
|
generateURLPermutations,
|
||||||
getCrawl,
|
getCrawl,
|
||||||
|
getCrawlJobCount,
|
||||||
getCrawlJobs,
|
getCrawlJobs,
|
||||||
lockURL,
|
lockURL,
|
||||||
lockURLs,
|
lockURLs,
|
||||||
@@ -212,7 +213,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const jobIDs = await getCrawlJobs(job.data.crawl_id);
|
const num_docs = await getCrawlJobCount(job.data.crawl_id);
|
||||||
const jobStatus = sc.cancelled ? "failed" : "completed";
|
const jobStatus = sc.cancelled ? "failed" : "completed";
|
||||||
|
|
||||||
await logJob(
|
await logJob(
|
||||||
@@ -220,7 +221,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
|||||||
job_id: job.data.crawl_id,
|
job_id: job.data.crawl_id,
|
||||||
success: jobStatus === "completed",
|
success: jobStatus === "completed",
|
||||||
message: sc.cancelled ? "Cancelled" : undefined,
|
message: sc.cancelled ? "Cancelled" : undefined,
|
||||||
num_docs: jobIDs.length,
|
num_docs,
|
||||||
docs: [],
|
docs: [],
|
||||||
time_taken: (Date.now() - sc.createdAt) / 1000,
|
time_taken: (Date.now() - sc.createdAt) / 1000,
|
||||||
team_id: job.data.team_id,
|
team_id: job.data.team_id,
|
||||||
|
|||||||
Reference in New Issue
Block a user