From 37f58efe457dd985e3c01aabeed23d48cf3bc99d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Sun, 15 Dec 2024 21:01:31 +0100 Subject: [PATCH] fix(crawl-redis/lockURL): only add to visited_unique if lock succeeds --- apps/api/src/lib/crawl-redis.ts | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index 0c9e0ff0..602d13b3 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -233,13 +233,6 @@ export async function lockURL( url = normalizeURL(url, sc); logger = logger.child({ url }); - await redisConnection.sadd("crawl:" + id + ":visited_unique", url); - await redisConnection.expire( - "crawl:" + id + ":visited_unique", - 24 * 60 * 60, - "NX", - ); - let res: boolean; if (!sc.crawlerOptions?.deduplicateSimilarURLs) { res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0; @@ -255,6 +248,15 @@ export async function lockURL( await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX"); + if (res) { + await redisConnection.sadd("crawl:" + id + ":visited_unique", url); + await redisConnection.expire( + "crawl:" + id + ":visited_unique", + 24 * 60 * 60, + "NX", + ); + } + logger.debug("Locking URL " + JSON.stringify(url) + "... result: " + res, { res, });