feat(batch/scrape): restructure logs, add webhooks

This commit is contained in:
Gergő Móricz
2024-10-23 21:55:21 +02:00
parent e3cb00990a
commit 1da6360b77
2 changed files with 13 additions and 11 deletions
+12 -10
View File
@@ -329,7 +329,8 @@ async function processJob(job: Job, token: string) {
job.id as string, job.id as string,
data, data,
job.data.webhook, job.data.webhook,
job.data.v1 job.data.v1,
job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
); );
} }
if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) { if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
@@ -339,7 +340,7 @@ async function processJob(job: Job, token: string) {
data, data,
job.data.webhook, job.data.webhook,
job.data.v1, job.data.v1,
"crawl.page", job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
true true
); );
} }
@@ -353,7 +354,7 @@ async function processJob(job: Job, token: string) {
docs: docs, docs: docs,
time_taken: timeTakenInSeconds, time_taken: timeTakenInSeconds,
team_id: job.data.team_id, team_id: job.data.team_id,
mode: job.data.crawlerOptions === null ? "batch_scrape" : job.data.mode, mode: job.data.mode,
url: job.data.url, url: job.data.url,
crawlerOptions: job.data.crawlerOptions, crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions, pageOptions: job.data.pageOptions,
@@ -414,7 +415,7 @@ async function processJob(job: Job, token: string) {
} }
} }
if (await finishCrawl(job.data.crawl_id) && job.data.crawlerOptions !== null) { if (await finishCrawl(job.data.crawl_id)) {
if (!job.data.v1) { if (!job.data.v1) {
const jobIDs = await getCrawlJobs(job.data.crawl_id); const jobIDs = await getCrawlJobs(job.data.crawl_id);
@@ -437,7 +438,7 @@ async function processJob(job: Job, token: string) {
docs: [], docs: [],
time_taken: (Date.now() - sc.createdAt) / 1000, time_taken: (Date.now() - sc.createdAt) / 1000,
team_id: job.data.team_id, team_id: job.data.team_id,
mode: "crawl", mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
url: sc.originUrl, url: sc.originUrl,
crawlerOptions: sc.crawlerOptions, crawlerOptions: sc.crawlerOptions,
pageOptions: sc.pageOptions, pageOptions: sc.pageOptions,
@@ -467,7 +468,7 @@ async function processJob(job: Job, token: string) {
data, data,
job.data.webhook, job.data.webhook,
job.data.v1, job.data.v1,
"crawl.completed" job.data.crawlerOptions !== null ? "crawl.completed" : "batch_scrape.completed"
); );
} }
} else { } else {
@@ -485,7 +486,7 @@ async function processJob(job: Job, token: string) {
[], [],
job.data.webhook, job.data.webhook,
job.data.v1, job.data.v1,
"crawl.completed" job.data.crawlerOptions !== null ? "crawl.completed" : "batch_scrape.completed"
); );
} }
@@ -497,7 +498,7 @@ async function processJob(job: Job, token: string) {
docs: [], docs: [],
time_taken: (Date.now() - sc.createdAt) / 1000, time_taken: (Date.now() - sc.createdAt) / 1000,
team_id: job.data.team_id, team_id: job.data.team_id,
mode: "crawl", mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
url: sc.originUrl, url: sc.originUrl,
crawlerOptions: sc.crawlerOptions, crawlerOptions: sc.crawlerOptions,
pageOptions: sc.pageOptions, pageOptions: sc.pageOptions,
@@ -554,7 +555,8 @@ async function processJob(job: Job, token: string) {
job.data.crawl_id ?? (job.id as string), job.data.crawl_id ?? (job.id as string),
data, data,
job.data.webhook, job.data.webhook,
job.data.v1 job.data.v1,
job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
); );
} }
// if (job.data.v1) { // if (job.data.v1) {
@@ -603,7 +605,7 @@ async function processJob(job: Job, token: string) {
docs: [], docs: [],
time_taken: 0, time_taken: 0,
team_id: job.data.team_id, team_id: job.data.team_id,
mode: "crawl", mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
url: sc ? sc.originUrl : job.data.url, url: sc ? sc.originUrl : job.data.url,
crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions, crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
pageOptions: sc ? sc.pageOptions : job.data.pageOptions, pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
+1 -1
View File
@@ -159,4 +159,4 @@ export type PlanType =
| ""; | "";
export type WebhookEventType = "crawl.page" | "crawl.started" | "crawl.completed" | "crawl.failed"; export type WebhookEventType = "crawl.page" | "batch_scrape.page" | "crawl.started" | "crawl.completed" | "batch_scrape.completed" | "crawl.failed";