Merge branch 'feat/save-docs-on-supabase' of https://github.com/mendableai/firecrawl into feat/save-docs-on-supabase

This commit is contained in:
rafaelsideguide
2024-07-09 18:48:29 -03:00
3 changed files with 32 additions and 7 deletions
+29 -7
View File
@@ -10,6 +10,7 @@ import cluster from "cluster";
import os from "os"; import os from "os";
import { Job } from "bull"; import { Job } from "bull";
import { supabase_service } from "./services/supabase"; import { supabase_service } from "./services/supabase";
import { logJob } from "./services/logging/log_job";
const { createBullBoard } = require("@bull-board/api"); const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { BullAdapter } = require("@bull-board/api/bullAdapter");
@@ -44,6 +45,13 @@ if (cluster.isMaster) {
jobId: x.job_id, jobId: x.job_id,
} }
}))) })))
if (data.length > 0) {
await supabase_service
.from("firecrawl_jobs")
.delete()
.in("id", data.map(x => x.id));
}
} }
})(); })();
@@ -79,17 +87,31 @@ if (cluster.isMaster) {
}))).flat(1); }))).flat(1);
for (const job of activeJobs) { for (const job of activeJobs) {
console.log(job.id);
try { try {
const { error } = await supabase_service await logJob({
.from("firecrawl_jobs") job_id: job.id as string,
.update({ docs: job.data.docs, partial_docs: job.data.partialDocs, retry: true }) success: false,
.eq("job_id", job.id); message: "Interrupted, retrying",
num_docs: 0,
if (error) throw new Error(error.message); docs: [],
time_taken: 0,
team_id: job.data.team_id,
mode: "crawl",
url: job.data.url,
crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions,
origin: job.data.origin,
retry: true,
});
await wsq.client.del(await job.lockKey());
await job.takeLock();
await job.moveToFailed({ message: "interrupted" });
await job.remove();
} catch (error) { } catch (error) {
console.error("Failed to update job status:", error); console.error("Failed to update job status:", error);
} }
await wsq.removeJobs(job.id.toString());
} }
} }
+2
View File
@@ -38,6 +38,7 @@ export async function logJob(job: FirecrawlJob) {
origin: job.origin, origin: job.origin,
extractor_options: job.extractor_options, extractor_options: job.extractor_options,
num_tokens: job.num_tokens, num_tokens: job.num_tokens,
retry: !!job.retry,
}, },
]); ]);
@@ -61,6 +62,7 @@ export async function logJob(job: FirecrawlJob) {
origin: job.origin, origin: job.origin,
extractor_options: job.extractor_options, extractor_options: job.extractor_options,
num_tokens: job.num_tokens, num_tokens: job.num_tokens,
retry: job.retry,
}, },
}; };
posthog.capture(phLog); posthog.capture(phLog);
+1
View File
@@ -62,6 +62,7 @@ export interface FirecrawlJob {
origin: string; origin: string;
extractor_options?: ExtractorOptions, extractor_options?: ExtractorOptions,
num_tokens?: number, num_tokens?: number,
retry?: boolean,
} }
export interface FirecrawlScrapeResponse { export interface FirecrawlScrapeResponse {