Files
firecrawl/apps/api/src/controllers/v1/extract-status.ts
T

77 lines
2.5 KiB
TypeScript
Raw Normal View History

2025-01-03 20:44:27 -03:00
import { Response } from "express";
import { RequestWithAuth } from "./types";
2025-01-07 16:16:01 -03:00
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
2025-02-20 00:41:22 +01:00
import { DBJob, PseudoJob } from "./crawl-status";
import { getExtractQueue } from "../../services/queue-service";
import { ExtractResult } from "../../lib/extract/extraction-service";
import { supabaseGetJobById } from "../../lib/supabase-jobs";
export async function getExtractJob(id: string): Promise<PseudoJob<ExtractResult> | null> {
const [bullJob, dbJob] = await Promise.all([
getExtractQueue().getJob(id),
(process.env.USE_DB_AUTHENTICATION === "true" ? supabaseGetJobById(id) : null) as Promise<DBJob | null>,
]);
if (!bullJob && !dbJob) return null;
const data = dbJob?.docs ?? bullJob?.returnvalue?.data;
const job: PseudoJob<any> = {
id,
getState: bullJob ? bullJob.getState : (() => dbJob!.success ? "completed" : "failed"),
returnvalue: data,
data: {
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
},
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
}
return job;
}
2025-01-03 20:44:27 -03:00
export async function extractStatusController(
req: RequestWithAuth<{ jobId: string }, any, any>,
res: Response,
) {
2025-01-07 16:16:01 -03:00
const extract = await getExtract(req.params.jobId);
if (!extract) {
2025-01-07 12:13:12 -03:00
return res.status(404).json({
success: false,
2025-01-07 16:16:01 -03:00
error: "Extract job not found",
2025-01-03 20:44:27 -03:00
});
}
2025-01-07 12:13:12 -03:00
2025-02-20 00:41:22 +01:00
let data: ExtractResult | [] = [];
2025-01-07 16:16:01 -03:00
if (extract.status === "completed") {
2025-02-20 00:41:22 +01:00
const jobData = await getExtractJob(req.params.jobId);
if (!jobData) {
2025-01-07 16:16:01 -03:00
return res.status(404).json({
success: false,
error: "Job not found",
});
}
2025-02-20 00:41:22 +01:00
if (!jobData.returnvalue) {
// if we got in the split-second where the redis is updated but the bull isn't
// just pretend it's still processing - MG
extract.status = "processing";
} else {
data = jobData.returnvalue ?? [];
}
2025-01-07 16:16:01 -03:00
}
2025-01-07 12:13:12 -03:00
return res.status(200).json({
success: extract.status === "failed" ? false : true,
2025-02-20 00:41:22 +01:00
data,
2025-01-07 16:16:01 -03:00
status: extract.status,
error: extract?.error ?? undefined,
2025-01-07 16:16:01 -03:00
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
2025-01-14 01:45:50 -03:00
steps: extract.showSteps ? extract.steps : undefined,
llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined,
sources: extract.showSources ? extract.sources : undefined,
2025-01-07 12:13:12 -03:00
});
2025-01-03 20:44:27 -03:00
}