Files
firecrawl/apps/api/src/controllers/v1/extract.ts
T

78 lines
2.0 KiB
TypeScript
Raw Normal View History

2024-10-28 16:02:07 -03:00
import { Request, Response } from "express";
import {
RequestWithAuth,
ExtractRequest,
extractRequestSchema,
ExtractResponse,
} from "./types";
2025-01-03 20:44:27 -03:00
import { getExtractQueue } from "../../services/queue-service";
import * as Sentry from "@sentry/node";
import { v4 as uuidv4 } from "uuid";
2024-10-28 16:02:07 -03:00
2024-11-20 13:16:36 -08:00
/**
* Extracts data from the provided URLs based on the request parameters.
* Currently in beta.
* @param req - The request object containing authentication and extraction details.
* @param res - The response object to send the extraction results.
* @returns A promise that resolves when the extraction process is complete.
*/
2024-10-28 16:02:07 -03:00
export async function extractController(
req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>,
2024-12-11 19:51:08 -03:00
res: Response<ExtractResponse>,
2024-10-28 16:02:07 -03:00
) {
2024-11-20 13:15:52 -08:00
const selfHosted = process.env.USE_DB_AUTHENTICATION !== "true";
2024-10-28 16:02:07 -03:00
req.body = extractRequestSchema.parse(req.body);
2024-12-26 12:41:37 -03:00
if (!req.auth.plan) {
2024-11-24 20:31:38 -08:00
return res.status(400).json({
success: false,
2024-12-26 12:41:37 -03:00
error: "No plan specified",
urlTrace: [],
2024-11-24 20:31:38 -08:00
});
}
2025-01-03 20:44:27 -03:00
const extractId = crypto.randomUUID();
const jobData = {
2024-12-26 12:41:37 -03:00
request: req.body,
teamId: req.auth.team_id,
plan: req.auth.plan,
2025-01-03 20:44:27 -03:00
subId: req.acuc?.sub_id,
extractId,
};
2024-11-14 14:59:34 -05:00
2025-01-03 20:44:27 -03:00
if (Sentry.isInitialized()) {
const size = JSON.stringify(jobData).length;
await Sentry.startSpan(
{
name: "Add extract job",
op: "queue.publish",
attributes: {
"messaging.message.id": extractId,
"messaging.destination.name": getExtractQueue().name,
"messaging.message.body.size": size,
},
},
async (span) => {
await getExtractQueue().add(extractId, {
...jobData,
sentry: {
trace: Sentry.spanToTraceHeader(span),
baggage: Sentry.spanToBaggageHeader(span),
size,
},
});
},
);
} else {
await getExtractQueue().add(extractId, jobData, {
jobId: extractId,
});
}
return res.status(202).json({
success: true,
id: extractId,
urlTrace: [],
});
2024-12-11 19:46:11 -03:00
}