Files
firecrawl/apps/api/src/lib/extract/completions/batchExtract.ts
T

58 lines
1.7 KiB
TypeScript
Raw Normal View History

import { logger } from "../../../lib/logger";
2025-02-20 18:48:58 -03:00
import { generateCompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
import { buildDocument } from "../build-document";
import { ExtractResponse, TokenUsage } from "../../../controllers/v1/types";
import { Document } from "../../../controllers/v1/types";
import {
buildBatchExtractPrompt,
buildBatchExtractSystemPrompt,
} from "../build-prompts";
/**
* Batch extract information from a list of URLs using a multi-entity schema.
* @param multiEntitySchema - The schema for the multi-entity extraction
* @param links - The URLs to extract information from
* @param prompt - The prompt for the extraction
* @param systemPrompt - The system prompt for the extraction
* @param doc - The document to extract information from
* @returns The completion promise
*/
export async function batchExtractPromise(
multiEntitySchema: any,
links: string[],
prompt: string,
systemPrompt: string,
doc: Document,
): Promise<{
extract: any;
numTokens: number;
totalUsage: TokenUsage;
warning?: string;
sources: string[];
}> {
2025-02-20 18:48:58 -03:00
const completion = await generateCompletions({
logger: logger.child({
method: "extractService/generateCompletions",
}),
2025-02-20 18:48:58 -03:00
options: {
mode: "llm",
systemPrompt: buildBatchExtractSystemPrompt(
systemPrompt,
multiEntitySchema,
links,
),
prompt: buildBatchExtractPrompt(prompt),
schema: multiEntitySchema,
},
2025-02-20 18:48:58 -03:00
markdown: buildDocument(doc),
isExtractEndpoint: true
});
return {
extract: completion.extract,
numTokens: completion.numTokens,
totalUsage: completion.totalUsage,
sources: [doc.metadata.url || doc.metadata.sourceURL || ""]
};
}