Files
firecrawl/apps/api/src/lib/LLM-extraction/index.ts
T

59 lines
2.1 KiB
TypeScript
Raw Normal View History

2024-04-30 12:19:43 -07:00
import OpenAI from "openai";
import Ajv from "ajv";
2024-04-29 12:12:55 -07:00
const ajv = new Ajv(); // Initialize AJV for JSON schema validation
2024-04-28 17:38:20 -07:00
2024-04-30 12:19:43 -07:00
import { generateOpenAICompletions } from "./models";
import { Document, ExtractorOptions } from "../entities";
2024-07-25 09:48:06 -03:00
import { Logger } from "../logger";
2024-04-28 15:52:09 -07:00
2024-04-30 12:19:43 -07:00
// Generate completion using OpenAI
2024-04-28 17:38:20 -07:00
export async function generateCompletions(
2024-04-30 12:19:43 -07:00
documents: Document[],
2024-06-28 16:39:09 -04:00
extractionOptions: ExtractorOptions,
mode: "markdown" | "raw-html"
2024-04-28 17:38:20 -07:00
): Promise<Document[]> {
2024-04-30 12:19:43 -07:00
// const schema = zodToJsonSchema(options.schema)
const schema = extractionOptions.extractionSchema;
const prompt = extractionOptions.extractionPrompt;
const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider
const completions = await Promise.all(
documents.map(async (document: Document) => {
switch (switchVariable) {
case "openAI":
const llm = new OpenAI();
2024-04-30 18:19:55 -07:00
try{
2024-04-30 12:19:43 -07:00
const completionResult = await generateOpenAICompletions({
client: llm,
document: document,
schema: schema,
prompt: prompt,
2024-06-28 16:39:09 -04:00
mode: mode,
2024-04-30 12:19:43 -07:00
});
// Validate the JSON output against the schema using AJV
const validate = ajv.compile(schema);
if (!validate(completionResult.llm_extraction)) {
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
throw new Error(
2024-04-30 16:19:32 -07:00
`JSON parsing error(s): ${validate.errors
2024-04-30 12:19:43 -07:00
?.map((err) => err.message)
2024-04-30 16:19:32 -07:00
.join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
2024-04-30 12:19:43 -07:00
);
}
return completionResult;
2024-04-30 18:19:55 -07:00
} catch (error) {
2024-07-25 09:48:06 -03:00
Logger.error(`Error generating completions: ${error}`);
2024-08-22 14:37:09 +02:00
throw error;
2024-04-30 18:19:55 -07:00
}
2024-04-30 12:19:43 -07:00
default:
throw new Error("Invalid client");
}
})
);
return completions;
2024-04-28 15:52:09 -07:00
}