Files
firecrawl/apps/api/src/lib/LLM-extraction/index.ts
T

54 lines
2.1 KiB
TypeScript
Raw Normal View History

2024-04-28 15:52:09 -07:00
import Turndown from 'turndown'
import OpenAI from 'openai'
// import { LlamaModel } from 'node-llama-cpp'
import { z } from 'zod'
import { zodToJsonSchema } from 'zod-to-json-schema'
2024-04-29 12:12:55 -07:00
import Ajv from 'ajv';
const ajv = new Ajv(); // Initialize AJV for JSON schema validation
2024-04-28 17:38:20 -07:00
2024-04-28 15:52:09 -07:00
import {
ScraperCompletionResult,
generateOpenAICompletions,
2024-04-28 17:38:20 -07:00
} from './models'
import { Document, ExtractorOptions } from '../entities'
2024-04-28 15:52:09 -07:00
// Generate completion using OpenAI
2024-04-28 17:38:20 -07:00
export async function generateCompletions(
2024-04-28 15:52:09 -07:00
documents: Document[],
extractionOptions: ExtractorOptions
2024-04-28 17:38:20 -07:00
): Promise<Document[]> {
2024-04-28 15:52:09 -07:00
// const schema = zodToJsonSchema(options.schema)
const schema = extractionOptions.extractionSchema;
const prompt = extractionOptions.extractionPrompt;
2024-04-28 17:38:20 -07:00
const switchVariable = "openAI" // Placholder, want to think more about how we abstract the model provider
2024-04-29 12:12:55 -07:00
2024-04-28 17:38:20 -07:00
const completions = await Promise.all(documents.map(async (document: Document) => {
switch (switchVariable) {
case "openAI":
const llm = new OpenAI();
2024-04-29 12:12:55 -07:00
const completionResult = await generateOpenAICompletions({
2024-04-28 17:38:20 -07:00
client: llm,
document: document,
schema: schema,
prompt: prompt
});
2024-04-29 12:12:55 -07:00
// Validate the JSON output against the schema using AJV
const validate = ajv.compile(schema);
if (!validate(completionResult.llm_extraction)) {
2024-04-30 09:20:15 -07:00
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
2024-04-29 12:12:55 -07:00
throw new Error(`LLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. AJV error: ${validate.errors?.map(err => err.message).join(', ')}`);
}
return completionResult;
2024-04-28 15:52:09 -07:00
default:
2024-04-28 17:38:20 -07:00
throw new Error('Invalid client');
2024-04-28 15:52:09 -07:00
}
2024-04-28 17:38:20 -07:00
}));
2024-04-29 12:12:55 -07:00
2024-04-28 15:52:09 -07:00
2024-04-28 17:38:20 -07:00
return completions;
2024-04-28 15:52:09 -07:00
}