Files
firecrawl/apps/api/src/lib/LLM-extraction/index.ts
T
2024-04-29 12:12:55 -07:00

53 lines
1.9 KiB
TypeScript

import Turndown from 'turndown'
import OpenAI from 'openai'
// import { LlamaModel } from 'node-llama-cpp'
import { z } from 'zod'
import { zodToJsonSchema } from 'zod-to-json-schema'
import Ajv from 'ajv';
const ajv = new Ajv(); // Initialize AJV for JSON schema validation
import {
ScraperCompletionResult,
generateOpenAICompletions,
} from './models'
import { Document, ExtractorOptions } from '../entities'
// Generate completion using OpenAI
export async function generateCompletions(
documents: Document[],
extractionOptions: ExtractorOptions
): Promise<Document[]> {
// const schema = zodToJsonSchema(options.schema)
const schema = extractionOptions.extractionSchema;
const prompt = extractionOptions.extractionPrompt;
const switchVariable = "openAI" // Placholder, want to think more about how we abstract the model provider
const completions = await Promise.all(documents.map(async (document: Document) => {
switch (switchVariable) {
case "openAI":
const llm = new OpenAI();
const completionResult = await generateOpenAICompletions({
client: llm,
document: document,
schema: schema,
prompt: prompt
});
// Validate the JSON output against the schema using AJV
const validate = ajv.compile(schema);
if (!validate(completionResult.llm_extraction)) {
throw new Error(`LLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. AJV error: ${validate.errors?.map(err => err.message).join(', ')}`);
}
return completionResult;
default:
throw new Error('Invalid client');
}
}));
return completions;
}