2024-04-28 15:52:09 -07:00
import Turndown from 'turndown'
import OpenAI from 'openai'
// import { LlamaModel } from 'node-llama-cpp'
import { z } from 'zod'
import { zodToJsonSchema } from 'zod-to-json-schema'
2024-04-29 12:12:55 -07:00
import Ajv from 'ajv' ;
const ajv = new Ajv ( ) ; // Initialize AJV for JSON schema validation
2024-04-28 17:38:20 -07:00
2024-04-28 15:52:09 -07:00
import {
ScraperCompletionResult ,
generateOpenAICompletions ,
2024-04-28 17:38:20 -07:00
} from './models'
import { Document , ExtractorOptions } from '../entities'
2024-04-28 15:52:09 -07:00
// Generate completion using OpenAI
2024-04-28 17:38:20 -07:00
export async function generateCompletions (
2024-04-28 15:52:09 -07:00
documents : Document [ ] ,
extractionOptions : ExtractorOptions
2024-04-28 17:38:20 -07:00
) : Promise < Document [ ] > {
2024-04-28 15:52:09 -07:00
// const schema = zodToJsonSchema(options.schema)
const schema = extractionOptions . extractionSchema ;
const prompt = extractionOptions . extractionPrompt ;
2024-04-28 17:38:20 -07:00
const switchVariable = "openAI" // Placholder, want to think more about how we abstract the model provider
2024-04-29 12:12:55 -07:00
2024-04-28 17:38:20 -07:00
const completions = await Promise . all ( documents . map ( async ( document : Document ) = > {
switch ( switchVariable ) {
case "openAI" :
const llm = new OpenAI ( ) ;
2024-04-29 12:12:55 -07:00
const completionResult = await generateOpenAICompletions ( {
2024-04-28 17:38:20 -07:00
client : llm ,
document : document ,
schema : schema ,
prompt : prompt
} ) ;
2024-04-29 12:12:55 -07:00
// Validate the JSON output against the schema using AJV
const validate = ajv . compile ( schema ) ;
if ( ! validate ( completionResult . llm_extraction ) ) {
throw new Error ( ` LLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. AJV error: ${ validate . errors ? . map ( err = > err . message ) . join ( ', ' ) } ` ) ;
}
return completionResult ;
2024-04-28 15:52:09 -07:00
default :
2024-04-28 17:38:20 -07:00
throw new Error ( 'Invalid client' ) ;
2024-04-28 15:52:09 -07:00
}
2024-04-28 17:38:20 -07:00
} ) ) ;
2024-04-29 12:12:55 -07:00
2024-04-28 15:52:09 -07:00
2024-04-28 17:38:20 -07:00
return completions ;
2024-04-28 15:52:09 -07:00
}