2024-04-30 12:19:43 -07:00
import OpenAI from "openai" ;
import Ajv from "ajv" ;
2024-04-29 12:12:55 -07:00
const ajv = new Ajv ( ) ; // Initialize AJV for JSON schema validation
2024-04-28 17:38:20 -07:00
2024-04-30 12:19:43 -07:00
import { generateOpenAICompletions } from "./models" ;
import { Document , ExtractorOptions } from "../entities" ;
2024-07-25 09:48:06 -03:00
import { Logger } from "../logger" ;
2024-04-28 15:52:09 -07:00
2024-04-30 12:19:43 -07:00
// Generate completion using OpenAI
2024-04-28 17:38:20 -07:00
export async function generateCompletions (
2024-04-30 12:19:43 -07:00
documents : Document [ ] ,
2024-06-28 16:39:09 -04:00
extractionOptions : ExtractorOptions ,
mode : "markdown" | "raw-html"
2024-04-28 17:38:20 -07:00
) : Promise < Document [ ] > {
2024-04-30 12:19:43 -07:00
// const schema = zodToJsonSchema(options.schema)
const schema = extractionOptions . extractionSchema ;
const prompt = extractionOptions . extractionPrompt ;
const switchVariable = "openAI" ; // Placholder, want to think more about how we abstract the model provider
const completions = await Promise . all (
documents . map ( async ( document : Document ) = > {
switch ( switchVariable ) {
case "openAI" :
const llm = new OpenAI ( ) ;
2024-04-30 18:19:55 -07:00
try {
2024-04-30 12:19:43 -07:00
const completionResult = await generateOpenAICompletions ( {
client : llm ,
document : document ,
schema : schema ,
prompt : prompt ,
2024-06-28 16:39:09 -04:00
mode : mode ,
2024-04-30 12:19:43 -07:00
} ) ;
// Validate the JSON output against the schema using AJV
const validate = ajv . compile ( schema ) ;
if ( ! validate ( completionResult . llm_extraction ) ) {
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
throw new Error (
2024-04-30 16:19:32 -07:00
` JSON parsing error(s): ${ validate . errors
2024-04-30 12:19:43 -07:00
? . map ( ( err ) = > err . message )
2024-04-30 16:19:32 -07:00
. join ( ", " ) } \ n \ nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. `
2024-04-30 12:19:43 -07:00
) ;
}
return completionResult ;
2024-04-30 18:19:55 -07:00
} catch ( error ) {
2024-07-25 09:48:06 -03:00
Logger . error ( ` Error generating completions: ${ error } ` ) ;
2024-08-22 14:37:09 +02:00
throw error ;
2024-04-30 18:19:55 -07:00
}
2024-04-30 12:19:43 -07:00
default :
throw new Error ( "Invalid client" ) ;
}
} )
) ;
return completions ;
2024-04-28 15:52:09 -07:00
}