2024-04-30 12:19:43 -07:00
import OpenAI from "openai" ;
import Ajv from "ajv" ;
2024-04-29 12:12:55 -07:00
const ajv = new Ajv ( ) ; // Initialize AJV for JSON schema validation
2024-04-28 17:38:20 -07:00
2024-04-30 12:19:43 -07:00
import { generateOpenAICompletions } from "./models" ;
import { Document , ExtractorOptions } from "../entities" ;
2024-11-07 20:57:33 +01:00
import { logger } from "../logger" ;
2024-04-28 15:52:09 -07:00
2024-04-30 12:19:43 -07:00
// Generate completion using OpenAI
2024-04-28 17:38:20 -07:00
export async function generateCompletions (
2024-04-30 12:19:43 -07:00
documents : Document [ ] ,
2024-11-07 20:57:33 +01:00
extractionOptions : ExtractorOptions | undefined ,
2024-06-28 16:39:09 -04:00
mode : "markdown" | "raw-html"
2024-04-28 17:38:20 -07:00
) : Promise < Document [ ] > {
2024-04-30 12:19:43 -07:00
// const schema = zodToJsonSchema(options.schema)
2024-11-07 20:57:33 +01:00
const schema = extractionOptions ? . extractionSchema ;
const systemPrompt = extractionOptions ? . extractionPrompt ;
const prompt = extractionOptions ? . userPrompt ;
2024-04-30 12:19:43 -07:00
const switchVariable = "openAI" ; // Placholder, want to think more about how we abstract the model provider
const completions = await Promise . all (
documents . map ( async ( document : Document ) = > {
switch ( switchVariable ) {
case "openAI" :
const llm = new OpenAI ( ) ;
2024-08-30 11:57:55 -03:00
try {
const completionResult = await generateOpenAICompletions ( {
client : llm ,
document : document ,
schema : schema ,
prompt : prompt ,
systemPrompt : systemPrompt ,
2024-12-11 19:46:11 -03:00
mode : mode
2024-08-30 11:57:55 -03:00
} ) ;
// Validate the JSON output against the schema using AJV
if ( schema ) {
const validate = ajv . compile ( schema ) ;
if ( ! validate ( completionResult . llm_extraction ) ) {
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
throw new Error (
` JSON parsing error(s): ${ validate . errors
? . map ( ( err ) = > err . message )
. join (
", "
) } \ n \ nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. `
) ;
}
2024-08-29 21:00:57 -03:00
}
2024-04-30 12:19:43 -07:00
2024-08-30 11:57:55 -03:00
return completionResult ;
} catch ( error ) {
2024-11-07 20:57:33 +01:00
logger . error ( ` Error generating completions: ${ error } ` ) ;
2024-08-30 11:57:55 -03:00
throw error ;
}
2024-04-30 12:19:43 -07:00
default :
throw new Error ( "Invalid client" ) ;
}
} )
) ;
return completions ;
2024-04-28 15:52:09 -07:00
}