apps/api/src/lib/LLM-extraction/index.ts

import OpenAI from "openai";
import Ajv from "ajv";
const ajv = new Ajv(); // Initialize AJV for JSON schema validation

import { generateOpenAICompletions } from "./models";
import { Document, ExtractorOptions } from "../entities";
import { Logger } from "../logger";

// Generate completion using OpenAI
export async function generateCompletions(
  documents: Document[],
  extractionOptions: ExtractorOptions,
  mode: "markdown" | "raw-html"
): Promise<Document[]> {
  // const schema = zodToJsonSchema(options.schema)

  const schema = extractionOptions.extractionSchema;
  const prompt = extractionOptions.extractionPrompt;

  const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider

  const completions = await Promise.all(
    documents.map(async (document: Document) => {
      switch (switchVariable) {
        case "openAI":
          const llm = new OpenAI();
          try{
          const completionResult = await generateOpenAICompletions({
            client: llm,
            document: document,
            schema: schema,
            prompt: prompt,
            mode: mode,
          });
          // Validate the JSON output against the schema using AJV
          const validate = ajv.compile(schema);
          if (!validate(completionResult.llm_extraction)) {
            //TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
            throw new Error(
              `JSON parsing error(s): ${validate.errors
                ?.map((err) => err.message)
                .join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
            );
          }

          return completionResult;
        } catch (error) {
          Logger.error(`Error generating completions: ${error}`);
          throw error;
        }
        default:
          throw new Error("Invalid client");
      }
    })
  );

  return completions;
}
Nick: cleanup 2024-04-30 12:19:43 -07:00			`import OpenAI from "openai";`
			`import Ajv from "ajv";`
Caleb: added ajv json schema validation. 2024-04-29 12:12:55 -07:00			`const ajv = new Ajv(); // Initialize AJV for JSON schema validation`
Caleb: first test passing 2024-04-28 17:38:20 -07:00
Nick: cleanup 2024-04-30 12:19:43 -07:00			`import { generateOpenAICompletions } from "./models";`
			`import { Document, ExtractorOptions } from "../entities";`
updated logs 2024-07-25 09:48:06 -03:00			`import { Logger } from "../logger";`
Caleb: got it to a testable state I believe 2024-04-28 15:52:09 -07:00
Nick: cleanup 2024-04-30 12:19:43 -07:00			`// Generate completion using OpenAI`
Caleb: first test passing 2024-04-28 17:38:20 -07:00			`export async function generateCompletions(`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`documents: Document[],`
init 2024-06-28 16:39:09 -04:00			`extractionOptions: ExtractorOptions,`
			`mode: "markdown" \| "raw-html"`
Caleb: first test passing 2024-04-28 17:38:20 -07:00			`): Promise<Document[]> {`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`// const schema = zodToJsonSchema(options.schema)`

			`const schema = extractionOptions.extractionSchema;`
			`const prompt = extractionOptions.extractionPrompt;`

			`const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider`

			`const completions = await Promise.all(`
			`documents.map(async (document: Document) => {`
			`switch (switchVariable) {`
			`case "openAI":`
			`const llm = new OpenAI();`
Update index.ts 2024-04-30 18:19:55 -07:00			`try{`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`const completionResult = await generateOpenAICompletions({`
			`client: llm,`
			`document: document,`
			`schema: schema,`
			`prompt: prompt,`
init 2024-06-28 16:39:09 -04:00			`mode: mode,`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`});`
			`// Validate the JSON output against the schema using AJV`
			`const validate = ajv.compile(schema);`
			`if (!validate(completionResult.llm_extraction)) {`
			`//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.`
			`throw new Error(`
Nick: improvements 2024-04-30 16:19:32 -07:00			`JSON parsing error(s): ${validate.errors
Nick: cleanup 2024-04-30 12:19:43 -07:00			`?.map((err) => err.message)`
Nick: improvements 2024-04-30 16:19:32 -07:00			.join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`);`
			`}`

			`return completionResult;`
Update index.ts 2024-04-30 18:19:55 -07:00			`} catch (error) {`
updated logs 2024-07-25 09:48:06 -03:00			Logger.error(`Error generating completions: ${error}`);
fix(llm-extract): pass stacktrace properly 2024-08-22 14:37:09 +02:00			`throw error;`
Update index.ts 2024-04-30 18:19:55 -07:00			`}`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`default:`
			`throw new Error("Invalid client");`
			`}`
			`})`
			`);`

			`return completions;`
Caleb: got it to a testable state I believe 2024-04-28 15:52:09 -07:00			`}`