2024-04-28 15:52:09 -07:00
|
|
|
import Turndown from 'turndown'
|
|
|
|
|
import OpenAI from 'openai'
|
|
|
|
|
// import { LlamaModel } from 'node-llama-cpp'
|
|
|
|
|
import { z } from 'zod'
|
|
|
|
|
import { zodToJsonSchema } from 'zod-to-json-schema'
|
2024-04-28 17:38:20 -07:00
|
|
|
|
2024-04-28 15:52:09 -07:00
|
|
|
import {
|
|
|
|
|
ScraperCompletionResult,
|
|
|
|
|
generateOpenAICompletions,
|
2024-04-28 17:38:20 -07:00
|
|
|
} from './models'
|
|
|
|
|
import { Document, ExtractorOptions } from '../entities'
|
2024-04-28 15:52:09 -07:00
|
|
|
|
|
|
|
|
// Generate completion using OpenAI
|
2024-04-28 17:38:20 -07:00
|
|
|
export async function generateCompletions(
|
2024-04-28 15:52:09 -07:00
|
|
|
documents: Document[],
|
|
|
|
|
extractionOptions: ExtractorOptions
|
2024-04-28 17:38:20 -07:00
|
|
|
): Promise<Document[]> {
|
2024-04-28 15:52:09 -07:00
|
|
|
// const schema = zodToJsonSchema(options.schema)
|
|
|
|
|
|
|
|
|
|
const schema = extractionOptions.extractionSchema;
|
|
|
|
|
const prompt = extractionOptions.extractionPrompt;
|
|
|
|
|
|
2024-04-28 17:38:20 -07:00
|
|
|
const switchVariable = "openAI" // Placholder, want to think more about how we abstract the model provider
|
|
|
|
|
|
|
|
|
|
const completions = await Promise.all(documents.map(async (document: Document) => {
|
|
|
|
|
switch (switchVariable) {
|
|
|
|
|
case "openAI":
|
|
|
|
|
const llm = new OpenAI();
|
|
|
|
|
return await generateOpenAICompletions({
|
|
|
|
|
client: llm,
|
|
|
|
|
document: document,
|
|
|
|
|
schema: schema,
|
|
|
|
|
prompt: prompt
|
|
|
|
|
});
|
2024-04-28 15:52:09 -07:00
|
|
|
default:
|
2024-04-28 17:38:20 -07:00
|
|
|
throw new Error('Invalid client');
|
2024-04-28 15:52:09 -07:00
|
|
|
}
|
2024-04-28 17:38:20 -07:00
|
|
|
}));
|
2024-04-28 15:52:09 -07:00
|
|
|
|
2024-04-28 17:38:20 -07:00
|
|
|
return completions;
|
2024-04-28 15:52:09 -07:00
|
|
|
}
|