119 lines
3.3 KiB
TypeScript
119 lines
3.3 KiB
TypeScript
import OpenAI from "openai";
|
|
import { encoding_for_model } from "@dqbd/tiktoken";
|
|
import { TiktokenModel } from "@dqbd/tiktoken";
|
|
import { ExtractOptions } from "../../controllers/v1/types";
|
|
import { Document } from "../entities";
|
|
import { z } from "zod";
|
|
|
|
const maxTokens = 32000;
|
|
const modifier = 4;
|
|
|
|
export class LLMRefusalError extends Error {
|
|
constructor(refusal: string) {
|
|
super("LLM refused to extract the website's content");
|
|
this.name = "LLMRefusalError";
|
|
}
|
|
}
|
|
|
|
interface GenerateCompletionsParams {
|
|
systemPrompt?: string;
|
|
prompt?: string;
|
|
schema?: any;
|
|
pagesContent: string;
|
|
}
|
|
|
|
export async function generateBasicCompletion(prompt: string) {
|
|
const openai = new OpenAI();
|
|
const model: TiktokenModel =
|
|
(process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
|
|
|
|
const completion = await openai.chat.completions.create({
|
|
model,
|
|
messages: [{ role: "user", content: prompt }],
|
|
});
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
export async function generateFinalExtraction({
|
|
pagesContent,
|
|
systemPrompt,
|
|
prompt,
|
|
schema,
|
|
}: GenerateCompletionsParams): Promise<{
|
|
content: string;
|
|
metadata: { numTokens: number; warning: string };
|
|
}> {
|
|
const openai = new OpenAI();
|
|
const model: TiktokenModel =
|
|
(process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
|
|
|
|
let extractionContent = pagesContent;
|
|
let numTokens = 0;
|
|
let warning = "";
|
|
|
|
const encoder = encoding_for_model(model);
|
|
try {
|
|
const tokens = encoder.encode(extractionContent);
|
|
numTokens = tokens.length;
|
|
} catch (error) {
|
|
extractionContent = extractionContent.slice(0, maxTokens * modifier);
|
|
warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`;
|
|
} finally {
|
|
encoder.free();
|
|
}
|
|
|
|
if (numTokens > maxTokens) {
|
|
extractionContent = extractionContent.slice(0, maxTokens * modifier);
|
|
warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`;
|
|
}
|
|
|
|
if (schema && (schema.type === "array" || schema._type === "ZodArray")) {
|
|
schema = {
|
|
type: "object",
|
|
properties: {
|
|
items: schema,
|
|
},
|
|
required: ["items"],
|
|
additionalProperties: false,
|
|
};
|
|
}
|
|
|
|
const jsonCompletion = await openai.beta.chat.completions.parse({
|
|
model,
|
|
messages: [
|
|
{ role: "system", content: systemPrompt ?? "" },
|
|
{ role: "user", content: [{ type: "text", text: extractionContent }] },
|
|
{
|
|
role: "user",
|
|
content: prompt
|
|
? `Transform the above content into structured JSON output based on the following user request: ${prompt}`
|
|
: "Transform the above content into structured JSON output.",
|
|
},
|
|
],
|
|
response_format: schema
|
|
? {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "websiteContent",
|
|
schema: schema.shape,
|
|
strict: true,
|
|
},
|
|
}
|
|
: { type: "json_object" },
|
|
});
|
|
|
|
if (jsonCompletion.choices[0].message.refusal !== null) {
|
|
throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
|
|
}
|
|
|
|
const extraction = jsonCompletion.choices[0].message.parsed;
|
|
return {
|
|
content: extraction ?? "",
|
|
metadata: {
|
|
numTokens,
|
|
warning,
|
|
},
|
|
};
|
|
}
|