Files
firecrawl/apps/api/src/lib/LLM-extraction/helpers.ts
T

23 lines
679 B
TypeScript
Raw Normal View History

2024-04-30 09:20:15 -07:00
import { encoding_for_model } from "@dqbd/tiktoken";
import { TiktokenModel } from "@dqbd/tiktoken";
// This function calculates the number of tokens in a text string using GPT-3.5-turbo model
export function numTokensFromString(message: string, model: string): number {
2024-04-30 12:19:43 -07:00
const encoder = encoding_for_model(model as TiktokenModel);
2024-04-30 09:20:15 -07:00
2024-04-30 12:19:43 -07:00
// Encode the message into tokens
2024-10-18 11:50:58 -03:00
let tokens: Uint32Array;
try {
tokens = encoder.encode(message);
} catch (error) {
message = message.replace("<|endoftext|>", "");
tokens = encoder.encode(message);
}
2024-04-30 09:20:15 -07:00
2024-04-30 12:19:43 -07:00
// Free the encoder resources after use
encoder.free();
2024-04-30 09:20:15 -07:00
2024-04-30 12:19:43 -07:00
// Return the number of tokens
return tokens.length;
}