2024-04-30 09:20:15 -07:00
|
|
|
import { encoding_for_model } from "@dqbd/tiktoken";
|
|
|
|
|
import { TiktokenModel } from "@dqbd/tiktoken";
|
|
|
|
|
|
|
|
|
|
// This function calculates the number of tokens in a text string using GPT-3.5-turbo model
|
|
|
|
|
export function numTokensFromString(message: string, model: string): number {
|
2024-04-30 12:19:43 -07:00
|
|
|
const encoder = encoding_for_model(model as TiktokenModel);
|
2024-04-30 09:20:15 -07:00
|
|
|
|
2024-04-30 12:19:43 -07:00
|
|
|
// Encode the message into tokens
|
2024-10-18 11:50:58 -03:00
|
|
|
let tokens: Uint32Array;
|
|
|
|
|
try {
|
|
|
|
|
tokens = encoder.encode(message);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
message = message.replace("<|endoftext|>", "");
|
|
|
|
|
tokens = encoder.encode(message);
|
|
|
|
|
}
|
2024-04-30 09:20:15 -07:00
|
|
|
|
2024-04-30 12:19:43 -07:00
|
|
|
// Free the encoder resources after use
|
|
|
|
|
encoder.free();
|
2024-04-30 09:20:15 -07:00
|
|
|
|
2024-04-30 12:19:43 -07:00
|
|
|
// Return the number of tokens
|
|
|
|
|
return tokens.length;
|
|
|
|
|
}
|