apps/api/src/lib/LLM-extraction/helpers.ts

import { encoding_for_model } from "@dqbd/tiktoken";
import { TiktokenModel } from "@dqbd/tiktoken";

// This function calculates the number of tokens in a text string using GPT-3.5-turbo model
export function numTokensFromString(message: string, model: string): number {
  const encoder = encoding_for_model(model as TiktokenModel);

  // Encode the message into tokens
  let tokens: Uint32Array;
  try {
    tokens = encoder.encode(message);
  } catch (error) {
    message = message.replace("<|endoftext|>", "");
    tokens = encoder.encode(message);
  }

  // Free the encoder resources after use
  encoder.free();

  // Return the number of tokens
  return tokens.length;
}
Caleb: trying to get loggin workng 2024-04-30 09:20:15 -07:00			`import { encoding_for_model } from "@dqbd/tiktoken";`
			`import { TiktokenModel } from "@dqbd/tiktoken";`

			`// This function calculates the number of tokens in a text string using GPT-3.5-turbo model`
			`export function numTokensFromString(message: string, model: string): number {`
Nick: cleanup 2024-04-30 12:19:43 -07:00			`const encoder = encoding_for_model(model as TiktokenModel);`
Caleb: trying to get loggin workng 2024-04-30 09:20:15 -07:00
Nick: cleanup 2024-04-30 12:19:43 -07:00			`// Encode the message into tokens`
fix encoding if error 2024-10-18 11:50:58 -03:00			`let tokens: Uint32Array;`
			`try {`
			`tokens = encoder.encode(message);`
			`} catch (error) {`
			`message = message.replace("<\|endoftext\|>", "");`
			`tokens = encoder.encode(message);`
			`}`
Caleb: trying to get loggin workng 2024-04-30 09:20:15 -07:00
Nick: cleanup 2024-04-30 12:19:43 -07:00			`// Free the encoder resources after use`
			`encoder.free();`
Caleb: trying to get loggin workng 2024-04-30 09:20:15 -07:00
Nick: cleanup 2024-04-30 12:19:43 -07:00			`// Return the number of tokens`
			`return tokens.length;`
			`}`