Files
firecrawl/apps/api/src/lib/ranker.ts
T

70 lines
2.0 KiB
TypeScript
Raw Normal View History

2024-11-12 18:44:14 -03:00
import axios from 'axios';
import { configDotenv } from 'dotenv';
import OpenAI from "openai";
configDotenv();
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
async function getEmbedding(text: string) {
const embedding = await openai.embeddings.create({
model: "text-embedding-ada-002",
input: text,
encoding_format: "float",
});
return embedding.data[0].embedding;
}
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
const magnitude1 = Math.sqrt(
vec1.reduce((sum, val) => sum + val * val, 0)
);
const magnitude2 = Math.sqrt(
vec2.reduce((sum, val) => sum + val * val, 0)
);
if (magnitude1 === 0 || magnitude2 === 0) return 0;
return dotProduct / (magnitude1 * magnitude2);
};
// Function to convert text to vector
const textToVector = (searchQuery: string, text: string): number[] => {
const words = searchQuery.toLowerCase().split(/\W+/);
return words.map((word) => {
const count = (text.toLowerCase().match(new RegExp(word, "g")) || [])
.length;
return count / text.length;
});
};
async function performRanking(links: string[], searchQuery: string) {
try {
// Generate embeddings for the search query
const queryEmbedding = await getEmbedding(searchQuery);
// Generate embeddings for each link and calculate similarity
const linksAndScores = await Promise.all(links.map(async (link) => {
const linkEmbedding = await getEmbedding(link);
2024-11-13 13:05:29 -03:00
// console.log("linkEmbedding", linkEmbedding);
2024-11-12 18:44:14 -03:00
// const linkVector = textToVector(searchQuery, link);
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
2024-11-13 13:05:29 -03:00
// console.log("score", score);
2024-11-12 18:44:14 -03:00
return { link, score };
}));
// Sort links based on similarity scores
linksAndScores.sort((a, b) => b.score - a.score);
return linksAndScores;
} catch (error) {
console.error(`Error performing semantic search: ${error}`);
return [];
}
}
export { performRanking };