Files
firecrawl/apps/api/src/lib/map-cosine.ts
T

47 lines
1.6 KiB
TypeScript
Raw Normal View History

2024-11-07 20:57:33 +01:00
import { logger } from "./logger";
2024-08-28 15:59:20 -03:00
export function performCosineSimilarity(links: string[], searchQuery: string) {
2024-08-28 15:59:20 -03:00
try {
// Function to calculate cosine similarity
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
const magnitude1 = Math.sqrt(
vec1.reduce((sum, val) => sum + val * val, 0)
);
const magnitude2 = Math.sqrt(
vec2.reduce((sum, val) => sum + val * val, 0)
);
if (magnitude1 === 0 || magnitude2 === 0) return 0;
return dotProduct / (magnitude1 * magnitude2);
};
2024-08-28 15:40:30 -03:00
2024-08-28 15:59:20 -03:00
// Function to convert text to vector
const textToVector = (text: string): number[] => {
const words = searchQuery.toLowerCase().split(/\W+/);
return words.map((word) => {
const count = (text.toLowerCase().match(new RegExp(word, "g")) || [])
.length;
return count / text.length;
});
};
2024-08-28 15:40:30 -03:00
2024-08-28 15:59:20 -03:00
// Calculate similarity scores
const similarityScores = links.map((link) => {
const linkVector = textToVector(link);
2024-08-28 15:59:20 -03:00
const searchVector = textToVector(searchQuery);
return cosineSimilarity(linkVector, searchVector);
});
2024-08-28 15:40:30 -03:00
// Sort links based on similarity scores and print scores
const a = links
2024-08-28 15:59:20 -03:00
.map((link, index) => ({ link, score: similarityScores[index] }))
.sort((a, b) => b.score - a.score);
2024-08-28 15:40:30 -03:00
links = a.map((item) => item.link);
return links;
2024-08-28 15:59:20 -03:00
} catch (error) {
2024-11-07 20:57:33 +01:00
logger.error(`Error performing cosine similarity: ${error}`);
return links;
2024-08-28 15:59:20 -03:00
}
2024-08-28 15:40:30 -03:00
}