Nick: re-ranker safety + unit tests

This commit is contained in:
Nicolas
2024-11-24 19:34:56 -08:00
parent aa26dbe74e
commit 95bea6a391
2 changed files with 96 additions and 14 deletions
+68
View File
@@ -0,0 +1,68 @@
import { performRanking } from './ranker';
describe('performRanking', () => {
it('should rank links based on similarity to search query', async () => {
const linksWithContext = [
'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds',
'url: https://example.com/cats, title: Cat care guide, description: Everything about cats',
'url: https://example.com/pets, title: General pet care, description: Care for all types of pets'
];
const links = [
'https://example.com/dogs',
'https://example.com/cats',
'https://example.com/pets'
];
const searchQuery = 'cats training';
const result = await performRanking(linksWithContext, links, searchQuery);
// Should return array of objects with link, linkWithContext, score, originalIndex
expect(result).toBeInstanceOf(Array);
expect(result.length).toBe(3);
// First result should be the dogs page since query is about dogs
expect(result[0].link).toBe('https://example.com/cats');
// Each result should have required properties
result.forEach(item => {
expect(item).toHaveProperty('link');
expect(item).toHaveProperty('linkWithContext');
expect(item).toHaveProperty('score');
expect(item).toHaveProperty('originalIndex');
expect(typeof item.score).toBe('number');
expect(item.score).toBeGreaterThanOrEqual(0);
expect(item.score).toBeLessThanOrEqual(1);
});
// Scores should be in descending order
for (let i = 1; i < result.length; i++) {
expect(result[i].score).toBeLessThanOrEqual(result[i-1].score);
}
});
it('should handle empty inputs', async () => {
const result = await performRanking([], [], '');
expect(result).toEqual([]);
});
it('should maintain original order for equal scores', async () => {
const linksWithContext = [
'url: https://example.com/1, title: Similar content A, description: test',
'url: https://example.com/2, title: Similar content B, description: test'
];
const links = [
'https://example.com/1',
'https://example.com/2'
];
const searchQuery = 'test';
const result = await performRanking(linksWithContext, links, searchQuery);
// If scores are equal, original order should be maintained
expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex);
});
});
+27 -13
View File
@@ -42,29 +42,43 @@ const textToVector = (searchQuery: string, text: string): number[] => {
async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) { async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) {
try { try {
// Handle invalid inputs
if (!searchQuery || !linksWithContext.length || !links.length) {
return [];
}
// Sanitize search query by removing null characters
const sanitizedQuery = searchQuery;
// Generate embeddings for the search query // Generate embeddings for the search query
const queryEmbedding = await getEmbedding(searchQuery); const queryEmbedding = await getEmbedding(sanitizedQuery);
// Generate embeddings for each link and calculate similarity // Generate embeddings for each link and calculate similarity
const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => { const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => {
const linkEmbedding = await getEmbedding(linkWithContext); try {
const linkEmbedding = await getEmbedding(linkWithContext);
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
// console.log("linkEmbedding", linkEmbedding); return {
// const linkVector = textToVector(searchQuery, linkWithContext); link: links[index],
const score = cosineSimilarity(queryEmbedding, linkEmbedding); linkWithContext,
// console.log("score", score); score,
return { originalIndex: index
link: links[index], // Use corresponding link from links array };
linkWithContext, } catch (err) {
score, // If embedding fails for a link, return with score 0
originalIndex: index // Store original position return {
}; link: links[index],
linkWithContext,
score: 0,
originalIndex: index
};
}
})); }));
// Sort links based on similarity scores while preserving original order for equal scores // Sort links based on similarity scores while preserving original order for equal scores
linksAndScores.sort((a, b) => { linksAndScores.sort((a, b) => {
const scoreDiff = b.score - a.score; const scoreDiff = b.score - a.score;
// If scores are equal, maintain original order
return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff; return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff;
}); });