Nick: re-ranker safety + unit tests
This commit is contained in:
@@ -0,0 +1,68 @@
|
|||||||
|
import { performRanking } from './ranker';
|
||||||
|
|
||||||
|
describe('performRanking', () => {
|
||||||
|
it('should rank links based on similarity to search query', async () => {
|
||||||
|
const linksWithContext = [
|
||||||
|
'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds',
|
||||||
|
'url: https://example.com/cats, title: Cat care guide, description: Everything about cats',
|
||||||
|
'url: https://example.com/pets, title: General pet care, description: Care for all types of pets'
|
||||||
|
];
|
||||||
|
|
||||||
|
const links = [
|
||||||
|
'https://example.com/dogs',
|
||||||
|
'https://example.com/cats',
|
||||||
|
'https://example.com/pets'
|
||||||
|
];
|
||||||
|
|
||||||
|
const searchQuery = 'cats training';
|
||||||
|
|
||||||
|
const result = await performRanking(linksWithContext, links, searchQuery);
|
||||||
|
|
||||||
|
// Should return array of objects with link, linkWithContext, score, originalIndex
|
||||||
|
expect(result).toBeInstanceOf(Array);
|
||||||
|
expect(result.length).toBe(3);
|
||||||
|
|
||||||
|
// First result should be the dogs page since query is about dogs
|
||||||
|
expect(result[0].link).toBe('https://example.com/cats');
|
||||||
|
|
||||||
|
// Each result should have required properties
|
||||||
|
result.forEach(item => {
|
||||||
|
expect(item).toHaveProperty('link');
|
||||||
|
expect(item).toHaveProperty('linkWithContext');
|
||||||
|
expect(item).toHaveProperty('score');
|
||||||
|
expect(item).toHaveProperty('originalIndex');
|
||||||
|
expect(typeof item.score).toBe('number');
|
||||||
|
expect(item.score).toBeGreaterThanOrEqual(0);
|
||||||
|
expect(item.score).toBeLessThanOrEqual(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Scores should be in descending order
|
||||||
|
for (let i = 1; i < result.length; i++) {
|
||||||
|
expect(result[i].score).toBeLessThanOrEqual(result[i-1].score);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle empty inputs', async () => {
|
||||||
|
const result = await performRanking([], [], '');
|
||||||
|
expect(result).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should maintain original order for equal scores', async () => {
|
||||||
|
const linksWithContext = [
|
||||||
|
'url: https://example.com/1, title: Similar content A, description: test',
|
||||||
|
'url: https://example.com/2, title: Similar content B, description: test'
|
||||||
|
];
|
||||||
|
|
||||||
|
const links = [
|
||||||
|
'https://example.com/1',
|
||||||
|
'https://example.com/2'
|
||||||
|
];
|
||||||
|
|
||||||
|
const searchQuery = 'test';
|
||||||
|
|
||||||
|
const result = await performRanking(linksWithContext, links, searchQuery);
|
||||||
|
|
||||||
|
// If scores are equal, original order should be maintained
|
||||||
|
expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex);
|
||||||
|
});
|
||||||
|
});
|
||||||
+27
-13
@@ -42,29 +42,43 @@ const textToVector = (searchQuery: string, text: string): number[] => {
|
|||||||
|
|
||||||
async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) {
|
async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) {
|
||||||
try {
|
try {
|
||||||
|
// Handle invalid inputs
|
||||||
|
if (!searchQuery || !linksWithContext.length || !links.length) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sanitize search query by removing null characters
|
||||||
|
const sanitizedQuery = searchQuery;
|
||||||
|
|
||||||
// Generate embeddings for the search query
|
// Generate embeddings for the search query
|
||||||
const queryEmbedding = await getEmbedding(searchQuery);
|
const queryEmbedding = await getEmbedding(sanitizedQuery);
|
||||||
|
|
||||||
// Generate embeddings for each link and calculate similarity
|
// Generate embeddings for each link and calculate similarity
|
||||||
const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => {
|
const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => {
|
||||||
const linkEmbedding = await getEmbedding(linkWithContext);
|
try {
|
||||||
|
const linkEmbedding = await getEmbedding(linkWithContext);
|
||||||
|
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
|
||||||
|
|
||||||
// console.log("linkEmbedding", linkEmbedding);
|
return {
|
||||||
// const linkVector = textToVector(searchQuery, linkWithContext);
|
link: links[index],
|
||||||
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
|
linkWithContext,
|
||||||
// console.log("score", score);
|
score,
|
||||||
return {
|
originalIndex: index
|
||||||
link: links[index], // Use corresponding link from links array
|
};
|
||||||
linkWithContext,
|
} catch (err) {
|
||||||
score,
|
// If embedding fails for a link, return with score 0
|
||||||
originalIndex: index // Store original position
|
return {
|
||||||
};
|
link: links[index],
|
||||||
|
linkWithContext,
|
||||||
|
score: 0,
|
||||||
|
originalIndex: index
|
||||||
|
};
|
||||||
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Sort links based on similarity scores while preserving original order for equal scores
|
// Sort links based on similarity scores while preserving original order for equal scores
|
||||||
linksAndScores.sort((a, b) => {
|
linksAndScores.sort((a, b) => {
|
||||||
const scoreDiff = b.score - a.score;
|
const scoreDiff = b.score - a.score;
|
||||||
// If scores are equal, maintain original order
|
|
||||||
return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff;
|
return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user