56 lines
1.6 KiB
JavaScript
56 lines
1.6 KiB
JavaScript
|
|
const logger = require('../utils/logger');
|
||
|
|
|
||
|
|
let pipeline;
|
||
|
|
try {
|
||
|
|
// Lazy import to avoid startup cost when unused
|
||
|
|
({ pipeline } = require('@xenova/transformers'));
|
||
|
|
} catch (e) {
|
||
|
|
logger.warn('Embedding pipeline not available. Did you install @xenova/transformers?');
|
||
|
|
}
|
||
|
|
|
||
|
|
class EmbeddingService {
|
||
|
|
constructor() {
|
||
|
|
this.initialized = false;
|
||
|
|
this.extractor = null;
|
||
|
|
this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
|
||
|
|
}
|
||
|
|
|
||
|
|
async initIfNeeded() {
|
||
|
|
if (this.initialized) return;
|
||
|
|
if (!pipeline) {
|
||
|
|
throw new Error('Transformers pipeline not available');
|
||
|
|
}
|
||
|
|
this.extractor = await pipeline('feature-extraction', this.modelName);
|
||
|
|
this.initialized = true;
|
||
|
|
logger.info(`Embedding model loaded: ${this.modelName}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
async embedText(text) {
|
||
|
|
if (!text || !text.trim()) return [];
|
||
|
|
await this.initIfNeeded();
|
||
|
|
const output = await this.extractor(text, { pooling: 'mean', normalize: true });
|
||
|
|
// output is a Tensor; convert to plain JS array
|
||
|
|
// Depending on version, .data or .tolist()
|
||
|
|
const vector = Array.isArray(output) ? output : (output?.data ? Array.from(output.data) : output.tolist());
|
||
|
|
return vector;
|
||
|
|
}
|
||
|
|
|
||
|
|
cosineSimilarity(a, b) {
|
||
|
|
if (!a || !b || a.length !== b.length || a.length === 0) return 0;
|
||
|
|
let dot = 0;
|
||
|
|
let normA = 0;
|
||
|
|
let normB = 0;
|
||
|
|
for (let i = 0; i < a.length; i++) {
|
||
|
|
const va = a[i] || 0;
|
||
|
|
const vb = b[i] || 0;
|
||
|
|
dot += va * vb;
|
||
|
|
normA += va * va;
|
||
|
|
normB += vb * vb;
|
||
|
|
}
|
||
|
|
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
||
|
|
return denom ? dot / denom : 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
module.exports = new EmbeddingService();
|