const logger = require('../utils/logger'); let pipeline; try { // Lazy import to avoid startup cost when unused ({ pipeline } = require('@xenova/transformers')); } catch (e) { logger.warn('Embedding pipeline not available. Did you install @xenova/transformers?'); } class EmbeddingService { constructor() { this.initialized = false; this.extractor = null; this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2'; } async initIfNeeded() { if (this.initialized) return; if (!pipeline) { throw new Error('Transformers pipeline not available'); } this.extractor = await pipeline('feature-extraction', this.modelName); this.initialized = true; logger.info(`Embedding model loaded: ${this.modelName}`); } async embedText(text) { if (!text || !text.trim()) return []; await this.initIfNeeded(); const output = await this.extractor(text, { pooling: 'mean', normalize: true }); // output is a Tensor; convert to plain JS array // Depending on version, .data or .tolist() const vector = Array.isArray(output) ? output : (output?.data ? Array.from(output.data) : output.tolist()); return vector; } cosineSimilarity(a, b) { if (!a || !b || a.length !== b.length || a.length === 0) return 0; let dot = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { const va = a[i] || 0; const vb = b[i] || 0; dot += va * vb; normA += va * va; normB += vb * vb; } const denom = Math.sqrt(normA) * Math.sqrt(normB); return denom ? dot / denom : 0; } } module.exports = new EmbeddingService();