Files
reason-flow/server/services/embeddingService.js
T
2025-11-06 11:08:59 +01:00

56 lines
1.6 KiB
JavaScript

const logger = require('../utils/logger');
let pipeline;
try {
// Lazy import to avoid startup cost when unused
({ pipeline } = require('@xenova/transformers'));
} catch (e) {
logger.warn('Embedding pipeline not available. Did you install @xenova/transformers?');
}
class EmbeddingService {
constructor() {
this.initialized = false;
this.extractor = null;
this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
}
async initIfNeeded() {
if (this.initialized) return;
if (!pipeline) {
throw new Error('Transformers pipeline not available');
}
this.extractor = await pipeline('feature-extraction', this.modelName);
this.initialized = true;
logger.info(`Embedding model loaded: ${this.modelName}`);
}
async embedText(text) {
if (!text || !text.trim()) return [];
await this.initIfNeeded();
const output = await this.extractor(text, { pooling: 'mean', normalize: true });
// output is a Tensor; convert to plain JS array
// Depending on version, .data or .tolist()
const vector = Array.isArray(output) ? output : (output?.data ? Array.from(output.data) : output.tolist());
return vector;
}
cosineSimilarity(a, b) {
if (!a || !b || a.length !== b.length || a.length === 0) return 0;
let dot = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
const va = a[i] || 0;
const vb = b[i] || 0;
dot += va * vb;
normA += va * va;
normB += vb * vb;
}
const denom = Math.sqrt(normA) * Math.sqrt(normB);
return denom ? dot / denom : 0;
}
}
module.exports = new EmbeddingService();