first commit
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
const logger = require('../utils/logger');
|
||||
|
||||
let pipeline;
|
||||
try {
|
||||
// Lazy import to avoid startup cost when unused
|
||||
({ pipeline } = require('@xenova/transformers'));
|
||||
} catch (e) {
|
||||
logger.warn('Embedding pipeline not available. Did you install @xenova/transformers?');
|
||||
}
|
||||
|
||||
class EmbeddingService {
|
||||
constructor() {
|
||||
this.initialized = false;
|
||||
this.extractor = null;
|
||||
this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
|
||||
}
|
||||
|
||||
async initIfNeeded() {
|
||||
if (this.initialized) return;
|
||||
if (!pipeline) {
|
||||
throw new Error('Transformers pipeline not available');
|
||||
}
|
||||
this.extractor = await pipeline('feature-extraction', this.modelName);
|
||||
this.initialized = true;
|
||||
logger.info(`Embedding model loaded: ${this.modelName}`);
|
||||
}
|
||||
|
||||
async embedText(text) {
|
||||
if (!text || !text.trim()) return [];
|
||||
await this.initIfNeeded();
|
||||
const output = await this.extractor(text, { pooling: 'mean', normalize: true });
|
||||
// output is a Tensor; convert to plain JS array
|
||||
// Depending on version, .data or .tolist()
|
||||
const vector = Array.isArray(output) ? output : (output?.data ? Array.from(output.data) : output.tolist());
|
||||
return vector;
|
||||
}
|
||||
|
||||
cosineSimilarity(a, b) {
|
||||
if (!a || !b || a.length !== b.length || a.length === 0) return 0;
|
||||
let dot = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const va = a[i] || 0;
|
||||
const vb = b[i] || 0;
|
||||
dot += va * vb;
|
||||
normA += va * va;
|
||||
normB += vb * vb;
|
||||
}
|
||||
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
return denom ? dot / denom : 0;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new EmbeddingService();
|
||||
Reference in New Issue
Block a user