const { Document } = require('../models'); const embeddingService = require('./embeddingService'); const logger = require('../utils/logger'); class GraphRagService { constructor() { this.similarityThreshold = parseFloat(process.env.GRAPH_RAG_SIM_THRESHOLD || '0.2'); this.maxNeighbors = parseInt(process.env.GRAPH_RAG_MAX_NEIGHBORS || '10'); this.maxResults = parseInt(process.env.GRAPH_RAG_MAX_RESULTS || '10'); } scoreSimilarity(a, b) { return embeddingService.cosineSimilarity(a, b); } tagOverlap(tagsA = [], tagsB = []) { const setA = new Set((tagsA || []).map((t) => (t || '').toLowerCase())); const setB = new Set((tagsB || []).map((t) => (t || '').toLowerCase())); let overlap = 0; setA.forEach((t) => { if (setB.has(t)) overlap += 1; }); return overlap; } buildGraph(nodes) { // nodes: [{ id, embedding, tags }] const edges = new Map(); // id -> [{ id, score, reason }] for (let i = 0; i < nodes.length; i++) { for (let j = i + 1; j < nodes.length; j++) { const ni = nodes[i]; const nj = nodes[j]; const sim = this.scoreSimilarity(ni.embedding, nj.embedding); const tagScore = this.tagOverlap(ni.tags, nj.tags); const hybrid = sim + Math.min(tagScore, 3) * 0.05; // light tag bonus if (hybrid >= this.similarityThreshold) { if (!edges.has(ni.id)) edges.set(ni.id, []); if (!edges.has(nj.id)) edges.set(nj.id, []); edges.get(ni.id).push({ id: nj.id, score: hybrid, reason: { sim, tagScore } }); edges.get(nj.id).push({ id: ni.id, score: hybrid, reason: { sim, tagScore } }); } } } // Trim neighbors edges.forEach((arr, k) => { arr.sort((a, b) => b.score - a.score); edges.set(k, arr.slice(0, this.maxNeighbors)); }); return edges; } async graphSearch({ query, category }) { const queryEmbedding = await embeddingService.embedText(query); // Load candidate docs const where = { is_indexed: true }; if (category) where.category = category; const docs = await Document.findAll({ where, attributes: ['id', 'original_filename', 'extracted_text', 'embeddings', 'tags', 'category', 'created_at'] }); const nodes = docs .filter((d) => Array.isArray(d.embeddings) && d.embeddings.length > 0) .map((d) => ({ id: d.id, embedding: d.embeddings, tags: d.tags || [], ref: d })); if (nodes.length === 0) { return { results: [] }; } // Seed scores by query similarity const seedScores = nodes.map((n) => ({ id: n.id, score: this.scoreSimilarity(queryEmbedding, n.embedding) })); // Log similarity scores for debugging logger.info('Similarity scores:', seedScores.map(s => ({ id: s.id, score: s.score.toFixed(4) }))); seedScores.sort((a, b) => b.score - a.score); const seeds = seedScores.slice(0, Math.min(5, seedScores.length)).map((s) => s.id); const graph = this.buildGraph(nodes); // Expand neighborhoods from seeds const visited = new Set(); const scored = new Map(); const pushScore = (id, add, meta) => { const prev = scored.get(id) || { score: 0, hops: Infinity, reasons: [] }; const combined = { score: Math.max(prev.score, add), hops: Math.min(prev.hops, meta.hops), reasons: prev.reasons.length < 3 ? [...prev.reasons, meta] : prev.reasons }; scored.set(id, combined); }; const queue = []; seeds.forEach((id) => queue.push({ id, hops: 0, via: null })); while (queue.length > 0 && scored.size < 200) { const { id, hops, via } = queue.shift(); if (visited.has(id) || hops > 2) continue; visited.add(id); // Base score: similarity to query const node = nodes.find((n) => n.id === id); const base = this.scoreSimilarity(queryEmbedding, node.embedding); pushScore(id, base, { type: 'seed', hops }); const neighbors = graph.get(id) || []; neighbors.forEach((nbr) => { const pathScore = (base + nbr.score) / 2; pushScore(nbr.id, pathScore, { type: 'edge', hops: hops + 1, via: id, edgeScore: nbr.score }); if (!visited.has(nbr.id)) { queue.push({ id: nbr.id, hops: hops + 1, via: id }); } }); } // Format results const ranked = Array.from(scored.entries()) .map(([id, info]) => { const ref = nodes.find((n) => n.id === id)?.ref; return { id, original_filename: ref?.original_filename, snippet: (ref?.extracted_text || '').slice(0, 400), category: ref?.category, created_at: ref?.created_at, score: Number(info.score.toFixed(4)), hops: info.hops, reasons: info.reasons }; }) .sort((a, b) => b.score - a.score) .slice(0, this.maxResults); return { results: ranked }; } } module.exports = new GraphRagService();