Files
reason-flow/server/controllers/documentController.js
T

300 lines
8.1 KiB
JavaScript
Raw Normal View History

2025-11-06 11:08:59 +01:00
const multer = require('multer');
const path = require('path');
const fs = require('fs');
const pdf = require('pdf-parse');
const { Document, sequelize } = require('../models');
const { Op } = require('sequelize');
const logger = require('../utils/logger');
const embeddingService = require('../services/embeddingService');
const graphRagService = require('../services/graphRagService');
// Configure multer for file uploads
const storage = multer.diskStorage({
destination: (req, file, cb) => {
const uploadPath = path.join(__dirname, '../../uploads');
if (!fs.existsSync(uploadPath)) {
fs.mkdirSync(uploadPath, { recursive: true });
}
cb(null, uploadPath);
},
filename: (req, file, cb) => {
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
cb(null, file.fieldname + '-' + uniqueSuffix + path.extname(file.originalname));
}
});
const upload = multer({
storage: storage,
limits: {
fileSize: 10 * 1024 * 1024, // 10MB for testing
fieldSize: 10 * 1024 * 1024, // 10MB for field values
fieldNameSize: 100, // 100 bytes for field names
files: 1 // Only 1 file at a time
},
fileFilter: (req, file, cb) => {
const allowedTypes = ['.pdf', '.txt', '.doc', '.docx'];
const ext = path.extname(file.originalname).toLowerCase();
if (allowedTypes.includes(ext)) {
cb(null, true);
} else {
cb(new Error('Invalid file type. Only PDF, TXT, DOC, DOCX files are allowed.'));
}
}
});
const uploadDocument = async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({
success: false,
error: 'No file uploaded'
});
}
const { category, tags } = req.body;
let extractedText = '';
// Extract text from PDF
if (req.file.mimetype === 'application/pdf') {
try {
const dataBuffer = fs.readFileSync(req.file.path);
const pdfData = await pdf(dataBuffer);
extractedText = pdfData.text;
} catch (error) {
logger.error('PDF extraction error:', error);
extractedText = 'Error extracting text from PDF';
}
} else if (req.file.mimetype === 'text/plain') {
extractedText = fs.readFileSync(req.file.path, 'utf8');
}
// Create document record
const document = await Document.create({
filename: req.file.filename,
original_filename: req.file.originalname,
file_path: req.file.path,
file_type: req.file.mimetype,
file_size: req.file.size,
content: extractedText,
extracted_text: extractedText,
category: category || 'general',
tags: tags ? tags.split(',').map(tag => tag.trim()) : [],
indexing_status: 'processing'
});
// Generate and store embeddings (if text available)
if (extractedText && extractedText.trim().length > 0) {
try {
const embedding = await embeddingService.embedText(extractedText.slice(0, 15000));
await document.update({ embeddings: embedding, is_indexed: true, indexing_status: 'completed' });
} catch (e) {
logger.error('Embedding generation failed:', e);
await document.update({ is_indexed: false, indexing_status: 'failed' });
}
} else {
await document.update({ is_indexed: false, indexing_status: 'failed' });
}
logger.info(`Document uploaded: ${document.id}`);
res.status(201).json({
success: true,
data: { document }
});
} catch (error) {
logger.error('Upload document error:', error);
// Clean up uploaded file if document creation failed
if (req.file && fs.existsSync(req.file.path)) {
fs.unlinkSync(req.file.path);
}
res.status(500).json({
success: false,
error: 'Internal server error'
});
}
};
const getDocuments = async (req, res) => {
try {
const { page = 1, limit = 10, category, search, isIndexed } = req.query;
const whereClause = {};
if (category) whereClause.category = category;
if (isIndexed !== undefined) whereClause.is_indexed = isIndexed === 'true';
if (search) {
whereClause[Op.or] = [
{ original_filename: { [Op.iLike]: `%${search}%` } },
{ extracted_text: { [Op.iLike]: `%${search}%` } }
];
}
const documents = await Document.findAndCountAll({
where: whereClause,
order: [['created_at', 'DESC']],
limit: parseInt(limit),
offset: (parseInt(page) - 1) * parseInt(limit)
});
res.json({
success: true,
data: {
documents: documents.rows,
pagination: {
page: parseInt(page),
limit: parseInt(limit),
total: documents.count,
pages: Math.ceil(documents.count / parseInt(limit))
}
}
});
} catch (error) {
logger.error('Get documents error:', error);
res.status(500).json({
success: false,
error: 'Internal server error'
});
}
};
const getDocument = async (req, res) => {
try {
const { documentId } = req.params;
const document = await Document.findByPk(documentId);
if (!document) {
return res.status(404).json({
success: false,
error: 'Document not found'
});
}
res.json({
success: true,
data: { document }
});
} catch (error) {
logger.error('Get document error:', error);
res.status(500).json({
success: false,
error: 'Internal server error'
});
}
};
const searchDocuments = async (req, res) => {
try {
const { query, category, limit = 10 } = req.query;
if (!query) {
return res.status(400).json({
success: false,
error: 'Search query is required'
});
}
const whereClause = {
is_indexed: true,
...(category ? { category } : {})
};
// Embed query and compute cosine similarity in JS for now
const queryEmbedding = await embeddingService.embedText(query);
const candidates = await Document.findAll({
where: whereClause,
attributes: ['id', 'original_filename', 'extracted_text', 'embeddings', 'category', 'created_at']
});
const scored = [];
for (const doc of candidates) {
const emb = doc.embeddings || [];
const score = embeddingService.cosineSimilarity(queryEmbedding, emb);
scored.push({
id: doc.id,
original_filename: doc.original_filename,
snippet: (doc.extracted_text || '').slice(0, 300),
category: doc.category,
created_at: doc.created_at,
score
});
}
scored.sort((a, b) => b.score - a.score);
const top = scored.slice(0, parseInt(limit));
res.json({ success: true, data: { results: top } });
} catch (error) {
logger.error('Search documents error:', error);
res.status(500).json({
success: false,
error: 'Internal server error'
});
}
};
const graphSearchDocuments = async (req, res) => {
try {
const { query, category } = req.query;
if (!query) {
return res.status(400).json({ success: false, error: 'Search query is required' });
}
const result = await graphRagService.graphSearch({ query, category });
res.json({ success: true, data: result });
} catch (error) {
logger.error('Graph search error:', error);
res.status(500).json({ success: false, error: 'Internal server error' });
}
};
const deleteDocument = async (req, res) => {
try {
const { documentId } = req.params;
const document = await Document.findByPk(documentId);
if (!document) {
return res.status(404).json({
success: false,
error: 'Document not found'
});
}
// Delete physical file
if (fs.existsSync(document.file_path)) {
fs.unlinkSync(document.file_path);
}
// Delete database record
await document.destroy();
logger.info(`Document deleted: ${documentId}`);
res.json({
success: true,
message: 'Document deleted successfully'
});
} catch (error) {
logger.error('Delete document error:', error);
res.status(500).json({
success: false,
error: 'Internal server error'
});
}
};
module.exports = {
uploadDocument,
getDocuments,
getDocument,
searchDocuments,
graphSearchDocuments,
deleteDocument,
upload // Export multer middleware
};