first commit
This commit is contained in:
@@ -0,0 +1,299 @@
|
||||
const multer = require('multer');
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const pdf = require('pdf-parse');
|
||||
const { Document, sequelize } = require('../models');
|
||||
const { Op } = require('sequelize');
|
||||
const logger = require('../utils/logger');
|
||||
const embeddingService = require('../services/embeddingService');
|
||||
const graphRagService = require('../services/graphRagService');
|
||||
|
||||
// Configure multer for file uploads
|
||||
const storage = multer.diskStorage({
|
||||
destination: (req, file, cb) => {
|
||||
const uploadPath = path.join(__dirname, '../../uploads');
|
||||
if (!fs.existsSync(uploadPath)) {
|
||||
fs.mkdirSync(uploadPath, { recursive: true });
|
||||
}
|
||||
cb(null, uploadPath);
|
||||
},
|
||||
filename: (req, file, cb) => {
|
||||
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
|
||||
cb(null, file.fieldname + '-' + uniqueSuffix + path.extname(file.originalname));
|
||||
}
|
||||
});
|
||||
|
||||
const upload = multer({
|
||||
storage: storage,
|
||||
limits: {
|
||||
fileSize: 10 * 1024 * 1024, // 10MB for testing
|
||||
fieldSize: 10 * 1024 * 1024, // 10MB for field values
|
||||
fieldNameSize: 100, // 100 bytes for field names
|
||||
files: 1 // Only 1 file at a time
|
||||
},
|
||||
fileFilter: (req, file, cb) => {
|
||||
const allowedTypes = ['.pdf', '.txt', '.doc', '.docx'];
|
||||
const ext = path.extname(file.originalname).toLowerCase();
|
||||
|
||||
if (allowedTypes.includes(ext)) {
|
||||
cb(null, true);
|
||||
} else {
|
||||
cb(new Error('Invalid file type. Only PDF, TXT, DOC, DOCX files are allowed.'));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const uploadDocument = async (req, res) => {
|
||||
try {
|
||||
if (!req.file) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'No file uploaded'
|
||||
});
|
||||
}
|
||||
|
||||
const { category, tags } = req.body;
|
||||
let extractedText = '';
|
||||
|
||||
// Extract text from PDF
|
||||
if (req.file.mimetype === 'application/pdf') {
|
||||
try {
|
||||
const dataBuffer = fs.readFileSync(req.file.path);
|
||||
const pdfData = await pdf(dataBuffer);
|
||||
extractedText = pdfData.text;
|
||||
} catch (error) {
|
||||
logger.error('PDF extraction error:', error);
|
||||
extractedText = 'Error extracting text from PDF';
|
||||
}
|
||||
} else if (req.file.mimetype === 'text/plain') {
|
||||
extractedText = fs.readFileSync(req.file.path, 'utf8');
|
||||
}
|
||||
|
||||
// Create document record
|
||||
const document = await Document.create({
|
||||
filename: req.file.filename,
|
||||
original_filename: req.file.originalname,
|
||||
file_path: req.file.path,
|
||||
file_type: req.file.mimetype,
|
||||
file_size: req.file.size,
|
||||
content: extractedText,
|
||||
extracted_text: extractedText,
|
||||
category: category || 'general',
|
||||
tags: tags ? tags.split(',').map(tag => tag.trim()) : [],
|
||||
indexing_status: 'processing'
|
||||
});
|
||||
|
||||
// Generate and store embeddings (if text available)
|
||||
if (extractedText && extractedText.trim().length > 0) {
|
||||
try {
|
||||
const embedding = await embeddingService.embedText(extractedText.slice(0, 15000));
|
||||
await document.update({ embeddings: embedding, is_indexed: true, indexing_status: 'completed' });
|
||||
} catch (e) {
|
||||
logger.error('Embedding generation failed:', e);
|
||||
await document.update({ is_indexed: false, indexing_status: 'failed' });
|
||||
}
|
||||
} else {
|
||||
await document.update({ is_indexed: false, indexing_status: 'failed' });
|
||||
}
|
||||
|
||||
logger.info(`Document uploaded: ${document.id}`);
|
||||
|
||||
res.status(201).json({
|
||||
success: true,
|
||||
data: { document }
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Upload document error:', error);
|
||||
|
||||
// Clean up uploaded file if document creation failed
|
||||
if (req.file && fs.existsSync(req.file.path)) {
|
||||
fs.unlinkSync(req.file.path);
|
||||
}
|
||||
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Internal server error'
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const getDocuments = async (req, res) => {
|
||||
try {
|
||||
const { page = 1, limit = 10, category, search, isIndexed } = req.query;
|
||||
|
||||
const whereClause = {};
|
||||
if (category) whereClause.category = category;
|
||||
if (isIndexed !== undefined) whereClause.is_indexed = isIndexed === 'true';
|
||||
if (search) {
|
||||
whereClause[Op.or] = [
|
||||
{ original_filename: { [Op.iLike]: `%${search}%` } },
|
||||
{ extracted_text: { [Op.iLike]: `%${search}%` } }
|
||||
];
|
||||
}
|
||||
|
||||
const documents = await Document.findAndCountAll({
|
||||
where: whereClause,
|
||||
order: [['created_at', 'DESC']],
|
||||
limit: parseInt(limit),
|
||||
offset: (parseInt(page) - 1) * parseInt(limit)
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
documents: documents.rows,
|
||||
pagination: {
|
||||
page: parseInt(page),
|
||||
limit: parseInt(limit),
|
||||
total: documents.count,
|
||||
pages: Math.ceil(documents.count / parseInt(limit))
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Get documents error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Internal server error'
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const getDocument = async (req, res) => {
|
||||
try {
|
||||
const { documentId } = req.params;
|
||||
|
||||
const document = await Document.findByPk(documentId);
|
||||
|
||||
if (!document) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: 'Document not found'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: { document }
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Get document error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Internal server error'
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const searchDocuments = async (req, res) => {
|
||||
try {
|
||||
const { query, category, limit = 10 } = req.query;
|
||||
|
||||
if (!query) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Search query is required'
|
||||
});
|
||||
}
|
||||
|
||||
const whereClause = {
|
||||
is_indexed: true,
|
||||
...(category ? { category } : {})
|
||||
};
|
||||
|
||||
// Embed query and compute cosine similarity in JS for now
|
||||
const queryEmbedding = await embeddingService.embedText(query);
|
||||
|
||||
const candidates = await Document.findAll({
|
||||
where: whereClause,
|
||||
attributes: ['id', 'original_filename', 'extracted_text', 'embeddings', 'category', 'created_at']
|
||||
});
|
||||
|
||||
const scored = [];
|
||||
for (const doc of candidates) {
|
||||
const emb = doc.embeddings || [];
|
||||
const score = embeddingService.cosineSimilarity(queryEmbedding, emb);
|
||||
scored.push({
|
||||
id: doc.id,
|
||||
original_filename: doc.original_filename,
|
||||
snippet: (doc.extracted_text || '').slice(0, 300),
|
||||
category: doc.category,
|
||||
created_at: doc.created_at,
|
||||
score
|
||||
});
|
||||
}
|
||||
|
||||
scored.sort((a, b) => b.score - a.score);
|
||||
const top = scored.slice(0, parseInt(limit));
|
||||
|
||||
res.json({ success: true, data: { results: top } });
|
||||
} catch (error) {
|
||||
logger.error('Search documents error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Internal server error'
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const graphSearchDocuments = async (req, res) => {
|
||||
try {
|
||||
const { query, category } = req.query;
|
||||
|
||||
if (!query) {
|
||||
return res.status(400).json({ success: false, error: 'Search query is required' });
|
||||
}
|
||||
|
||||
const result = await graphRagService.graphSearch({ query, category });
|
||||
res.json({ success: true, data: result });
|
||||
} catch (error) {
|
||||
logger.error('Graph search error:', error);
|
||||
res.status(500).json({ success: false, error: 'Internal server error' });
|
||||
}
|
||||
};
|
||||
|
||||
const deleteDocument = async (req, res) => {
|
||||
try {
|
||||
const { documentId } = req.params;
|
||||
|
||||
const document = await Document.findByPk(documentId);
|
||||
|
||||
if (!document) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: 'Document not found'
|
||||
});
|
||||
}
|
||||
|
||||
// Delete physical file
|
||||
if (fs.existsSync(document.file_path)) {
|
||||
fs.unlinkSync(document.file_path);
|
||||
}
|
||||
|
||||
// Delete database record
|
||||
await document.destroy();
|
||||
|
||||
logger.info(`Document deleted: ${documentId}`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: 'Document deleted successfully'
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Delete document error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Internal server error'
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
uploadDocument,
|
||||
getDocuments,
|
||||
getDocument,
|
||||
searchDocuments,
|
||||
graphSearchDocuments,
|
||||
deleteDocument,
|
||||
upload // Export multer middleware
|
||||
};
|
||||
Reference in New Issue
Block a user