257 lines
8.9 KiB
Python
257 lines
8.9 KiB
Python
# Document API routes
|
|
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Query
|
|
from fastapi.responses import JSONResponse, HTMLResponse
|
|
from typing import List, Optional, Dict
|
|
import uuid
|
|
from loguru import logger
|
|
|
|
from app.core.models import (
|
|
DocumentUploadResponse,
|
|
DocumentAnalysisResponse,
|
|
DocumentStatus
|
|
)
|
|
from app.services.document import DocumentService
|
|
from app.services.embedding import EmbeddingService
|
|
from app.services.reasoning import ReasoningService
|
|
from app.services.standards import StandardsService
|
|
from app.utils.helpers import generate_html_report
|
|
|
|
# Create services
|
|
embedding_service = EmbeddingService()
|
|
reasoning_service = ReasoningService()
|
|
standards_service = StandardsService()
|
|
|
|
# Log the standards service instance ID to verify singleton pattern
|
|
logger.info(f"Document API - Using StandardsService instance: {id(standards_service)}")
|
|
logger.info(f"Document API - Initial standards count: {len(standards_service.standards)}")
|
|
|
|
document_service = DocumentService(
|
|
embedding_service=embedding_service,
|
|
reasoning_service=reasoning_service,
|
|
standards_service=standards_service
|
|
)
|
|
|
|
# Create router
|
|
router = APIRouter(prefix="/documents", tags=["documents"])
|
|
|
|
@router.post("/upload", response_model=DocumentUploadResponse)
|
|
async def upload_document(file: UploadFile = File(...)):
|
|
"""
|
|
Upload and process a document.
|
|
|
|
Args:
|
|
file: The document file to upload
|
|
|
|
Returns:
|
|
DocumentUploadResponse with document ID
|
|
"""
|
|
try:
|
|
# Check file extension
|
|
if not file.filename:
|
|
raise HTTPException(status_code=400, detail="Filename is required")
|
|
|
|
# Process document
|
|
document = await document_service.upload_document(file.file, file.filename)
|
|
|
|
return DocumentUploadResponse(
|
|
document_id=document.id,
|
|
filename=document.metadata.filename,
|
|
status=document.status,
|
|
message="Document uploaded successfully and is being processed."
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
|
|
|
|
@router.get("/{doc_id}/analysis", response_model=DocumentAnalysisResponse)
|
|
async def get_document_analysis(doc_id: str, format: Optional[str] = Query(None, description="Response format (json or html)")):
|
|
"""
|
|
Get analysis results for a document.
|
|
|
|
Args:
|
|
doc_id: The document ID
|
|
format: Response format (json or html)
|
|
|
|
Returns:
|
|
DocumentAnalysisResponse with analysis results
|
|
"""
|
|
try:
|
|
# Retrieve document
|
|
document = await document_service.get_document(doc_id)
|
|
if not document:
|
|
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
|
|
|
|
# Check if document has been processed
|
|
if document.status != DocumentStatus.COMPLETED:
|
|
return DocumentAnalysisResponse(
|
|
document_id=doc_id,
|
|
status=document.status,
|
|
message=f"Document is in {document.status} state. Please try again later."
|
|
)
|
|
|
|
# Get the latest report
|
|
if not document.reports:
|
|
raise HTTPException(status_code=404, detail=f"No analysis reports found for document {doc_id}")
|
|
|
|
latest_report_id = document.reports[-1]
|
|
report = await document_service.get_report(latest_report_id)
|
|
|
|
if not report:
|
|
raise HTTPException(status_code=404, detail=f"Report {latest_report_id} not found")
|
|
|
|
# Check if HTML format is requested
|
|
if format == "html":
|
|
# Convert report to HTML
|
|
report_data = {
|
|
"document_name": document.metadata.filename,
|
|
"timestamp": report.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"compliance_score": report.compliance_score,
|
|
"summary": report.summary,
|
|
"applied_standards": report.applied_standards,
|
|
"issues": [
|
|
{
|
|
"section": issue.section,
|
|
"description": issue.description,
|
|
"level": issue.level.value,
|
|
"reasoning": issue.reasoning,
|
|
"standard_references": issue.standard_references,
|
|
"recommendation": issue.recommendation
|
|
}
|
|
for issue in report.issues
|
|
]
|
|
}
|
|
|
|
html_content = generate_html_report(report_data)
|
|
return HTMLResponse(content=html_content)
|
|
|
|
# Return JSON response
|
|
return DocumentAnalysisResponse(
|
|
document_id=doc_id,
|
|
status=document.status,
|
|
report=report,
|
|
message="Analysis completed successfully."
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving document analysis: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error retrieving document analysis: {str(e)}")
|
|
|
|
@router.post("/{doc_id}/resubmit", response_model=DocumentUploadResponse)
|
|
async def resubmit_document(
|
|
doc_id: str,
|
|
file: UploadFile = File(...),
|
|
background_tasks: BackgroundTasks = None
|
|
):
|
|
"""
|
|
Resubmit a document with changes.
|
|
|
|
Args:
|
|
doc_id: The document ID to resubmit
|
|
file: The updated document file
|
|
background_tasks: Background tasks handler
|
|
|
|
Returns:
|
|
DocumentUploadResponse with document ID
|
|
"""
|
|
try:
|
|
# Check if document exists
|
|
document = await document_service.get_document(doc_id)
|
|
if not document:
|
|
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
|
|
|
|
# Process resubmitted document
|
|
updated_document = await document_service.resubmit_document(doc_id, file.file)
|
|
|
|
return DocumentUploadResponse(
|
|
document_id=updated_document.id,
|
|
filename=updated_document.metadata.filename,
|
|
status=updated_document.status,
|
|
message=f"Document (version {updated_document.version}) resubmitted successfully and is being processed."
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except Exception as e:
|
|
logger.error(f"Error resubmitting document: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error resubmitting document: {str(e)}")
|
|
|
|
@router.get("/{doc_id}", response_model=Dict)
|
|
async def get_document_info(doc_id: str):
|
|
"""
|
|
Get document information.
|
|
|
|
Args:
|
|
doc_id: The document ID
|
|
|
|
Returns:
|
|
Document information
|
|
"""
|
|
try:
|
|
# Retrieve document
|
|
document = await document_service.get_document(doc_id)
|
|
if not document:
|
|
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
|
|
|
|
# Convert to dict for response
|
|
return {
|
|
"document_id": document.id,
|
|
"filename": document.metadata.filename,
|
|
"file_type": document.metadata.file_type,
|
|
"file_size": document.metadata.file_size,
|
|
"upload_timestamp": document.metadata.upload_timestamp,
|
|
"last_modified": document.metadata.last_modified,
|
|
"status": document.status,
|
|
"version": document.version,
|
|
"reports": document.reports
|
|
}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving document info: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error retrieving document info: {str(e)}")
|
|
|
|
@router.get("/", response_model=List[Dict])
|
|
async def list_documents():
|
|
"""
|
|
List all documents.
|
|
|
|
Returns:
|
|
List of document summaries
|
|
"""
|
|
try:
|
|
documents = []
|
|
for doc_id, document in document_service.documents.items():
|
|
documents.append({
|
|
"document_id": doc_id,
|
|
"filename": document.metadata.filename,
|
|
"status": document.status,
|
|
"version": document.version,
|
|
"upload_timestamp": document.metadata.upload_timestamp
|
|
})
|
|
return documents
|
|
except Exception as e:
|
|
logger.error(f"Error listing documents: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}")
|
|
|
|
@router.get("/{doc_id}/stats", response_model=Dict)
|
|
async def get_document_stats(doc_id: str):
|
|
"""
|
|
Get statistics for a document.
|
|
|
|
Args:
|
|
doc_id: The document ID
|
|
|
|
Returns:
|
|
Document statistics
|
|
"""
|
|
try:
|
|
stats = await document_service.get_document_stats(doc_id)
|
|
return stats
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving document stats: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error retrieving document stats: {str(e)}")
|