# Document API routes from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Query from fastapi.responses import JSONResponse, HTMLResponse from typing import List, Optional, Dict import uuid from loguru import logger from app.core.models import ( DocumentUploadResponse, DocumentAnalysisResponse, DocumentStatus ) from app.services.document import DocumentService from app.services.embedding import EmbeddingService from app.services.reasoning import ReasoningService from app.services.standards import StandardsService from app.utils.helpers import generate_html_report # Create services embedding_service = EmbeddingService() reasoning_service = ReasoningService() standards_service = StandardsService() # Log the standards service instance ID to verify singleton pattern logger.info(f"Document API - Using StandardsService instance: {id(standards_service)}") logger.info(f"Document API - Initial standards count: {len(standards_service.standards)}") document_service = DocumentService( embedding_service=embedding_service, reasoning_service=reasoning_service, standards_service=standards_service ) # Create router router = APIRouter(prefix="/documents", tags=["documents"]) @router.post("/upload", response_model=DocumentUploadResponse) async def upload_document(file: UploadFile = File(...)): """ Upload and process a document. Args: file: The document file to upload Returns: DocumentUploadResponse with document ID """ try: # Check file extension if not file.filename: raise HTTPException(status_code=400, detail="Filename is required") # Process document document = await document_service.upload_document(file.file, file.filename) return DocumentUploadResponse( document_id=document.id, filename=document.metadata.filename, status=document.status, message="Document uploaded successfully and is being processed." ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}") @router.get("/{doc_id}/analysis", response_model=DocumentAnalysisResponse) async def get_document_analysis(doc_id: str, format: Optional[str] = Query(None, description="Response format (json or html)")): """ Get analysis results for a document. Args: doc_id: The document ID format: Response format (json or html) Returns: DocumentAnalysisResponse with analysis results """ try: # Retrieve document document = await document_service.get_document(doc_id) if not document: raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found") # Check if document has been processed if document.status != DocumentStatus.COMPLETED: return DocumentAnalysisResponse( document_id=doc_id, status=document.status, message=f"Document is in {document.status} state. Please try again later." ) # Get the latest report if not document.reports: raise HTTPException(status_code=404, detail=f"No analysis reports found for document {doc_id}") latest_report_id = document.reports[-1] report = await document_service.get_report(latest_report_id) if not report: raise HTTPException(status_code=404, detail=f"Report {latest_report_id} not found") # Check if HTML format is requested if format == "html": # Convert report to HTML report_data = { "document_name": document.metadata.filename, "timestamp": report.timestamp.strftime("%Y-%m-%d %H:%M:%S"), "compliance_score": report.compliance_score, "summary": report.summary, "applied_standards": report.applied_standards, "issues": [ { "section": issue.section, "description": issue.description, "level": issue.level.value, "reasoning": issue.reasoning, "standard_references": issue.standard_references, "recommendation": issue.recommendation } for issue in report.issues ] } html_content = generate_html_report(report_data) return HTMLResponse(content=html_content) # Return JSON response return DocumentAnalysisResponse( document_id=doc_id, status=document.status, report=report, message="Analysis completed successfully." ) except HTTPException: raise except Exception as e: logger.error(f"Error retrieving document analysis: {str(e)}") raise HTTPException(status_code=500, detail=f"Error retrieving document analysis: {str(e)}") @router.post("/{doc_id}/resubmit", response_model=DocumentUploadResponse) async def resubmit_document( doc_id: str, file: UploadFile = File(...), background_tasks: BackgroundTasks = None ): """ Resubmit a document with changes. Args: doc_id: The document ID to resubmit file: The updated document file background_tasks: Background tasks handler Returns: DocumentUploadResponse with document ID """ try: # Check if document exists document = await document_service.get_document(doc_id) if not document: raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found") # Process resubmitted document updated_document = await document_service.resubmit_document(doc_id, file.file) return DocumentUploadResponse( document_id=updated_document.id, filename=updated_document.metadata.filename, status=updated_document.status, message=f"Document (version {updated_document.version}) resubmitted successfully and is being processed." ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"Error resubmitting document: {str(e)}") raise HTTPException(status_code=500, detail=f"Error resubmitting document: {str(e)}") @router.get("/{doc_id}", response_model=Dict) async def get_document_info(doc_id: str): """ Get document information. Args: doc_id: The document ID Returns: Document information """ try: # Retrieve document document = await document_service.get_document(doc_id) if not document: raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found") # Convert to dict for response return { "document_id": document.id, "filename": document.metadata.filename, "file_type": document.metadata.file_type, "file_size": document.metadata.file_size, "upload_timestamp": document.metadata.upload_timestamp, "last_modified": document.metadata.last_modified, "status": document.status, "version": document.version, "reports": document.reports } except HTTPException: raise except Exception as e: logger.error(f"Error retrieving document info: {str(e)}") raise HTTPException(status_code=500, detail=f"Error retrieving document info: {str(e)}") @router.get("/", response_model=List[Dict]) async def list_documents(): """ List all documents. Returns: List of document summaries """ try: documents = [] for doc_id, document in document_service.documents.items(): documents.append({ "document_id": doc_id, "filename": document.metadata.filename, "status": document.status, "version": document.version, "upload_timestamp": document.metadata.upload_timestamp }) return documents except Exception as e: logger.error(f"Error listing documents: {str(e)}") raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}") @router.get("/{doc_id}/stats", response_model=Dict) async def get_document_stats(doc_id: str): """ Get statistics for a document. Args: doc_id: The document ID Returns: Document statistics """ try: stats = await document_service.get_document_stats(doc_id) return stats except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) except Exception as e: logger.error(f"Error retrieving document stats: {str(e)}") raise HTTPException(status_code=500, detail=f"Error retrieving document stats: {str(e)}")