Files
ds_scp_task_solution/app/api/document_routes.py
T
Aherobo Ovie Victor 0e3e22e8cb Initial commit
2025-07-17 22:20:25 +01:00

257 lines
8.9 KiB
Python

# Document API routes
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Query
from fastapi.responses import JSONResponse, HTMLResponse
from typing import List, Optional, Dict
import uuid
from loguru import logger
from app.core.models import (
DocumentUploadResponse,
DocumentAnalysisResponse,
DocumentStatus
)
from app.services.document import DocumentService
from app.services.embedding import EmbeddingService
from app.services.reasoning import ReasoningService
from app.services.standards import StandardsService
from app.utils.helpers import generate_html_report
# Create services
embedding_service = EmbeddingService()
reasoning_service = ReasoningService()
standards_service = StandardsService()
# Log the standards service instance ID to verify singleton pattern
logger.info(f"Document API - Using StandardsService instance: {id(standards_service)}")
logger.info(f"Document API - Initial standards count: {len(standards_service.standards)}")
document_service = DocumentService(
embedding_service=embedding_service,
reasoning_service=reasoning_service,
standards_service=standards_service
)
# Create router
router = APIRouter(prefix="/documents", tags=["documents"])
@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(file: UploadFile = File(...)):
"""
Upload and process a document.
Args:
file: The document file to upload
Returns:
DocumentUploadResponse with document ID
"""
try:
# Check file extension
if not file.filename:
raise HTTPException(status_code=400, detail="Filename is required")
# Process document
document = await document_service.upload_document(file.file, file.filename)
return DocumentUploadResponse(
document_id=document.id,
filename=document.metadata.filename,
status=document.status,
message="Document uploaded successfully and is being processed."
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
@router.get("/{doc_id}/analysis", response_model=DocumentAnalysisResponse)
async def get_document_analysis(doc_id: str, format: Optional[str] = Query(None, description="Response format (json or html)")):
"""
Get analysis results for a document.
Args:
doc_id: The document ID
format: Response format (json or html)
Returns:
DocumentAnalysisResponse with analysis results
"""
try:
# Retrieve document
document = await document_service.get_document(doc_id)
if not document:
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
# Check if document has been processed
if document.status != DocumentStatus.COMPLETED:
return DocumentAnalysisResponse(
document_id=doc_id,
status=document.status,
message=f"Document is in {document.status} state. Please try again later."
)
# Get the latest report
if not document.reports:
raise HTTPException(status_code=404, detail=f"No analysis reports found for document {doc_id}")
latest_report_id = document.reports[-1]
report = await document_service.get_report(latest_report_id)
if not report:
raise HTTPException(status_code=404, detail=f"Report {latest_report_id} not found")
# Check if HTML format is requested
if format == "html":
# Convert report to HTML
report_data = {
"document_name": document.metadata.filename,
"timestamp": report.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
"compliance_score": report.compliance_score,
"summary": report.summary,
"applied_standards": report.applied_standards,
"issues": [
{
"section": issue.section,
"description": issue.description,
"level": issue.level.value,
"reasoning": issue.reasoning,
"standard_references": issue.standard_references,
"recommendation": issue.recommendation
}
for issue in report.issues
]
}
html_content = generate_html_report(report_data)
return HTMLResponse(content=html_content)
# Return JSON response
return DocumentAnalysisResponse(
document_id=doc_id,
status=document.status,
report=report,
message="Analysis completed successfully."
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrieving document analysis: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving document analysis: {str(e)}")
@router.post("/{doc_id}/resubmit", response_model=DocumentUploadResponse)
async def resubmit_document(
doc_id: str,
file: UploadFile = File(...),
background_tasks: BackgroundTasks = None
):
"""
Resubmit a document with changes.
Args:
doc_id: The document ID to resubmit
file: The updated document file
background_tasks: Background tasks handler
Returns:
DocumentUploadResponse with document ID
"""
try:
# Check if document exists
document = await document_service.get_document(doc_id)
if not document:
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
# Process resubmitted document
updated_document = await document_service.resubmit_document(doc_id, file.file)
return DocumentUploadResponse(
document_id=updated_document.id,
filename=updated_document.metadata.filename,
status=updated_document.status,
message=f"Document (version {updated_document.version}) resubmitted successfully and is being processed."
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error resubmitting document: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error resubmitting document: {str(e)}")
@router.get("/{doc_id}", response_model=Dict)
async def get_document_info(doc_id: str):
"""
Get document information.
Args:
doc_id: The document ID
Returns:
Document information
"""
try:
# Retrieve document
document = await document_service.get_document(doc_id)
if not document:
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
# Convert to dict for response
return {
"document_id": document.id,
"filename": document.metadata.filename,
"file_type": document.metadata.file_type,
"file_size": document.metadata.file_size,
"upload_timestamp": document.metadata.upload_timestamp,
"last_modified": document.metadata.last_modified,
"status": document.status,
"version": document.version,
"reports": document.reports
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrieving document info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving document info: {str(e)}")
@router.get("/", response_model=List[Dict])
async def list_documents():
"""
List all documents.
Returns:
List of document summaries
"""
try:
documents = []
for doc_id, document in document_service.documents.items():
documents.append({
"document_id": doc_id,
"filename": document.metadata.filename,
"status": document.status,
"version": document.version,
"upload_timestamp": document.metadata.upload_timestamp
})
return documents
except Exception as e:
logger.error(f"Error listing documents: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}")
@router.get("/{doc_id}/stats", response_model=Dict)
async def get_document_stats(doc_id: str):
"""
Get statistics for a document.
Args:
doc_id: The document ID
Returns:
Document statistics
"""
try:
stats = await document_service.get_document_stats(doc_id)
return stats
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
logger.error(f"Error retrieving document stats: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving document stats: {str(e)}")