ds_scp_task_solution/app/api/document_routes.py

# Document API routes
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Query
from fastapi.responses import JSONResponse, HTMLResponse
from typing import List, Optional, Dict
import uuid
from loguru import logger

from app.core.models import (
    DocumentUploadResponse,
    DocumentAnalysisResponse,
    DocumentStatus
)
from app.services.document import DocumentService
from app.services.embedding import EmbeddingService
from app.services.reasoning import ReasoningService
from app.services.standards import StandardsService
from app.utils.helpers import generate_html_report

# Create services
embedding_service = EmbeddingService()
reasoning_service = ReasoningService()
standards_service = StandardsService()

# Log the standards service instance ID to verify singleton pattern
logger.info(f"Document API - Using StandardsService instance: {id(standards_service)}")
logger.info(f"Document API - Initial standards count: {len(standards_service.standards)}")

document_service = DocumentService(
    embedding_service=embedding_service,
    reasoning_service=reasoning_service,
    standards_service=standards_service
)

# Create router
router = APIRouter(prefix="/documents", tags=["documents"])

@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(file: UploadFile = File(...)):
    """
    Upload and process a document.

    Args:
        file: The document file to upload

    Returns:
        DocumentUploadResponse with document ID
    """
    try:
        # Check file extension
        if not file.filename:
            raise HTTPException(status_code=400, detail="Filename is required")

        # Process document
        document = await document_service.upload_document(file.file, file.filename)

        return DocumentUploadResponse(
            document_id=document.id,
            filename=document.metadata.filename,
            status=document.status,
            message="Document uploaded successfully and is being processed."
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")

@router.get("/{doc_id}/analysis", response_model=DocumentAnalysisResponse)
async def get_document_analysis(doc_id: str, format: Optional[str] = Query(None, description="Response format (json or html)")):
    """
    Get analysis results for a document.

    Args:
        doc_id: The document ID
        format: Response format (json or html)

    Returns:
        DocumentAnalysisResponse with analysis results
    """
    try:
        # Retrieve document
        document = await document_service.get_document(doc_id)
        if not document:
            raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")

        # Check if document has been processed
        if document.status != DocumentStatus.COMPLETED:
            return DocumentAnalysisResponse(
                document_id=doc_id,
                status=document.status,
                message=f"Document is in {document.status} state. Please try again later."
            )

        # Get the latest report
        if not document.reports:
            raise HTTPException(status_code=404, detail=f"No analysis reports found for document {doc_id}")

        latest_report_id = document.reports[-1]
        report = await document_service.get_report(latest_report_id)

        if not report:
            raise HTTPException(status_code=404, detail=f"Report {latest_report_id} not found")

        # Check if HTML format is requested
        if format == "html":
            # Convert report to HTML
            report_data = {
                "document_name": document.metadata.filename,
                "timestamp": report.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                "compliance_score": report.compliance_score,
                "summary": report.summary,
                "applied_standards": report.applied_standards,
                "issues": [
                    {
                        "section": issue.section,
                        "description": issue.description,
                        "level": issue.level.value,
                        "reasoning": issue.reasoning,
                        "standard_references": issue.standard_references,
                        "recommendation": issue.recommendation
                    }
                    for issue in report.issues
                ]
            }

            html_content = generate_html_report(report_data)
            return HTMLResponse(content=html_content)

        # Return JSON response
        return DocumentAnalysisResponse(
            document_id=doc_id,
            status=document.status,
            report=report,
            message="Analysis completed successfully."
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error retrieving document analysis: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error retrieving document analysis: {str(e)}")

@router.post("/{doc_id}/resubmit", response_model=DocumentUploadResponse)
async def resubmit_document(
    doc_id: str,
    file: UploadFile = File(...),
    background_tasks: BackgroundTasks = None
):
    """
    Resubmit a document with changes.

    Args:
        doc_id: The document ID to resubmit
        file: The updated document file
        background_tasks: Background tasks handler

    Returns:
        DocumentUploadResponse with document ID
    """
    try:
        # Check if document exists
        document = await document_service.get_document(doc_id)
        if not document:
            raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")

        # Process resubmitted document
        updated_document = await document_service.resubmit_document(doc_id, file.file)

        return DocumentUploadResponse(
            document_id=updated_document.id,
            filename=updated_document.metadata.filename,
            status=updated_document.status,
            message=f"Document (version {updated_document.version}) resubmitted successfully and is being processed."
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Error resubmitting document: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error resubmitting document: {str(e)}")

@router.get("/{doc_id}", response_model=Dict)
async def get_document_info(doc_id: str):
    """
    Get document information.

    Args:
        doc_id: The document ID

    Returns:
        Document information
    """
    try:
        # Retrieve document
        document = await document_service.get_document(doc_id)
        if not document:
            raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")

        # Convert to dict for response
        return {
            "document_id": document.id,
            "filename": document.metadata.filename,
            "file_type": document.metadata.file_type,
            "file_size": document.metadata.file_size,
            "upload_timestamp": document.metadata.upload_timestamp,
            "last_modified": document.metadata.last_modified,
            "status": document.status,
            "version": document.version,
            "reports": document.reports
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error retrieving document info: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error retrieving document info: {str(e)}")

@router.get("/", response_model=List[Dict])
async def list_documents():
    """
    List all documents.

    Returns:
        List of document summaries
    """
    try:
        documents = []
        for doc_id, document in document_service.documents.items():
            documents.append({
                "document_id": doc_id,
                "filename": document.metadata.filename,
                "status": document.status,
                "version": document.version,
                "upload_timestamp": document.metadata.upload_timestamp
            })
        return documents
    except Exception as e:
        logger.error(f"Error listing documents: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}")

@router.get("/{doc_id}/stats", response_model=Dict)
async def get_document_stats(doc_id: str):
    """
    Get statistics for a document.

    Args:
        doc_id: The document ID

    Returns:
        Document statistics
    """
    try:
        stats = await document_service.get_document_stats(doc_id)
        return stats
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
    except Exception as e:
        logger.error(f"Error retrieving document stats: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error retrieving document stats: {str(e)}")