Initial commit
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Mini SpecsComply Pro (SCP)
|
||||
--------------------------
|
||||
A lightweight document compliance and validation tool designed to analyze
|
||||
and verify technical documents against predefined standards and project-specific requirements.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -0,0 +1 @@
|
||||
"""API routes for the Mini SpecsComply Pro application."""
|
||||
@@ -0,0 +1,256 @@
|
||||
# Document API routes
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Query
|
||||
from fastapi.responses import JSONResponse, HTMLResponse
|
||||
from typing import List, Optional, Dict
|
||||
import uuid
|
||||
from loguru import logger
|
||||
|
||||
from app.core.models import (
|
||||
DocumentUploadResponse,
|
||||
DocumentAnalysisResponse,
|
||||
DocumentStatus
|
||||
)
|
||||
from app.services.document import DocumentService
|
||||
from app.services.embedding import EmbeddingService
|
||||
from app.services.reasoning import ReasoningService
|
||||
from app.services.standards import StandardsService
|
||||
from app.utils.helpers import generate_html_report
|
||||
|
||||
# Create services
|
||||
embedding_service = EmbeddingService()
|
||||
reasoning_service = ReasoningService()
|
||||
standards_service = StandardsService()
|
||||
|
||||
# Log the standards service instance ID to verify singleton pattern
|
||||
logger.info(f"Document API - Using StandardsService instance: {id(standards_service)}")
|
||||
logger.info(f"Document API - Initial standards count: {len(standards_service.standards)}")
|
||||
|
||||
document_service = DocumentService(
|
||||
embedding_service=embedding_service,
|
||||
reasoning_service=reasoning_service,
|
||||
standards_service=standards_service
|
||||
)
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/documents", tags=["documents"])
|
||||
|
||||
@router.post("/upload", response_model=DocumentUploadResponse)
|
||||
async def upload_document(file: UploadFile = File(...)):
|
||||
"""
|
||||
Upload and process a document.
|
||||
|
||||
Args:
|
||||
file: The document file to upload
|
||||
|
||||
Returns:
|
||||
DocumentUploadResponse with document ID
|
||||
"""
|
||||
try:
|
||||
# Check file extension
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="Filename is required")
|
||||
|
||||
# Process document
|
||||
document = await document_service.upload_document(file.file, file.filename)
|
||||
|
||||
return DocumentUploadResponse(
|
||||
document_id=document.id,
|
||||
filename=document.metadata.filename,
|
||||
status=document.status,
|
||||
message="Document uploaded successfully and is being processed."
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
|
||||
|
||||
@router.get("/{doc_id}/analysis", response_model=DocumentAnalysisResponse)
|
||||
async def get_document_analysis(doc_id: str, format: Optional[str] = Query(None, description="Response format (json or html)")):
|
||||
"""
|
||||
Get analysis results for a document.
|
||||
|
||||
Args:
|
||||
doc_id: The document ID
|
||||
format: Response format (json or html)
|
||||
|
||||
Returns:
|
||||
DocumentAnalysisResponse with analysis results
|
||||
"""
|
||||
try:
|
||||
# Retrieve document
|
||||
document = await document_service.get_document(doc_id)
|
||||
if not document:
|
||||
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
|
||||
|
||||
# Check if document has been processed
|
||||
if document.status != DocumentStatus.COMPLETED:
|
||||
return DocumentAnalysisResponse(
|
||||
document_id=doc_id,
|
||||
status=document.status,
|
||||
message=f"Document is in {document.status} state. Please try again later."
|
||||
)
|
||||
|
||||
# Get the latest report
|
||||
if not document.reports:
|
||||
raise HTTPException(status_code=404, detail=f"No analysis reports found for document {doc_id}")
|
||||
|
||||
latest_report_id = document.reports[-1]
|
||||
report = await document_service.get_report(latest_report_id)
|
||||
|
||||
if not report:
|
||||
raise HTTPException(status_code=404, detail=f"Report {latest_report_id} not found")
|
||||
|
||||
# Check if HTML format is requested
|
||||
if format == "html":
|
||||
# Convert report to HTML
|
||||
report_data = {
|
||||
"document_name": document.metadata.filename,
|
||||
"timestamp": report.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"compliance_score": report.compliance_score,
|
||||
"summary": report.summary,
|
||||
"applied_standards": report.applied_standards,
|
||||
"issues": [
|
||||
{
|
||||
"section": issue.section,
|
||||
"description": issue.description,
|
||||
"level": issue.level.value,
|
||||
"reasoning": issue.reasoning,
|
||||
"standard_references": issue.standard_references,
|
||||
"recommendation": issue.recommendation
|
||||
}
|
||||
for issue in report.issues
|
||||
]
|
||||
}
|
||||
|
||||
html_content = generate_html_report(report_data)
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
# Return JSON response
|
||||
return DocumentAnalysisResponse(
|
||||
document_id=doc_id,
|
||||
status=document.status,
|
||||
report=report,
|
||||
message="Analysis completed successfully."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving document analysis: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving document analysis: {str(e)}")
|
||||
|
||||
@router.post("/{doc_id}/resubmit", response_model=DocumentUploadResponse)
|
||||
async def resubmit_document(
|
||||
doc_id: str,
|
||||
file: UploadFile = File(...),
|
||||
background_tasks: BackgroundTasks = None
|
||||
):
|
||||
"""
|
||||
Resubmit a document with changes.
|
||||
|
||||
Args:
|
||||
doc_id: The document ID to resubmit
|
||||
file: The updated document file
|
||||
background_tasks: Background tasks handler
|
||||
|
||||
Returns:
|
||||
DocumentUploadResponse with document ID
|
||||
"""
|
||||
try:
|
||||
# Check if document exists
|
||||
document = await document_service.get_document(doc_id)
|
||||
if not document:
|
||||
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
|
||||
|
||||
# Process resubmitted document
|
||||
updated_document = await document_service.resubmit_document(doc_id, file.file)
|
||||
|
||||
return DocumentUploadResponse(
|
||||
document_id=updated_document.id,
|
||||
filename=updated_document.metadata.filename,
|
||||
status=updated_document.status,
|
||||
message=f"Document (version {updated_document.version}) resubmitted successfully and is being processed."
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error resubmitting document: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error resubmitting document: {str(e)}")
|
||||
|
||||
@router.get("/{doc_id}", response_model=Dict)
|
||||
async def get_document_info(doc_id: str):
|
||||
"""
|
||||
Get document information.
|
||||
|
||||
Args:
|
||||
doc_id: The document ID
|
||||
|
||||
Returns:
|
||||
Document information
|
||||
"""
|
||||
try:
|
||||
# Retrieve document
|
||||
document = await document_service.get_document(doc_id)
|
||||
if not document:
|
||||
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
|
||||
|
||||
# Convert to dict for response
|
||||
return {
|
||||
"document_id": document.id,
|
||||
"filename": document.metadata.filename,
|
||||
"file_type": document.metadata.file_type,
|
||||
"file_size": document.metadata.file_size,
|
||||
"upload_timestamp": document.metadata.upload_timestamp,
|
||||
"last_modified": document.metadata.last_modified,
|
||||
"status": document.status,
|
||||
"version": document.version,
|
||||
"reports": document.reports
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving document info: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving document info: {str(e)}")
|
||||
|
||||
@router.get("/", response_model=List[Dict])
|
||||
async def list_documents():
|
||||
"""
|
||||
List all documents.
|
||||
|
||||
Returns:
|
||||
List of document summaries
|
||||
"""
|
||||
try:
|
||||
documents = []
|
||||
for doc_id, document in document_service.documents.items():
|
||||
documents.append({
|
||||
"document_id": doc_id,
|
||||
"filename": document.metadata.filename,
|
||||
"status": document.status,
|
||||
"version": document.version,
|
||||
"upload_timestamp": document.metadata.upload_timestamp
|
||||
})
|
||||
return documents
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing documents: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}")
|
||||
|
||||
@router.get("/{doc_id}/stats", response_model=Dict)
|
||||
async def get_document_stats(doc_id: str):
|
||||
"""
|
||||
Get statistics for a document.
|
||||
|
||||
Args:
|
||||
doc_id: The document ID
|
||||
|
||||
Returns:
|
||||
Document statistics
|
||||
"""
|
||||
try:
|
||||
stats = await document_service.get_document_stats(doc_id)
|
||||
return stats
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving document stats: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving document stats: {str(e)}")
|
||||
@@ -0,0 +1,15 @@
|
||||
# API routes - Main router
|
||||
from fastapi import APIRouter
|
||||
|
||||
# Import sub-routers
|
||||
from app.api.document_routes import router as document_router
|
||||
from app.api.standards_routes import router as standards_router
|
||||
|
||||
# Create main router
|
||||
router = APIRouter()
|
||||
|
||||
# Include sub-routers
|
||||
router.include_router(document_router)
|
||||
router.include_router(standards_router)
|
||||
|
||||
# Add any additional routes that don't fit in the other routers here
|
||||
@@ -0,0 +1,113 @@
|
||||
# Standards API routes
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException, Query
|
||||
from typing import List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from app.core.models import Standard, StandardUploadResponse
|
||||
from app.services.standards import StandardsService
|
||||
|
||||
# Create services
|
||||
standards_service = StandardsService()
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/standards", tags=["standards"])
|
||||
|
||||
@router.get("/", response_model=List[Standard])
|
||||
async def get_all_standards():
|
||||
"""
|
||||
Get all available compliance standards.
|
||||
|
||||
Returns:
|
||||
List of all standards
|
||||
"""
|
||||
try:
|
||||
standards = await standards_service.get_all_standards()
|
||||
return standards
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving standards: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving standards: {str(e)}")
|
||||
|
||||
@router.get("/{standard_id}", response_model=Standard)
|
||||
async def get_standard(standard_id: str):
|
||||
"""
|
||||
Get a specific standard by ID.
|
||||
|
||||
Args:
|
||||
standard_id: The standard ID
|
||||
|
||||
Returns:
|
||||
Standard details
|
||||
"""
|
||||
try:
|
||||
standard = await standards_service.get_standard(standard_id)
|
||||
if not standard:
|
||||
raise HTTPException(status_code=404, detail=f"Standard with ID {standard_id} not found")
|
||||
return standard
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving standard: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error retrieving standard: {str(e)}")
|
||||
|
||||
@router.post("/upload", response_model=StandardUploadResponse)
|
||||
async def upload_standard(file: UploadFile = File(...)):
|
||||
"""
|
||||
Upload a new compliance standard definition.
|
||||
|
||||
Args:
|
||||
file: JSON file containing standard definition
|
||||
|
||||
Returns:
|
||||
StandardUploadResponse with standard ID
|
||||
"""
|
||||
try:
|
||||
# Check file extension
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="Filename is required")
|
||||
|
||||
if not file.filename.lower().endswith('.json'):
|
||||
raise HTTPException(status_code=400, detail="Standard definition must be a JSON file")
|
||||
|
||||
# Log the standards service instance ID to verify singleton pattern
|
||||
logger.info(f"Standards API - Using StandardsService instance: {id(standards_service)}")
|
||||
logger.info(f"Standards API - Standards count before upload: {len(standards_service.standards)}")
|
||||
|
||||
# Process standard
|
||||
standard = await standards_service.upload_standard(file.file, file.filename)
|
||||
|
||||
# Log the updated standards count
|
||||
logger.info(f"Standards API - Standards count after upload: {len(standards_service.standards)}")
|
||||
logger.info(f"Standards API - Uploaded standard: {standard.name} (ID: {standard.id})")
|
||||
|
||||
return StandardUploadResponse(
|
||||
standard_id=standard.id,
|
||||
name=standard.name,
|
||||
requirement_count=len(standard.requirements),
|
||||
message="Standard uploaded successfully."
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing standard: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error processing standard: {str(e)}")
|
||||
|
||||
@router.get("/search/", response_model=List[Standard])
|
||||
async def search_standards(name: Optional[str] = Query(None, description="Standard name to search for")):
|
||||
"""
|
||||
Search for standards by name.
|
||||
|
||||
Args:
|
||||
name: Standard name to search for (optional)
|
||||
|
||||
Returns:
|
||||
List of matching standards
|
||||
"""
|
||||
try:
|
||||
if name:
|
||||
standard = await standards_service.get_standard_by_name(name)
|
||||
return [standard] if standard else []
|
||||
else:
|
||||
return await standards_service.get_all_standards()
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching standards: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error searching standards: {str(e)}")
|
||||
@@ -0,0 +1 @@
|
||||
"""Core functionality for the Mini SpecsComply Pro application."""
|
||||
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
class Settings(BaseModel):
|
||||
"""Application settings loaded from environment variables."""
|
||||
|
||||
# Application information
|
||||
APP_NAME: str = os.getenv("APP_NAME", "Mini SpecsComply Pro")
|
||||
APP_VERSION: str = os.getenv("APP_VERSION", "0.1.0")
|
||||
DEBUG: bool = os.getenv("DEBUG", "False").lower() in ("true", "1", "t")
|
||||
|
||||
# API keys
|
||||
GROQ_API_KEY: Optional[str] = os.getenv("GROQ_API_KEY")
|
||||
COHERE_API_KEY: Optional[str] = os.getenv("COHERE_API_KEY")
|
||||
|
||||
# Vector database settings
|
||||
# Pinecone
|
||||
PINECONE_API_KEY: Optional[str] = os.getenv("PINECONE_API_KEY")
|
||||
PINECONE_INDEX_NAME: str = os.getenv("PINECONE_INDEX_NAME", "specscomply_documents")
|
||||
|
||||
# Weaviate
|
||||
WEAVIATE_URL: Optional[str] = os.getenv("WEAVIATE_URL")
|
||||
WEAVIATE_API_KEY: Optional[str] = os.getenv("WEAVIATE_API_KEY")
|
||||
|
||||
# Models
|
||||
EMBEDDING_MODEL: str = "embed-english-v3.0" # Default embedding model
|
||||
RERANKER_MODEL: str = "rerank-english-v2.0" # Default reranker model
|
||||
REASONING_MODEL: str = "llama-3.3-70b-versatile" # Default reasoning model
|
||||
PROCESSING_MODEL: str = "llama-3.3-70b-versatile" # Default quick processing model
|
||||
|
||||
# Vector database selector (pinecone or weaviate)
|
||||
VECTOR_DB: str = os.getenv("VECTOR_DB", "pinecone").lower()
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
|
||||
# Create global settings instance
|
||||
settings = Settings()
|
||||
@@ -0,0 +1,127 @@
|
||||
# Data models
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Optional, Any, Union
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
import uuid
|
||||
|
||||
class DocumentStatus(str, Enum):
|
||||
"""Enum for document processing status."""
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
class ComplianceLevel(str, Enum):
|
||||
"""Enum for compliance severity levels."""
|
||||
CRITICAL = "critical"
|
||||
MAJOR = "major"
|
||||
MINOR = "minor"
|
||||
INFO = "info"
|
||||
|
||||
class ComplianceIssue(BaseModel):
|
||||
"""Model for compliance issues found in the document."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
section: str
|
||||
description: str
|
||||
level: ComplianceLevel
|
||||
line_number: Optional[int] = None
|
||||
reasoning: str = "" # Detailed explanation of why this is an issue
|
||||
standard_references: List[str] = [] # References to specific standards or requirements
|
||||
recommendation: str
|
||||
|
||||
class DocumentMetadata(BaseModel):
|
||||
"""Model for document metadata."""
|
||||
filename: str
|
||||
file_type: str
|
||||
file_size: int # In bytes
|
||||
upload_timestamp: datetime = Field(default_factory=datetime.now)
|
||||
last_modified: Optional[datetime] = None
|
||||
|
||||
class DocumentEmbedding(BaseModel):
|
||||
"""Model for document embeddings."""
|
||||
embedding_id: str
|
||||
embedding_model: str
|
||||
vector_db: str
|
||||
sections: Dict[str, str] # Section name to section ID in vector DB
|
||||
|
||||
class ComplianceReport(BaseModel):
|
||||
"""Model for the generated compliance report."""
|
||||
report_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
document_id: str
|
||||
timestamp: datetime = Field(default_factory=datetime.now)
|
||||
compliance_score: float # 0.0 to 1.0
|
||||
summary: str
|
||||
issues: List[ComplianceIssue] = []
|
||||
applied_standards: List[str] = [] # Standards used for analysis
|
||||
|
||||
@property
|
||||
def critical_issues_count(self) -> int:
|
||||
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.CRITICAL)
|
||||
|
||||
@property
|
||||
def major_issues_count(self) -> int:
|
||||
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.MAJOR)
|
||||
|
||||
@property
|
||||
def minor_issues_count(self) -> int:
|
||||
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.MINOR)
|
||||
|
||||
@property
|
||||
def info_issues_count(self) -> int:
|
||||
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.INFO)
|
||||
|
||||
class Document(BaseModel):
|
||||
"""Model for document tracking."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
metadata: DocumentMetadata
|
||||
embedding: Optional[DocumentEmbedding] = None
|
||||
status: DocumentStatus = DocumentStatus.PENDING
|
||||
version: int = 1 # Incremented on resubmissions
|
||||
reports: List[str] = [] # List of report IDs
|
||||
|
||||
class DocumentUploadResponse(BaseModel):
|
||||
"""Response model for document uploads."""
|
||||
document_id: str
|
||||
filename: str
|
||||
status: DocumentStatus
|
||||
message: str
|
||||
|
||||
class DocumentAnalysisResponse(BaseModel):
|
||||
"""Response model for document analysis retrieval."""
|
||||
document_id: str
|
||||
status: DocumentStatus
|
||||
report: Optional[ComplianceReport] = None
|
||||
message: str
|
||||
|
||||
|
||||
class RequirementSeverity(str, Enum):
|
||||
"""Enum for requirement severity levels."""
|
||||
CRITICAL = "critical"
|
||||
MAJOR = "major"
|
||||
MINOR = "minor"
|
||||
INFO = "info"
|
||||
|
||||
|
||||
class Requirement(BaseModel):
|
||||
"""Model for a compliance requirement."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
description: str
|
||||
severity: RequirementSeverity
|
||||
details: Optional[str] = None
|
||||
|
||||
|
||||
class Standard(BaseModel):
|
||||
"""Model for a compliance standard."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
name: str
|
||||
description: str
|
||||
requirements: List[Requirement] = []
|
||||
|
||||
|
||||
class StandardUploadResponse(BaseModel):
|
||||
"""Response model for standard uploads."""
|
||||
standard_id: str
|
||||
name: str
|
||||
requirement_count: int
|
||||
message: str
|
||||
+123
@@ -0,0 +1,123 @@
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
import os
|
||||
import uvicorn
|
||||
from loguru import logger
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import settings
|
||||
from app.api.routes import router as api_router
|
||||
|
||||
# Configure logging
|
||||
logger.remove()
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
||||
level="DEBUG" if settings.DEBUG else "INFO",
|
||||
)
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title=settings.APP_NAME,
|
||||
version=settings.APP_VERSION,
|
||||
description="A lightweight document compliance and validation tool",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
openapi_url="/openapi.json",
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # In production, this should be restricted
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Create templates directory
|
||||
templates_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates")
|
||||
os.makedirs(templates_dir, exist_ok=True)
|
||||
templates = Jinja2Templates(directory=templates_dir)
|
||||
|
||||
# Create static files directory if it doesn't exist
|
||||
static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
||||
if not os.path.exists(static_dir):
|
||||
os.makedirs(static_dir, exist_ok=True)
|
||||
|
||||
# Ensure CSS and JS directories exist
|
||||
css_dir = os.path.join(static_dir, "css")
|
||||
js_dir = os.path.join(static_dir, "js")
|
||||
os.makedirs(css_dir, exist_ok=True)
|
||||
os.makedirs(js_dir, exist_ok=True)
|
||||
|
||||
# Mount static files
|
||||
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
||||
|
||||
# Include API routes
|
||||
app.include_router(api_router, prefix="/api")
|
||||
|
||||
# Root endpoint - serve the frontend
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def root(request: Request):
|
||||
# Check if index.html exists in static directory
|
||||
index_path = os.path.join(static_dir, "index.html")
|
||||
|
||||
if os.path.exists(index_path):
|
||||
with open(index_path, "r") as f:
|
||||
return HTMLResponse(content=f.read())
|
||||
|
||||
# If not found, return a simple HTML response
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{settings.APP_NAME}</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
|
||||
h1 {{ color: #3498db; }}
|
||||
.api-link {{ margin-top: 20px; }}
|
||||
.api-link a {{ display: inline-block; padding: 10px 20px; background-color: #3498db; color: white;
|
||||
text-decoration: none; border-radius: 4px; }}
|
||||
.api-link a:hover {{ background-color: #2980b9; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{settings.APP_NAME}</h1>
|
||||
<p>Welcome to {settings.APP_NAME}, a lightweight document compliance and validation tool.</p>
|
||||
<p>This application is currently running in API-only mode.</p>
|
||||
<div class="api-link">
|
||||
<a href="/docs">View API Documentation</a>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
)
|
||||
|
||||
# Health check endpoint
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "healthy"}
|
||||
|
||||
# Global exception handler
|
||||
@app.exception_handler(Exception)
|
||||
async def global_exception_handler(request: Request, exc: Exception):
|
||||
logger.error(f"Unhandled exception: {str(exc)}")
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"message": "An unexpected error occurred", "detail": str(exc)},
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(
|
||||
"app.main:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=settings.DEBUG,
|
||||
log_level="debug" if settings.DEBUG else "info",
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
"""Services for the Mini SpecsComply Pro application."""
|
||||
@@ -0,0 +1,461 @@
|
||||
# Document processing
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, BinaryIO, Tuple
|
||||
import re
|
||||
from loguru import logger
|
||||
|
||||
from app.core.models import (
|
||||
Document,
|
||||
DocumentMetadata,
|
||||
DocumentStatus,
|
||||
ComplianceReport,
|
||||
ComplianceIssue,
|
||||
ComplianceLevel,
|
||||
DocumentEmbedding
|
||||
)
|
||||
from app.services.embedding import EmbeddingService
|
||||
from app.services.reasoning import ReasoningService
|
||||
from app.services.standards import StandardsService
|
||||
from app.utils.token_counter import count_tokens, truncate_by_tokens
|
||||
|
||||
class DocumentService:
|
||||
"""Service for handling document processing and storage."""
|
||||
|
||||
def __init__(self, embedding_service: EmbeddingService, reasoning_service: ReasoningService, standards_service: Optional[StandardsService] = None):
|
||||
"""Initialize with required services."""
|
||||
self.embedding_service = embedding_service
|
||||
self.reasoning_service = reasoning_service
|
||||
self.standards_service = standards_service or StandardsService()
|
||||
self.documents = {} # In-memory storage for documents (replace with DB in production)
|
||||
self.reports = {} # In-memory storage for reports (replace with DB in production)
|
||||
|
||||
async def upload_document(self, file: BinaryIO, filename: str) -> Document:
|
||||
"""
|
||||
Process an uploaded document.
|
||||
|
||||
Args:
|
||||
file: The document file
|
||||
filename: Name of the uploaded file
|
||||
|
||||
Returns:
|
||||
Document object with metadata
|
||||
"""
|
||||
# Validate file type
|
||||
if not self._validate_file_type(filename):
|
||||
raise ValueError(f"Unsupported file type. Supported types: .txt, .md, .rst, .doc, .docx, .pdf")
|
||||
|
||||
# Get file content
|
||||
content = await self._read_file_content(file)
|
||||
|
||||
# Extract file metadata
|
||||
file_size = len(content)
|
||||
file_type = self._get_file_type(filename)
|
||||
|
||||
# Create document metadata
|
||||
metadata = DocumentMetadata(
|
||||
filename=filename,
|
||||
file_type=file_type,
|
||||
file_size=file_size,
|
||||
upload_timestamp=datetime.now(),
|
||||
last_modified=datetime.now()
|
||||
)
|
||||
|
||||
# Create document object
|
||||
document_id = str(uuid.uuid4())
|
||||
document = Document(
|
||||
id=document_id,
|
||||
metadata=metadata,
|
||||
status=DocumentStatus.PENDING,
|
||||
version=1
|
||||
)
|
||||
|
||||
# Store document in memory
|
||||
self.documents[document_id] = document
|
||||
|
||||
# Start processing
|
||||
try:
|
||||
await self._process_document(document_id, content)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing document {document_id}: {str(e)}")
|
||||
document.status = DocumentStatus.FAILED
|
||||
raise
|
||||
|
||||
return document
|
||||
|
||||
async def get_document(self, document_id: str) -> Optional[Document]:
|
||||
"""
|
||||
Retrieve a document by ID.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document to retrieve
|
||||
|
||||
Returns:
|
||||
Document object if found, None otherwise
|
||||
"""
|
||||
return self.documents.get(document_id)
|
||||
|
||||
async def get_report(self, report_id: str) -> Optional[ComplianceReport]:
|
||||
"""
|
||||
Retrieve a compliance report by ID.
|
||||
|
||||
Args:
|
||||
report_id: The ID of the report to retrieve
|
||||
|
||||
Returns:
|
||||
ComplianceReport object if found, None otherwise
|
||||
"""
|
||||
return self.reports.get(report_id)
|
||||
|
||||
async def resubmit_document(self, document_id: str, file: BinaryIO) -> Document:
|
||||
"""
|
||||
Resubmit a document with changes.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document to resubmit
|
||||
file: The updated document file
|
||||
|
||||
Returns:
|
||||
Updated Document object
|
||||
"""
|
||||
# Check if document exists
|
||||
document = await self.get_document(document_id)
|
||||
if not document:
|
||||
raise ValueError(f"Document with ID {document_id} not found")
|
||||
|
||||
# Get file content
|
||||
content = await self._read_file_content(file)
|
||||
|
||||
# Update document metadata
|
||||
document.metadata.file_size = len(content)
|
||||
document.metadata.last_modified = datetime.now()
|
||||
document.version += 1
|
||||
document.status = DocumentStatus.PENDING
|
||||
|
||||
# Process the updated document
|
||||
try:
|
||||
await self._process_document(document_id, content)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing resubmitted document {document_id}: {str(e)}")
|
||||
document.status = DocumentStatus.FAILED
|
||||
|
||||
return document
|
||||
|
||||
async def process_document(self, document_id: str, content: str) -> ComplianceReport:
|
||||
"""
|
||||
Process document and generate compliance report.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document
|
||||
content: Document content
|
||||
|
||||
Returns:
|
||||
ComplianceReport object
|
||||
"""
|
||||
try:
|
||||
# Get the document
|
||||
document = self.documents.get(document_id)
|
||||
if not document:
|
||||
raise ValueError(f"Document {document_id} not found")
|
||||
|
||||
# Split document into sections
|
||||
sections = self._split_into_sections(content)
|
||||
|
||||
# Generate embeddings for sections
|
||||
document.embedding = await self.embedding_service.embed_document(document_id, sections)
|
||||
|
||||
# Identify relevant standards for the document
|
||||
if self.standards_service:
|
||||
# Log the standards service instance ID to verify singleton pattern
|
||||
logger.info(f"Using StandardsService instance: {id(self.standards_service)}")
|
||||
logger.info(f"Standards count before matching: {len(self.standards_service.standards)}")
|
||||
|
||||
standard_names = await self.standards_service.get_standard_names_for_document(content)
|
||||
logger.info(f"Identified standards for document {document_id}: {standard_names}")
|
||||
else:
|
||||
logger.warning(f"No StandardsService available for document {document_id}")
|
||||
standard_names = ["ISO-9001", "IEEE-829", "RFC-2119"]
|
||||
|
||||
# Use reasoning service for compliance analysis
|
||||
report = await self.reasoning_service.analyze_document(document_id, sections, standard_names)
|
||||
|
||||
# Store the report
|
||||
self.reports[report.report_id] = report
|
||||
|
||||
return report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in document processing: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _read_file_content(self, file: BinaryIO) -> str:
|
||||
"""
|
||||
Read and decode file content.
|
||||
|
||||
Args:
|
||||
file: The file to read
|
||||
|
||||
Returns:
|
||||
File content as string
|
||||
"""
|
||||
file_content = file.read()
|
||||
|
||||
# Try to decode as UTF-8
|
||||
try:
|
||||
return file_content.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# Try other encodings if UTF-8 fails
|
||||
try:
|
||||
return file_content.decode('latin-1')
|
||||
except:
|
||||
raise ValueError("Unable to decode file content. Please ensure file is text-based.")
|
||||
|
||||
def _get_file_type(self, filename: str) -> str:
|
||||
"""
|
||||
Determine file type from filename.
|
||||
|
||||
Args:
|
||||
filename: The name of the file
|
||||
|
||||
Returns:
|
||||
File type (extension)
|
||||
"""
|
||||
_, extension = os.path.splitext(filename)
|
||||
return extension.lstrip('.').lower()
|
||||
|
||||
def _validate_file_type(self, filename: str) -> bool:
|
||||
"""
|
||||
Validate if the file type is supported.
|
||||
|
||||
Args:
|
||||
filename: Name of the file to validate
|
||||
|
||||
Returns:
|
||||
bool: True if file type is supported, False otherwise
|
||||
"""
|
||||
SUPPORTED_EXTENSIONS = {'.txt', '.md', '.rst', '.doc', '.docx', '.pdf'}
|
||||
_, ext = os.path.splitext(filename)
|
||||
return ext.lower() in SUPPORTED_EXTENSIONS
|
||||
|
||||
def _split_into_sections(self, content: str) -> Dict[str, str]:
|
||||
"""
|
||||
Split document content into sections.
|
||||
|
||||
Args:
|
||||
content: The document content
|
||||
|
||||
Returns:
|
||||
Dictionary mapping section names to section content
|
||||
"""
|
||||
# This is a simple implementation - in production, you would use more advanced
|
||||
# techniques like heading detection, markdown parsing, etc.
|
||||
|
||||
# For simplicity, we'll just split by markdown headings
|
||||
sections = {}
|
||||
|
||||
# Add the whole document as one section
|
||||
sections["full_document"] = content
|
||||
|
||||
# Try to split by markdown headings
|
||||
heading_pattern = re.compile(r'^(#{1,3})\s+(.+)$', re.MULTILINE)
|
||||
matches = list(heading_pattern.finditer(content))
|
||||
|
||||
if matches:
|
||||
for i, match in enumerate(matches):
|
||||
heading_level = len(match.group(1))
|
||||
section_name = match.group(2).strip()
|
||||
|
||||
# Get section content (from this heading to the next, or to the end)
|
||||
start_pos = match.end()
|
||||
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(content)
|
||||
|
||||
section_content = content[start_pos:end_pos].strip()
|
||||
section_key = f"h{heading_level}_{section_name}"
|
||||
|
||||
sections[section_key] = section_content
|
||||
else:
|
||||
# No headings found, try to split by newlines into paragraphs
|
||||
paragraphs = [p for p in content.split('\n\n') if p.strip()]
|
||||
|
||||
for i, paragraph in enumerate(paragraphs):
|
||||
if len(paragraph) > 100: # Only include substantial paragraphs
|
||||
sections[f"paragraph_{i+1}"] = paragraph
|
||||
|
||||
return sections
|
||||
|
||||
async def _generate_mock_report(self, document_id: str, sections: Dict[str, str]) -> ComplianceReport:
|
||||
"""
|
||||
Generate a mock compliance report for development/testing.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document
|
||||
sections: Dictionary of document sections
|
||||
|
||||
Returns:
|
||||
ComplianceReport object
|
||||
"""
|
||||
# In production, this would use the reasoning service
|
||||
# For now, we'll generate a simple mock report
|
||||
|
||||
# Create some mock issues
|
||||
issues = []
|
||||
|
||||
if "full_document" in sections:
|
||||
content = sections["full_document"]
|
||||
|
||||
# Check for missing sections (mock check)
|
||||
if "introduction" not in content.lower():
|
||||
issues.append(ComplianceIssue(
|
||||
section="Document Structure",
|
||||
description="Missing introduction section",
|
||||
level=ComplianceLevel.MAJOR,
|
||||
recommendation="Add an introduction section to provide context for the document"
|
||||
))
|
||||
|
||||
# Check for formatting issues (mock check)
|
||||
if content.count('#') < 3:
|
||||
issues.append(ComplianceIssue(
|
||||
section="Formatting",
|
||||
description="Insufficient section headings",
|
||||
level=ComplianceLevel.MINOR,
|
||||
recommendation="Use markdown headings to better structure the document"
|
||||
))
|
||||
|
||||
# Check for technical compliance (mock check)
|
||||
if "compliance" in content.lower() and "standard" not in content.lower():
|
||||
issues.append(ComplianceIssue(
|
||||
section="Technical Content",
|
||||
description="Mentions compliance but doesn't reference specific standards",
|
||||
level=ComplianceLevel.CRITICAL,
|
||||
recommendation="Specify which standards or regulations the document complies with"
|
||||
))
|
||||
|
||||
# Calculate mock compliance score
|
||||
if issues:
|
||||
compliance_score = max(0.0, 1.0 - (len(issues) * 0.1))
|
||||
else:
|
||||
compliance_score = 1.0
|
||||
|
||||
# Create summary based on issues
|
||||
if not issues:
|
||||
summary = "The document meets all compliance requirements. No issues found."
|
||||
else:
|
||||
critical_count = sum(1 for i in issues if i.level == ComplianceLevel.CRITICAL)
|
||||
major_count = sum(1 for i in issues if i.level == ComplianceLevel.MAJOR)
|
||||
minor_count = sum(1 for i in issues if i.level == ComplianceLevel.MINOR)
|
||||
|
||||
summary = f"The document has {len(issues)} compliance issues: "
|
||||
if critical_count:
|
||||
summary += f"{critical_count} critical, "
|
||||
if major_count:
|
||||
summary += f"{major_count} major, "
|
||||
if minor_count:
|
||||
summary += f"{minor_count} minor."
|
||||
else:
|
||||
summary = summary.rstrip(", ") + "."
|
||||
|
||||
summary += " See detailed report for recommendations."
|
||||
|
||||
# Create report
|
||||
report = ComplianceReport(
|
||||
document_id=document_id,
|
||||
compliance_score=compliance_score,
|
||||
summary=summary,
|
||||
issues=issues
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
async def _process_document(self, document_id: str, content: str) -> None:
|
||||
"""
|
||||
Internal method to process a document and update its status.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document to process
|
||||
content: The document content
|
||||
"""
|
||||
try:
|
||||
# Get the document
|
||||
document = self.documents.get(document_id)
|
||||
if not document:
|
||||
raise ValueError(f"Document {document_id} not found")
|
||||
|
||||
# Update status to processing
|
||||
document.status = DocumentStatus.PROCESSING
|
||||
|
||||
# Generate compliance report
|
||||
report = await self.process_document(document_id, content)
|
||||
|
||||
# Store report ID in document
|
||||
document.reports.append(report.report_id)
|
||||
|
||||
# Update document status
|
||||
document.status = DocumentStatus.COMPLETED
|
||||
|
||||
except Exception as e:
|
||||
# Update document status to failed
|
||||
if document:
|
||||
document.status = DocumentStatus.FAILED
|
||||
raise
|
||||
|
||||
async def get_document_stats(self, document_id: str) -> Dict[str, any]:
|
||||
"""
|
||||
Get statistics for a document.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document
|
||||
|
||||
Returns:
|
||||
Dictionary containing document statistics
|
||||
"""
|
||||
document = await self.get_document(document_id)
|
||||
if not document:
|
||||
raise ValueError(f"Document {document_id} not found")
|
||||
|
||||
latest_report = None
|
||||
if document.reports:
|
||||
latest_report = await self.get_report(document.reports[-1])
|
||||
|
||||
stats = {
|
||||
"document_id": document_id,
|
||||
"version": document.version,
|
||||
"status": document.status,
|
||||
"file_size": document.metadata.file_size,
|
||||
"upload_date": document.metadata.upload_timestamp,
|
||||
"last_modified": document.metadata.last_modified,
|
||||
"num_reports": len(document.reports),
|
||||
"latest_compliance_score": latest_report.compliance_score if latest_report else None,
|
||||
"critical_issues": latest_report.critical_issues_count if latest_report else 0,
|
||||
"major_issues": latest_report.major_issues_count if latest_report else 0,
|
||||
"minor_issues": latest_report.minor_issues_count if latest_report else 0
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
async def cleanup_old_documents(self, days: int = 30) -> List[str]:
|
||||
"""
|
||||
Remove documents older than specified days.
|
||||
|
||||
Args:
|
||||
days: Number of days after which documents should be removed
|
||||
|
||||
Returns:
|
||||
List of removed document IDs
|
||||
"""
|
||||
cutoff_date = datetime.now() - timedelta(days=days)
|
||||
removed_ids = []
|
||||
|
||||
for doc_id, document in list(self.documents.items()):
|
||||
if document.metadata.upload_timestamp < cutoff_date:
|
||||
# Remove associated reports
|
||||
for report_id in document.reports:
|
||||
self.reports.pop(report_id, None)
|
||||
|
||||
# Remove document
|
||||
self.documents.pop(doc_id)
|
||||
removed_ids.append(doc_id)
|
||||
|
||||
return removed_ids
|
||||
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
import cohere
|
||||
from typing import List, Dict, Any, Optional
|
||||
import uuid
|
||||
from pinecone import Pinecone
|
||||
import weaviate
|
||||
from loguru import logger
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.models import DocumentEmbedding
|
||||
|
||||
class EmbeddingService:
|
||||
"""Service for document embedding and vector database operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the embedding service with the Cohere client and vector DB."""
|
||||
# Initialize Cohere client
|
||||
self.cohere_client = cohere.Client(settings.COHERE_API_KEY)
|
||||
|
||||
# Initialize vector database client based on configuration
|
||||
self.vector_db_client = self._init_vector_db()
|
||||
self.embedding_model = settings.EMBEDDING_MODEL
|
||||
|
||||
def _init_vector_db(self) -> Any:
|
||||
"""Initialize the vector database client based on settings."""
|
||||
if settings.VECTOR_DB == "pinecone" and settings.PINECONE_API_KEY:
|
||||
# Initialize Pinecone with new API
|
||||
pc = Pinecone(api_key=settings.PINECONE_API_KEY)
|
||||
|
||||
# Check if index exists, if not create it
|
||||
if settings.PINECONE_INDEX_NAME not in [idx["name"] for idx in pc.list_indexes()]:
|
||||
pc.create_index(
|
||||
name=settings.PINECONE_INDEX_NAME,
|
||||
dimension=1024, # Cohere embed-english-v3.0 dimension
|
||||
metric="cosine"
|
||||
)
|
||||
|
||||
# Return the index
|
||||
return pc.Index(settings.PINECONE_INDEX_NAME)
|
||||
|
||||
elif settings.VECTOR_DB == "weaviate" and settings.WEAVIATE_URL:
|
||||
# Initialize Weaviate
|
||||
auth_config = weaviate.auth.AuthApiKey(api_key=settings.WEAVIATE_API_KEY) if settings.WEAVIATE_API_KEY else None
|
||||
client = weaviate.Client(
|
||||
url=settings.WEAVIATE_URL,
|
||||
auth_client_secret=auth_config
|
||||
)
|
||||
# Check if schema exists, if not create it
|
||||
if not client.schema.contains().get("classes", []):
|
||||
class_obj = {
|
||||
"class": "Document",
|
||||
"vectorizer": "none", # We'll provide our own vectors
|
||||
"properties": [
|
||||
{
|
||||
"name": "content",
|
||||
"dataType": ["text"]
|
||||
},
|
||||
{
|
||||
"name": "document_id",
|
||||
"dataType": ["string"]
|
||||
},
|
||||
{
|
||||
"name": "section_name",
|
||||
"dataType": ["string"]
|
||||
}
|
||||
]
|
||||
}
|
||||
client.schema.create_class(class_obj)
|
||||
return client
|
||||
|
||||
else:
|
||||
logger.warning("No valid vector database configuration found. Using mock implementation.")
|
||||
return MockVectorDB()
|
||||
|
||||
async def embed_document(self, document_id: str, sections: Dict[str, str]) -> DocumentEmbedding:
|
||||
"""
|
||||
Embed document sections and store in vector database.
|
||||
|
||||
Args:
|
||||
document_id: Unique identifier for the document
|
||||
sections: Dictionary mapping section names to section content
|
||||
|
||||
Returns:
|
||||
DocumentEmbedding object with embedding metadata
|
||||
"""
|
||||
section_ids = {}
|
||||
|
||||
for section_name, content in sections.items():
|
||||
# Generate embedding for section content
|
||||
try:
|
||||
embedding_response = self.cohere_client.embed(
|
||||
texts=[content],
|
||||
model=self.embedding_model,
|
||||
input_type="search_document"
|
||||
)
|
||||
embedding_vector = embedding_response.embeddings[0]
|
||||
|
||||
# Generate a unique ID for this section
|
||||
section_id = f"{document_id}_{section_name}_{str(uuid.uuid4())[:8]}"
|
||||
|
||||
# Store in vector database
|
||||
if settings.VECTOR_DB == "pinecone":
|
||||
self.vector_db_client.upsert(
|
||||
vectors=[{
|
||||
"id": section_id,
|
||||
"values": embedding_vector,
|
||||
"metadata": {
|
||||
"document_id": document_id,
|
||||
"section_name": section_name,
|
||||
"content": content[:1000] # Store truncated content for context
|
||||
}
|
||||
}],
|
||||
namespace=document_id
|
||||
)
|
||||
|
||||
elif settings.VECTOR_DB == "weaviate":
|
||||
self.vector_db_client.data_object.create(
|
||||
class_name="Document",
|
||||
data_object={
|
||||
"content": content,
|
||||
"document_id": document_id,
|
||||
"section_name": section_name
|
||||
},
|
||||
uuid=section_id,
|
||||
vector=embedding_vector
|
||||
)
|
||||
|
||||
# Store the section ID
|
||||
section_ids[section_name] = section_id
|
||||
logger.info(f"Successfully embedded section '{section_name}' for document {document_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error embedding section '{section_name}': {str(e)}")
|
||||
raise
|
||||
|
||||
# Create and return DocumentEmbedding object
|
||||
embedding = DocumentEmbedding(
|
||||
embedding_id=str(uuid.uuid4()),
|
||||
embedding_model=self.embedding_model,
|
||||
vector_db=settings.VECTOR_DB,
|
||||
sections=section_ids
|
||||
)
|
||||
|
||||
return embedding
|
||||
|
||||
async def retrieve_similar_sections(self, query: str, document_id: Optional[str] = None, top_k: int = 5) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve similar document sections for a query.
|
||||
|
||||
Args:
|
||||
query: The query text to find similar sections for
|
||||
document_id: Optional document ID to restrict search
|
||||
top_k: Number of results to return
|
||||
|
||||
Returns:
|
||||
List of similar sections with metadata
|
||||
"""
|
||||
# Generate embedding for query
|
||||
query_embedding = self.cohere_client.embed(
|
||||
texts=[query],
|
||||
model=self.embedding_model,
|
||||
input_type="search_query"
|
||||
).embeddings[0]
|
||||
|
||||
# Search vector database
|
||||
if settings.VECTOR_DB == "pinecone":
|
||||
namespace = document_id if document_id else None
|
||||
results = self.vector_db_client.query(
|
||||
vector=query_embedding,
|
||||
top_k=top_k,
|
||||
namespace=namespace,
|
||||
include_metadata=True
|
||||
)
|
||||
|
||||
# Format results
|
||||
similar_sections = []
|
||||
for match in results.matches:
|
||||
similar_sections.append({
|
||||
"section_id": match.id,
|
||||
"document_id": match.metadata["document_id"],
|
||||
"section_name": match.metadata["section_name"],
|
||||
"content": match.metadata.get("content", ""),
|
||||
"score": match.score
|
||||
})
|
||||
|
||||
elif settings.VECTOR_DB == "weaviate":
|
||||
query_builder = self.vector_db_client.query.get(
|
||||
"Document", ["content", "document_id", "section_name"]
|
||||
).with_near_vector({
|
||||
"vector": query_embedding
|
||||
}).with_limit(top_k)
|
||||
|
||||
if document_id:
|
||||
query_builder = query_builder.with_where({
|
||||
"path": ["document_id"],
|
||||
"operator": "Equal",
|
||||
"valueString": document_id
|
||||
})
|
||||
|
||||
results = query_builder.do()
|
||||
|
||||
# Format results
|
||||
similar_sections = []
|
||||
for item in results.get("data", {}).get("Get", {}).get("Document", []):
|
||||
similar_sections.append({
|
||||
"section_id": item.get("_additional", {}).get("id"),
|
||||
"document_id": item.get("document_id"),
|
||||
"section_name": item.get("section_name"),
|
||||
"content": item.get("content", ""),
|
||||
"score": item.get("_additional", {}).get("distance")
|
||||
})
|
||||
|
||||
else:
|
||||
# Mock implementation
|
||||
similar_sections = []
|
||||
|
||||
return similar_sections
|
||||
|
||||
|
||||
class MockVectorDB:
|
||||
"""Mock vector database for development without actual vector DB."""
|
||||
|
||||
def __init__(self):
|
||||
self.vectors = {}
|
||||
logger.warning("Using mock vector database. Not suitable for production.")
|
||||
|
||||
def upsert(self, vectors, namespace=None):
|
||||
"""Mock upsert method."""
|
||||
namespace = namespace or "default"
|
||||
if namespace not in self.vectors:
|
||||
self.vectors[namespace] = {}
|
||||
|
||||
for vector in vectors:
|
||||
vector_id = vector['id']
|
||||
metadata = vector['metadata']
|
||||
self.vectors[namespace][vector_id] = metadata
|
||||
|
||||
def query(self, vector, top_k=5, namespace=None, include_metadata=True):
|
||||
"""Mock query method."""
|
||||
from collections import namedtuple
|
||||
|
||||
namespace = namespace or "default"
|
||||
if namespace not in self.vectors:
|
||||
return []
|
||||
|
||||
# Just return some mock results
|
||||
Match = namedtuple('Match', ['id', 'score', 'metadata'])
|
||||
Results = namedtuple('Results', ['matches'])
|
||||
|
||||
matches = [
|
||||
Match(id=vector_id, score=0.8, metadata=metadata)
|
||||
for vector_id, metadata in list(self.vectors[namespace].items())[:top_k]
|
||||
]
|
||||
|
||||
return Results(matches=matches)
|
||||
@@ -0,0 +1,136 @@
|
||||
# Reranking services
|
||||
import cohere
|
||||
from typing import List, Dict, Any
|
||||
from loguru import logger
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.models import ComplianceIssue, ComplianceReport, ComplianceLevel
|
||||
|
||||
class RankingService:
|
||||
"""Service for ranking and prioritizing compliance issues using Cohere Reranker."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the ranking service with the Cohere client."""
|
||||
self.cohere_client = cohere.Client(settings.COHERE_API_KEY)
|
||||
self.reranker_model = settings.RERANKER_MODEL
|
||||
|
||||
async def prioritize_issues(self, report: ComplianceReport, max_issues: int = 10) -> ComplianceReport:
|
||||
"""
|
||||
Prioritize and rank compliance issues in a report.
|
||||
|
||||
Args:
|
||||
report: The compliance report with issues to prioritize
|
||||
max_issues: Maximum number of issues to include in the final report
|
||||
|
||||
Returns:
|
||||
Updated compliance report with prioritized issues
|
||||
"""
|
||||
if not report.issues or len(report.issues) <= 1:
|
||||
# No need to rank if there's only 0 or 1 issues
|
||||
return report
|
||||
|
||||
try:
|
||||
# Prepare issues for ranking
|
||||
issue_texts = [
|
||||
f"Section: {issue.section}. "
|
||||
f"Level: {issue.level.value}. "
|
||||
f"Description: {issue.description}. "
|
||||
f"Recommendation: {issue.recommendation}"
|
||||
for issue in report.issues
|
||||
]
|
||||
|
||||
# Query object representing what we're looking for
|
||||
query = "critical compliance issues that require immediate attention"
|
||||
|
||||
# Rerank issues based on relevance to the query
|
||||
reranked_issues = await self._rerank_issues(query, issue_texts)
|
||||
|
||||
# Sort issues based on:
|
||||
# 1. Compliance level (critical > major > minor > info)
|
||||
# 2. Reranker relevance score
|
||||
sorted_issues = []
|
||||
level_scores = {
|
||||
ComplianceLevel.CRITICAL: 4,
|
||||
ComplianceLevel.MAJOR: 3,
|
||||
ComplianceLevel.MINOR: 2,
|
||||
ComplianceLevel.INFO: 1
|
||||
}
|
||||
|
||||
# Combine original issues with reranked scores
|
||||
combined_issues = []
|
||||
for i, issue in enumerate(report.issues):
|
||||
rerank_score = next((item["relevance_score"] for item in reranked_issues
|
||||
if item["index"] == i), 0.0)
|
||||
|
||||
# Calculate combined score (level_score * 100 + rerank_score)
|
||||
# This ensures level is always the primary sorting factor
|
||||
level_score = level_scores.get(issue.level, 0)
|
||||
combined_score = (level_score * 100) + rerank_score
|
||||
|
||||
combined_issues.append({
|
||||
"issue": issue,
|
||||
"combined_score": combined_score,
|
||||
"rerank_score": rerank_score
|
||||
})
|
||||
|
||||
# Sort by combined score (descending)
|
||||
combined_issues.sort(key=lambda x: x["combined_score"], reverse=True)
|
||||
|
||||
# Take top issues based on max_issues limit
|
||||
sorted_issues = [item["issue"] for item in combined_issues[:max_issues]]
|
||||
|
||||
# Create updated report
|
||||
prioritized_report = ComplianceReport(
|
||||
report_id=report.report_id,
|
||||
document_id=report.document_id,
|
||||
timestamp=report.timestamp,
|
||||
compliance_score=report.compliance_score,
|
||||
summary=report.summary,
|
||||
issues=sorted_issues
|
||||
)
|
||||
|
||||
return prioritized_report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error prioritizing issues: {str(e)}")
|
||||
# If ranking fails, return the original report
|
||||
return report
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
|
||||
async def _rerank_issues(self, query: str, issue_texts: List[str]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Rerank issues using Cohere Reranker.
|
||||
|
||||
Args:
|
||||
query: The search query to compare issues against
|
||||
issue_texts: List of issue descriptions to rank
|
||||
|
||||
Returns:
|
||||
List of dictionaries with reranked issues and scores
|
||||
"""
|
||||
try:
|
||||
# Call Cohere Rerank endpoint
|
||||
response = self.cohere_client.rerank(
|
||||
model=self.reranker_model,
|
||||
query=query,
|
||||
documents=issue_texts,
|
||||
top_n=len(issue_texts)
|
||||
)
|
||||
|
||||
# Format results
|
||||
reranked_issues = []
|
||||
for result in response.results:
|
||||
reranked_issues.append({
|
||||
"index": result.index, # Original index in the issues list
|
||||
"relevance_score": result.relevance_score
|
||||
})
|
||||
|
||||
return reranked_issues
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling Cohere Reranker: {str(e)}")
|
||||
|
||||
# Return basic ranking if reranking fails
|
||||
return [{"index": i, "relevance_score": 1.0 - (i * 0.1)}
|
||||
for i in range(len(issue_texts))]
|
||||
@@ -0,0 +1,168 @@
|
||||
# Reasoning with LLMs
|
||||
# Reasoning with LLMs using GROQ
|
||||
import json
|
||||
from typing import Dict, List
|
||||
from loguru import logger
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport
|
||||
from app.utils.token_counter import count_tokens, truncate_by_tokens
|
||||
from groq import Groq # Assuming groq Python SDK is installed
|
||||
|
||||
class ReasoningService:
|
||||
"""Service for performing deep reasoning on documents using Groq."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the reasoning service with the Groq client."""
|
||||
self.client = Groq(api_key=settings.GROQ_API_KEY)
|
||||
self.model = settings.REASONING_MODEL # e.g., "mixtral-8x7b-32768"
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
|
||||
async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:
|
||||
document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])
|
||||
|
||||
# Use token-based truncation instead of character-based
|
||||
max_tokens = 30000 # Adjust based on model context window
|
||||
token_count = count_tokens(document_content)
|
||||
|
||||
logger.info(f"Document {document_id} has {token_count} tokens before truncation")
|
||||
|
||||
if token_count > max_tokens:
|
||||
document_content = truncate_by_tokens(document_content, max_tokens)
|
||||
logger.info(f"Document {document_id} truncated to {max_tokens} tokens")
|
||||
|
||||
prompt = self._create_analysis_prompt(document_content, standards)
|
||||
|
||||
try:
|
||||
response = await self._query_groq(prompt)
|
||||
compliance_report = self._parse_compliance_response(document_id, response, standards)
|
||||
return compliance_report
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing document with Groq: {str(e)}")
|
||||
raise
|
||||
|
||||
def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:
|
||||
standards_text = "\n".join([f"- {standard}" for standard in standards])
|
||||
return f"""<document>
|
||||
{document_content}
|
||||
</document>
|
||||
|
||||
<standards>
|
||||
{standards_text}
|
||||
</standards>
|
||||
|
||||
You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.
|
||||
|
||||
Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:
|
||||
1. Technical accuracy and completeness
|
||||
2. Compliance with the specified standards
|
||||
3. Document structure and organization
|
||||
4. Clarity and specificity of language
|
||||
5. Consistency and coherence
|
||||
|
||||
For each compliance issue you find, please provide:
|
||||
- The section where the issue appears
|
||||
- A detailed description of the issue
|
||||
- The severity level (critical, major, minor, or info)
|
||||
- A thorough explanation of why this is an issue and how it impacts compliance
|
||||
- Specific, actionable recommendations to fix the issue
|
||||
- References to specific standards or best practices that apply
|
||||
|
||||
Respond in the following JSON format:
|
||||
{{
|
||||
"summary": "Comprehensive overall assessment of the document",
|
||||
"compliance_score": 0.0 to 1.0,
|
||||
"issues": [
|
||||
{{
|
||||
"section": "Section name",
|
||||
"description": "Detailed issue description",
|
||||
"level": "critical/major/minor/info",
|
||||
"reasoning": "Thorough explanation of why this is an issue",
|
||||
"standard_references": ["Specific standards or requirements that are violated"],
|
||||
"recommendation": "Detailed, actionable recommendation to fix the issue"
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
|
||||
async def _query_groq(self, prompt: str) -> str:
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
max_tokens=4000,
|
||||
temperature=0.2,
|
||||
top_p=1.0
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying Groq: {str(e)}")
|
||||
raise
|
||||
|
||||
def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("Could not find JSON in response")
|
||||
|
||||
json_response = response[json_start:json_end]
|
||||
data = json.loads(json_response)
|
||||
|
||||
summary = data.get("summary", "No summary provided")
|
||||
compliance_score = float(data.get("compliance_score", 0.5))
|
||||
issues = []
|
||||
|
||||
for issue_data in data.get("issues", []):
|
||||
level_str = issue_data.get("level", "minor").lower()
|
||||
if level_str == "critical":
|
||||
level = ComplianceLevel.CRITICAL
|
||||
elif level_str == "major":
|
||||
level = ComplianceLevel.MAJOR
|
||||
elif level_str == "info":
|
||||
level = ComplianceLevel.INFO
|
||||
else:
|
||||
level = ComplianceLevel.MINOR
|
||||
|
||||
issues.append(ComplianceIssue(
|
||||
section=issue_data.get("section", "Unknown"),
|
||||
description=issue_data.get("description", "No description provided"),
|
||||
level=level,
|
||||
reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),
|
||||
standard_references=issue_data.get("standard_references", []),
|
||||
recommendation=issue_data.get("recommendation", "No recommendation provided")
|
||||
))
|
||||
|
||||
return ComplianceReport(
|
||||
document_id=document_id,
|
||||
compliance_score=compliance_score,
|
||||
summary=summary,
|
||||
issues=issues,
|
||||
applied_standards=standards
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse JSON from response")
|
||||
return ComplianceReport(
|
||||
document_id=document_id,
|
||||
compliance_score=0.0,
|
||||
summary="Failed to analyze document due to parsing error.",
|
||||
issues=[
|
||||
ComplianceIssue(
|
||||
section="System",
|
||||
description="Failed to parse compliance analysis results.",
|
||||
level=ComplianceLevel.CRITICAL,
|
||||
reasoning="The system encountered an error while parsing the compliance analysis results.",
|
||||
standard_references=[],
|
||||
recommendation="Please try resubmitting the document or contact support."
|
||||
)
|
||||
],
|
||||
applied_standards=[]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing compliance response: {str(e)}")
|
||||
raise
|
||||
@@ -0,0 +1,250 @@
|
||||
# Standards management
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Optional, BinaryIO, Tuple
|
||||
import uuid
|
||||
from loguru import logger
|
||||
|
||||
from app.core.models import Standard, Requirement, RequirementSeverity
|
||||
from app.utils.helpers import load_standards_from_file
|
||||
from app.services.standards_matcher import StandardsMatcher
|
||||
|
||||
# Singleton instance to ensure all parts of the application use the same standards
|
||||
_standards_service_instance = None
|
||||
|
||||
class StandardsService:
|
||||
"""Service for managing compliance standards."""
|
||||
|
||||
def __new__(cls):
|
||||
"""Implement singleton pattern to ensure all parts of the app use the same standards."""
|
||||
global _standards_service_instance
|
||||
if _standards_service_instance is None:
|
||||
_standards_service_instance = super(StandardsService, cls).__new__(cls)
|
||||
_standards_service_instance.standards = {} # In-memory storage for standards
|
||||
_standards_service_instance.matcher = StandardsMatcher() # Advanced standards matching logic
|
||||
_standards_service_instance._load_default_standards()
|
||||
return _standards_service_instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the standards service."""
|
||||
# Initialization is done in __new__ for the singleton pattern
|
||||
|
||||
def _load_default_standards(self):
|
||||
"""Load default standards from the standards directory."""
|
||||
standards_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "standard")
|
||||
|
||||
if not os.path.exists(standards_dir):
|
||||
logger.warning(f"Standards directory not found: {standards_dir}")
|
||||
return
|
||||
|
||||
for filename in os.listdir(standards_dir):
|
||||
if filename.endswith(".json"):
|
||||
try:
|
||||
file_path = os.path.join(standards_dir, filename)
|
||||
standards_data = load_standards_from_file(file_path)
|
||||
|
||||
if "standards" in standards_data:
|
||||
for std_data in standards_data["standards"]:
|
||||
standard = self._create_standard_from_data(std_data)
|
||||
self.standards[standard.id] = standard
|
||||
logger.info(f"Loaded standard: {standard.name} ({standard.id})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading standard from {filename}: {str(e)}")
|
||||
|
||||
def _create_standard_from_data(self, data: Dict) -> Standard:
|
||||
"""
|
||||
Create a Standard object from dictionary data.
|
||||
|
||||
Args:
|
||||
data: Dictionary containing standard data
|
||||
|
||||
Returns:
|
||||
Standard object
|
||||
"""
|
||||
requirements = []
|
||||
|
||||
if "requirements" in data:
|
||||
for req_data in data["requirements"]:
|
||||
# Map severity string to RequirementSeverity enum
|
||||
severity_str = req_data.get("severity", "minor").lower()
|
||||
if severity_str == "critical":
|
||||
severity = RequirementSeverity.CRITICAL
|
||||
elif severity_str == "major":
|
||||
severity = RequirementSeverity.MAJOR
|
||||
elif severity_str == "info":
|
||||
severity = RequirementSeverity.INFO
|
||||
else:
|
||||
severity = RequirementSeverity.MINOR
|
||||
|
||||
requirement = Requirement(
|
||||
id=req_data.get("id", str(uuid.uuid4())),
|
||||
description=req_data.get("description", ""),
|
||||
severity=severity,
|
||||
details=req_data.get("details", None)
|
||||
)
|
||||
requirements.append(requirement)
|
||||
|
||||
return Standard(
|
||||
id=data.get("id", str(uuid.uuid4())),
|
||||
name=data.get("name", "Unnamed Standard"),
|
||||
description=data.get("description", ""),
|
||||
requirements=requirements
|
||||
)
|
||||
|
||||
async def get_all_standards(self) -> List[Standard]:
|
||||
"""
|
||||
Get all available standards.
|
||||
|
||||
Returns:
|
||||
List of Standard objects
|
||||
"""
|
||||
return list(self.standards.values())
|
||||
|
||||
async def get_standard(self, standard_id: str) -> Optional[Standard]:
|
||||
"""
|
||||
Get a standard by ID.
|
||||
|
||||
Args:
|
||||
standard_id: ID of the standard to retrieve
|
||||
|
||||
Returns:
|
||||
Standard object if found, None otherwise
|
||||
"""
|
||||
return self.standards.get(standard_id)
|
||||
|
||||
async def get_standard_by_name(self, name: str) -> Optional[Standard]:
|
||||
"""
|
||||
Get a standard by name (case-insensitive).
|
||||
|
||||
Args:
|
||||
name: Name of the standard to retrieve
|
||||
|
||||
Returns:
|
||||
Standard object if found, None otherwise
|
||||
"""
|
||||
name_lower = name.lower()
|
||||
for standard in self.standards.values():
|
||||
if standard.name.lower() == name_lower:
|
||||
return standard
|
||||
return None
|
||||
|
||||
async def upload_standard(self, file: BinaryIO, filename: str) -> Standard:
|
||||
"""
|
||||
Upload and process a standard definition file.
|
||||
|
||||
Args:
|
||||
file: The standard definition file (JSON)
|
||||
filename: Name of the uploaded file
|
||||
|
||||
Returns:
|
||||
Standard object
|
||||
"""
|
||||
try:
|
||||
# Read file content
|
||||
content = await self._read_file_content(file)
|
||||
|
||||
# Parse JSON
|
||||
data = json.loads(content)
|
||||
|
||||
if "standards" in data and isinstance(data["standards"], list):
|
||||
# Multiple standards in file
|
||||
standards = []
|
||||
for std_data in data["standards"]:
|
||||
standard = self._create_standard_from_data(std_data)
|
||||
self.standards[standard.id] = standard
|
||||
standards.append(standard)
|
||||
logger.info(f"Uploaded standard: {standard.name} (ID: {standard.id}) with {len(standard.requirements)} requirements")
|
||||
|
||||
# Log the current standards count after upload
|
||||
logger.info(f"Total standards in system after upload: {len(self.standards)}")
|
||||
|
||||
# Return the first standard for simplicity
|
||||
return standards[0] if standards else None
|
||||
else:
|
||||
# Single standard in file
|
||||
standard = self._create_standard_from_data(data)
|
||||
self.standards[standard.id] = standard
|
||||
logger.info(f"Uploaded standard: {standard.name} (ID: {standard.id}) with {len(standard.requirements)} requirements")
|
||||
|
||||
# Log the current standards count after upload
|
||||
logger.info(f"Total standards in system after upload: {len(self.standards)}")
|
||||
|
||||
return standard
|
||||
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Invalid JSON format in standard definition file")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing standard file: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _read_file_content(self, file: BinaryIO) -> str:
|
||||
"""
|
||||
Read and decode file content.
|
||||
|
||||
Args:
|
||||
file: The file to read
|
||||
|
||||
Returns:
|
||||
File content as string
|
||||
"""
|
||||
file_content = file.read()
|
||||
|
||||
# Try to decode as UTF-8
|
||||
try:
|
||||
return file_content.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# Try other encodings if UTF-8 fails
|
||||
try:
|
||||
return file_content.decode('latin-1')
|
||||
except:
|
||||
raise ValueError("Unable to decode file content. Please ensure file is text-based.")
|
||||
|
||||
async def get_standard_names_for_document(self, document_content: str) -> List[str]:
|
||||
"""
|
||||
Identify which standards might be relevant for a document based on content.
|
||||
Uses advanced matching logic to find the most relevant standards.
|
||||
|
||||
Args:
|
||||
document_content: The document content
|
||||
|
||||
Returns:
|
||||
List of standard names that might be relevant
|
||||
"""
|
||||
# Default standards to use if no matches are found
|
||||
DEFAULT_STANDARDS = ["ISO-9001", "IEEE-829", "RFC-2119"]
|
||||
|
||||
# Log available standards for debugging
|
||||
logger.info(f"Available standards in the system: {len(self.standards)}")
|
||||
for std_id, std in self.standards.items():
|
||||
logger.info(f" - {std.name} (ID: {std_id})")
|
||||
|
||||
# If no standards are available, return defaults
|
||||
if not self.standards:
|
||||
logger.warning("No standards available in the system. Using default standards.")
|
||||
return DEFAULT_STANDARDS
|
||||
|
||||
# Use the standards matcher to find relevant standards
|
||||
standard_scores = self.matcher.find_relevant_standards(
|
||||
document_content=document_content,
|
||||
standards=list(self.standards.values()),
|
||||
threshold=0.1, # Minimum relevance threshold
|
||||
max_standards=5 # Maximum number of standards to return
|
||||
)
|
||||
|
||||
# Log the matching results
|
||||
if standard_scores:
|
||||
logger.info(f"Found {len(standard_scores)} relevant standards:")
|
||||
for name, score in standard_scores:
|
||||
logger.info(f" - {name}: relevance score {score:.2f}")
|
||||
else:
|
||||
logger.info("No relevant standards found based on document content.")
|
||||
|
||||
# Extract standard names from the results
|
||||
relevant_standards = [std[0] for std in standard_scores]
|
||||
|
||||
# If no relevant standards found, use defaults
|
||||
if not relevant_standards:
|
||||
logger.info(f"Using default standards: {DEFAULT_STANDARDS}")
|
||||
return DEFAULT_STANDARDS
|
||||
|
||||
return relevant_standards
|
||||
@@ -0,0 +1,304 @@
|
||||
# Standards matching logic
|
||||
import re
|
||||
from typing import Dict, List, Set, Tuple, Optional
|
||||
from loguru import logger
|
||||
|
||||
from app.core.models import Standard, Requirement
|
||||
|
||||
|
||||
class StandardsMatcher:
|
||||
"""
|
||||
Advanced matching logic to identify relevant standards for documents.
|
||||
This class implements sophisticated matching algorithms beyond simple text matching.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the standards matcher."""
|
||||
# Common stopwords to filter out when extracting keywords
|
||||
self.stopwords = {
|
||||
"the", "a", "an", "and", "or", "in", "on", "at", "to", "for", "with",
|
||||
"by", "of", "is", "are", "was", "were", "be", "been", "being", "have",
|
||||
"has", "had", "do", "does", "did", "but", "if", "then", "else", "when",
|
||||
"where", "why", "how", "all", "any", "both", "each", "few", "more",
|
||||
"most", "other", "some", "such", "no", "nor", "not", "only", "own",
|
||||
"same", "so", "than", "too", "very", "can", "will", "just", "should",
|
||||
"now", "this", "that", "these", "those"
|
||||
}
|
||||
|
||||
# Technical terms that indicate compliance requirements
|
||||
self.technical_indicators = [
|
||||
"shall", "must", "required", "should", "recommended", "may", "optional",
|
||||
"compliant", "compliance", "conform", "standard", "specification", "requirement",
|
||||
"procedure", "process", "method", "test", "verify", "validate", "certification",
|
||||
"certified", "approved", "regulation", "regulatory", "guideline", "protocol"
|
||||
]
|
||||
|
||||
# Common standard prefixes and abbreviations
|
||||
self.standard_prefixes = [
|
||||
"iso", "ieee", "astm", "ansi", "iec", "din", "bs", "en", "jis",
|
||||
"gb", "api", "asme", "nfpa", "ul", "mil", "std", "rfc", "itu"
|
||||
]
|
||||
|
||||
def extract_document_sections(self, document_content: str) -> Dict[str, str]:
|
||||
"""
|
||||
Extract sections from a document to improve matching.
|
||||
|
||||
Args:
|
||||
document_content: The document content
|
||||
|
||||
Returns:
|
||||
Dictionary of section name to section content
|
||||
"""
|
||||
sections = {}
|
||||
sections["full_document"] = document_content
|
||||
|
||||
# Try to identify document sections using markdown headings
|
||||
heading_pattern = re.compile(r'^(#{1,3})\s+(.+)$', re.MULTILINE)
|
||||
matches = list(heading_pattern.finditer(document_content))
|
||||
|
||||
if matches:
|
||||
for i, match in enumerate(matches):
|
||||
section_name = match.group(2).strip()
|
||||
|
||||
# Get section content (from this heading to the next, or to the end)
|
||||
start_pos = match.end()
|
||||
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(document_content)
|
||||
|
||||
section_content = document_content[start_pos:end_pos].strip()
|
||||
sections[section_name] = section_content
|
||||
|
||||
# Look for common document sections by name
|
||||
common_sections = [
|
||||
"introduction", "scope", "purpose", "references", "definitions",
|
||||
"requirements", "compliance", "standards", "conclusion", "summary",
|
||||
"appendix", "annex"
|
||||
]
|
||||
|
||||
for section in common_sections:
|
||||
pattern = re.compile(rf'(?i)(?:^|\n)(?:{section}|{section.capitalize()})(?:[\s:]+)(.*?)(?=\n\s*\n|\n\s*[A-Z]|\Z)', re.DOTALL)
|
||||
match = pattern.search(document_content)
|
||||
if match:
|
||||
sections[section] = match.group(1).strip()
|
||||
|
||||
return sections
|
||||
|
||||
def extract_key_terms(self, document_content: str) -> List[str]:
|
||||
"""
|
||||
Extract key technical terms from document content.
|
||||
|
||||
Args:
|
||||
document_content: The document content
|
||||
|
||||
Returns:
|
||||
List of key terms
|
||||
"""
|
||||
key_terms = []
|
||||
|
||||
# Split into sentences
|
||||
sentences = re.split(r'[.!?]\s+', document_content)
|
||||
|
||||
for sentence in sentences:
|
||||
words = sentence.split()
|
||||
|
||||
# Check if sentence contains technical indicators
|
||||
if any(indicator in sentence.lower() for indicator in self.technical_indicators):
|
||||
# Extract noun phrases (simplified approach)
|
||||
for i in range(len(words) - 1):
|
||||
if words[i].lower() not in self.stopwords and words[i+1].lower() not in self.stopwords:
|
||||
key_terms.append(f"{words[i]} {words[i+1]}".lower())
|
||||
|
||||
# Look for capitalized terms (often defined terms)
|
||||
cap_pattern = re.compile(r'\b[A-Z][A-Z0-9]+\b')
|
||||
cap_terms = cap_pattern.findall(document_content)
|
||||
key_terms.extend([term.lower() for term in cap_terms])
|
||||
|
||||
# Look for standard references (e.g., ISO-9001, IEEE 829)
|
||||
for prefix in self.standard_prefixes:
|
||||
pattern = re.compile(rf'\b{prefix}[-\s]?\d+\b', re.IGNORECASE)
|
||||
matches = pattern.findall(document_content)
|
||||
key_terms.extend([match.lower() for match in matches])
|
||||
|
||||
# Remove duplicates
|
||||
return list(set(key_terms))
|
||||
|
||||
def extract_standard_keywords(self, standard: Standard) -> List[str]:
|
||||
"""
|
||||
Extract keywords from a standard that can be used for matching.
|
||||
|
||||
Args:
|
||||
standard: The standard to extract keywords from
|
||||
|
||||
Returns:
|
||||
List of keywords associated with the standard
|
||||
"""
|
||||
keywords = []
|
||||
|
||||
# Add standard name and variations
|
||||
keywords.append(standard.name.lower())
|
||||
keywords.append(standard.name.replace("-", "").lower())
|
||||
keywords.append(standard.name.replace("-", " ").lower())
|
||||
|
||||
# Add standard description words (excluding common words)
|
||||
if standard.description:
|
||||
description_words = [word.lower() for word in standard.description.split()
|
||||
if word.lower() not in self.stopwords]
|
||||
keywords.extend(description_words)
|
||||
|
||||
# Add requirement keywords
|
||||
for req in standard.requirements:
|
||||
# Add requirement ID
|
||||
keywords.append(req.id.lower())
|
||||
|
||||
# Add key phrases from requirement description
|
||||
if req.description:
|
||||
# Extract noun phrases and technical terms (simplified approach)
|
||||
phrases = []
|
||||
words = req.description.split()
|
||||
for i in range(len(words) - 1):
|
||||
if words[i].lower() not in self.stopwords and words[i+1].lower() not in self.stopwords:
|
||||
phrases.append(f"{words[i]} {words[i+1]}".lower())
|
||||
keywords.extend(phrases)
|
||||
|
||||
# Add individual technical terms
|
||||
for word in words:
|
||||
if word.lower() in self.technical_indicators:
|
||||
keywords.append(word.lower())
|
||||
|
||||
# Remove duplicates and return
|
||||
return list(set(keywords))
|
||||
|
||||
def calculate_standard_relevance(self, standard: Standard, document_content: str,
|
||||
sections: Dict[str, str], key_terms: List[str]) -> float:
|
||||
"""
|
||||
Calculate a relevance score for a standard based on multiple factors.
|
||||
|
||||
Args:
|
||||
standard: The standard to evaluate
|
||||
document_content: The document content
|
||||
sections: Document sections
|
||||
key_terms: Key terms extracted from the document
|
||||
|
||||
Returns:
|
||||
Relevance score (0.0 to 1.0)
|
||||
"""
|
||||
document_content_lower = document_content.lower()
|
||||
|
||||
# Extract keywords for this standard
|
||||
standard_keywords = self.extract_standard_keywords(standard)
|
||||
|
||||
# Initialize scores for different matching components
|
||||
name_match_score = 0.0
|
||||
keyword_match_score = 0.0
|
||||
section_match_score = 0.0
|
||||
term_match_score = 0.0
|
||||
requirement_match_score = 0.0
|
||||
|
||||
# 1. Check for standard name matches (highest weight)
|
||||
if standard.name.lower() in document_content_lower:
|
||||
name_match_score = 0.5
|
||||
elif standard.name.replace("-", "").lower() in document_content_lower:
|
||||
name_match_score = 0.4
|
||||
elif standard.name.replace("-", " ").lower() in document_content_lower:
|
||||
name_match_score = 0.4
|
||||
|
||||
# 2. Check for keyword matches
|
||||
matched_keywords = 0
|
||||
total_keywords = len(standard_keywords)
|
||||
|
||||
if total_keywords > 0:
|
||||
for keyword in standard_keywords:
|
||||
if keyword in document_content_lower:
|
||||
matched_keywords += 1
|
||||
|
||||
keyword_match_score = matched_keywords / total_keywords * 0.3
|
||||
|
||||
# 3. Check for section-specific matches
|
||||
important_sections = ["introduction", "scope", "purpose", "references",
|
||||
"standards", "compliance", "requirements"]
|
||||
|
||||
for section_name in important_sections:
|
||||
if section_name in sections:
|
||||
section_content = sections[section_name].lower()
|
||||
|
||||
# Check for standard name in important sections
|
||||
if standard.name.lower() in section_content:
|
||||
section_match_score += 0.1
|
||||
break
|
||||
|
||||
# Check for standard name in section titles
|
||||
for section_name in sections.keys():
|
||||
if standard.name.lower() in section_name.lower():
|
||||
section_match_score += 0.2
|
||||
break
|
||||
|
||||
# 4. Check for key term matches
|
||||
matching_terms = 0
|
||||
for term in key_terms:
|
||||
if any(kw in term or term in kw for kw in standard_keywords):
|
||||
matching_terms += 1
|
||||
|
||||
if len(key_terms) > 0:
|
||||
term_match_score = min(0.2, 0.01 * matching_terms)
|
||||
|
||||
# 5. Check for requirement-specific matches
|
||||
for req in standard.requirements:
|
||||
req_desc_lower = req.description.lower()
|
||||
req_keywords = [word for word in req_desc_lower.split()
|
||||
if word not in self.stopwords and len(word) > 3]
|
||||
|
||||
for keyword in req_keywords:
|
||||
if keyword in document_content_lower:
|
||||
requirement_match_score += 0.01
|
||||
|
||||
requirement_match_score = min(0.2, requirement_match_score)
|
||||
|
||||
# Calculate final score (weighted sum of all components)
|
||||
final_score = (
|
||||
name_match_score +
|
||||
keyword_match_score +
|
||||
section_match_score +
|
||||
term_match_score +
|
||||
requirement_match_score
|
||||
)
|
||||
|
||||
# Cap at 1.0
|
||||
return min(final_score, 1.0)
|
||||
|
||||
def find_relevant_standards(self, document_content: str, standards: List[Standard],
|
||||
threshold: float = 0.1, max_standards: int = 5) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
Find standards relevant to a document with relevance scores.
|
||||
|
||||
Args:
|
||||
document_content: The document content
|
||||
standards: List of available standards
|
||||
threshold: Minimum relevance score threshold
|
||||
max_standards: Maximum number of standards to return
|
||||
|
||||
Returns:
|
||||
List of tuples (standard_name, relevance_score) sorted by relevance
|
||||
"""
|
||||
if not standards:
|
||||
return []
|
||||
|
||||
# Extract document sections and key terms
|
||||
sections = self.extract_document_sections(document_content)
|
||||
key_terms = self.extract_key_terms(document_content)
|
||||
|
||||
# Calculate relevance scores for each standard
|
||||
standard_scores = []
|
||||
|
||||
for standard in standards:
|
||||
score = self.calculate_standard_relevance(
|
||||
standard, document_content, sections, key_terms
|
||||
)
|
||||
|
||||
if score >= threshold:
|
||||
standard_scores.append((standard.name, score))
|
||||
logger.debug(f"Standard {standard.name} relevance score: {score:.2f}")
|
||||
|
||||
# Sort by relevance score (highest first)
|
||||
standard_scores.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Limit to max_standards
|
||||
return standard_scores[:max_standards]
|
||||
@@ -0,0 +1,713 @@
|
||||
/* Base styles */
|
||||
:root {
|
||||
--primary-color: #3498db;
|
||||
--secondary-color: #2980b9;
|
||||
--accent-color: #f39c12;
|
||||
--success-color: #2ecc71;
|
||||
--warning-color: #f1c40f;
|
||||
--danger-color: #e74c3c;
|
||||
--info-color: #3498db;
|
||||
--light-color: #f8f9fa;
|
||||
--dark-color: #343a40;
|
||||
--gray-color: #6c757d;
|
||||
--border-color: #dee2e6;
|
||||
--font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||||
}
|
||||
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: var(--font-family);
|
||||
line-height: 1.6;
|
||||
color: var(--dark-color);
|
||||
background-color: #f4f7fa;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
margin-bottom: 1rem;
|
||||
color: var(--dark-color);
|
||||
}
|
||||
|
||||
/* Header styles */
|
||||
header {
|
||||
text-align: center;
|
||||
margin-bottom: 2rem;
|
||||
padding: 1.5rem;
|
||||
background-color: white;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
|
||||
}
|
||||
|
||||
header h1 {
|
||||
color: var(--primary-color);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
header p {
|
||||
color: var(--gray-color);
|
||||
}
|
||||
|
||||
/* Main content sections */
|
||||
main {
|
||||
display: block;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.main-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 2rem;
|
||||
}
|
||||
|
||||
.documents-section {
|
||||
grid-column: 1 / -1; /* Span all columns */
|
||||
}
|
||||
|
||||
section {
|
||||
background-color: white;
|
||||
border-radius: 8px;
|
||||
padding: 1.5rem;
|
||||
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
|
||||
}
|
||||
|
||||
section h2 {
|
||||
margin-bottom: 1.5rem;
|
||||
padding-bottom: 0.5rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
/* Upload area styles */
|
||||
.upload-container {
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.upload-area {
|
||||
border: 2px dashed var(--primary-color);
|
||||
border-radius: 8px;
|
||||
padding: 3rem 1.5rem;
|
||||
text-align: center;
|
||||
transition: background-color 0.3s ease;
|
||||
}
|
||||
|
||||
.upload-area:hover {
|
||||
background-color: rgba(52, 152, 219, 0.05);
|
||||
}
|
||||
|
||||
.upload-area i {
|
||||
font-size: 3rem;
|
||||
color: var(--primary-color);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.upload-area p {
|
||||
margin-bottom: 0.5rem;
|
||||
color: var(--gray-color);
|
||||
}
|
||||
|
||||
.file-info {
|
||||
margin-top: 1.5rem;
|
||||
padding: 1rem;
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.file-details {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.file-icon {
|
||||
font-size: 2rem;
|
||||
color: var(--primary-color);
|
||||
margin-right: 1rem;
|
||||
}
|
||||
|
||||
.file-name {
|
||||
font-weight: bold;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.file-size {
|
||||
color: var(--gray-color);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
/* Buttons */
|
||||
.button {
|
||||
display: inline-block;
|
||||
padding: 0.5rem 1rem;
|
||||
background-color: var(--primary-color);
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 0.9rem;
|
||||
transition: background-color 0.3s ease;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.button:hover {
|
||||
background-color: var(--secondary-color);
|
||||
}
|
||||
|
||||
.upload-button {
|
||||
background-color: var(--success-color);
|
||||
}
|
||||
|
||||
.upload-button:hover {
|
||||
background-color: #27ae60;
|
||||
}
|
||||
|
||||
.cancel-button {
|
||||
background-color: var(--gray-color);
|
||||
margin-left: 0.5rem;
|
||||
}
|
||||
|
||||
.cancel-button:hover {
|
||||
background-color: #5a6268;
|
||||
}
|
||||
|
||||
/* Documents list */
|
||||
.documents-list {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.document-item {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 1rem;
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 4px;
|
||||
margin-bottom: 0.5rem;
|
||||
transition: background-color 0.3s ease;
|
||||
}
|
||||
|
||||
.document-item:hover {
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
|
||||
.document-info {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.document-icon {
|
||||
font-size: 1.5rem;
|
||||
color: var(--primary-color);
|
||||
margin-right: 1rem;
|
||||
}
|
||||
|
||||
.document-name {
|
||||
font-weight: bold;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.document-date {
|
||||
color: var(--gray-color);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.document-actions {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.action-button {
|
||||
font-size: 1rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
}
|
||||
|
||||
.view-button {
|
||||
background-color: var(--info-color);
|
||||
}
|
||||
|
||||
.view-button:hover {
|
||||
background-color: #2980b9;
|
||||
}
|
||||
|
||||
.resubmit-button {
|
||||
background-color: var(--warning-color);
|
||||
color: var(--dark-color);
|
||||
}
|
||||
|
||||
.resubmit-button:hover {
|
||||
background-color: #f39c12;
|
||||
}
|
||||
|
||||
/* Report section */
|
||||
.report-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.report-container {
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 8px;
|
||||
padding: 1.5rem;
|
||||
}
|
||||
|
||||
/* Summary section in report */
|
||||
.summary {
|
||||
background-color: #e9f7ef;
|
||||
padding: 1.5rem;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 1.5rem;
|
||||
border-left: 4px solid var(--success-color);
|
||||
}
|
||||
|
||||
.applied-standards {
|
||||
margin-top: 1rem;
|
||||
padding: 0.75rem 1rem;
|
||||
background-color: #f8f9fa;
|
||||
border-radius: 5px;
|
||||
border-left: 3px solid var(--info-color);
|
||||
}
|
||||
|
||||
.applied-standards h4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 1rem;
|
||||
color: #495057;
|
||||
}
|
||||
|
||||
.standards-list {
|
||||
margin: 0 0 0 1.5rem;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.standards-list li {
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
/* Issues section in report */
|
||||
.issues-container {
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
.issue {
|
||||
margin-bottom: 1rem;
|
||||
padding: 1rem;
|
||||
border-radius: 8px;
|
||||
background-color: #f8f9fa;
|
||||
border-left: 4px solid var(--gray-color);
|
||||
}
|
||||
|
||||
.issue.critical {
|
||||
background-color: #fdedec;
|
||||
border-left-color: var(--danger-color);
|
||||
}
|
||||
|
||||
.issue.major {
|
||||
background-color: #fef9e7;
|
||||
border-left-color: var(--warning-color);
|
||||
}
|
||||
|
||||
.issue.minor {
|
||||
background-color: #eafaf1;
|
||||
border-left-color: var(--success-color);
|
||||
}
|
||||
|
||||
.issue.info {
|
||||
background-color: #ebf5fb;
|
||||
border-left-color: var(--info-color);
|
||||
}
|
||||
|
||||
.issue-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.issue-section {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.badge {
|
||||
display: inline-block;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.8rem;
|
||||
font-weight: bold;
|
||||
text-transform: uppercase;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.badge.critical {
|
||||
background-color: var(--danger-color);
|
||||
}
|
||||
|
||||
.badge.major {
|
||||
background-color: var(--warning-color);
|
||||
color: var(--dark-color);
|
||||
}
|
||||
|
||||
.badge.minor {
|
||||
background-color: var(--success-color);
|
||||
}
|
||||
|
||||
.badge.info {
|
||||
background-color: var(--info-color);
|
||||
}
|
||||
|
||||
.issue-description {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.issue-recommendation {
|
||||
background-color: #f8f9fa;
|
||||
padding: 0.75rem;
|
||||
border-radius: 4px;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* Loading overlay */
|
||||
.loading-overlay {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
z-index: 1000;
|
||||
}
|
||||
|
||||
.loading-spinner {
|
||||
width: 50px;
|
||||
height: 50px;
|
||||
border: 5px solid #f3f3f3;
|
||||
border-top: 5px solid var(--primary-color);
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.loading-overlay p {
|
||||
color: white;
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
/* Stats display */
|
||||
.stats-container {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 1.5rem;
|
||||
flex-wrap: wrap;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.stat-box {
|
||||
flex: 1;
|
||||
min-width: 100px;
|
||||
background-color: #f8f9fa;
|
||||
padding: 1rem;
|
||||
border-radius: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.stat-value {
|
||||
font-size: 1.5rem;
|
||||
font-weight: bold;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.stat-label {
|
||||
color: var(--gray-color);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
/* Score display */
|
||||
.score-container {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.score-circle {
|
||||
width: 100px;
|
||||
height: 100px;
|
||||
border-radius: 50%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
font-size: 2rem;
|
||||
font-weight: bold;
|
||||
color: white;
|
||||
background-color: var(--success-color);
|
||||
margin-right: 1rem;
|
||||
}
|
||||
|
||||
.score-label {
|
||||
font-size: 1.2rem;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Footer */
|
||||
footer {
|
||||
text-align: center;
|
||||
margin-top: 2rem;
|
||||
padding: 1rem;
|
||||
color: var(--gray-color);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
/* Standards section */
|
||||
.standards-section {
|
||||
background-color: white;
|
||||
border-radius: 8px;
|
||||
padding: 1.5rem;
|
||||
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
|
||||
}
|
||||
|
||||
.standards-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.standards-header p {
|
||||
color: var(--gray-color);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.standards-upload {
|
||||
margin-bottom: 1.5rem;
|
||||
border: 1px dashed var(--border-color);
|
||||
border-radius: 8px;
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.standards-list-container {
|
||||
max-height: 300px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.standards-list {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.standard-item {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 1rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.standard-info {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.standard-icon {
|
||||
font-size: 1.5rem;
|
||||
color: var(--primary-color);
|
||||
margin-right: 1rem;
|
||||
}
|
||||
|
||||
.standard-name {
|
||||
font-weight: bold;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.standard-description {
|
||||
color: var(--gray-color);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.standard-requirements {
|
||||
color: var(--gray-color);
|
||||
font-size: 0.9rem;
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.standard-actions {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.no-standards {
|
||||
color: var(--gray-color);
|
||||
text-align: center;
|
||||
padding: 2rem 0;
|
||||
}
|
||||
|
||||
/* Modal styles */
|
||||
.modal {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
z-index: 1001;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
background-color: white;
|
||||
border-radius: 8px;
|
||||
width: 80%;
|
||||
max-width: 800px;
|
||||
max-height: 80vh;
|
||||
overflow-y: auto;
|
||||
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 1rem 1.5rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.modal-header h2 {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.close-button {
|
||||
background: none;
|
||||
border: none;
|
||||
font-size: 1.5rem;
|
||||
cursor: pointer;
|
||||
color: var(--gray-color);
|
||||
}
|
||||
|
||||
.modal-body {
|
||||
padding: 1.5rem;
|
||||
}
|
||||
|
||||
.standard-detail-header {
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.standard-detail-header h3 {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.standard-requirements-list {
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
.standard-requirements-list h4 {
|
||||
margin-bottom: 1rem;
|
||||
padding-bottom: 0.5rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.requirement-item {
|
||||
margin-bottom: 1rem;
|
||||
padding: 1rem;
|
||||
border-radius: 4px;
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
|
||||
.requirement-item.critical {
|
||||
border-left: 4px solid var(--danger-color);
|
||||
}
|
||||
|
||||
.requirement-item.major {
|
||||
border-left: 4px solid var(--warning-color);
|
||||
}
|
||||
|
||||
.requirement-item.minor {
|
||||
border-left: 4px solid var(--success-color);
|
||||
}
|
||||
|
||||
.requirement-item.info {
|
||||
border-left: 4px solid var(--info-color);
|
||||
}
|
||||
|
||||
.requirement-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.requirement-id {
|
||||
font-weight: bold;
|
||||
color: var(--gray-color);
|
||||
}
|
||||
|
||||
.requirement-description {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.requirement-details {
|
||||
font-size: 0.9rem;
|
||||
color: var(--gray-color);
|
||||
padding: 0.5rem;
|
||||
background-color: #f1f1f1;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
/* Responsive design */
|
||||
@media (max-width: 992px) {
|
||||
.main-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
width: 95%;
|
||||
max-height: 90vh;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.document-item {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.document-actions {
|
||||
margin-top: 1rem;
|
||||
align-self: flex-end;
|
||||
}
|
||||
|
||||
.stats-container {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.stat-box {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.standard-item {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.standard-actions {
|
||||
margin-top: 1rem;
|
||||
align-self: flex-end;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Mini SpecsComply Pro</title>
|
||||
<link rel="stylesheet" href="static/css/styles.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1><i class="fas fa-clipboard-check"></i> Mini SpecsComply Pro</h1>
|
||||
<p>Document Compliance and Validation Tool</p>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="main-grid">
|
||||
<section class="upload-section">
|
||||
<h2>Upload Document</h2>
|
||||
<div class="upload-container" id="upload-container">
|
||||
<div class="upload-area" id="upload-area">
|
||||
<i class="fas fa-cloud-upload-alt"></i>
|
||||
<p>Drag and drop your document here</p>
|
||||
<p>or</p>
|
||||
<label for="file-input" class="button">Browse Files</label>
|
||||
<input type="file" id="file-input" accept=".md,.txt,.json,.yaml,.html,.doc,.docx,.pdf" hidden>
|
||||
</div>
|
||||
<div class="file-info" id="file-info" style="display: none;">
|
||||
<div class="file-details">
|
||||
<i class="fas fa-file-alt file-icon"></i>
|
||||
<div>
|
||||
<p class="file-name" id="file-name">document.md</p>
|
||||
<p class="file-size" id="file-size">0 KB</p>
|
||||
</div>
|
||||
</div>
|
||||
<button class="button upload-button" id="upload-button">Upload for Analysis</button>
|
||||
<button class="button cancel-button" id="cancel-button">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="standards-section">
|
||||
<h2>Compliance Standards</h2>
|
||||
<div class="standards-container" id="standards-container">
|
||||
<div class="standards-header">
|
||||
<p>Manage the compliance standards used for document analysis</p>
|
||||
<button class="button" id="upload-standard-button">
|
||||
<i class="fas fa-plus"></i> Add Standard
|
||||
</button>
|
||||
</div>
|
||||
<div class="standards-upload" id="standards-upload" style="display: none;">
|
||||
<div class="upload-area" id="standard-upload-area">
|
||||
<i class="fas fa-cloud-upload-alt"></i>
|
||||
<p>Drag and drop your standard JSON file here</p>
|
||||
<p>or</p>
|
||||
<label for="standard-file-input" class="button">Browse Files</label>
|
||||
<input type="file" id="standard-file-input" accept=".json" hidden>
|
||||
</div>
|
||||
<div class="file-info" id="standard-file-info" style="display: none;">
|
||||
<div class="file-details">
|
||||
<i class="fas fa-file-code file-icon"></i>
|
||||
<div>
|
||||
<p class="file-name" id="standard-file-name">standard.json</p>
|
||||
<p class="file-size" id="standard-file-size">0 KB</p>
|
||||
</div>
|
||||
</div>
|
||||
<button class="button upload-button" id="standard-upload-button">Upload Standard</button>
|
||||
<button class="button cancel-button" id="standard-cancel-button">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="standards-list-container">
|
||||
<p class="no-standards" id="no-standards">No custom standards have been added yet.</p>
|
||||
<ul class="standards-list" id="standards-list">
|
||||
<!-- Standards will be added here dynamically -->
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="documents-section">
|
||||
<h2>Recent Documents</h2>
|
||||
<div class="documents-container" id="documents-container">
|
||||
<p class="no-documents" id="no-documents">No documents have been analyzed yet.</p>
|
||||
<ul class="documents-list" id="documents-list">
|
||||
<!-- Document items will be added here dynamically -->
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<section class="report-section" id="report-section" style="display: none;">
|
||||
<div class="report-header">
|
||||
<h2>Compliance Report</h2>
|
||||
<button class="button" id="close-report-button">Close Report</button>
|
||||
</div>
|
||||
<div class="report-container" id="report-container">
|
||||
<!-- Report content will be loaded here -->
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
|
||||
<div class="loading-overlay" id="loading-overlay" style="display: none;">
|
||||
<div class="loading-spinner"></div>
|
||||
<p>Analyzing document...</p>
|
||||
</div>
|
||||
|
||||
<footer>
|
||||
<p>© 2025 Mini SpecsComply Pro</p>
|
||||
</footer>
|
||||
</div>
|
||||
|
||||
<script src="static/js/script.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,825 @@
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// Document Elements
|
||||
const uploadArea = document.getElementById('upload-area');
|
||||
const fileInput = document.getElementById('file-input');
|
||||
const fileInfo = document.getElementById('file-info');
|
||||
const fileName = document.getElementById('file-name');
|
||||
const fileSize = document.getElementById('file-size');
|
||||
const uploadButton = document.getElementById('upload-button');
|
||||
const cancelButton = document.getElementById('cancel-button');
|
||||
const noDocuments = document.getElementById('no-documents');
|
||||
const documentsList = document.getElementById('documents-list');
|
||||
const reportSection = document.getElementById('report-section');
|
||||
const reportContainer = document.getElementById('report-container');
|
||||
const closeReportButton = document.getElementById('close-report-button');
|
||||
const loadingOverlay = document.getElementById('loading-overlay');
|
||||
|
||||
// Standards Elements
|
||||
const uploadStandardButton = document.getElementById('upload-standard-button');
|
||||
const standardsUpload = document.getElementById('standards-upload');
|
||||
const standardUploadArea = document.getElementById('standard-upload-area');
|
||||
const standardFileInput = document.getElementById('standard-file-input');
|
||||
const standardFileInfo = document.getElementById('standard-file-info');
|
||||
const standardFileName = document.getElementById('standard-file-name');
|
||||
const standardFileSize = document.getElementById('standard-file-size');
|
||||
const standardUploadButton = document.getElementById('standard-upload-button');
|
||||
const standardCancelButton = document.getElementById('standard-cancel-button');
|
||||
const noStandards = document.getElementById('no-standards');
|
||||
const standardsList = document.getElementById('standards-list');
|
||||
|
||||
// API endpoint base URL
|
||||
const API_BASE_URL = '/api';
|
||||
|
||||
// Local storage keys
|
||||
const DOCUMENTS_STORAGE_KEY = 'specscomply_documents';
|
||||
const STANDARDS_STORAGE_KEY = 'specscomply_standards';
|
||||
|
||||
// Drag and drop functionality
|
||||
uploadArea.addEventListener('dragover', function(e) {
|
||||
e.preventDefault();
|
||||
uploadArea.classList.add('dragover');
|
||||
});
|
||||
|
||||
uploadArea.addEventListener('dragleave', function() {
|
||||
uploadArea.classList.remove('dragover');
|
||||
});
|
||||
|
||||
uploadArea.addEventListener('drop', function(e) {
|
||||
e.preventDefault();
|
||||
uploadArea.classList.remove('dragover');
|
||||
|
||||
if (e.dataTransfer.files.length) {
|
||||
handleFileSelection(e.dataTransfer.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// File input change
|
||||
fileInput.addEventListener('change', function() {
|
||||
if (fileInput.files.length) {
|
||||
handleFileSelection(fileInput.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Upload button click
|
||||
uploadButton.addEventListener('click', function() {
|
||||
if (fileInput.files.length) {
|
||||
uploadDocument(fileInput.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Cancel button click
|
||||
cancelButton.addEventListener('click', function() {
|
||||
resetFileInput();
|
||||
});
|
||||
|
||||
// Close report button click
|
||||
closeReportButton.addEventListener('click', function() {
|
||||
reportSection.style.display = 'none';
|
||||
});
|
||||
|
||||
// Load stored documents and standards on page load
|
||||
loadDocuments();
|
||||
loadStandards();
|
||||
|
||||
// Standards upload button click
|
||||
uploadStandardButton.addEventListener('click', function() {
|
||||
standardsUpload.style.display = 'block';
|
||||
});
|
||||
|
||||
// Standard drag and drop functionality
|
||||
standardUploadArea.addEventListener('dragover', function(e) {
|
||||
e.preventDefault();
|
||||
standardUploadArea.classList.add('dragover');
|
||||
});
|
||||
|
||||
standardUploadArea.addEventListener('dragleave', function() {
|
||||
standardUploadArea.classList.remove('dragover');
|
||||
});
|
||||
|
||||
standardUploadArea.addEventListener('drop', function(e) {
|
||||
e.preventDefault();
|
||||
standardUploadArea.classList.remove('dragover');
|
||||
|
||||
if (e.dataTransfer.files.length) {
|
||||
handleStandardFileSelection(e.dataTransfer.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Standard file input change
|
||||
standardFileInput.addEventListener('change', function() {
|
||||
if (standardFileInput.files.length) {
|
||||
handleStandardFileSelection(standardFileInput.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Standard upload button click
|
||||
standardUploadButton.addEventListener('click', function() {
|
||||
if (standardFileInput.files.length) {
|
||||
uploadStandard(standardFileInput.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Standard cancel button click
|
||||
standardCancelButton.addEventListener('click', function() {
|
||||
resetStandardFileInput();
|
||||
});
|
||||
|
||||
// Handle file selection
|
||||
function handleFileSelection(file) {
|
||||
// Update file info display
|
||||
fileName.textContent = file.name;
|
||||
fileSize.textContent = formatFileSize(file.size);
|
||||
|
||||
// Show file info section
|
||||
uploadArea.style.display = 'none';
|
||||
fileInfo.style.display = 'block';
|
||||
}
|
||||
|
||||
// Reset file input
|
||||
function resetFileInput() {
|
||||
fileInput.value = '';
|
||||
uploadArea.style.display = 'block';
|
||||
fileInfo.style.display = 'none';
|
||||
}
|
||||
|
||||
// Format file size
|
||||
function formatFileSize(bytes) {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
}
|
||||
|
||||
// Upload document
|
||||
async function uploadDocument(file) {
|
||||
try {
|
||||
// Show loading overlay
|
||||
loadingOverlay.style.display = 'flex';
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const response = await fetch(`${API_BASE_URL}/documents/upload`, {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error uploading document: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log('Upload response:', data);
|
||||
|
||||
// Store document in local storage
|
||||
const document = {
|
||||
id: data.document_id,
|
||||
name: file.name,
|
||||
status: data.status,
|
||||
date: new Date().toISOString(),
|
||||
size: file.size
|
||||
};
|
||||
|
||||
saveDocument(document);
|
||||
resetFileInput();
|
||||
loadDocuments(); // This should now work correctly
|
||||
checkDocumentStatus(data.document_id);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error uploading document:', error);
|
||||
alert('Failed to upload document. Please try again.');
|
||||
} finally {
|
||||
loadingOverlay.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Save document to local storage
|
||||
function saveDocument(document) {
|
||||
let documents = JSON.parse(localStorage.getItem(DOCUMENTS_STORAGE_KEY) || '[]');
|
||||
|
||||
// Check if document already exists
|
||||
const existingIndex = documents.findIndex(doc => doc.id === document.id);
|
||||
|
||||
if (existingIndex !== -1) {
|
||||
// Update existing document
|
||||
documents[existingIndex] = {...documents[existingIndex], ...document};
|
||||
} else {
|
||||
// Add new document
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
// Sort documents by date (newest first)
|
||||
documents.sort((a, b) => new Date(b.date) - new Date(a.date));
|
||||
|
||||
// Keep only the 10 most recent documents
|
||||
if (documents.length > 10) {
|
||||
documents = documents.slice(0, 10);
|
||||
}
|
||||
|
||||
// Save to local storage
|
||||
localStorage.setItem(DOCUMENTS_STORAGE_KEY, JSON.stringify(documents));
|
||||
}
|
||||
|
||||
// Load documents from local storage
|
||||
function loadDocuments() {
|
||||
const documents = JSON.parse(localStorage.getItem(DOCUMENTS_STORAGE_KEY) || '[]');
|
||||
|
||||
// Clear documents list
|
||||
documentsList.innerHTML = '';
|
||||
|
||||
if (documents.length === 0) {
|
||||
noDocuments.style.display = 'block';
|
||||
return;
|
||||
}
|
||||
|
||||
noDocuments.style.display = 'none';
|
||||
|
||||
// Add documents to list
|
||||
documents.forEach(doc => { // Changed parameter name to 'doc'
|
||||
const li = createDocumentListItem(doc);
|
||||
documentsList.appendChild(li);
|
||||
});
|
||||
}
|
||||
|
||||
// Create document list item
|
||||
function createDocumentListItem(doc) { // Changed parameter name to 'doc'
|
||||
try {
|
||||
const li = window.document.createElement('li'); // Use window.document to be explicit
|
||||
li.className = 'document-item';
|
||||
|
||||
let dateStr = 'Unknown date';
|
||||
try {
|
||||
dateStr = new Date(doc.date).toLocaleDateString('en-US', {
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit'
|
||||
});
|
||||
} catch (e) {
|
||||
console.warn('Error formatting date:', e);
|
||||
}
|
||||
|
||||
li.innerHTML = `
|
||||
<div class="document-info">
|
||||
<i class="fas fa-file-alt document-icon"></i>
|
||||
<div>
|
||||
<p class="document-name">${doc.name || 'Unnamed document'}</p>
|
||||
<p class="document-date">${dateStr}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="document-actions">
|
||||
<button class="button action-button view-button" data-id="${doc.id}">
|
||||
<i class="fas fa-eye"></i> View Report
|
||||
</button>
|
||||
<button class="button action-button resubmit-button" data-id="${doc.id}">
|
||||
<i class="fas fa-redo"></i> Resubmit
|
||||
</button>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Add event listeners to buttons
|
||||
const viewButton = li.querySelector('.view-button');
|
||||
const resubmitButton = li.querySelector('.resubmit-button');
|
||||
|
||||
viewButton.addEventListener('click', function() {
|
||||
const documentId = this.getAttribute('data-id');
|
||||
viewDocumentReport(documentId);
|
||||
});
|
||||
|
||||
resubmitButton.addEventListener('click', function() {
|
||||
const documentId = this.getAttribute('data-id');
|
||||
resubmitDocument(documentId);
|
||||
});
|
||||
|
||||
return li;
|
||||
} catch (error) {
|
||||
console.error('Error creating document list item:', error);
|
||||
const li = window.document.createElement('li'); // Use window.document here too
|
||||
li.className = 'document-item';
|
||||
li.textContent = 'Error displaying document';
|
||||
return li;
|
||||
}
|
||||
}
|
||||
|
||||
// View document report
|
||||
async function viewDocumentReport(documentId) {
|
||||
try {
|
||||
// Show loading overlay
|
||||
loadingOverlay.style.display = 'flex';
|
||||
|
||||
// Fetch document analysis
|
||||
const response = await fetch(`${API_BASE_URL}/documents/${documentId}/analysis`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error fetching document analysis: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Check if analysis is complete
|
||||
if (data.status === 'pending' || data.status === 'processing') {
|
||||
alert('Document analysis is still in progress. Please try again later.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.status === 'failed') {
|
||||
alert('Document analysis failed. Please try resubmitting the document.');
|
||||
return;
|
||||
}
|
||||
|
||||
// If no report is available
|
||||
if (!data.report) {
|
||||
alert('No analysis report available for this document.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Render report
|
||||
renderReport(data.report);
|
||||
|
||||
// Show report section
|
||||
reportSection.style.display = 'block';
|
||||
|
||||
// Scroll to report section
|
||||
reportSection.scrollIntoView({ behavior: 'smooth' });
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error viewing document report:', error);
|
||||
alert('Failed to load document report. Please try again.');
|
||||
} finally {
|
||||
// Hide loading overlay
|
||||
loadingOverlay.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Handle standard file selection
|
||||
function handleStandardFileSelection(file) {
|
||||
// Check if file is JSON
|
||||
if (!file.name.toLowerCase().endsWith('.json')) {
|
||||
alert('Please select a JSON file for standards');
|
||||
return;
|
||||
}
|
||||
|
||||
// Update file info display
|
||||
standardFileName.textContent = file.name;
|
||||
standardFileSize.textContent = formatFileSize(file.size);
|
||||
|
||||
// Show file info section
|
||||
standardUploadArea.style.display = 'none';
|
||||
standardFileInfo.style.display = 'block';
|
||||
}
|
||||
|
||||
// Reset standard file input
|
||||
function resetStandardFileInput() {
|
||||
standardFileInput.value = '';
|
||||
standardUploadArea.style.display = 'block';
|
||||
standardFileInfo.style.display = 'none';
|
||||
standardsUpload.style.display = 'none';
|
||||
}
|
||||
|
||||
// Upload standard
|
||||
async function uploadStandard(file) {
|
||||
try {
|
||||
// Show loading overlay
|
||||
loadingOverlay.style.display = 'flex';
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const response = await fetch(`${API_BASE_URL}/standards/upload`, {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error uploading standard: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log('Standard upload response:', data);
|
||||
|
||||
// Store standard in local storage
|
||||
const standard = {
|
||||
id: data.standard_id,
|
||||
name: data.name,
|
||||
requirement_count: data.requirement_count,
|
||||
date: new Date().toISOString()
|
||||
};
|
||||
|
||||
saveStandard(standard);
|
||||
resetStandardFileInput();
|
||||
loadStandards();
|
||||
|
||||
alert(`Standard "${data.name}" uploaded successfully with ${data.requirement_count} requirements.`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error uploading standard:', error);
|
||||
alert('Failed to upload standard. Please try again.');
|
||||
} finally {
|
||||
loadingOverlay.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Save standard to local storage
|
||||
function saveStandard(standard) {
|
||||
let standards = JSON.parse(localStorage.getItem(STANDARDS_STORAGE_KEY) || '[]');
|
||||
|
||||
// Check if standard already exists
|
||||
const existingIndex = standards.findIndex(std => std.id === standard.id);
|
||||
|
||||
if (existingIndex !== -1) {
|
||||
// Update existing standard
|
||||
standards[existingIndex] = {...standards[existingIndex], ...standard};
|
||||
} else {
|
||||
// Add new standard
|
||||
standards.push(standard);
|
||||
}
|
||||
|
||||
// Sort standards by date (newest first)
|
||||
standards.sort((a, b) => new Date(b.date) - new Date(a.date));
|
||||
|
||||
// Save to local storage
|
||||
localStorage.setItem(STANDARDS_STORAGE_KEY, JSON.stringify(standards));
|
||||
}
|
||||
|
||||
// Load standards from local storage
|
||||
function loadStandards() {
|
||||
const standards = JSON.parse(localStorage.getItem(STANDARDS_STORAGE_KEY) || '[]');
|
||||
|
||||
// Clear standards list
|
||||
standardsList.innerHTML = '';
|
||||
|
||||
if (standards.length === 0) {
|
||||
noStandards.style.display = 'block';
|
||||
return;
|
||||
}
|
||||
|
||||
noStandards.style.display = 'none';
|
||||
|
||||
// Add standards to list
|
||||
standards.forEach(standard => {
|
||||
const li = createStandardListItem(standard);
|
||||
standardsList.appendChild(li);
|
||||
});
|
||||
}
|
||||
|
||||
// Create standard list item
|
||||
function createStandardListItem(standard) {
|
||||
const li = document.createElement('li');
|
||||
li.className = 'standard-item';
|
||||
|
||||
let dateStr = 'Unknown date';
|
||||
try {
|
||||
dateStr = new Date(standard.date).toLocaleDateString('en-US', {
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric'
|
||||
});
|
||||
} catch (e) {
|
||||
console.warn('Error formatting date:', e);
|
||||
}
|
||||
|
||||
li.innerHTML = `
|
||||
<div class="standard-info">
|
||||
<i class="fas fa-book standard-icon"></i>
|
||||
<div>
|
||||
<p class="standard-name">${standard.name || 'Unnamed standard'}</p>
|
||||
<p class="standard-description">Added on ${dateStr}</p>
|
||||
<p class="standard-requirements">${standard.requirement_count} requirements</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="standard-actions">
|
||||
<button class="button action-button view-standard-button" data-id="${standard.id}">
|
||||
<i class="fas fa-eye"></i> View
|
||||
</button>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Add event listeners to buttons
|
||||
const viewButton = li.querySelector('.view-standard-button');
|
||||
|
||||
viewButton.addEventListener('click', function() {
|
||||
const standardId = this.getAttribute('data-id');
|
||||
viewStandard(standardId);
|
||||
});
|
||||
|
||||
return li;
|
||||
}
|
||||
|
||||
// View standard details
|
||||
async function viewStandard(standardId) {
|
||||
try {
|
||||
// Show loading overlay
|
||||
loadingOverlay.style.display = 'flex';
|
||||
|
||||
// Fetch standard details
|
||||
const response = await fetch(`${API_BASE_URL}/standards/${standardId}`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error fetching standard: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const standard = await response.json();
|
||||
|
||||
// Create modal content
|
||||
const modalContent = `
|
||||
<div class="standard-detail-header">
|
||||
<h3>${standard.name}</h3>
|
||||
<p>${standard.description || 'No description available'}</p>
|
||||
</div>
|
||||
<div class="standard-requirements-list">
|
||||
<h4>Requirements (${standard.requirements.length})</h4>
|
||||
${standard.requirements.length === 0 ? '<p>No requirements defined</p>' : ''}
|
||||
<ul>
|
||||
${standard.requirements.map(req => `
|
||||
<li class="requirement-item ${req.severity}">
|
||||
<div class="requirement-header">
|
||||
<span class="requirement-id">${req.id}</span>
|
||||
<span class="badge ${req.severity}">${req.severity}</span>
|
||||
</div>
|
||||
<div class="requirement-description">${req.description}</div>
|
||||
${req.details ? `<div class="requirement-details">${req.details}</div>` : ''}
|
||||
</li>
|
||||
`).join('')}
|
||||
</ul>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Create modal
|
||||
const modal = document.createElement('div');
|
||||
modal.className = 'modal';
|
||||
modal.innerHTML = `
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h2>Standard Details</h2>
|
||||
<button class="close-button">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
${modalContent}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Add modal to body
|
||||
document.body.appendChild(modal);
|
||||
|
||||
// Add close button event listener
|
||||
modal.querySelector('.close-button').addEventListener('click', function() {
|
||||
document.body.removeChild(modal);
|
||||
});
|
||||
|
||||
// Close modal when clicking outside
|
||||
modal.addEventListener('click', function(e) {
|
||||
if (e.target === modal) {
|
||||
document.body.removeChild(modal);
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error viewing standard:', error);
|
||||
alert('Failed to load standard details. Please try again.');
|
||||
} finally {
|
||||
// Hide loading overlay
|
||||
loadingOverlay.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Render report
|
||||
function renderReport(report) {
|
||||
// Calculate issue counts
|
||||
const criticalCount = report.issues.filter(issue => issue.level === 'critical').length;
|
||||
const majorCount = report.issues.filter(issue => issue.level === 'major').length;
|
||||
const minorCount = report.issues.filter(issue => issue.level === 'minor').length;
|
||||
const infoCount = report.issues.filter(issue => issue.level === 'info').length;
|
||||
|
||||
// Format score as percentage
|
||||
const scorePercentage = (report.compliance_score * 100).toFixed(1);
|
||||
|
||||
// Determine score color based on percentage
|
||||
let scoreColor = '#2ecc71'; // Default green
|
||||
if (scorePercentage < 50) {
|
||||
scoreColor = '#e74c3c'; // Red for low score
|
||||
} else if (scorePercentage < 80) {
|
||||
scoreColor = '#f39c12'; // Orange for medium score
|
||||
}
|
||||
|
||||
// Create HTML
|
||||
let html = `
|
||||
<div class="score-container">
|
||||
<div class="score-circle" style="background-color: ${scoreColor}">
|
||||
${scorePercentage}%
|
||||
</div>
|
||||
<div class="score-label">Compliance Score</div>
|
||||
</div>
|
||||
|
||||
<div class="stats-container">
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #e74c3c">${criticalCount}</div>
|
||||
<div class="stat-label">Critical Issues</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #f39c12">${majorCount}</div>
|
||||
<div class="stat-label">Major Issues</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #2ecc71">${minorCount}</div>
|
||||
<div class="stat-label">Minor Issues</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #3498db">${infoCount}</div>
|
||||
<div class="stat-label">Info Issues</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="summary">
|
||||
<h3>Summary</h3>
|
||||
<p>${report.summary}</p>
|
||||
|
||||
${report.applied_standards && report.applied_standards.length > 0 ? `
|
||||
<div class="applied-standards">
|
||||
<h4>Applied Standards</h4>
|
||||
<ul class="standards-list">
|
||||
${report.applied_standards.map(std => `<li>${std}</li>`).join('')}
|
||||
</ul>
|
||||
</div>
|
||||
` : ''}
|
||||
</div>
|
||||
|
||||
<div class="issues-container">
|
||||
<h3>Compliance Issues</h3>
|
||||
`;
|
||||
|
||||
if (report.issues.length === 0) {
|
||||
html += '<p>No compliance issues found. Great job!</p>';
|
||||
} else {
|
||||
// Sort issues by level (critical first)
|
||||
const sortedIssues = [...report.issues].sort((a, b) => {
|
||||
const levelOrder = { 'critical': 0, 'major': 1, 'minor': 2, 'info': 3 };
|
||||
return levelOrder[a.level] - levelOrder[b.level];
|
||||
});
|
||||
|
||||
// Add issues to HTML
|
||||
sortedIssues.forEach(issue => {
|
||||
html += `
|
||||
<div class="issue ${issue.level}">
|
||||
<div class="issue-header">
|
||||
<div class="issue-section">${issue.section}</div>
|
||||
<span class="badge ${issue.level}">${issue.level}</span>
|
||||
</div>
|
||||
<div class="issue-description">
|
||||
${issue.description}
|
||||
</div>
|
||||
${issue.reasoning ? `
|
||||
<div class="issue-reasoning">
|
||||
<strong>Reasoning:</strong> ${issue.reasoning}
|
||||
</div>` : ''}
|
||||
${issue.standard_references && issue.standard_references.length > 0 ? `
|
||||
<div class="issue-references">
|
||||
<strong>Standard References:</strong>
|
||||
<ul class="reference-list">
|
||||
${issue.standard_references.map(ref => `<li>${ref}</li>`).join('')}
|
||||
</ul>
|
||||
</div>` : ''}
|
||||
<div class="issue-recommendation">
|
||||
<strong>Recommendation:</strong> ${issue.recommendation}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
});
|
||||
}
|
||||
|
||||
html += '</div>';
|
||||
|
||||
// Set report HTML
|
||||
reportContainer.innerHTML = html;
|
||||
}
|
||||
|
||||
// Check document status
|
||||
async function checkDocumentStatus(documentId) {
|
||||
try {
|
||||
// Start with a short delay
|
||||
let delay = 2000;
|
||||
const maxAttempts = 10;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
// Wait for the delay
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
|
||||
// Fetch document status
|
||||
const response = await fetch(`${API_BASE_URL}/documents/${documentId}`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error checking document status: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Update document in local storage
|
||||
const documents = JSON.parse(localStorage.getItem(DOCUMENTS_STORAGE_KEY) || '[]');
|
||||
const documentIndex = documents.findIndex(doc => doc.id === documentId);
|
||||
|
||||
if (documentIndex !== -1) {
|
||||
documents[documentIndex].status = data.status;
|
||||
localStorage.setItem(DOCUMENTS_STORAGE_KEY, JSON.stringify(documents));
|
||||
}
|
||||
|
||||
// If processing is complete or failed, stop checking
|
||||
if (data.status === 'completed' || data.status === 'failed') {
|
||||
// If completed, show the report
|
||||
if (data.status === 'completed' && data.reports && data.reports.length > 0) {
|
||||
viewDocumentReport(documentId);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Increase delay for next attempt (exponential backoff)
|
||||
delay = Math.min(delay * 1.5, 10000);
|
||||
}
|
||||
|
||||
// Refresh document list
|
||||
loadDocuments();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error checking document status:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Resubmit document
|
||||
function resubmitDocument(documentId) {
|
||||
// Trigger file input for resubmission
|
||||
fileInput.setAttribute('data-resubmit-id', documentId);
|
||||
fileInput.click();
|
||||
|
||||
// Listen for file selection (one-time event listener)
|
||||
const handleResubmitFileSelection = async function() {
|
||||
if (fileInput.files.length) {
|
||||
const resubmitId = fileInput.getAttribute('data-resubmit-id');
|
||||
|
||||
if (resubmitId) {
|
||||
// Handle resubmission
|
||||
await handleDocumentResubmission(resubmitId, fileInput.files[0]);
|
||||
|
||||
// Remove attribute and event listener
|
||||
fileInput.removeAttribute('data-resubmit-id');
|
||||
fileInput.removeEventListener('change', handleResubmitFileSelection);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
fileInput.addEventListener('change', handleResubmitFileSelection);
|
||||
}
|
||||
|
||||
// Handle document resubmission
|
||||
async function handleDocumentResubmission(documentId, file) {
|
||||
try {
|
||||
// Show loading overlay
|
||||
loadingOverlay.style.display = 'flex';
|
||||
|
||||
// Create form data
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
// Send request to API
|
||||
const response = await fetch(`${API_BASE_URL}/documents/${documentId}/resubmit`, {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error resubmitting document: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Update document in local storage
|
||||
const document = {
|
||||
id: data.document_id,
|
||||
name: file.name,
|
||||
status: data.status,
|
||||
date: new Date().toISOString(),
|
||||
size: file.size
|
||||
};
|
||||
|
||||
saveDocument(document);
|
||||
|
||||
// Load updated document list
|
||||
loadDocuments();
|
||||
|
||||
// Check document status and show report if ready
|
||||
checkDocumentStatus(data.document_id);
|
||||
|
||||
// Show success message
|
||||
alert('Document resubmitted successfully! The analysis is in progress.');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error resubmitting document:', error);
|
||||
alert('Failed to resubmit document. Please try again.');
|
||||
} finally {
|
||||
// Hide loading overlay
|
||||
loadingOverlay.style.display = 'none';
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,295 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Document Compliance Report</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
.header {
|
||||
background-color: #f8f9fa;
|
||||
padding: 20px;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 20px;
|
||||
border-left: 5px solid #007bff;
|
||||
}
|
||||
.document-info {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.document-info-left {
|
||||
flex: 3;
|
||||
}
|
||||
.document-info-right {
|
||||
flex: 1;
|
||||
text-align: right;
|
||||
}
|
||||
.score-container {
|
||||
font-size: 24px;
|
||||
font-weight: bold;
|
||||
color: #28a745;
|
||||
}
|
||||
.summary {
|
||||
background-color: #e9f7ef;
|
||||
padding: 20px;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 30px;
|
||||
border-left: 5px solid #27ae60;
|
||||
}
|
||||
|
||||
.applied-standards {
|
||||
margin-top: 1.5rem;
|
||||
padding: 1rem;
|
||||
background-color: #f8f9fa;
|
||||
border-radius: 5px;
|
||||
border-left: 3px solid #3498db;
|
||||
}
|
||||
|
||||
.applied-standards h3 {
|
||||
margin-top: 0;
|
||||
font-size: 1.2rem;
|
||||
color: #495057;
|
||||
}
|
||||
|
||||
.standards-list {
|
||||
margin: 0.5rem 0 0 1.5rem;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.standards-list li {
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
.issues-container {
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.issue {
|
||||
margin-bottom: 15px;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
background-color: #f8f9fa;
|
||||
border-left: 4px solid #6c757d;
|
||||
}
|
||||
.issue.critical {
|
||||
background-color: #fdedec;
|
||||
border-left-color: #e74c3c;
|
||||
}
|
||||
.issue.major {
|
||||
background-color: #fef9e7;
|
||||
border-left-color: #f39c12;
|
||||
}
|
||||
.issue.minor {
|
||||
background-color: #eafaf1;
|
||||
border-left-color: #2ecc71;
|
||||
}
|
||||
.issue.info {
|
||||
background-color: #ebf5fb;
|
||||
border-left-color: #3498db;
|
||||
}
|
||||
.issue-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.issue-section {
|
||||
font-weight: bold;
|
||||
font-size: 18px;
|
||||
}
|
||||
.badge {
|
||||
display: inline-block;
|
||||
padding: 5px 10px;
|
||||
border-radius: 3px;
|
||||
font-size: 12px;
|
||||
font-weight: bold;
|
||||
text-transform: uppercase;
|
||||
color: white;
|
||||
}
|
||||
.badge.critical {
|
||||
background-color: #e74c3c;
|
||||
}
|
||||
.badge.major {
|
||||
background-color: #f39c12;
|
||||
}
|
||||
.badge.minor {
|
||||
background-color: #2ecc71;
|
||||
}
|
||||
.badge.info {
|
||||
background-color: #3498db;
|
||||
}
|
||||
.issue-description {
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.issue-recommendation {
|
||||
background-color: #f8f9fa;
|
||||
padding: 10px;
|
||||
border-radius: 3px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.issue-reasoning {
|
||||
margin-top: 10px;
|
||||
padding: 10px;
|
||||
background-color: #ebf5fb;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.issue-references {
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.reference-list {
|
||||
margin-top: 5px;
|
||||
margin-left: 20px;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.stats {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.stat-box {
|
||||
flex: 1;
|
||||
min-width: 200px;
|
||||
background-color: #f8f9fa;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 5px;
|
||||
text-align: center;
|
||||
}
|
||||
.stat-value {
|
||||
font-size: 24px;
|
||||
font-weight: bold;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
.stat-label {
|
||||
color: #6c757d;
|
||||
font-size: 14px;
|
||||
}
|
||||
.footer {
|
||||
margin-top: 30px;
|
||||
text-align: center;
|
||||
color: #6c757d;
|
||||
font-size: 14px;
|
||||
padding-top: 20px;
|
||||
border-top: 1px solid #dee2e6;
|
||||
}
|
||||
@media print {
|
||||
body {
|
||||
padding: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
.issue {
|
||||
break-inside: avoid;
|
||||
}
|
||||
.header, .summary {
|
||||
break-inside: avoid;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>Document Compliance Report</h1>
|
||||
<div class="document-info">
|
||||
<div class="document-info-left">
|
||||
<p><strong>Document:</strong> {{ document_name }}</p>
|
||||
<p><strong>Generated:</strong> {{ timestamp }}</p>
|
||||
</div>
|
||||
<div class="document-info-right">
|
||||
<div class="score-container">
|
||||
{{ compliance_score * 100 | round(1) }}%
|
||||
</div>
|
||||
<div>Compliance Score</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="summary">
|
||||
<h2>Summary</h2>
|
||||
<p>{{ summary }}</p>
|
||||
|
||||
{% if applied_standards and applied_standards|length > 0 %}
|
||||
<div class="applied-standards">
|
||||
<h3>Applied Standards</h3>
|
||||
<ul class="standards-list">
|
||||
{% for standard in applied_standards %}
|
||||
<li>{{ standard }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="stats">
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #e74c3c;">{{ critical_count }}</div>
|
||||
<div class="stat-label">Critical Issues</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #f39c12;">{{ major_count }}</div>
|
||||
<div class="stat-label">Major Issues</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #2ecc71;">{{ minor_count }}</div>
|
||||
<div class="stat-label">Minor Issues</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value" style="color: #3498db;">{{ info_count }}</div>
|
||||
<div class="stat-label">Info Issues</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="issues-container">
|
||||
<h2>Compliance Issues</h2>
|
||||
|
||||
{% if issues %}
|
||||
{% for issue in issues %}
|
||||
<div class="issue {{ issue.level }}">
|
||||
<div class="issue-header">
|
||||
<div class="issue-section">{{ issue.section }}</div>
|
||||
<span class="badge {{ issue.level }}">{{ issue.level }}</span>
|
||||
</div>
|
||||
<div class="issue-description">
|
||||
{{ issue.description }}
|
||||
</div>
|
||||
{% if issue.reasoning %}
|
||||
<div class="issue-reasoning">
|
||||
<strong>Reasoning:</strong> {{ issue.reasoning }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if issue.standard_references and issue.standard_references|length > 0 %}
|
||||
<div class="issue-references">
|
||||
<strong>Standard References:</strong>
|
||||
<ul class="reference-list">
|
||||
{% for reference in issue.standard_references %}
|
||||
<li>{{ reference }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="issue-recommendation">
|
||||
<strong>Recommendation:</strong> {{ issue.recommendation }}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<p>No compliance issues found. Great job!</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p>Generated by Mini SpecsComply Pro</p>
|
||||
<p>This report is for informational purposes only and should be reviewed by a qualified professional.</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1 @@
|
||||
"""Utility functions for the Mini SpecsComply Pro application."""
|
||||
@@ -0,0 +1,283 @@
|
||||
# Utility functions
|
||||
import re
|
||||
from typing import Dict, List, Any, Optional
|
||||
import os
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
def extract_sections_from_markdown(markdown_text: str) -> Dict[str, str]:
|
||||
"""
|
||||
Extract sections from a markdown document.
|
||||
|
||||
Args:
|
||||
markdown_text: The markdown text to parse
|
||||
|
||||
Returns:
|
||||
Dictionary mapping section names to section content
|
||||
"""
|
||||
sections = {}
|
||||
|
||||
# Add the whole document as one section
|
||||
sections["full_document"] = markdown_text
|
||||
|
||||
# Split by markdown headings
|
||||
heading_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
|
||||
matches = list(heading_pattern.finditer(markdown_text))
|
||||
|
||||
if matches:
|
||||
for i, match in enumerate(matches):
|
||||
heading_level = len(match.group(1))
|
||||
section_name = match.group(2).strip()
|
||||
|
||||
# Get section content (from this heading to the next, or to the end)
|
||||
start_pos = match.end()
|
||||
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(markdown_text)
|
||||
|
||||
section_content = markdown_text[start_pos:end_pos].strip()
|
||||
section_key = f"h{heading_level}_{section_name}"
|
||||
|
||||
sections[section_key] = section_content
|
||||
|
||||
return sections
|
||||
|
||||
def detect_file_type(filename: str) -> str:
|
||||
"""
|
||||
Detect file type from filename extension.
|
||||
|
||||
Args:
|
||||
filename: Name of the file
|
||||
|
||||
Returns:
|
||||
File type (markdown, text, etc.)
|
||||
"""
|
||||
_, extension = os.path.splitext(filename)
|
||||
ext = extension.lower().lstrip('.')
|
||||
|
||||
if ext in ['md', 'markdown']:
|
||||
return 'markdown'
|
||||
elif ext in ['txt', 'text']:
|
||||
return 'text'
|
||||
elif ext in ['json']:
|
||||
return 'json'
|
||||
elif ext in ['yaml', 'yml']:
|
||||
return 'yaml'
|
||||
elif ext in ['html', 'htm']:
|
||||
return 'html'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
def parse_code_blocks(content: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract code blocks from markdown content.
|
||||
|
||||
Args:
|
||||
content: Markdown content with code blocks
|
||||
|
||||
Returns:
|
||||
List of dictionaries with language and code
|
||||
"""
|
||||
# Pattern to match code blocks with optional language
|
||||
pattern = r'```(\w*)\n([\s\S]*?)```'
|
||||
matches = re.findall(pattern, content)
|
||||
|
||||
code_blocks = []
|
||||
for language, code in matches:
|
||||
code_blocks.append({
|
||||
'language': language.strip() or 'text',
|
||||
'code': code.strip()
|
||||
})
|
||||
|
||||
return code_blocks
|
||||
|
||||
def format_timestamp(timestamp: datetime) -> str:
|
||||
"""
|
||||
Format timestamp for display.
|
||||
|
||||
Args:
|
||||
timestamp: Datetime object
|
||||
|
||||
Returns:
|
||||
Formatted timestamp string
|
||||
"""
|
||||
return timestamp.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
def calculate_readability_score(text: str) -> float:
|
||||
"""
|
||||
Calculate a simple readability score for text.
|
||||
|
||||
Args:
|
||||
text: The text to analyze
|
||||
|
||||
Returns:
|
||||
Readability score (0.0-1.0)
|
||||
"""
|
||||
if not text:
|
||||
return 0.0
|
||||
|
||||
# Split into sentences and words
|
||||
sentences = re.split(r'[.!?]+', text)
|
||||
words = re.findall(r'\b\w+\b', text)
|
||||
|
||||
if not words or not sentences:
|
||||
return 0.0
|
||||
|
||||
# Average words per sentence
|
||||
avg_words_per_sentence = len(words) / len(sentences)
|
||||
|
||||
# Simple readability score based on average words per sentence
|
||||
# Optimal is around 15-20 words per sentence
|
||||
if avg_words_per_sentence <= 10:
|
||||
score = 0.7 # Very short sentences
|
||||
elif 10 < avg_words_per_sentence <= 20:
|
||||
score = 1.0 # Optimal
|
||||
elif 20 < avg_words_per_sentence <= 30:
|
||||
score = 0.8 # Getting long
|
||||
else:
|
||||
score = 0.5 # Too long
|
||||
|
||||
return score
|
||||
|
||||
def sanitize_filename(filename: str) -> str:
|
||||
"""
|
||||
Sanitize filename to be safe for filesystem.
|
||||
|
||||
Args:
|
||||
filename: Original filename
|
||||
|
||||
Returns:
|
||||
Sanitized filename
|
||||
"""
|
||||
# Replace illegal characters
|
||||
sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
||||
|
||||
# Ensure it's not too long
|
||||
if len(sanitized) > 255:
|
||||
base, ext = os.path.splitext(sanitized)
|
||||
sanitized = base[:255-len(ext)] + ext
|
||||
|
||||
return sanitized
|
||||
|
||||
def load_standards_from_file(file_path: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load compliance standards from a JSON file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the standards JSON file
|
||||
|
||||
Returns:
|
||||
List of standard dictionaries
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
standards = json.load(f)
|
||||
return standards
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
# Return empty list if file not found or invalid
|
||||
return []
|
||||
|
||||
def _render_applied_standards(standards: List[str]) -> str:
|
||||
"""
|
||||
Render HTML for applied standards section.
|
||||
|
||||
Args:
|
||||
standards: List of standard names
|
||||
|
||||
Returns:
|
||||
HTML string for the applied standards section
|
||||
"""
|
||||
if not standards:
|
||||
return ""
|
||||
|
||||
html = """<div style="margin-top: 15px; padding: 10px; background-color: #f8f9fa; border-radius: 5px;">
|
||||
<h3 style="margin-top: 0; font-size: 16px; color: #495057;">Applied Standards</h3>
|
||||
<ul style="margin: 5px 0 0 20px; padding: 0;">
|
||||
"""
|
||||
|
||||
for standard in standards:
|
||||
html += f"<li style=\"margin-bottom: 3px;\">{standard}</li>\n"
|
||||
|
||||
html += "</ul></div>"
|
||||
return html
|
||||
|
||||
def generate_html_report(report_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Generate HTML for compliance report.
|
||||
|
||||
Args:
|
||||
report_data: Report data dictionary
|
||||
|
||||
Returns:
|
||||
HTML string for the report
|
||||
"""
|
||||
# Simple HTML template for the report
|
||||
html = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Compliance Report</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; color: #333; }}
|
||||
.header {{ background-color: #f5f5f5; padding: 15px; border-bottom: 1px solid #ddd; }}
|
||||
.summary {{ margin: 20px 0; padding: 15px; background-color: #e9f7ef; border-left: 4px solid #27ae60; }}
|
||||
.issues {{ margin: 20px 0; }}
|
||||
.issue {{ margin-bottom: 15px; padding: 15px; background-color: #f9f9f9; border-left: 4px solid #3498db; }}
|
||||
.issue.critical {{ background-color: #fdedec; border-left-color: #c0392b; }}
|
||||
.issue.major {{ background-color: #fef9e7; border-left-color: #f1c40f; }}
|
||||
.issue.minor {{ background-color: #eafaf1; border-left-color: #2ecc71; }}
|
||||
.issue.info {{ background-color: #ebf5fb; border-left-color: #3498db; }}
|
||||
.issue h3 {{ margin-top: 0; }}
|
||||
.issue p {{ margin: 5px 0; }}
|
||||
.badge {{ display: inline-block; padding: 3px 7px; border-radius: 3px; font-size: 12px; color: white; }}
|
||||
.badge.critical {{ background-color: #c0392b; }}
|
||||
.badge.major {{ background-color: #f1c40f; color: #333; }}
|
||||
.badge.minor {{ background-color: #2ecc71; }}
|
||||
.badge.info {{ background-color: #3498db; }}
|
||||
.score {{ font-size: 24px; font-weight: bold; }}
|
||||
.score-container {{ text-align: right; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>Compliance Report</h1>
|
||||
<p>Document: {report_data.get('document_name', 'Unknown')}</p>
|
||||
<p>Generated: {report_data.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}</p>
|
||||
<div class="score-container">
|
||||
<span>Compliance Score: </span>
|
||||
<span class="score">{report_data.get('compliance_score', 0) * 100:.1f}%</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="summary">
|
||||
<h2>Summary</h2>
|
||||
<p>{report_data.get('summary', 'No summary available.')}</p>
|
||||
|
||||
{_render_applied_standards(report_data.get('applied_standards', []))}
|
||||
</div>
|
||||
|
||||
<div class="issues">
|
||||
<h2>Compliance Issues</h2>
|
||||
"""
|
||||
|
||||
# Add issues
|
||||
issues = report_data.get('issues', [])
|
||||
if not issues:
|
||||
html += "<p>No compliance issues found.</p>"
|
||||
else:
|
||||
for issue in issues:
|
||||
level = issue.get('level', 'info').lower()
|
||||
html += f"""
|
||||
<div class="issue {level}">
|
||||
<h3>{issue.get('section', 'Unknown Section')}</h3>
|
||||
<p><span class="badge {level}">{level.upper()}</span> {issue.get('description', 'No description')}</p>
|
||||
<p><strong>Recommendation:</strong> {issue.get('recommendation', 'No recommendation')}</p>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Close HTML
|
||||
html += """
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
return html
|
||||
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
Token counting utilities for document processing.
|
||||
"""
|
||||
import tiktoken
|
||||
from typing import Dict, List, Optional, Union
|
||||
from loguru import logger
|
||||
|
||||
# Default models to use for token counting
|
||||
DEFAULT_MODEL = "gpt-4o"
|
||||
|
||||
def count_tokens(text: str, model: str = DEFAULT_MODEL) -> int:
|
||||
"""
|
||||
Count the number of tokens in a text string using tiktoken.
|
||||
|
||||
Args:
|
||||
text: The text to count tokens for
|
||||
model: The model to use for token counting (default: gpt-4o)
|
||||
|
||||
Returns:
|
||||
Number of tokens in the text
|
||||
"""
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
return len(encoding.encode(text))
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting tokens with model {model}: {str(e)}")
|
||||
# Fallback to cl100k_base encoding if model-specific encoding fails
|
||||
try:
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
return len(encoding.encode(text))
|
||||
except Exception as e:
|
||||
logger.error(f"Error counting tokens with fallback encoding: {str(e)}")
|
||||
# If all else fails, use a rough approximation (4 chars per token)
|
||||
return len(text) // 4
|
||||
|
||||
def truncate_by_tokens(text: str, max_tokens: int, model: str = DEFAULT_MODEL) -> str:
|
||||
"""
|
||||
Truncate text to fit within a maximum token count.
|
||||
|
||||
Args:
|
||||
text: The text to truncate
|
||||
max_tokens: Maximum number of tokens to allow
|
||||
model: The model to use for token counting (default: gpt-4o)
|
||||
|
||||
Returns:
|
||||
Truncated text that fits within max_tokens
|
||||
"""
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
tokens = encoding.encode(text)
|
||||
|
||||
if len(tokens) <= max_tokens:
|
||||
return text
|
||||
|
||||
# Truncate tokens and decode
|
||||
truncated_tokens = tokens[:max_tokens]
|
||||
truncated_text = encoding.decode(truncated_tokens)
|
||||
|
||||
# Add truncation indicator
|
||||
return truncated_text + "...(truncated)"
|
||||
except Exception as e:
|
||||
logger.warning(f"Error truncating by tokens with model {model}: {str(e)}")
|
||||
# Fallback to character-based truncation if token-based fails
|
||||
approx_chars = max_tokens * 4 # Rough approximation
|
||||
if len(text) <= approx_chars:
|
||||
return text
|
||||
return text[:approx_chars] + "...(truncated)"
|
||||
|
||||
def estimate_tokens_from_chars(char_count: int) -> int:
|
||||
"""
|
||||
Estimate the number of tokens from character count.
|
||||
This is a rough approximation (4 chars per token on average).
|
||||
|
||||
Args:
|
||||
char_count: Number of characters
|
||||
|
||||
Returns:
|
||||
Estimated number of tokens
|
||||
"""
|
||||
return char_count // 4
|
||||
Reference in New Issue
Block a user