Initial commit

This commit is contained in:
Aherobo Ovie Victor
2025-07-17 22:20:25 +01:00
commit 0e3e22e8cb
39 changed files with 13295 additions and 0 deletions
+8
View File
@@ -0,0 +1,8 @@
"""
Mini SpecsComply Pro (SCP)
--------------------------
A lightweight document compliance and validation tool designed to analyze
and verify technical documents against predefined standards and project-specific requirements.
"""
__version__ = "0.1.0"
+1
View File
@@ -0,0 +1 @@
"""API routes for the Mini SpecsComply Pro application."""
+256
View File
@@ -0,0 +1,256 @@
# Document API routes
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Query
from fastapi.responses import JSONResponse, HTMLResponse
from typing import List, Optional, Dict
import uuid
from loguru import logger
from app.core.models import (
DocumentUploadResponse,
DocumentAnalysisResponse,
DocumentStatus
)
from app.services.document import DocumentService
from app.services.embedding import EmbeddingService
from app.services.reasoning import ReasoningService
from app.services.standards import StandardsService
from app.utils.helpers import generate_html_report
# Create services
embedding_service = EmbeddingService()
reasoning_service = ReasoningService()
standards_service = StandardsService()
# Log the standards service instance ID to verify singleton pattern
logger.info(f"Document API - Using StandardsService instance: {id(standards_service)}")
logger.info(f"Document API - Initial standards count: {len(standards_service.standards)}")
document_service = DocumentService(
embedding_service=embedding_service,
reasoning_service=reasoning_service,
standards_service=standards_service
)
# Create router
router = APIRouter(prefix="/documents", tags=["documents"])
@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(file: UploadFile = File(...)):
"""
Upload and process a document.
Args:
file: The document file to upload
Returns:
DocumentUploadResponse with document ID
"""
try:
# Check file extension
if not file.filename:
raise HTTPException(status_code=400, detail="Filename is required")
# Process document
document = await document_service.upload_document(file.file, file.filename)
return DocumentUploadResponse(
document_id=document.id,
filename=document.metadata.filename,
status=document.status,
message="Document uploaded successfully and is being processed."
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
@router.get("/{doc_id}/analysis", response_model=DocumentAnalysisResponse)
async def get_document_analysis(doc_id: str, format: Optional[str] = Query(None, description="Response format (json or html)")):
"""
Get analysis results for a document.
Args:
doc_id: The document ID
format: Response format (json or html)
Returns:
DocumentAnalysisResponse with analysis results
"""
try:
# Retrieve document
document = await document_service.get_document(doc_id)
if not document:
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
# Check if document has been processed
if document.status != DocumentStatus.COMPLETED:
return DocumentAnalysisResponse(
document_id=doc_id,
status=document.status,
message=f"Document is in {document.status} state. Please try again later."
)
# Get the latest report
if not document.reports:
raise HTTPException(status_code=404, detail=f"No analysis reports found for document {doc_id}")
latest_report_id = document.reports[-1]
report = await document_service.get_report(latest_report_id)
if not report:
raise HTTPException(status_code=404, detail=f"Report {latest_report_id} not found")
# Check if HTML format is requested
if format == "html":
# Convert report to HTML
report_data = {
"document_name": document.metadata.filename,
"timestamp": report.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
"compliance_score": report.compliance_score,
"summary": report.summary,
"applied_standards": report.applied_standards,
"issues": [
{
"section": issue.section,
"description": issue.description,
"level": issue.level.value,
"reasoning": issue.reasoning,
"standard_references": issue.standard_references,
"recommendation": issue.recommendation
}
for issue in report.issues
]
}
html_content = generate_html_report(report_data)
return HTMLResponse(content=html_content)
# Return JSON response
return DocumentAnalysisResponse(
document_id=doc_id,
status=document.status,
report=report,
message="Analysis completed successfully."
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrieving document analysis: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving document analysis: {str(e)}")
@router.post("/{doc_id}/resubmit", response_model=DocumentUploadResponse)
async def resubmit_document(
doc_id: str,
file: UploadFile = File(...),
background_tasks: BackgroundTasks = None
):
"""
Resubmit a document with changes.
Args:
doc_id: The document ID to resubmit
file: The updated document file
background_tasks: Background tasks handler
Returns:
DocumentUploadResponse with document ID
"""
try:
# Check if document exists
document = await document_service.get_document(doc_id)
if not document:
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
# Process resubmitted document
updated_document = await document_service.resubmit_document(doc_id, file.file)
return DocumentUploadResponse(
document_id=updated_document.id,
filename=updated_document.metadata.filename,
status=updated_document.status,
message=f"Document (version {updated_document.version}) resubmitted successfully and is being processed."
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error resubmitting document: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error resubmitting document: {str(e)}")
@router.get("/{doc_id}", response_model=Dict)
async def get_document_info(doc_id: str):
"""
Get document information.
Args:
doc_id: The document ID
Returns:
Document information
"""
try:
# Retrieve document
document = await document_service.get_document(doc_id)
if not document:
raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} not found")
# Convert to dict for response
return {
"document_id": document.id,
"filename": document.metadata.filename,
"file_type": document.metadata.file_type,
"file_size": document.metadata.file_size,
"upload_timestamp": document.metadata.upload_timestamp,
"last_modified": document.metadata.last_modified,
"status": document.status,
"version": document.version,
"reports": document.reports
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrieving document info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving document info: {str(e)}")
@router.get("/", response_model=List[Dict])
async def list_documents():
"""
List all documents.
Returns:
List of document summaries
"""
try:
documents = []
for doc_id, document in document_service.documents.items():
documents.append({
"document_id": doc_id,
"filename": document.metadata.filename,
"status": document.status,
"version": document.version,
"upload_timestamp": document.metadata.upload_timestamp
})
return documents
except Exception as e:
logger.error(f"Error listing documents: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}")
@router.get("/{doc_id}/stats", response_model=Dict)
async def get_document_stats(doc_id: str):
"""
Get statistics for a document.
Args:
doc_id: The document ID
Returns:
Document statistics
"""
try:
stats = await document_service.get_document_stats(doc_id)
return stats
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
logger.error(f"Error retrieving document stats: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving document stats: {str(e)}")
+15
View File
@@ -0,0 +1,15 @@
# API routes - Main router
from fastapi import APIRouter
# Import sub-routers
from app.api.document_routes import router as document_router
from app.api.standards_routes import router as standards_router
# Create main router
router = APIRouter()
# Include sub-routers
router.include_router(document_router)
router.include_router(standards_router)
# Add any additional routes that don't fit in the other routers here
+113
View File
@@ -0,0 +1,113 @@
# Standards API routes
from fastapi import APIRouter, UploadFile, File, HTTPException, Query
from typing import List, Optional
from loguru import logger
from app.core.models import Standard, StandardUploadResponse
from app.services.standards import StandardsService
# Create services
standards_service = StandardsService()
# Create router
router = APIRouter(prefix="/standards", tags=["standards"])
@router.get("/", response_model=List[Standard])
async def get_all_standards():
"""
Get all available compliance standards.
Returns:
List of all standards
"""
try:
standards = await standards_service.get_all_standards()
return standards
except Exception as e:
logger.error(f"Error retrieving standards: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving standards: {str(e)}")
@router.get("/{standard_id}", response_model=Standard)
async def get_standard(standard_id: str):
"""
Get a specific standard by ID.
Args:
standard_id: The standard ID
Returns:
Standard details
"""
try:
standard = await standards_service.get_standard(standard_id)
if not standard:
raise HTTPException(status_code=404, detail=f"Standard with ID {standard_id} not found")
return standard
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrieving standard: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving standard: {str(e)}")
@router.post("/upload", response_model=StandardUploadResponse)
async def upload_standard(file: UploadFile = File(...)):
"""
Upload a new compliance standard definition.
Args:
file: JSON file containing standard definition
Returns:
StandardUploadResponse with standard ID
"""
try:
# Check file extension
if not file.filename:
raise HTTPException(status_code=400, detail="Filename is required")
if not file.filename.lower().endswith('.json'):
raise HTTPException(status_code=400, detail="Standard definition must be a JSON file")
# Log the standards service instance ID to verify singleton pattern
logger.info(f"Standards API - Using StandardsService instance: {id(standards_service)}")
logger.info(f"Standards API - Standards count before upload: {len(standards_service.standards)}")
# Process standard
standard = await standards_service.upload_standard(file.file, file.filename)
# Log the updated standards count
logger.info(f"Standards API - Standards count after upload: {len(standards_service.standards)}")
logger.info(f"Standards API - Uploaded standard: {standard.name} (ID: {standard.id})")
return StandardUploadResponse(
standard_id=standard.id,
name=standard.name,
requirement_count=len(standard.requirements),
message="Standard uploaded successfully."
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error processing standard: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error processing standard: {str(e)}")
@router.get("/search/", response_model=List[Standard])
async def search_standards(name: Optional[str] = Query(None, description="Standard name to search for")):
"""
Search for standards by name.
Args:
name: Standard name to search for (optional)
Returns:
List of matching standards
"""
try:
if name:
standard = await standards_service.get_standard_by_name(name)
return [standard] if standard else []
else:
return await standards_service.get_all_standards()
except Exception as e:
logger.error(f"Error searching standards: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error searching standards: {str(e)}")
+1
View File
@@ -0,0 +1 @@
"""Core functionality for the Mini SpecsComply Pro application."""
+44
View File
@@ -0,0 +1,44 @@
import os
from typing import Optional
from pydantic import BaseModel
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class Settings(BaseModel):
"""Application settings loaded from environment variables."""
# Application information
APP_NAME: str = os.getenv("APP_NAME", "Mini SpecsComply Pro")
APP_VERSION: str = os.getenv("APP_VERSION", "0.1.0")
DEBUG: bool = os.getenv("DEBUG", "False").lower() in ("true", "1", "t")
# API keys
GROQ_API_KEY: Optional[str] = os.getenv("GROQ_API_KEY")
COHERE_API_KEY: Optional[str] = os.getenv("COHERE_API_KEY")
# Vector database settings
# Pinecone
PINECONE_API_KEY: Optional[str] = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME: str = os.getenv("PINECONE_INDEX_NAME", "specscomply_documents")
# Weaviate
WEAVIATE_URL: Optional[str] = os.getenv("WEAVIATE_URL")
WEAVIATE_API_KEY: Optional[str] = os.getenv("WEAVIATE_API_KEY")
# Models
EMBEDDING_MODEL: str = "embed-english-v3.0" # Default embedding model
RERANKER_MODEL: str = "rerank-english-v2.0" # Default reranker model
REASONING_MODEL: str = "llama-3.3-70b-versatile" # Default reasoning model
PROCESSING_MODEL: str = "llama-3.3-70b-versatile" # Default quick processing model
# Vector database selector (pinecone or weaviate)
VECTOR_DB: str = os.getenv("VECTOR_DB", "pinecone").lower()
class Config:
env_file = ".env"
case_sensitive = True
# Create global settings instance
settings = Settings()
+127
View File
@@ -0,0 +1,127 @@
# Data models
from pydantic import BaseModel, Field
from typing import List, Dict, Optional, Any, Union
from datetime import datetime
from enum import Enum
import uuid
class DocumentStatus(str, Enum):
"""Enum for document processing status."""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class ComplianceLevel(str, Enum):
"""Enum for compliance severity levels."""
CRITICAL = "critical"
MAJOR = "major"
MINOR = "minor"
INFO = "info"
class ComplianceIssue(BaseModel):
"""Model for compliance issues found in the document."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
section: str
description: str
level: ComplianceLevel
line_number: Optional[int] = None
reasoning: str = "" # Detailed explanation of why this is an issue
standard_references: List[str] = [] # References to specific standards or requirements
recommendation: str
class DocumentMetadata(BaseModel):
"""Model for document metadata."""
filename: str
file_type: str
file_size: int # In bytes
upload_timestamp: datetime = Field(default_factory=datetime.now)
last_modified: Optional[datetime] = None
class DocumentEmbedding(BaseModel):
"""Model for document embeddings."""
embedding_id: str
embedding_model: str
vector_db: str
sections: Dict[str, str] # Section name to section ID in vector DB
class ComplianceReport(BaseModel):
"""Model for the generated compliance report."""
report_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
document_id: str
timestamp: datetime = Field(default_factory=datetime.now)
compliance_score: float # 0.0 to 1.0
summary: str
issues: List[ComplianceIssue] = []
applied_standards: List[str] = [] # Standards used for analysis
@property
def critical_issues_count(self) -> int:
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.CRITICAL)
@property
def major_issues_count(self) -> int:
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.MAJOR)
@property
def minor_issues_count(self) -> int:
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.MINOR)
@property
def info_issues_count(self) -> int:
return sum(1 for issue in self.issues if issue.level == ComplianceLevel.INFO)
class Document(BaseModel):
"""Model for document tracking."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
metadata: DocumentMetadata
embedding: Optional[DocumentEmbedding] = None
status: DocumentStatus = DocumentStatus.PENDING
version: int = 1 # Incremented on resubmissions
reports: List[str] = [] # List of report IDs
class DocumentUploadResponse(BaseModel):
"""Response model for document uploads."""
document_id: str
filename: str
status: DocumentStatus
message: str
class DocumentAnalysisResponse(BaseModel):
"""Response model for document analysis retrieval."""
document_id: str
status: DocumentStatus
report: Optional[ComplianceReport] = None
message: str
class RequirementSeverity(str, Enum):
"""Enum for requirement severity levels."""
CRITICAL = "critical"
MAJOR = "major"
MINOR = "minor"
INFO = "info"
class Requirement(BaseModel):
"""Model for a compliance requirement."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
description: str
severity: RequirementSeverity
details: Optional[str] = None
class Standard(BaseModel):
"""Model for a compliance standard."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
name: str
description: str
requirements: List[Requirement] = []
class StandardUploadResponse(BaseModel):
"""Response model for standard uploads."""
standard_id: str
name: str
requirement_count: int
message: str
+123
View File
@@ -0,0 +1,123 @@
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import os
import uvicorn
from loguru import logger
import sys
from pathlib import Path
from app.core.config import settings
from app.api.routes import router as api_router
# Configure logging
logger.remove()
logger.add(
sys.stdout,
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
level="DEBUG" if settings.DEBUG else "INFO",
)
# Create FastAPI app
app = FastAPI(
title=settings.APP_NAME,
version=settings.APP_VERSION,
description="A lightweight document compliance and validation tool",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, this should be restricted
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Create templates directory
templates_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates")
os.makedirs(templates_dir, exist_ok=True)
templates = Jinja2Templates(directory=templates_dir)
# Create static files directory if it doesn't exist
static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
if not os.path.exists(static_dir):
os.makedirs(static_dir, exist_ok=True)
# Ensure CSS and JS directories exist
css_dir = os.path.join(static_dir, "css")
js_dir = os.path.join(static_dir, "js")
os.makedirs(css_dir, exist_ok=True)
os.makedirs(js_dir, exist_ok=True)
# Mount static files
app.mount("/static", StaticFiles(directory=static_dir), name="static")
# Include API routes
app.include_router(api_router, prefix="/api")
# Root endpoint - serve the frontend
@app.get("/", response_class=HTMLResponse)
async def root(request: Request):
# Check if index.html exists in static directory
index_path = os.path.join(static_dir, "index.html")
if os.path.exists(index_path):
with open(index_path, "r") as f:
return HTMLResponse(content=f.read())
# If not found, return a simple HTML response
return HTMLResponse(
content=f"""
<!DOCTYPE html>
<html>
<head>
<title>{settings.APP_NAME}</title>
<style>
body {{ font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
h1 {{ color: #3498db; }}
.api-link {{ margin-top: 20px; }}
.api-link a {{ display: inline-block; padding: 10px 20px; background-color: #3498db; color: white;
text-decoration: none; border-radius: 4px; }}
.api-link a:hover {{ background-color: #2980b9; }}
</style>
</head>
<body>
<h1>{settings.APP_NAME}</h1>
<p>Welcome to {settings.APP_NAME}, a lightweight document compliance and validation tool.</p>
<p>This application is currently running in API-only mode.</p>
<div class="api-link">
<a href="/docs">View API Documentation</a>
</div>
</body>
</html>
"""
)
# Health check endpoint
@app.get("/health")
async def health():
return {"status": "healthy"}
# Global exception handler
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
logger.error(f"Unhandled exception: {str(exc)}")
return JSONResponse(
status_code=500,
content={"message": "An unexpected error occurred", "detail": str(exc)},
)
if __name__ == "__main__":
uvicorn.run(
"app.main:app",
host="0.0.0.0",
port=8000,
reload=settings.DEBUG,
log_level="debug" if settings.DEBUG else "info",
)
+1
View File
@@ -0,0 +1 @@
"""Services for the Mini SpecsComply Pro application."""
+461
View File
@@ -0,0 +1,461 @@
# Document processing
import os
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Optional, BinaryIO, Tuple
import re
from loguru import logger
from app.core.models import (
Document,
DocumentMetadata,
DocumentStatus,
ComplianceReport,
ComplianceIssue,
ComplianceLevel,
DocumentEmbedding
)
from app.services.embedding import EmbeddingService
from app.services.reasoning import ReasoningService
from app.services.standards import StandardsService
from app.utils.token_counter import count_tokens, truncate_by_tokens
class DocumentService:
"""Service for handling document processing and storage."""
def __init__(self, embedding_service: EmbeddingService, reasoning_service: ReasoningService, standards_service: Optional[StandardsService] = None):
"""Initialize with required services."""
self.embedding_service = embedding_service
self.reasoning_service = reasoning_service
self.standards_service = standards_service or StandardsService()
self.documents = {} # In-memory storage for documents (replace with DB in production)
self.reports = {} # In-memory storage for reports (replace with DB in production)
async def upload_document(self, file: BinaryIO, filename: str) -> Document:
"""
Process an uploaded document.
Args:
file: The document file
filename: Name of the uploaded file
Returns:
Document object with metadata
"""
# Validate file type
if not self._validate_file_type(filename):
raise ValueError(f"Unsupported file type. Supported types: .txt, .md, .rst, .doc, .docx, .pdf")
# Get file content
content = await self._read_file_content(file)
# Extract file metadata
file_size = len(content)
file_type = self._get_file_type(filename)
# Create document metadata
metadata = DocumentMetadata(
filename=filename,
file_type=file_type,
file_size=file_size,
upload_timestamp=datetime.now(),
last_modified=datetime.now()
)
# Create document object
document_id = str(uuid.uuid4())
document = Document(
id=document_id,
metadata=metadata,
status=DocumentStatus.PENDING,
version=1
)
# Store document in memory
self.documents[document_id] = document
# Start processing
try:
await self._process_document(document_id, content)
except Exception as e:
logger.error(f"Error processing document {document_id}: {str(e)}")
document.status = DocumentStatus.FAILED
raise
return document
async def get_document(self, document_id: str) -> Optional[Document]:
"""
Retrieve a document by ID.
Args:
document_id: The ID of the document to retrieve
Returns:
Document object if found, None otherwise
"""
return self.documents.get(document_id)
async def get_report(self, report_id: str) -> Optional[ComplianceReport]:
"""
Retrieve a compliance report by ID.
Args:
report_id: The ID of the report to retrieve
Returns:
ComplianceReport object if found, None otherwise
"""
return self.reports.get(report_id)
async def resubmit_document(self, document_id: str, file: BinaryIO) -> Document:
"""
Resubmit a document with changes.
Args:
document_id: The ID of the document to resubmit
file: The updated document file
Returns:
Updated Document object
"""
# Check if document exists
document = await self.get_document(document_id)
if not document:
raise ValueError(f"Document with ID {document_id} not found")
# Get file content
content = await self._read_file_content(file)
# Update document metadata
document.metadata.file_size = len(content)
document.metadata.last_modified = datetime.now()
document.version += 1
document.status = DocumentStatus.PENDING
# Process the updated document
try:
await self._process_document(document_id, content)
except Exception as e:
logger.error(f"Error processing resubmitted document {document_id}: {str(e)}")
document.status = DocumentStatus.FAILED
return document
async def process_document(self, document_id: str, content: str) -> ComplianceReport:
"""
Process document and generate compliance report.
Args:
document_id: The ID of the document
content: Document content
Returns:
ComplianceReport object
"""
try:
# Get the document
document = self.documents.get(document_id)
if not document:
raise ValueError(f"Document {document_id} not found")
# Split document into sections
sections = self._split_into_sections(content)
# Generate embeddings for sections
document.embedding = await self.embedding_service.embed_document(document_id, sections)
# Identify relevant standards for the document
if self.standards_service:
# Log the standards service instance ID to verify singleton pattern
logger.info(f"Using StandardsService instance: {id(self.standards_service)}")
logger.info(f"Standards count before matching: {len(self.standards_service.standards)}")
standard_names = await self.standards_service.get_standard_names_for_document(content)
logger.info(f"Identified standards for document {document_id}: {standard_names}")
else:
logger.warning(f"No StandardsService available for document {document_id}")
standard_names = ["ISO-9001", "IEEE-829", "RFC-2119"]
# Use reasoning service for compliance analysis
report = await self.reasoning_service.analyze_document(document_id, sections, standard_names)
# Store the report
self.reports[report.report_id] = report
return report
except Exception as e:
logger.error(f"Error in document processing: {str(e)}")
raise
async def _read_file_content(self, file: BinaryIO) -> str:
"""
Read and decode file content.
Args:
file: The file to read
Returns:
File content as string
"""
file_content = file.read()
# Try to decode as UTF-8
try:
return file_content.decode('utf-8')
except UnicodeDecodeError:
# Try other encodings if UTF-8 fails
try:
return file_content.decode('latin-1')
except:
raise ValueError("Unable to decode file content. Please ensure file is text-based.")
def _get_file_type(self, filename: str) -> str:
"""
Determine file type from filename.
Args:
filename: The name of the file
Returns:
File type (extension)
"""
_, extension = os.path.splitext(filename)
return extension.lstrip('.').lower()
def _validate_file_type(self, filename: str) -> bool:
"""
Validate if the file type is supported.
Args:
filename: Name of the file to validate
Returns:
bool: True if file type is supported, False otherwise
"""
SUPPORTED_EXTENSIONS = {'.txt', '.md', '.rst', '.doc', '.docx', '.pdf'}
_, ext = os.path.splitext(filename)
return ext.lower() in SUPPORTED_EXTENSIONS
def _split_into_sections(self, content: str) -> Dict[str, str]:
"""
Split document content into sections.
Args:
content: The document content
Returns:
Dictionary mapping section names to section content
"""
# This is a simple implementation - in production, you would use more advanced
# techniques like heading detection, markdown parsing, etc.
# For simplicity, we'll just split by markdown headings
sections = {}
# Add the whole document as one section
sections["full_document"] = content
# Try to split by markdown headings
heading_pattern = re.compile(r'^(#{1,3})\s+(.+)$', re.MULTILINE)
matches = list(heading_pattern.finditer(content))
if matches:
for i, match in enumerate(matches):
heading_level = len(match.group(1))
section_name = match.group(2).strip()
# Get section content (from this heading to the next, or to the end)
start_pos = match.end()
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(content)
section_content = content[start_pos:end_pos].strip()
section_key = f"h{heading_level}_{section_name}"
sections[section_key] = section_content
else:
# No headings found, try to split by newlines into paragraphs
paragraphs = [p for p in content.split('\n\n') if p.strip()]
for i, paragraph in enumerate(paragraphs):
if len(paragraph) > 100: # Only include substantial paragraphs
sections[f"paragraph_{i+1}"] = paragraph
return sections
async def _generate_mock_report(self, document_id: str, sections: Dict[str, str]) -> ComplianceReport:
"""
Generate a mock compliance report for development/testing.
Args:
document_id: The ID of the document
sections: Dictionary of document sections
Returns:
ComplianceReport object
"""
# In production, this would use the reasoning service
# For now, we'll generate a simple mock report
# Create some mock issues
issues = []
if "full_document" in sections:
content = sections["full_document"]
# Check for missing sections (mock check)
if "introduction" not in content.lower():
issues.append(ComplianceIssue(
section="Document Structure",
description="Missing introduction section",
level=ComplianceLevel.MAJOR,
recommendation="Add an introduction section to provide context for the document"
))
# Check for formatting issues (mock check)
if content.count('#') < 3:
issues.append(ComplianceIssue(
section="Formatting",
description="Insufficient section headings",
level=ComplianceLevel.MINOR,
recommendation="Use markdown headings to better structure the document"
))
# Check for technical compliance (mock check)
if "compliance" in content.lower() and "standard" not in content.lower():
issues.append(ComplianceIssue(
section="Technical Content",
description="Mentions compliance but doesn't reference specific standards",
level=ComplianceLevel.CRITICAL,
recommendation="Specify which standards or regulations the document complies with"
))
# Calculate mock compliance score
if issues:
compliance_score = max(0.0, 1.0 - (len(issues) * 0.1))
else:
compliance_score = 1.0
# Create summary based on issues
if not issues:
summary = "The document meets all compliance requirements. No issues found."
else:
critical_count = sum(1 for i in issues if i.level == ComplianceLevel.CRITICAL)
major_count = sum(1 for i in issues if i.level == ComplianceLevel.MAJOR)
minor_count = sum(1 for i in issues if i.level == ComplianceLevel.MINOR)
summary = f"The document has {len(issues)} compliance issues: "
if critical_count:
summary += f"{critical_count} critical, "
if major_count:
summary += f"{major_count} major, "
if minor_count:
summary += f"{minor_count} minor."
else:
summary = summary.rstrip(", ") + "."
summary += " See detailed report for recommendations."
# Create report
report = ComplianceReport(
document_id=document_id,
compliance_score=compliance_score,
summary=summary,
issues=issues
)
return report
async def _process_document(self, document_id: str, content: str) -> None:
"""
Internal method to process a document and update its status.
Args:
document_id: The ID of the document to process
content: The document content
"""
try:
# Get the document
document = self.documents.get(document_id)
if not document:
raise ValueError(f"Document {document_id} not found")
# Update status to processing
document.status = DocumentStatus.PROCESSING
# Generate compliance report
report = await self.process_document(document_id, content)
# Store report ID in document
document.reports.append(report.report_id)
# Update document status
document.status = DocumentStatus.COMPLETED
except Exception as e:
# Update document status to failed
if document:
document.status = DocumentStatus.FAILED
raise
async def get_document_stats(self, document_id: str) -> Dict[str, any]:
"""
Get statistics for a document.
Args:
document_id: The ID of the document
Returns:
Dictionary containing document statistics
"""
document = await self.get_document(document_id)
if not document:
raise ValueError(f"Document {document_id} not found")
latest_report = None
if document.reports:
latest_report = await self.get_report(document.reports[-1])
stats = {
"document_id": document_id,
"version": document.version,
"status": document.status,
"file_size": document.metadata.file_size,
"upload_date": document.metadata.upload_timestamp,
"last_modified": document.metadata.last_modified,
"num_reports": len(document.reports),
"latest_compliance_score": latest_report.compliance_score if latest_report else None,
"critical_issues": latest_report.critical_issues_count if latest_report else 0,
"major_issues": latest_report.major_issues_count if latest_report else 0,
"minor_issues": latest_report.minor_issues_count if latest_report else 0
}
return stats
async def cleanup_old_documents(self, days: int = 30) -> List[str]:
"""
Remove documents older than specified days.
Args:
days: Number of days after which documents should be removed
Returns:
List of removed document IDs
"""
cutoff_date = datetime.now() - timedelta(days=days)
removed_ids = []
for doc_id, document in list(self.documents.items()):
if document.metadata.upload_timestamp < cutoff_date:
# Remove associated reports
for report_id in document.reports:
self.reports.pop(report_id, None)
# Remove document
self.documents.pop(doc_id)
removed_ids.append(doc_id)
return removed_ids
+254
View File
@@ -0,0 +1,254 @@
import cohere
from typing import List, Dict, Any, Optional
import uuid
from pinecone import Pinecone
import weaviate
from loguru import logger
from app.core.config import settings
from app.core.models import DocumentEmbedding
class EmbeddingService:
"""Service for document embedding and vector database operations."""
def __init__(self):
"""Initialize the embedding service with the Cohere client and vector DB."""
# Initialize Cohere client
self.cohere_client = cohere.Client(settings.COHERE_API_KEY)
# Initialize vector database client based on configuration
self.vector_db_client = self._init_vector_db()
self.embedding_model = settings.EMBEDDING_MODEL
def _init_vector_db(self) -> Any:
"""Initialize the vector database client based on settings."""
if settings.VECTOR_DB == "pinecone" and settings.PINECONE_API_KEY:
# Initialize Pinecone with new API
pc = Pinecone(api_key=settings.PINECONE_API_KEY)
# Check if index exists, if not create it
if settings.PINECONE_INDEX_NAME not in [idx["name"] for idx in pc.list_indexes()]:
pc.create_index(
name=settings.PINECONE_INDEX_NAME,
dimension=1024, # Cohere embed-english-v3.0 dimension
metric="cosine"
)
# Return the index
return pc.Index(settings.PINECONE_INDEX_NAME)
elif settings.VECTOR_DB == "weaviate" and settings.WEAVIATE_URL:
# Initialize Weaviate
auth_config = weaviate.auth.AuthApiKey(api_key=settings.WEAVIATE_API_KEY) if settings.WEAVIATE_API_KEY else None
client = weaviate.Client(
url=settings.WEAVIATE_URL,
auth_client_secret=auth_config
)
# Check if schema exists, if not create it
if not client.schema.contains().get("classes", []):
class_obj = {
"class": "Document",
"vectorizer": "none", # We'll provide our own vectors
"properties": [
{
"name": "content",
"dataType": ["text"]
},
{
"name": "document_id",
"dataType": ["string"]
},
{
"name": "section_name",
"dataType": ["string"]
}
]
}
client.schema.create_class(class_obj)
return client
else:
logger.warning("No valid vector database configuration found. Using mock implementation.")
return MockVectorDB()
async def embed_document(self, document_id: str, sections: Dict[str, str]) -> DocumentEmbedding:
"""
Embed document sections and store in vector database.
Args:
document_id: Unique identifier for the document
sections: Dictionary mapping section names to section content
Returns:
DocumentEmbedding object with embedding metadata
"""
section_ids = {}
for section_name, content in sections.items():
# Generate embedding for section content
try:
embedding_response = self.cohere_client.embed(
texts=[content],
model=self.embedding_model,
input_type="search_document"
)
embedding_vector = embedding_response.embeddings[0]
# Generate a unique ID for this section
section_id = f"{document_id}_{section_name}_{str(uuid.uuid4())[:8]}"
# Store in vector database
if settings.VECTOR_DB == "pinecone":
self.vector_db_client.upsert(
vectors=[{
"id": section_id,
"values": embedding_vector,
"metadata": {
"document_id": document_id,
"section_name": section_name,
"content": content[:1000] # Store truncated content for context
}
}],
namespace=document_id
)
elif settings.VECTOR_DB == "weaviate":
self.vector_db_client.data_object.create(
class_name="Document",
data_object={
"content": content,
"document_id": document_id,
"section_name": section_name
},
uuid=section_id,
vector=embedding_vector
)
# Store the section ID
section_ids[section_name] = section_id
logger.info(f"Successfully embedded section '{section_name}' for document {document_id}")
except Exception as e:
logger.error(f"Error embedding section '{section_name}': {str(e)}")
raise
# Create and return DocumentEmbedding object
embedding = DocumentEmbedding(
embedding_id=str(uuid.uuid4()),
embedding_model=self.embedding_model,
vector_db=settings.VECTOR_DB,
sections=section_ids
)
return embedding
async def retrieve_similar_sections(self, query: str, document_id: Optional[str] = None, top_k: int = 5) -> List[Dict[str, Any]]:
"""
Retrieve similar document sections for a query.
Args:
query: The query text to find similar sections for
document_id: Optional document ID to restrict search
top_k: Number of results to return
Returns:
List of similar sections with metadata
"""
# Generate embedding for query
query_embedding = self.cohere_client.embed(
texts=[query],
model=self.embedding_model,
input_type="search_query"
).embeddings[0]
# Search vector database
if settings.VECTOR_DB == "pinecone":
namespace = document_id if document_id else None
results = self.vector_db_client.query(
vector=query_embedding,
top_k=top_k,
namespace=namespace,
include_metadata=True
)
# Format results
similar_sections = []
for match in results.matches:
similar_sections.append({
"section_id": match.id,
"document_id": match.metadata["document_id"],
"section_name": match.metadata["section_name"],
"content": match.metadata.get("content", ""),
"score": match.score
})
elif settings.VECTOR_DB == "weaviate":
query_builder = self.vector_db_client.query.get(
"Document", ["content", "document_id", "section_name"]
).with_near_vector({
"vector": query_embedding
}).with_limit(top_k)
if document_id:
query_builder = query_builder.with_where({
"path": ["document_id"],
"operator": "Equal",
"valueString": document_id
})
results = query_builder.do()
# Format results
similar_sections = []
for item in results.get("data", {}).get("Get", {}).get("Document", []):
similar_sections.append({
"section_id": item.get("_additional", {}).get("id"),
"document_id": item.get("document_id"),
"section_name": item.get("section_name"),
"content": item.get("content", ""),
"score": item.get("_additional", {}).get("distance")
})
else:
# Mock implementation
similar_sections = []
return similar_sections
class MockVectorDB:
"""Mock vector database for development without actual vector DB."""
def __init__(self):
self.vectors = {}
logger.warning("Using mock vector database. Not suitable for production.")
def upsert(self, vectors, namespace=None):
"""Mock upsert method."""
namespace = namespace or "default"
if namespace not in self.vectors:
self.vectors[namespace] = {}
for vector in vectors:
vector_id = vector['id']
metadata = vector['metadata']
self.vectors[namespace][vector_id] = metadata
def query(self, vector, top_k=5, namespace=None, include_metadata=True):
"""Mock query method."""
from collections import namedtuple
namespace = namespace or "default"
if namespace not in self.vectors:
return []
# Just return some mock results
Match = namedtuple('Match', ['id', 'score', 'metadata'])
Results = namedtuple('Results', ['matches'])
matches = [
Match(id=vector_id, score=0.8, metadata=metadata)
for vector_id, metadata in list(self.vectors[namespace].items())[:top_k]
]
return Results(matches=matches)
+136
View File
@@ -0,0 +1,136 @@
# Reranking services
import cohere
from typing import List, Dict, Any
from loguru import logger
from tenacity import retry, stop_after_attempt, wait_exponential
from app.core.config import settings
from app.core.models import ComplianceIssue, ComplianceReport, ComplianceLevel
class RankingService:
"""Service for ranking and prioritizing compliance issues using Cohere Reranker."""
def __init__(self):
"""Initialize the ranking service with the Cohere client."""
self.cohere_client = cohere.Client(settings.COHERE_API_KEY)
self.reranker_model = settings.RERANKER_MODEL
async def prioritize_issues(self, report: ComplianceReport, max_issues: int = 10) -> ComplianceReport:
"""
Prioritize and rank compliance issues in a report.
Args:
report: The compliance report with issues to prioritize
max_issues: Maximum number of issues to include in the final report
Returns:
Updated compliance report with prioritized issues
"""
if not report.issues or len(report.issues) <= 1:
# No need to rank if there's only 0 or 1 issues
return report
try:
# Prepare issues for ranking
issue_texts = [
f"Section: {issue.section}. "
f"Level: {issue.level.value}. "
f"Description: {issue.description}. "
f"Recommendation: {issue.recommendation}"
for issue in report.issues
]
# Query object representing what we're looking for
query = "critical compliance issues that require immediate attention"
# Rerank issues based on relevance to the query
reranked_issues = await self._rerank_issues(query, issue_texts)
# Sort issues based on:
# 1. Compliance level (critical > major > minor > info)
# 2. Reranker relevance score
sorted_issues = []
level_scores = {
ComplianceLevel.CRITICAL: 4,
ComplianceLevel.MAJOR: 3,
ComplianceLevel.MINOR: 2,
ComplianceLevel.INFO: 1
}
# Combine original issues with reranked scores
combined_issues = []
for i, issue in enumerate(report.issues):
rerank_score = next((item["relevance_score"] for item in reranked_issues
if item["index"] == i), 0.0)
# Calculate combined score (level_score * 100 + rerank_score)
# This ensures level is always the primary sorting factor
level_score = level_scores.get(issue.level, 0)
combined_score = (level_score * 100) + rerank_score
combined_issues.append({
"issue": issue,
"combined_score": combined_score,
"rerank_score": rerank_score
})
# Sort by combined score (descending)
combined_issues.sort(key=lambda x: x["combined_score"], reverse=True)
# Take top issues based on max_issues limit
sorted_issues = [item["issue"] for item in combined_issues[:max_issues]]
# Create updated report
prioritized_report = ComplianceReport(
report_id=report.report_id,
document_id=report.document_id,
timestamp=report.timestamp,
compliance_score=report.compliance_score,
summary=report.summary,
issues=sorted_issues
)
return prioritized_report
except Exception as e:
logger.error(f"Error prioritizing issues: {str(e)}")
# If ranking fails, return the original report
return report
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
async def _rerank_issues(self, query: str, issue_texts: List[str]) -> List[Dict[str, Any]]:
"""
Rerank issues using Cohere Reranker.
Args:
query: The search query to compare issues against
issue_texts: List of issue descriptions to rank
Returns:
List of dictionaries with reranked issues and scores
"""
try:
# Call Cohere Rerank endpoint
response = self.cohere_client.rerank(
model=self.reranker_model,
query=query,
documents=issue_texts,
top_n=len(issue_texts)
)
# Format results
reranked_issues = []
for result in response.results:
reranked_issues.append({
"index": result.index, # Original index in the issues list
"relevance_score": result.relevance_score
})
return reranked_issues
except Exception as e:
logger.error(f"Error calling Cohere Reranker: {str(e)}")
# Return basic ranking if reranking fails
return [{"index": i, "relevance_score": 1.0 - (i * 0.1)}
for i in range(len(issue_texts))]
+168
View File
@@ -0,0 +1,168 @@
# Reasoning with LLMs
# Reasoning with LLMs using GROQ
import json
from typing import Dict, List
from loguru import logger
from tenacity import retry, stop_after_attempt, wait_exponential
from app.core.config import settings
from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport
from app.utils.token_counter import count_tokens, truncate_by_tokens
from groq import Groq # Assuming groq Python SDK is installed
class ReasoningService:
"""Service for performing deep reasoning on documents using Groq."""
def __init__(self):
"""Initialize the reasoning service with the Groq client."""
self.client = Groq(api_key=settings.GROQ_API_KEY)
self.model = settings.REASONING_MODEL # e.g., "mixtral-8x7b-32768"
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:
document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])
# Use token-based truncation instead of character-based
max_tokens = 30000 # Adjust based on model context window
token_count = count_tokens(document_content)
logger.info(f"Document {document_id} has {token_count} tokens before truncation")
if token_count > max_tokens:
document_content = truncate_by_tokens(document_content, max_tokens)
logger.info(f"Document {document_id} truncated to {max_tokens} tokens")
prompt = self._create_analysis_prompt(document_content, standards)
try:
response = await self._query_groq(prompt)
compliance_report = self._parse_compliance_response(document_id, response, standards)
return compliance_report
except Exception as e:
logger.error(f"Error analyzing document with Groq: {str(e)}")
raise
def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:
standards_text = "\n".join([f"- {standard}" for standard in standards])
return f"""<document>
{document_content}
</document>
<standards>
{standards_text}
</standards>
You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.
Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:
1. Technical accuracy and completeness
2. Compliance with the specified standards
3. Document structure and organization
4. Clarity and specificity of language
5. Consistency and coherence
For each compliance issue you find, please provide:
- The section where the issue appears
- A detailed description of the issue
- The severity level (critical, major, minor, or info)
- A thorough explanation of why this is an issue and how it impacts compliance
- Specific, actionable recommendations to fix the issue
- References to specific standards or best practices that apply
Respond in the following JSON format:
{{
"summary": "Comprehensive overall assessment of the document",
"compliance_score": 0.0 to 1.0,
"issues": [
{{
"section": "Section name",
"description": "Detailed issue description",
"level": "critical/major/minor/info",
"reasoning": "Thorough explanation of why this is an issue",
"standard_references": ["Specific standards or requirements that are violated"],
"recommendation": "Detailed, actionable recommendation to fix the issue"
}}
]
}}"""
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
async def _query_groq(self, prompt: str) -> str:
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},
{"role": "user", "content": prompt}
],
max_tokens=4000,
temperature=0.2,
top_p=1.0
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error querying Groq: {str(e)}")
raise
def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:
try:
json_start = response.find('{')
json_end = response.rfind('}') + 1
if json_start == -1 or json_end == 0:
raise ValueError("Could not find JSON in response")
json_response = response[json_start:json_end]
data = json.loads(json_response)
summary = data.get("summary", "No summary provided")
compliance_score = float(data.get("compliance_score", 0.5))
issues = []
for issue_data in data.get("issues", []):
level_str = issue_data.get("level", "minor").lower()
if level_str == "critical":
level = ComplianceLevel.CRITICAL
elif level_str == "major":
level = ComplianceLevel.MAJOR
elif level_str == "info":
level = ComplianceLevel.INFO
else:
level = ComplianceLevel.MINOR
issues.append(ComplianceIssue(
section=issue_data.get("section", "Unknown"),
description=issue_data.get("description", "No description provided"),
level=level,
reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),
standard_references=issue_data.get("standard_references", []),
recommendation=issue_data.get("recommendation", "No recommendation provided")
))
return ComplianceReport(
document_id=document_id,
compliance_score=compliance_score,
summary=summary,
issues=issues,
applied_standards=standards
)
except json.JSONDecodeError:
logger.error("Failed to parse JSON from response")
return ComplianceReport(
document_id=document_id,
compliance_score=0.0,
summary="Failed to analyze document due to parsing error.",
issues=[
ComplianceIssue(
section="System",
description="Failed to parse compliance analysis results.",
level=ComplianceLevel.CRITICAL,
reasoning="The system encountered an error while parsing the compliance analysis results.",
standard_references=[],
recommendation="Please try resubmitting the document or contact support."
)
],
applied_standards=[]
)
except Exception as e:
logger.error(f"Error parsing compliance response: {str(e)}")
raise
+250
View File
@@ -0,0 +1,250 @@
# Standards management
import json
import os
from typing import Dict, List, Optional, BinaryIO, Tuple
import uuid
from loguru import logger
from app.core.models import Standard, Requirement, RequirementSeverity
from app.utils.helpers import load_standards_from_file
from app.services.standards_matcher import StandardsMatcher
# Singleton instance to ensure all parts of the application use the same standards
_standards_service_instance = None
class StandardsService:
"""Service for managing compliance standards."""
def __new__(cls):
"""Implement singleton pattern to ensure all parts of the app use the same standards."""
global _standards_service_instance
if _standards_service_instance is None:
_standards_service_instance = super(StandardsService, cls).__new__(cls)
_standards_service_instance.standards = {} # In-memory storage for standards
_standards_service_instance.matcher = StandardsMatcher() # Advanced standards matching logic
_standards_service_instance._load_default_standards()
return _standards_service_instance
def __init__(self):
"""Initialize the standards service."""
# Initialization is done in __new__ for the singleton pattern
def _load_default_standards(self):
"""Load default standards from the standards directory."""
standards_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "standard")
if not os.path.exists(standards_dir):
logger.warning(f"Standards directory not found: {standards_dir}")
return
for filename in os.listdir(standards_dir):
if filename.endswith(".json"):
try:
file_path = os.path.join(standards_dir, filename)
standards_data = load_standards_from_file(file_path)
if "standards" in standards_data:
for std_data in standards_data["standards"]:
standard = self._create_standard_from_data(std_data)
self.standards[standard.id] = standard
logger.info(f"Loaded standard: {standard.name} ({standard.id})")
except Exception as e:
logger.error(f"Error loading standard from {filename}: {str(e)}")
def _create_standard_from_data(self, data: Dict) -> Standard:
"""
Create a Standard object from dictionary data.
Args:
data: Dictionary containing standard data
Returns:
Standard object
"""
requirements = []
if "requirements" in data:
for req_data in data["requirements"]:
# Map severity string to RequirementSeverity enum
severity_str = req_data.get("severity", "minor").lower()
if severity_str == "critical":
severity = RequirementSeverity.CRITICAL
elif severity_str == "major":
severity = RequirementSeverity.MAJOR
elif severity_str == "info":
severity = RequirementSeverity.INFO
else:
severity = RequirementSeverity.MINOR
requirement = Requirement(
id=req_data.get("id", str(uuid.uuid4())),
description=req_data.get("description", ""),
severity=severity,
details=req_data.get("details", None)
)
requirements.append(requirement)
return Standard(
id=data.get("id", str(uuid.uuid4())),
name=data.get("name", "Unnamed Standard"),
description=data.get("description", ""),
requirements=requirements
)
async def get_all_standards(self) -> List[Standard]:
"""
Get all available standards.
Returns:
List of Standard objects
"""
return list(self.standards.values())
async def get_standard(self, standard_id: str) -> Optional[Standard]:
"""
Get a standard by ID.
Args:
standard_id: ID of the standard to retrieve
Returns:
Standard object if found, None otherwise
"""
return self.standards.get(standard_id)
async def get_standard_by_name(self, name: str) -> Optional[Standard]:
"""
Get a standard by name (case-insensitive).
Args:
name: Name of the standard to retrieve
Returns:
Standard object if found, None otherwise
"""
name_lower = name.lower()
for standard in self.standards.values():
if standard.name.lower() == name_lower:
return standard
return None
async def upload_standard(self, file: BinaryIO, filename: str) -> Standard:
"""
Upload and process a standard definition file.
Args:
file: The standard definition file (JSON)
filename: Name of the uploaded file
Returns:
Standard object
"""
try:
# Read file content
content = await self._read_file_content(file)
# Parse JSON
data = json.loads(content)
if "standards" in data and isinstance(data["standards"], list):
# Multiple standards in file
standards = []
for std_data in data["standards"]:
standard = self._create_standard_from_data(std_data)
self.standards[standard.id] = standard
standards.append(standard)
logger.info(f"Uploaded standard: {standard.name} (ID: {standard.id}) with {len(standard.requirements)} requirements")
# Log the current standards count after upload
logger.info(f"Total standards in system after upload: {len(self.standards)}")
# Return the first standard for simplicity
return standards[0] if standards else None
else:
# Single standard in file
standard = self._create_standard_from_data(data)
self.standards[standard.id] = standard
logger.info(f"Uploaded standard: {standard.name} (ID: {standard.id}) with {len(standard.requirements)} requirements")
# Log the current standards count after upload
logger.info(f"Total standards in system after upload: {len(self.standards)}")
return standard
except json.JSONDecodeError:
raise ValueError("Invalid JSON format in standard definition file")
except Exception as e:
logger.error(f"Error processing standard file: {str(e)}")
raise
async def _read_file_content(self, file: BinaryIO) -> str:
"""
Read and decode file content.
Args:
file: The file to read
Returns:
File content as string
"""
file_content = file.read()
# Try to decode as UTF-8
try:
return file_content.decode('utf-8')
except UnicodeDecodeError:
# Try other encodings if UTF-8 fails
try:
return file_content.decode('latin-1')
except:
raise ValueError("Unable to decode file content. Please ensure file is text-based.")
async def get_standard_names_for_document(self, document_content: str) -> List[str]:
"""
Identify which standards might be relevant for a document based on content.
Uses advanced matching logic to find the most relevant standards.
Args:
document_content: The document content
Returns:
List of standard names that might be relevant
"""
# Default standards to use if no matches are found
DEFAULT_STANDARDS = ["ISO-9001", "IEEE-829", "RFC-2119"]
# Log available standards for debugging
logger.info(f"Available standards in the system: {len(self.standards)}")
for std_id, std in self.standards.items():
logger.info(f" - {std.name} (ID: {std_id})")
# If no standards are available, return defaults
if not self.standards:
logger.warning("No standards available in the system. Using default standards.")
return DEFAULT_STANDARDS
# Use the standards matcher to find relevant standards
standard_scores = self.matcher.find_relevant_standards(
document_content=document_content,
standards=list(self.standards.values()),
threshold=0.1, # Minimum relevance threshold
max_standards=5 # Maximum number of standards to return
)
# Log the matching results
if standard_scores:
logger.info(f"Found {len(standard_scores)} relevant standards:")
for name, score in standard_scores:
logger.info(f" - {name}: relevance score {score:.2f}")
else:
logger.info("No relevant standards found based on document content.")
# Extract standard names from the results
relevant_standards = [std[0] for std in standard_scores]
# If no relevant standards found, use defaults
if not relevant_standards:
logger.info(f"Using default standards: {DEFAULT_STANDARDS}")
return DEFAULT_STANDARDS
return relevant_standards
+304
View File
@@ -0,0 +1,304 @@
# Standards matching logic
import re
from typing import Dict, List, Set, Tuple, Optional
from loguru import logger
from app.core.models import Standard, Requirement
class StandardsMatcher:
"""
Advanced matching logic to identify relevant standards for documents.
This class implements sophisticated matching algorithms beyond simple text matching.
"""
def __init__(self):
"""Initialize the standards matcher."""
# Common stopwords to filter out when extracting keywords
self.stopwords = {
"the", "a", "an", "and", "or", "in", "on", "at", "to", "for", "with",
"by", "of", "is", "are", "was", "were", "be", "been", "being", "have",
"has", "had", "do", "does", "did", "but", "if", "then", "else", "when",
"where", "why", "how", "all", "any", "both", "each", "few", "more",
"most", "other", "some", "such", "no", "nor", "not", "only", "own",
"same", "so", "than", "too", "very", "can", "will", "just", "should",
"now", "this", "that", "these", "those"
}
# Technical terms that indicate compliance requirements
self.technical_indicators = [
"shall", "must", "required", "should", "recommended", "may", "optional",
"compliant", "compliance", "conform", "standard", "specification", "requirement",
"procedure", "process", "method", "test", "verify", "validate", "certification",
"certified", "approved", "regulation", "regulatory", "guideline", "protocol"
]
# Common standard prefixes and abbreviations
self.standard_prefixes = [
"iso", "ieee", "astm", "ansi", "iec", "din", "bs", "en", "jis",
"gb", "api", "asme", "nfpa", "ul", "mil", "std", "rfc", "itu"
]
def extract_document_sections(self, document_content: str) -> Dict[str, str]:
"""
Extract sections from a document to improve matching.
Args:
document_content: The document content
Returns:
Dictionary of section name to section content
"""
sections = {}
sections["full_document"] = document_content
# Try to identify document sections using markdown headings
heading_pattern = re.compile(r'^(#{1,3})\s+(.+)$', re.MULTILINE)
matches = list(heading_pattern.finditer(document_content))
if matches:
for i, match in enumerate(matches):
section_name = match.group(2).strip()
# Get section content (from this heading to the next, or to the end)
start_pos = match.end()
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(document_content)
section_content = document_content[start_pos:end_pos].strip()
sections[section_name] = section_content
# Look for common document sections by name
common_sections = [
"introduction", "scope", "purpose", "references", "definitions",
"requirements", "compliance", "standards", "conclusion", "summary",
"appendix", "annex"
]
for section in common_sections:
pattern = re.compile(rf'(?i)(?:^|\n)(?:{section}|{section.capitalize()})(?:[\s:]+)(.*?)(?=\n\s*\n|\n\s*[A-Z]|\Z)', re.DOTALL)
match = pattern.search(document_content)
if match:
sections[section] = match.group(1).strip()
return sections
def extract_key_terms(self, document_content: str) -> List[str]:
"""
Extract key technical terms from document content.
Args:
document_content: The document content
Returns:
List of key terms
"""
key_terms = []
# Split into sentences
sentences = re.split(r'[.!?]\s+', document_content)
for sentence in sentences:
words = sentence.split()
# Check if sentence contains technical indicators
if any(indicator in sentence.lower() for indicator in self.technical_indicators):
# Extract noun phrases (simplified approach)
for i in range(len(words) - 1):
if words[i].lower() not in self.stopwords and words[i+1].lower() not in self.stopwords:
key_terms.append(f"{words[i]} {words[i+1]}".lower())
# Look for capitalized terms (often defined terms)
cap_pattern = re.compile(r'\b[A-Z][A-Z0-9]+\b')
cap_terms = cap_pattern.findall(document_content)
key_terms.extend([term.lower() for term in cap_terms])
# Look for standard references (e.g., ISO-9001, IEEE 829)
for prefix in self.standard_prefixes:
pattern = re.compile(rf'\b{prefix}[-\s]?\d+\b', re.IGNORECASE)
matches = pattern.findall(document_content)
key_terms.extend([match.lower() for match in matches])
# Remove duplicates
return list(set(key_terms))
def extract_standard_keywords(self, standard: Standard) -> List[str]:
"""
Extract keywords from a standard that can be used for matching.
Args:
standard: The standard to extract keywords from
Returns:
List of keywords associated with the standard
"""
keywords = []
# Add standard name and variations
keywords.append(standard.name.lower())
keywords.append(standard.name.replace("-", "").lower())
keywords.append(standard.name.replace("-", " ").lower())
# Add standard description words (excluding common words)
if standard.description:
description_words = [word.lower() for word in standard.description.split()
if word.lower() not in self.stopwords]
keywords.extend(description_words)
# Add requirement keywords
for req in standard.requirements:
# Add requirement ID
keywords.append(req.id.lower())
# Add key phrases from requirement description
if req.description:
# Extract noun phrases and technical terms (simplified approach)
phrases = []
words = req.description.split()
for i in range(len(words) - 1):
if words[i].lower() not in self.stopwords and words[i+1].lower() not in self.stopwords:
phrases.append(f"{words[i]} {words[i+1]}".lower())
keywords.extend(phrases)
# Add individual technical terms
for word in words:
if word.lower() in self.technical_indicators:
keywords.append(word.lower())
# Remove duplicates and return
return list(set(keywords))
def calculate_standard_relevance(self, standard: Standard, document_content: str,
sections: Dict[str, str], key_terms: List[str]) -> float:
"""
Calculate a relevance score for a standard based on multiple factors.
Args:
standard: The standard to evaluate
document_content: The document content
sections: Document sections
key_terms: Key terms extracted from the document
Returns:
Relevance score (0.0 to 1.0)
"""
document_content_lower = document_content.lower()
# Extract keywords for this standard
standard_keywords = self.extract_standard_keywords(standard)
# Initialize scores for different matching components
name_match_score = 0.0
keyword_match_score = 0.0
section_match_score = 0.0
term_match_score = 0.0
requirement_match_score = 0.0
# 1. Check for standard name matches (highest weight)
if standard.name.lower() in document_content_lower:
name_match_score = 0.5
elif standard.name.replace("-", "").lower() in document_content_lower:
name_match_score = 0.4
elif standard.name.replace("-", " ").lower() in document_content_lower:
name_match_score = 0.4
# 2. Check for keyword matches
matched_keywords = 0
total_keywords = len(standard_keywords)
if total_keywords > 0:
for keyword in standard_keywords:
if keyword in document_content_lower:
matched_keywords += 1
keyword_match_score = matched_keywords / total_keywords * 0.3
# 3. Check for section-specific matches
important_sections = ["introduction", "scope", "purpose", "references",
"standards", "compliance", "requirements"]
for section_name in important_sections:
if section_name in sections:
section_content = sections[section_name].lower()
# Check for standard name in important sections
if standard.name.lower() in section_content:
section_match_score += 0.1
break
# Check for standard name in section titles
for section_name in sections.keys():
if standard.name.lower() in section_name.lower():
section_match_score += 0.2
break
# 4. Check for key term matches
matching_terms = 0
for term in key_terms:
if any(kw in term or term in kw for kw in standard_keywords):
matching_terms += 1
if len(key_terms) > 0:
term_match_score = min(0.2, 0.01 * matching_terms)
# 5. Check for requirement-specific matches
for req in standard.requirements:
req_desc_lower = req.description.lower()
req_keywords = [word for word in req_desc_lower.split()
if word not in self.stopwords and len(word) > 3]
for keyword in req_keywords:
if keyword in document_content_lower:
requirement_match_score += 0.01
requirement_match_score = min(0.2, requirement_match_score)
# Calculate final score (weighted sum of all components)
final_score = (
name_match_score +
keyword_match_score +
section_match_score +
term_match_score +
requirement_match_score
)
# Cap at 1.0
return min(final_score, 1.0)
def find_relevant_standards(self, document_content: str, standards: List[Standard],
threshold: float = 0.1, max_standards: int = 5) -> List[Tuple[str, float]]:
"""
Find standards relevant to a document with relevance scores.
Args:
document_content: The document content
standards: List of available standards
threshold: Minimum relevance score threshold
max_standards: Maximum number of standards to return
Returns:
List of tuples (standard_name, relevance_score) sorted by relevance
"""
if not standards:
return []
# Extract document sections and key terms
sections = self.extract_document_sections(document_content)
key_terms = self.extract_key_terms(document_content)
# Calculate relevance scores for each standard
standard_scores = []
for standard in standards:
score = self.calculate_standard_relevance(
standard, document_content, sections, key_terms
)
if score >= threshold:
standard_scores.append((standard.name, score))
logger.debug(f"Standard {standard.name} relevance score: {score:.2f}")
# Sort by relevance score (highest first)
standard_scores.sort(key=lambda x: x[1], reverse=True)
# Limit to max_standards
return standard_scores[:max_standards]
+713
View File
@@ -0,0 +1,713 @@
/* Base styles */
:root {
--primary-color: #3498db;
--secondary-color: #2980b9;
--accent-color: #f39c12;
--success-color: #2ecc71;
--warning-color: #f1c40f;
--danger-color: #e74c3c;
--info-color: #3498db;
--light-color: #f8f9fa;
--dark-color: #343a40;
--gray-color: #6c757d;
--border-color: #dee2e6;
--font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: var(--font-family);
line-height: 1.6;
color: var(--dark-color);
background-color: #f4f7fa;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
h1, h2, h3 {
margin-bottom: 1rem;
color: var(--dark-color);
}
/* Header styles */
header {
text-align: center;
margin-bottom: 2rem;
padding: 1.5rem;
background-color: white;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
}
header h1 {
color: var(--primary-color);
margin-bottom: 0.5rem;
}
header p {
color: var(--gray-color);
}
/* Main content sections */
main {
display: block;
width: 100%;
}
.main-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 2rem;
}
.documents-section {
grid-column: 1 / -1; /* Span all columns */
}
section {
background-color: white;
border-radius: 8px;
padding: 1.5rem;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
}
section h2 {
margin-bottom: 1.5rem;
padding-bottom: 0.5rem;
border-bottom: 1px solid var(--border-color);
}
/* Upload area styles */
.upload-container {
margin-bottom: 1.5rem;
}
.upload-area {
border: 2px dashed var(--primary-color);
border-radius: 8px;
padding: 3rem 1.5rem;
text-align: center;
transition: background-color 0.3s ease;
}
.upload-area:hover {
background-color: rgba(52, 152, 219, 0.05);
}
.upload-area i {
font-size: 3rem;
color: var(--primary-color);
margin-bottom: 1rem;
}
.upload-area p {
margin-bottom: 0.5rem;
color: var(--gray-color);
}
.file-info {
margin-top: 1.5rem;
padding: 1rem;
border: 1px solid var(--border-color);
border-radius: 8px;
}
.file-details {
display: flex;
align-items: center;
margin-bottom: 1rem;
}
.file-icon {
font-size: 2rem;
color: var(--primary-color);
margin-right: 1rem;
}
.file-name {
font-weight: bold;
margin-bottom: 0.25rem;
}
.file-size {
color: var(--gray-color);
font-size: 0.9rem;
}
/* Buttons */
.button {
display: inline-block;
padding: 0.5rem 1rem;
background-color: var(--primary-color);
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 0.9rem;
transition: background-color 0.3s ease;
text-decoration: none;
}
.button:hover {
background-color: var(--secondary-color);
}
.upload-button {
background-color: var(--success-color);
}
.upload-button:hover {
background-color: #27ae60;
}
.cancel-button {
background-color: var(--gray-color);
margin-left: 0.5rem;
}
.cancel-button:hover {
background-color: #5a6268;
}
/* Documents list */
.documents-list {
list-style: none;
}
.document-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem;
border: 1px solid var(--border-color);
border-radius: 4px;
margin-bottom: 0.5rem;
transition: background-color 0.3s ease;
}
.document-item:hover {
background-color: #f8f9fa;
}
.document-info {
display: flex;
align-items: center;
}
.document-icon {
font-size: 1.5rem;
color: var(--primary-color);
margin-right: 1rem;
}
.document-name {
font-weight: bold;
margin-bottom: 0.25rem;
}
.document-date {
color: var(--gray-color);
font-size: 0.9rem;
}
.document-actions {
display: flex;
gap: 0.5rem;
}
.action-button {
font-size: 1rem;
padding: 0.25rem 0.5rem;
}
.view-button {
background-color: var(--info-color);
}
.view-button:hover {
background-color: #2980b9;
}
.resubmit-button {
background-color: var(--warning-color);
color: var(--dark-color);
}
.resubmit-button:hover {
background-color: #f39c12;
}
/* Report section */
.report-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1.5rem;
}
.report-container {
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 1.5rem;
}
/* Summary section in report */
.summary {
background-color: #e9f7ef;
padding: 1.5rem;
border-radius: 8px;
margin-bottom: 1.5rem;
border-left: 4px solid var(--success-color);
}
.applied-standards {
margin-top: 1rem;
padding: 0.75rem 1rem;
background-color: #f8f9fa;
border-radius: 5px;
border-left: 3px solid var(--info-color);
}
.applied-standards h4 {
margin-top: 0;
margin-bottom: 0.5rem;
font-size: 1rem;
color: #495057;
}
.standards-list {
margin: 0 0 0 1.5rem;
padding: 0;
}
.standards-list li {
margin-bottom: 0.25rem;
}
/* Issues section in report */
.issues-container {
margin-top: 1.5rem;
}
.issue {
margin-bottom: 1rem;
padding: 1rem;
border-radius: 8px;
background-color: #f8f9fa;
border-left: 4px solid var(--gray-color);
}
.issue.critical {
background-color: #fdedec;
border-left-color: var(--danger-color);
}
.issue.major {
background-color: #fef9e7;
border-left-color: var(--warning-color);
}
.issue.minor {
background-color: #eafaf1;
border-left-color: var(--success-color);
}
.issue.info {
background-color: #ebf5fb;
border-left-color: var(--info-color);
}
.issue-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 0.5rem;
}
.issue-section {
font-weight: bold;
}
.badge {
display: inline-block;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.8rem;
font-weight: bold;
text-transform: uppercase;
color: white;
}
.badge.critical {
background-color: var(--danger-color);
}
.badge.major {
background-color: var(--warning-color);
color: var(--dark-color);
}
.badge.minor {
background-color: var(--success-color);
}
.badge.info {
background-color: var(--info-color);
}
.issue-description {
margin-bottom: 0.5rem;
}
.issue-recommendation {
background-color: #f8f9fa;
padding: 0.75rem;
border-radius: 4px;
font-style: italic;
}
/* Loading overlay */
.loading-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
z-index: 1000;
}
.loading-spinner {
width: 50px;
height: 50px;
border: 5px solid #f3f3f3;
border-top: 5px solid var(--primary-color);
border-radius: 50%;
animation: spin 1s linear infinite;
margin-bottom: 1rem;
}
.loading-overlay p {
color: white;
font-size: 1.2rem;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Stats display */
.stats-container {
display: flex;
justify-content: space-between;
margin-bottom: 1.5rem;
flex-wrap: wrap;
gap: 1rem;
}
.stat-box {
flex: 1;
min-width: 100px;
background-color: #f8f9fa;
padding: 1rem;
border-radius: 8px;
text-align: center;
}
.stat-value {
font-size: 1.5rem;
font-weight: bold;
margin-bottom: 0.25rem;
}
.stat-label {
color: var(--gray-color);
font-size: 0.9rem;
}
/* Score display */
.score-container {
display: flex;
justify-content: center;
align-items: center;
margin-bottom: 1.5rem;
}
.score-circle {
width: 100px;
height: 100px;
border-radius: 50%;
display: flex;
justify-content: center;
align-items: center;
font-size: 2rem;
font-weight: bold;
color: white;
background-color: var(--success-color);
margin-right: 1rem;
}
.score-label {
font-size: 1.2rem;
font-weight: bold;
}
/* Footer */
footer {
text-align: center;
margin-top: 2rem;
padding: 1rem;
color: var(--gray-color);
font-size: 0.9rem;
}
/* Standards section */
.standards-section {
background-color: white;
border-radius: 8px;
padding: 1.5rem;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
}
.standards-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1.5rem;
}
.standards-header p {
color: var(--gray-color);
margin: 0;
}
.standards-upload {
margin-bottom: 1.5rem;
border: 1px dashed var(--border-color);
border-radius: 8px;
padding: 1rem;
}
.standards-list-container {
max-height: 300px;
overflow-y: auto;
}
.standards-list {
list-style: none;
}
.standard-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem;
border-bottom: 1px solid var(--border-color);
}
.standard-info {
display: flex;
align-items: center;
}
.standard-icon {
font-size: 1.5rem;
color: var(--primary-color);
margin-right: 1rem;
}
.standard-name {
font-weight: bold;
margin-bottom: 0.25rem;
}
.standard-description {
color: var(--gray-color);
font-size: 0.9rem;
}
.standard-requirements {
color: var(--gray-color);
font-size: 0.9rem;
margin-top: 0.25rem;
}
.standard-actions {
display: flex;
gap: 0.5rem;
}
.no-standards {
color: var(--gray-color);
text-align: center;
padding: 2rem 0;
}
/* Modal styles */
.modal {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
justify-content: center;
align-items: center;
z-index: 1001;
}
.modal-content {
background-color: white;
border-radius: 8px;
width: 80%;
max-width: 800px;
max-height: 80vh;
overflow-y: auto;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.modal-header h2 {
margin: 0;
}
.close-button {
background: none;
border: none;
font-size: 1.5rem;
cursor: pointer;
color: var(--gray-color);
}
.modal-body {
padding: 1.5rem;
}
.standard-detail-header {
margin-bottom: 1.5rem;
}
.standard-detail-header h3 {
margin-bottom: 0.5rem;
}
.standard-requirements-list {
margin-top: 1.5rem;
}
.standard-requirements-list h4 {
margin-bottom: 1rem;
padding-bottom: 0.5rem;
border-bottom: 1px solid var(--border-color);
}
.requirement-item {
margin-bottom: 1rem;
padding: 1rem;
border-radius: 4px;
background-color: #f8f9fa;
}
.requirement-item.critical {
border-left: 4px solid var(--danger-color);
}
.requirement-item.major {
border-left: 4px solid var(--warning-color);
}
.requirement-item.minor {
border-left: 4px solid var(--success-color);
}
.requirement-item.info {
border-left: 4px solid var(--info-color);
}
.requirement-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 0.5rem;
}
.requirement-id {
font-weight: bold;
color: var(--gray-color);
}
.requirement-description {
margin-bottom: 0.5rem;
}
.requirement-details {
font-size: 0.9rem;
color: var(--gray-color);
padding: 0.5rem;
background-color: #f1f1f1;
border-radius: 4px;
}
/* Responsive design */
@media (max-width: 992px) {
.main-grid {
grid-template-columns: 1fr;
}
.modal-content {
width: 95%;
max-height: 90vh;
}
}
@media (max-width: 768px) {
.document-item {
flex-direction: column;
align-items: flex-start;
}
.document-actions {
margin-top: 1rem;
align-self: flex-end;
}
.stats-container {
flex-direction: column;
}
.stat-box {
margin-bottom: 0.5rem;
}
.standard-item {
flex-direction: column;
align-items: flex-start;
}
.standard-actions {
margin-top: 1rem;
align-self: flex-end;
}
}
+115
View File
@@ -0,0 +1,115 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Mini SpecsComply Pro</title>
<link rel="stylesheet" href="static/css/styles.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
</head>
<body>
<div class="container">
<header>
<h1><i class="fas fa-clipboard-check"></i> Mini SpecsComply Pro</h1>
<p>Document Compliance and Validation Tool</p>
</header>
<main>
<div class="main-grid">
<section class="upload-section">
<h2>Upload Document</h2>
<div class="upload-container" id="upload-container">
<div class="upload-area" id="upload-area">
<i class="fas fa-cloud-upload-alt"></i>
<p>Drag and drop your document here</p>
<p>or</p>
<label for="file-input" class="button">Browse Files</label>
<input type="file" id="file-input" accept=".md,.txt,.json,.yaml,.html,.doc,.docx,.pdf" hidden>
</div>
<div class="file-info" id="file-info" style="display: none;">
<div class="file-details">
<i class="fas fa-file-alt file-icon"></i>
<div>
<p class="file-name" id="file-name">document.md</p>
<p class="file-size" id="file-size">0 KB</p>
</div>
</div>
<button class="button upload-button" id="upload-button">Upload for Analysis</button>
<button class="button cancel-button" id="cancel-button">Cancel</button>
</div>
</div>
</section>
<section class="standards-section">
<h2>Compliance Standards</h2>
<div class="standards-container" id="standards-container">
<div class="standards-header">
<p>Manage the compliance standards used for document analysis</p>
<button class="button" id="upload-standard-button">
<i class="fas fa-plus"></i> Add Standard
</button>
</div>
<div class="standards-upload" id="standards-upload" style="display: none;">
<div class="upload-area" id="standard-upload-area">
<i class="fas fa-cloud-upload-alt"></i>
<p>Drag and drop your standard JSON file here</p>
<p>or</p>
<label for="standard-file-input" class="button">Browse Files</label>
<input type="file" id="standard-file-input" accept=".json" hidden>
</div>
<div class="file-info" id="standard-file-info" style="display: none;">
<div class="file-details">
<i class="fas fa-file-code file-icon"></i>
<div>
<p class="file-name" id="standard-file-name">standard.json</p>
<p class="file-size" id="standard-file-size">0 KB</p>
</div>
</div>
<button class="button upload-button" id="standard-upload-button">Upload Standard</button>
<button class="button cancel-button" id="standard-cancel-button">Cancel</button>
</div>
</div>
<div class="standards-list-container">
<p class="no-standards" id="no-standards">No custom standards have been added yet.</p>
<ul class="standards-list" id="standards-list">
<!-- Standards will be added here dynamically -->
</ul>
</div>
</div>
</section>
<section class="documents-section">
<h2>Recent Documents</h2>
<div class="documents-container" id="documents-container">
<p class="no-documents" id="no-documents">No documents have been analyzed yet.</p>
<ul class="documents-list" id="documents-list">
<!-- Document items will be added here dynamically -->
</ul>
</div>
</section>
</div>
<section class="report-section" id="report-section" style="display: none;">
<div class="report-header">
<h2>Compliance Report</h2>
<button class="button" id="close-report-button">Close Report</button>
</div>
<div class="report-container" id="report-container">
<!-- Report content will be loaded here -->
</div>
</section>
</main>
<div class="loading-overlay" id="loading-overlay" style="display: none;">
<div class="loading-spinner"></div>
<p>Analyzing document...</p>
</div>
<footer>
<p>&copy; 2025 Mini SpecsComply Pro</p>
</footer>
</div>
<script src="static/js/script.js"></script>
</body>
</html>
+825
View File
@@ -0,0 +1,825 @@
document.addEventListener('DOMContentLoaded', function() {
// Document Elements
const uploadArea = document.getElementById('upload-area');
const fileInput = document.getElementById('file-input');
const fileInfo = document.getElementById('file-info');
const fileName = document.getElementById('file-name');
const fileSize = document.getElementById('file-size');
const uploadButton = document.getElementById('upload-button');
const cancelButton = document.getElementById('cancel-button');
const noDocuments = document.getElementById('no-documents');
const documentsList = document.getElementById('documents-list');
const reportSection = document.getElementById('report-section');
const reportContainer = document.getElementById('report-container');
const closeReportButton = document.getElementById('close-report-button');
const loadingOverlay = document.getElementById('loading-overlay');
// Standards Elements
const uploadStandardButton = document.getElementById('upload-standard-button');
const standardsUpload = document.getElementById('standards-upload');
const standardUploadArea = document.getElementById('standard-upload-area');
const standardFileInput = document.getElementById('standard-file-input');
const standardFileInfo = document.getElementById('standard-file-info');
const standardFileName = document.getElementById('standard-file-name');
const standardFileSize = document.getElementById('standard-file-size');
const standardUploadButton = document.getElementById('standard-upload-button');
const standardCancelButton = document.getElementById('standard-cancel-button');
const noStandards = document.getElementById('no-standards');
const standardsList = document.getElementById('standards-list');
// API endpoint base URL
const API_BASE_URL = '/api';
// Local storage keys
const DOCUMENTS_STORAGE_KEY = 'specscomply_documents';
const STANDARDS_STORAGE_KEY = 'specscomply_standards';
// Drag and drop functionality
uploadArea.addEventListener('dragover', function(e) {
e.preventDefault();
uploadArea.classList.add('dragover');
});
uploadArea.addEventListener('dragleave', function() {
uploadArea.classList.remove('dragover');
});
uploadArea.addEventListener('drop', function(e) {
e.preventDefault();
uploadArea.classList.remove('dragover');
if (e.dataTransfer.files.length) {
handleFileSelection(e.dataTransfer.files[0]);
}
});
// File input change
fileInput.addEventListener('change', function() {
if (fileInput.files.length) {
handleFileSelection(fileInput.files[0]);
}
});
// Upload button click
uploadButton.addEventListener('click', function() {
if (fileInput.files.length) {
uploadDocument(fileInput.files[0]);
}
});
// Cancel button click
cancelButton.addEventListener('click', function() {
resetFileInput();
});
// Close report button click
closeReportButton.addEventListener('click', function() {
reportSection.style.display = 'none';
});
// Load stored documents and standards on page load
loadDocuments();
loadStandards();
// Standards upload button click
uploadStandardButton.addEventListener('click', function() {
standardsUpload.style.display = 'block';
});
// Standard drag and drop functionality
standardUploadArea.addEventListener('dragover', function(e) {
e.preventDefault();
standardUploadArea.classList.add('dragover');
});
standardUploadArea.addEventListener('dragleave', function() {
standardUploadArea.classList.remove('dragover');
});
standardUploadArea.addEventListener('drop', function(e) {
e.preventDefault();
standardUploadArea.classList.remove('dragover');
if (e.dataTransfer.files.length) {
handleStandardFileSelection(e.dataTransfer.files[0]);
}
});
// Standard file input change
standardFileInput.addEventListener('change', function() {
if (standardFileInput.files.length) {
handleStandardFileSelection(standardFileInput.files[0]);
}
});
// Standard upload button click
standardUploadButton.addEventListener('click', function() {
if (standardFileInput.files.length) {
uploadStandard(standardFileInput.files[0]);
}
});
// Standard cancel button click
standardCancelButton.addEventListener('click', function() {
resetStandardFileInput();
});
// Handle file selection
function handleFileSelection(file) {
// Update file info display
fileName.textContent = file.name;
fileSize.textContent = formatFileSize(file.size);
// Show file info section
uploadArea.style.display = 'none';
fileInfo.style.display = 'block';
}
// Reset file input
function resetFileInput() {
fileInput.value = '';
uploadArea.style.display = 'block';
fileInfo.style.display = 'none';
}
// Format file size
function formatFileSize(bytes) {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
// Upload document
async function uploadDocument(file) {
try {
// Show loading overlay
loadingOverlay.style.display = 'flex';
const formData = new FormData();
formData.append('file', file);
const response = await fetch(`${API_BASE_URL}/documents/upload`, {
method: 'POST',
body: formData
});
if (!response.ok) {
throw new Error(`Error uploading document: ${response.statusText}`);
}
const data = await response.json();
console.log('Upload response:', data);
// Store document in local storage
const document = {
id: data.document_id,
name: file.name,
status: data.status,
date: new Date().toISOString(),
size: file.size
};
saveDocument(document);
resetFileInput();
loadDocuments(); // This should now work correctly
checkDocumentStatus(data.document_id);
} catch (error) {
console.error('Error uploading document:', error);
alert('Failed to upload document. Please try again.');
} finally {
loadingOverlay.style.display = 'none';
}
}
// Save document to local storage
function saveDocument(document) {
let documents = JSON.parse(localStorage.getItem(DOCUMENTS_STORAGE_KEY) || '[]');
// Check if document already exists
const existingIndex = documents.findIndex(doc => doc.id === document.id);
if (existingIndex !== -1) {
// Update existing document
documents[existingIndex] = {...documents[existingIndex], ...document};
} else {
// Add new document
documents.push(document);
}
// Sort documents by date (newest first)
documents.sort((a, b) => new Date(b.date) - new Date(a.date));
// Keep only the 10 most recent documents
if (documents.length > 10) {
documents = documents.slice(0, 10);
}
// Save to local storage
localStorage.setItem(DOCUMENTS_STORAGE_KEY, JSON.stringify(documents));
}
// Load documents from local storage
function loadDocuments() {
const documents = JSON.parse(localStorage.getItem(DOCUMENTS_STORAGE_KEY) || '[]');
// Clear documents list
documentsList.innerHTML = '';
if (documents.length === 0) {
noDocuments.style.display = 'block';
return;
}
noDocuments.style.display = 'none';
// Add documents to list
documents.forEach(doc => { // Changed parameter name to 'doc'
const li = createDocumentListItem(doc);
documentsList.appendChild(li);
});
}
// Create document list item
function createDocumentListItem(doc) { // Changed parameter name to 'doc'
try {
const li = window.document.createElement('li'); // Use window.document to be explicit
li.className = 'document-item';
let dateStr = 'Unknown date';
try {
dateStr = new Date(doc.date).toLocaleDateString('en-US', {
year: 'numeric',
month: 'short',
day: 'numeric',
hour: '2-digit',
minute: '2-digit'
});
} catch (e) {
console.warn('Error formatting date:', e);
}
li.innerHTML = `
<div class="document-info">
<i class="fas fa-file-alt document-icon"></i>
<div>
<p class="document-name">${doc.name || 'Unnamed document'}</p>
<p class="document-date">${dateStr}</p>
</div>
</div>
<div class="document-actions">
<button class="button action-button view-button" data-id="${doc.id}">
<i class="fas fa-eye"></i> View Report
</button>
<button class="button action-button resubmit-button" data-id="${doc.id}">
<i class="fas fa-redo"></i> Resubmit
</button>
</div>
`;
// Add event listeners to buttons
const viewButton = li.querySelector('.view-button');
const resubmitButton = li.querySelector('.resubmit-button');
viewButton.addEventListener('click', function() {
const documentId = this.getAttribute('data-id');
viewDocumentReport(documentId);
});
resubmitButton.addEventListener('click', function() {
const documentId = this.getAttribute('data-id');
resubmitDocument(documentId);
});
return li;
} catch (error) {
console.error('Error creating document list item:', error);
const li = window.document.createElement('li'); // Use window.document here too
li.className = 'document-item';
li.textContent = 'Error displaying document';
return li;
}
}
// View document report
async function viewDocumentReport(documentId) {
try {
// Show loading overlay
loadingOverlay.style.display = 'flex';
// Fetch document analysis
const response = await fetch(`${API_BASE_URL}/documents/${documentId}/analysis`);
if (!response.ok) {
throw new Error(`Error fetching document analysis: ${response.statusText}`);
}
const data = await response.json();
// Check if analysis is complete
if (data.status === 'pending' || data.status === 'processing') {
alert('Document analysis is still in progress. Please try again later.');
return;
}
if (data.status === 'failed') {
alert('Document analysis failed. Please try resubmitting the document.');
return;
}
// If no report is available
if (!data.report) {
alert('No analysis report available for this document.');
return;
}
// Render report
renderReport(data.report);
// Show report section
reportSection.style.display = 'block';
// Scroll to report section
reportSection.scrollIntoView({ behavior: 'smooth' });
} catch (error) {
console.error('Error viewing document report:', error);
alert('Failed to load document report. Please try again.');
} finally {
// Hide loading overlay
loadingOverlay.style.display = 'none';
}
}
// Handle standard file selection
function handleStandardFileSelection(file) {
// Check if file is JSON
if (!file.name.toLowerCase().endsWith('.json')) {
alert('Please select a JSON file for standards');
return;
}
// Update file info display
standardFileName.textContent = file.name;
standardFileSize.textContent = formatFileSize(file.size);
// Show file info section
standardUploadArea.style.display = 'none';
standardFileInfo.style.display = 'block';
}
// Reset standard file input
function resetStandardFileInput() {
standardFileInput.value = '';
standardUploadArea.style.display = 'block';
standardFileInfo.style.display = 'none';
standardsUpload.style.display = 'none';
}
// Upload standard
async function uploadStandard(file) {
try {
// Show loading overlay
loadingOverlay.style.display = 'flex';
const formData = new FormData();
formData.append('file', file);
const response = await fetch(`${API_BASE_URL}/standards/upload`, {
method: 'POST',
body: formData
});
if (!response.ok) {
throw new Error(`Error uploading standard: ${response.statusText}`);
}
const data = await response.json();
console.log('Standard upload response:', data);
// Store standard in local storage
const standard = {
id: data.standard_id,
name: data.name,
requirement_count: data.requirement_count,
date: new Date().toISOString()
};
saveStandard(standard);
resetStandardFileInput();
loadStandards();
alert(`Standard "${data.name}" uploaded successfully with ${data.requirement_count} requirements.`);
} catch (error) {
console.error('Error uploading standard:', error);
alert('Failed to upload standard. Please try again.');
} finally {
loadingOverlay.style.display = 'none';
}
}
// Save standard to local storage
function saveStandard(standard) {
let standards = JSON.parse(localStorage.getItem(STANDARDS_STORAGE_KEY) || '[]');
// Check if standard already exists
const existingIndex = standards.findIndex(std => std.id === standard.id);
if (existingIndex !== -1) {
// Update existing standard
standards[existingIndex] = {...standards[existingIndex], ...standard};
} else {
// Add new standard
standards.push(standard);
}
// Sort standards by date (newest first)
standards.sort((a, b) => new Date(b.date) - new Date(a.date));
// Save to local storage
localStorage.setItem(STANDARDS_STORAGE_KEY, JSON.stringify(standards));
}
// Load standards from local storage
function loadStandards() {
const standards = JSON.parse(localStorage.getItem(STANDARDS_STORAGE_KEY) || '[]');
// Clear standards list
standardsList.innerHTML = '';
if (standards.length === 0) {
noStandards.style.display = 'block';
return;
}
noStandards.style.display = 'none';
// Add standards to list
standards.forEach(standard => {
const li = createStandardListItem(standard);
standardsList.appendChild(li);
});
}
// Create standard list item
function createStandardListItem(standard) {
const li = document.createElement('li');
li.className = 'standard-item';
let dateStr = 'Unknown date';
try {
dateStr = new Date(standard.date).toLocaleDateString('en-US', {
year: 'numeric',
month: 'short',
day: 'numeric'
});
} catch (e) {
console.warn('Error formatting date:', e);
}
li.innerHTML = `
<div class="standard-info">
<i class="fas fa-book standard-icon"></i>
<div>
<p class="standard-name">${standard.name || 'Unnamed standard'}</p>
<p class="standard-description">Added on ${dateStr}</p>
<p class="standard-requirements">${standard.requirement_count} requirements</p>
</div>
</div>
<div class="standard-actions">
<button class="button action-button view-standard-button" data-id="${standard.id}">
<i class="fas fa-eye"></i> View
</button>
</div>
`;
// Add event listeners to buttons
const viewButton = li.querySelector('.view-standard-button');
viewButton.addEventListener('click', function() {
const standardId = this.getAttribute('data-id');
viewStandard(standardId);
});
return li;
}
// View standard details
async function viewStandard(standardId) {
try {
// Show loading overlay
loadingOverlay.style.display = 'flex';
// Fetch standard details
const response = await fetch(`${API_BASE_URL}/standards/${standardId}`);
if (!response.ok) {
throw new Error(`Error fetching standard: ${response.statusText}`);
}
const standard = await response.json();
// Create modal content
const modalContent = `
<div class="standard-detail-header">
<h3>${standard.name}</h3>
<p>${standard.description || 'No description available'}</p>
</div>
<div class="standard-requirements-list">
<h4>Requirements (${standard.requirements.length})</h4>
${standard.requirements.length === 0 ? '<p>No requirements defined</p>' : ''}
<ul>
${standard.requirements.map(req => `
<li class="requirement-item ${req.severity}">
<div class="requirement-header">
<span class="requirement-id">${req.id}</span>
<span class="badge ${req.severity}">${req.severity}</span>
</div>
<div class="requirement-description">${req.description}</div>
${req.details ? `<div class="requirement-details">${req.details}</div>` : ''}
</li>
`).join('')}
</ul>
</div>
`;
// Create modal
const modal = document.createElement('div');
modal.className = 'modal';
modal.innerHTML = `
<div class="modal-content">
<div class="modal-header">
<h2>Standard Details</h2>
<button class="close-button">&times;</button>
</div>
<div class="modal-body">
${modalContent}
</div>
</div>
`;
// Add modal to body
document.body.appendChild(modal);
// Add close button event listener
modal.querySelector('.close-button').addEventListener('click', function() {
document.body.removeChild(modal);
});
// Close modal when clicking outside
modal.addEventListener('click', function(e) {
if (e.target === modal) {
document.body.removeChild(modal);
}
});
} catch (error) {
console.error('Error viewing standard:', error);
alert('Failed to load standard details. Please try again.');
} finally {
// Hide loading overlay
loadingOverlay.style.display = 'none';
}
}
// Render report
function renderReport(report) {
// Calculate issue counts
const criticalCount = report.issues.filter(issue => issue.level === 'critical').length;
const majorCount = report.issues.filter(issue => issue.level === 'major').length;
const minorCount = report.issues.filter(issue => issue.level === 'minor').length;
const infoCount = report.issues.filter(issue => issue.level === 'info').length;
// Format score as percentage
const scorePercentage = (report.compliance_score * 100).toFixed(1);
// Determine score color based on percentage
let scoreColor = '#2ecc71'; // Default green
if (scorePercentage < 50) {
scoreColor = '#e74c3c'; // Red for low score
} else if (scorePercentage < 80) {
scoreColor = '#f39c12'; // Orange for medium score
}
// Create HTML
let html = `
<div class="score-container">
<div class="score-circle" style="background-color: ${scoreColor}">
${scorePercentage}%
</div>
<div class="score-label">Compliance Score</div>
</div>
<div class="stats-container">
<div class="stat-box">
<div class="stat-value" style="color: #e74c3c">${criticalCount}</div>
<div class="stat-label">Critical Issues</div>
</div>
<div class="stat-box">
<div class="stat-value" style="color: #f39c12">${majorCount}</div>
<div class="stat-label">Major Issues</div>
</div>
<div class="stat-box">
<div class="stat-value" style="color: #2ecc71">${minorCount}</div>
<div class="stat-label">Minor Issues</div>
</div>
<div class="stat-box">
<div class="stat-value" style="color: #3498db">${infoCount}</div>
<div class="stat-label">Info Issues</div>
</div>
</div>
<div class="summary">
<h3>Summary</h3>
<p>${report.summary}</p>
${report.applied_standards && report.applied_standards.length > 0 ? `
<div class="applied-standards">
<h4>Applied Standards</h4>
<ul class="standards-list">
${report.applied_standards.map(std => `<li>${std}</li>`).join('')}
</ul>
</div>
` : ''}
</div>
<div class="issues-container">
<h3>Compliance Issues</h3>
`;
if (report.issues.length === 0) {
html += '<p>No compliance issues found. Great job!</p>';
} else {
// Sort issues by level (critical first)
const sortedIssues = [...report.issues].sort((a, b) => {
const levelOrder = { 'critical': 0, 'major': 1, 'minor': 2, 'info': 3 };
return levelOrder[a.level] - levelOrder[b.level];
});
// Add issues to HTML
sortedIssues.forEach(issue => {
html += `
<div class="issue ${issue.level}">
<div class="issue-header">
<div class="issue-section">${issue.section}</div>
<span class="badge ${issue.level}">${issue.level}</span>
</div>
<div class="issue-description">
${issue.description}
</div>
${issue.reasoning ? `
<div class="issue-reasoning">
<strong>Reasoning:</strong> ${issue.reasoning}
</div>` : ''}
${issue.standard_references && issue.standard_references.length > 0 ? `
<div class="issue-references">
<strong>Standard References:</strong>
<ul class="reference-list">
${issue.standard_references.map(ref => `<li>${ref}</li>`).join('')}
</ul>
</div>` : ''}
<div class="issue-recommendation">
<strong>Recommendation:</strong> ${issue.recommendation}
</div>
</div>
`;
});
}
html += '</div>';
// Set report HTML
reportContainer.innerHTML = html;
}
// Check document status
async function checkDocumentStatus(documentId) {
try {
// Start with a short delay
let delay = 2000;
const maxAttempts = 10;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
// Wait for the delay
await new Promise(resolve => setTimeout(resolve, delay));
// Fetch document status
const response = await fetch(`${API_BASE_URL}/documents/${documentId}`);
if (!response.ok) {
throw new Error(`Error checking document status: ${response.statusText}`);
}
const data = await response.json();
// Update document in local storage
const documents = JSON.parse(localStorage.getItem(DOCUMENTS_STORAGE_KEY) || '[]');
const documentIndex = documents.findIndex(doc => doc.id === documentId);
if (documentIndex !== -1) {
documents[documentIndex].status = data.status;
localStorage.setItem(DOCUMENTS_STORAGE_KEY, JSON.stringify(documents));
}
// If processing is complete or failed, stop checking
if (data.status === 'completed' || data.status === 'failed') {
// If completed, show the report
if (data.status === 'completed' && data.reports && data.reports.length > 0) {
viewDocumentReport(documentId);
}
break;
}
// Increase delay for next attempt (exponential backoff)
delay = Math.min(delay * 1.5, 10000);
}
// Refresh document list
loadDocuments();
} catch (error) {
console.error('Error checking document status:', error);
}
}
// Resubmit document
function resubmitDocument(documentId) {
// Trigger file input for resubmission
fileInput.setAttribute('data-resubmit-id', documentId);
fileInput.click();
// Listen for file selection (one-time event listener)
const handleResubmitFileSelection = async function() {
if (fileInput.files.length) {
const resubmitId = fileInput.getAttribute('data-resubmit-id');
if (resubmitId) {
// Handle resubmission
await handleDocumentResubmission(resubmitId, fileInput.files[0]);
// Remove attribute and event listener
fileInput.removeAttribute('data-resubmit-id');
fileInput.removeEventListener('change', handleResubmitFileSelection);
}
}
};
fileInput.addEventListener('change', handleResubmitFileSelection);
}
// Handle document resubmission
async function handleDocumentResubmission(documentId, file) {
try {
// Show loading overlay
loadingOverlay.style.display = 'flex';
// Create form data
const formData = new FormData();
formData.append('file', file);
// Send request to API
const response = await fetch(`${API_BASE_URL}/documents/${documentId}/resubmit`, {
method: 'POST',
body: formData
});
if (!response.ok) {
throw new Error(`Error resubmitting document: ${response.statusText}`);
}
const data = await response.json();
// Update document in local storage
const document = {
id: data.document_id,
name: file.name,
status: data.status,
date: new Date().toISOString(),
size: file.size
};
saveDocument(document);
// Load updated document list
loadDocuments();
// Check document status and show report if ready
checkDocumentStatus(data.document_id);
// Show success message
alert('Document resubmitted successfully! The analysis is in progress.');
} catch (error) {
console.error('Error resubmitting document:', error);
alert('Failed to resubmit document. Please try again.');
} finally {
// Hide loading overlay
loadingOverlay.style.display = 'none';
}
}
});
+295
View File
@@ -0,0 +1,295 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document Compliance Report</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
line-height: 1.6;
color: #333;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.header {
background-color: #f8f9fa;
padding: 20px;
border-radius: 5px;
margin-bottom: 20px;
border-left: 5px solid #007bff;
}
.document-info {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
}
.document-info-left {
flex: 3;
}
.document-info-right {
flex: 1;
text-align: right;
}
.score-container {
font-size: 24px;
font-weight: bold;
color: #28a745;
}
.summary {
background-color: #e9f7ef;
padding: 20px;
border-radius: 5px;
margin-bottom: 30px;
border-left: 5px solid #27ae60;
}
.applied-standards {
margin-top: 1.5rem;
padding: 1rem;
background-color: #f8f9fa;
border-radius: 5px;
border-left: 3px solid #3498db;
}
.applied-standards h3 {
margin-top: 0;
font-size: 1.2rem;
color: #495057;
}
.standards-list {
margin: 0.5rem 0 0 1.5rem;
padding: 0;
}
.standards-list li {
margin-bottom: 0.25rem;
}
.issues-container {
margin-bottom: 30px;
}
.issue {
margin-bottom: 15px;
padding: 15px;
border-radius: 5px;
background-color: #f8f9fa;
border-left: 4px solid #6c757d;
}
.issue.critical {
background-color: #fdedec;
border-left-color: #e74c3c;
}
.issue.major {
background-color: #fef9e7;
border-left-color: #f39c12;
}
.issue.minor {
background-color: #eafaf1;
border-left-color: #2ecc71;
}
.issue.info {
background-color: #ebf5fb;
border-left-color: #3498db;
}
.issue-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 10px;
}
.issue-section {
font-weight: bold;
font-size: 18px;
}
.badge {
display: inline-block;
padding: 5px 10px;
border-radius: 3px;
font-size: 12px;
font-weight: bold;
text-transform: uppercase;
color: white;
}
.badge.critical {
background-color: #e74c3c;
}
.badge.major {
background-color: #f39c12;
}
.badge.minor {
background-color: #2ecc71;
}
.badge.info {
background-color: #3498db;
}
.issue-description {
margin-bottom: 10px;
}
.issue-recommendation {
background-color: #f8f9fa;
padding: 10px;
border-radius: 3px;
margin-top: 10px;
}
.issue-reasoning {
margin-top: 10px;
padding: 10px;
background-color: #ebf5fb;
border-radius: 3px;
}
.issue-references {
margin-top: 10px;
}
.reference-list {
margin-top: 5px;
margin-left: 20px;
font-size: 0.9rem;
}
.stats {
display: flex;
justify-content: space-between;
margin-bottom: 20px;
flex-wrap: wrap;
}
.stat-box {
flex: 1;
min-width: 200px;
background-color: #f8f9fa;
padding: 15px;
border-radius: 5px;
margin: 5px;
text-align: center;
}
.stat-value {
font-size: 24px;
font-weight: bold;
margin-bottom: 5px;
}
.stat-label {
color: #6c757d;
font-size: 14px;
}
.footer {
margin-top: 30px;
text-align: center;
color: #6c757d;
font-size: 14px;
padding-top: 20px;
border-top: 1px solid #dee2e6;
}
@media print {
body {
padding: 0;
font-size: 12px;
}
.issue {
break-inside: avoid;
}
.header, .summary {
break-inside: avoid;
}
}
</style>
</head>
<body>
<div class="header">
<h1>Document Compliance Report</h1>
<div class="document-info">
<div class="document-info-left">
<p><strong>Document:</strong> {{ document_name }}</p>
<p><strong>Generated:</strong> {{ timestamp }}</p>
</div>
<div class="document-info-right">
<div class="score-container">
{{ compliance_score * 100 | round(1) }}%
</div>
<div>Compliance Score</div>
</div>
</div>
</div>
<div class="summary">
<h2>Summary</h2>
<p>{{ summary }}</p>
{% if applied_standards and applied_standards|length > 0 %}
<div class="applied-standards">
<h3>Applied Standards</h3>
<ul class="standards-list">
{% for standard in applied_standards %}
<li>{{ standard }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
</div>
<div class="stats">
<div class="stat-box">
<div class="stat-value" style="color: #e74c3c;">{{ critical_count }}</div>
<div class="stat-label">Critical Issues</div>
</div>
<div class="stat-box">
<div class="stat-value" style="color: #f39c12;">{{ major_count }}</div>
<div class="stat-label">Major Issues</div>
</div>
<div class="stat-box">
<div class="stat-value" style="color: #2ecc71;">{{ minor_count }}</div>
<div class="stat-label">Minor Issues</div>
</div>
<div class="stat-box">
<div class="stat-value" style="color: #3498db;">{{ info_count }}</div>
<div class="stat-label">Info Issues</div>
</div>
</div>
<div class="issues-container">
<h2>Compliance Issues</h2>
{% if issues %}
{% for issue in issues %}
<div class="issue {{ issue.level }}">
<div class="issue-header">
<div class="issue-section">{{ issue.section }}</div>
<span class="badge {{ issue.level }}">{{ issue.level }}</span>
</div>
<div class="issue-description">
{{ issue.description }}
</div>
{% if issue.reasoning %}
<div class="issue-reasoning">
<strong>Reasoning:</strong> {{ issue.reasoning }}
</div>
{% endif %}
{% if issue.standard_references and issue.standard_references|length > 0 %}
<div class="issue-references">
<strong>Standard References:</strong>
<ul class="reference-list">
{% for reference in issue.standard_references %}
<li>{{ reference }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
<div class="issue-recommendation">
<strong>Recommendation:</strong> {{ issue.recommendation }}
</div>
</div>
{% endfor %}
{% else %}
<p>No compliance issues found. Great job!</p>
{% endif %}
</div>
<div class="footer">
<p>Generated by Mini SpecsComply Pro</p>
<p>This report is for informational purposes only and should be reviewed by a qualified professional.</p>
</div>
</body>
</html>
+1
View File
@@ -0,0 +1 @@
"""Utility functions for the Mini SpecsComply Pro application."""
+283
View File
@@ -0,0 +1,283 @@
# Utility functions
import re
from typing import Dict, List, Any, Optional
import os
from datetime import datetime
import json
def extract_sections_from_markdown(markdown_text: str) -> Dict[str, str]:
"""
Extract sections from a markdown document.
Args:
markdown_text: The markdown text to parse
Returns:
Dictionary mapping section names to section content
"""
sections = {}
# Add the whole document as one section
sections["full_document"] = markdown_text
# Split by markdown headings
heading_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
matches = list(heading_pattern.finditer(markdown_text))
if matches:
for i, match in enumerate(matches):
heading_level = len(match.group(1))
section_name = match.group(2).strip()
# Get section content (from this heading to the next, or to the end)
start_pos = match.end()
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(markdown_text)
section_content = markdown_text[start_pos:end_pos].strip()
section_key = f"h{heading_level}_{section_name}"
sections[section_key] = section_content
return sections
def detect_file_type(filename: str) -> str:
"""
Detect file type from filename extension.
Args:
filename: Name of the file
Returns:
File type (markdown, text, etc.)
"""
_, extension = os.path.splitext(filename)
ext = extension.lower().lstrip('.')
if ext in ['md', 'markdown']:
return 'markdown'
elif ext in ['txt', 'text']:
return 'text'
elif ext in ['json']:
return 'json'
elif ext in ['yaml', 'yml']:
return 'yaml'
elif ext in ['html', 'htm']:
return 'html'
else:
return 'unknown'
def parse_code_blocks(content: str) -> List[Dict[str, str]]:
"""
Extract code blocks from markdown content.
Args:
content: Markdown content with code blocks
Returns:
List of dictionaries with language and code
"""
# Pattern to match code blocks with optional language
pattern = r'```(\w*)\n([\s\S]*?)```'
matches = re.findall(pattern, content)
code_blocks = []
for language, code in matches:
code_blocks.append({
'language': language.strip() or 'text',
'code': code.strip()
})
return code_blocks
def format_timestamp(timestamp: datetime) -> str:
"""
Format timestamp for display.
Args:
timestamp: Datetime object
Returns:
Formatted timestamp string
"""
return timestamp.strftime("%Y-%m-%d %H:%M:%S")
def calculate_readability_score(text: str) -> float:
"""
Calculate a simple readability score for text.
Args:
text: The text to analyze
Returns:
Readability score (0.0-1.0)
"""
if not text:
return 0.0
# Split into sentences and words
sentences = re.split(r'[.!?]+', text)
words = re.findall(r'\b\w+\b', text)
if not words or not sentences:
return 0.0
# Average words per sentence
avg_words_per_sentence = len(words) / len(sentences)
# Simple readability score based on average words per sentence
# Optimal is around 15-20 words per sentence
if avg_words_per_sentence <= 10:
score = 0.7 # Very short sentences
elif 10 < avg_words_per_sentence <= 20:
score = 1.0 # Optimal
elif 20 < avg_words_per_sentence <= 30:
score = 0.8 # Getting long
else:
score = 0.5 # Too long
return score
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to be safe for filesystem.
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Replace illegal characters
sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
# Ensure it's not too long
if len(sanitized) > 255:
base, ext = os.path.splitext(sanitized)
sanitized = base[:255-len(ext)] + ext
return sanitized
def load_standards_from_file(file_path: str) -> List[Dict[str, Any]]:
"""
Load compliance standards from a JSON file.
Args:
file_path: Path to the standards JSON file
Returns:
List of standard dictionaries
"""
try:
with open(file_path, 'r') as f:
standards = json.load(f)
return standards
except (FileNotFoundError, json.JSONDecodeError):
# Return empty list if file not found or invalid
return []
def _render_applied_standards(standards: List[str]) -> str:
"""
Render HTML for applied standards section.
Args:
standards: List of standard names
Returns:
HTML string for the applied standards section
"""
if not standards:
return ""
html = """<div style="margin-top: 15px; padding: 10px; background-color: #f8f9fa; border-radius: 5px;">
<h3 style="margin-top: 0; font-size: 16px; color: #495057;">Applied Standards</h3>
<ul style="margin: 5px 0 0 20px; padding: 0;">
"""
for standard in standards:
html += f"<li style=\"margin-bottom: 3px;\">{standard}</li>\n"
html += "</ul></div>"
return html
def generate_html_report(report_data: Dict[str, Any]) -> str:
"""
Generate HTML for compliance report.
Args:
report_data: Report data dictionary
Returns:
HTML string for the report
"""
# Simple HTML template for the report
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>Compliance Report</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; color: #333; }}
.header {{ background-color: #f5f5f5; padding: 15px; border-bottom: 1px solid #ddd; }}
.summary {{ margin: 20px 0; padding: 15px; background-color: #e9f7ef; border-left: 4px solid #27ae60; }}
.issues {{ margin: 20px 0; }}
.issue {{ margin-bottom: 15px; padding: 15px; background-color: #f9f9f9; border-left: 4px solid #3498db; }}
.issue.critical {{ background-color: #fdedec; border-left-color: #c0392b; }}
.issue.major {{ background-color: #fef9e7; border-left-color: #f1c40f; }}
.issue.minor {{ background-color: #eafaf1; border-left-color: #2ecc71; }}
.issue.info {{ background-color: #ebf5fb; border-left-color: #3498db; }}
.issue h3 {{ margin-top: 0; }}
.issue p {{ margin: 5px 0; }}
.badge {{ display: inline-block; padding: 3px 7px; border-radius: 3px; font-size: 12px; color: white; }}
.badge.critical {{ background-color: #c0392b; }}
.badge.major {{ background-color: #f1c40f; color: #333; }}
.badge.minor {{ background-color: #2ecc71; }}
.badge.info {{ background-color: #3498db; }}
.score {{ font-size: 24px; font-weight: bold; }}
.score-container {{ text-align: right; }}
</style>
</head>
<body>
<div class="header">
<h1>Compliance Report</h1>
<p>Document: {report_data.get('document_name', 'Unknown')}</p>
<p>Generated: {report_data.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}</p>
<div class="score-container">
<span>Compliance Score: </span>
<span class="score">{report_data.get('compliance_score', 0) * 100:.1f}%</span>
</div>
</div>
<div class="summary">
<h2>Summary</h2>
<p>{report_data.get('summary', 'No summary available.')}</p>
{_render_applied_standards(report_data.get('applied_standards', []))}
</div>
<div class="issues">
<h2>Compliance Issues</h2>
"""
# Add issues
issues = report_data.get('issues', [])
if not issues:
html += "<p>No compliance issues found.</p>"
else:
for issue in issues:
level = issue.get('level', 'info').lower()
html += f"""
<div class="issue {level}">
<h3>{issue.get('section', 'Unknown Section')}</h3>
<p><span class="badge {level}">{level.upper()}</span> {issue.get('description', 'No description')}</p>
<p><strong>Recommendation:</strong> {issue.get('recommendation', 'No recommendation')}</p>
</div>
"""
# Close HTML
html += """
</div>
</body>
</html>
"""
return html
+80
View File
@@ -0,0 +1,80 @@
"""
Token counting utilities for document processing.
"""
import tiktoken
from typing import Dict, List, Optional, Union
from loguru import logger
# Default models to use for token counting
DEFAULT_MODEL = "gpt-4o"
def count_tokens(text: str, model: str = DEFAULT_MODEL) -> int:
"""
Count the number of tokens in a text string using tiktoken.
Args:
text: The text to count tokens for
model: The model to use for token counting (default: gpt-4o)
Returns:
Number of tokens in the text
"""
try:
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
except Exception as e:
logger.warning(f"Error counting tokens with model {model}: {str(e)}")
# Fallback to cl100k_base encoding if model-specific encoding fails
try:
encoding = tiktoken.get_encoding("cl100k_base")
return len(encoding.encode(text))
except Exception as e:
logger.error(f"Error counting tokens with fallback encoding: {str(e)}")
# If all else fails, use a rough approximation (4 chars per token)
return len(text) // 4
def truncate_by_tokens(text: str, max_tokens: int, model: str = DEFAULT_MODEL) -> str:
"""
Truncate text to fit within a maximum token count.
Args:
text: The text to truncate
max_tokens: Maximum number of tokens to allow
model: The model to use for token counting (default: gpt-4o)
Returns:
Truncated text that fits within max_tokens
"""
try:
encoding = tiktoken.encoding_for_model(model)
tokens = encoding.encode(text)
if len(tokens) <= max_tokens:
return text
# Truncate tokens and decode
truncated_tokens = tokens[:max_tokens]
truncated_text = encoding.decode(truncated_tokens)
# Add truncation indicator
return truncated_text + "...(truncated)"
except Exception as e:
logger.warning(f"Error truncating by tokens with model {model}: {str(e)}")
# Fallback to character-based truncation if token-based fails
approx_chars = max_tokens * 4 # Rough approximation
if len(text) <= approx_chars:
return text
return text[:approx_chars] + "...(truncated)"
def estimate_tokens_from_chars(char_count: int) -> int:
"""
Estimate the number of tokens from character count.
This is a rough approximation (4 chars per token on average).
Args:
char_count: Number of characters
Returns:
Estimated number of tokens
"""
return char_count // 4