Initial commit

2025-07-17 22:20:25 +01:00
commit 0e3e22e8cb
39 changed files with 13295 additions and 0 deletions
@@ -0,0 +1 @@
+"""Services for the Mini SpecsComply Pro application."""
@@ -0,0 +1,461 @@
+# Document processing
+import os
+import uuid
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, BinaryIO, Tuple
+import re
+from loguru import logger
+
+from app.core.models import (
+    Document,
+    DocumentMetadata,
+    DocumentStatus,
+    ComplianceReport,
+    ComplianceIssue,
+    ComplianceLevel,
+    DocumentEmbedding
+)
+from app.services.embedding import EmbeddingService
+from app.services.reasoning import ReasoningService
+from app.services.standards import StandardsService
+from app.utils.token_counter import count_tokens, truncate_by_tokens
+
+class DocumentService:
+    """Service for handling document processing and storage."""
+
+    def __init__(self, embedding_service: EmbeddingService, reasoning_service: ReasoningService, standards_service: Optional[StandardsService] = None):
+        """Initialize with required services."""
+        self.embedding_service = embedding_service
+        self.reasoning_service = reasoning_service
+        self.standards_service = standards_service or StandardsService()
+        self.documents = {}  # In-memory storage for documents (replace with DB in production)
+        self.reports = {}  # In-memory storage for reports (replace with DB in production)
+
+    async def upload_document(self, file: BinaryIO, filename: str) -> Document:
+        """
+        Process an uploaded document.
+
+        Args:
+            file: The document file
+            filename: Name of the uploaded file
+
+        Returns:
+            Document object with metadata
+        """
+        # Validate file type
+        if not self._validate_file_type(filename):
+            raise ValueError(f"Unsupported file type. Supported types: .txt, .md, .rst, .doc, .docx, .pdf")
+
+        # Get file content
+        content = await self._read_file_content(file)
+
+        # Extract file metadata
+        file_size = len(content)
+        file_type = self._get_file_type(filename)
+
+        # Create document metadata
+        metadata = DocumentMetadata(
+            filename=filename,
+            file_type=file_type,
+            file_size=file_size,
+            upload_timestamp=datetime.now(),
+            last_modified=datetime.now()
+        )
+
+        # Create document object
+        document_id = str(uuid.uuid4())
+        document = Document(
+            id=document_id,
+            metadata=metadata,
+            status=DocumentStatus.PENDING,
+            version=1
+        )
+
+        # Store document in memory
+        self.documents[document_id] = document
+
+        # Start processing
+        try:
+            await self._process_document(document_id, content)
+        except Exception as e:
+            logger.error(f"Error processing document {document_id}: {str(e)}")
+            document.status = DocumentStatus.FAILED
+            raise
+
+        return document
+
+    async def get_document(self, document_id: str) -> Optional[Document]:
+        """
+        Retrieve a document by ID.
+
+        Args:
+            document_id: The ID of the document to retrieve
+
+        Returns:
+            Document object if found, None otherwise
+        """
+        return self.documents.get(document_id)
+
+    async def get_report(self, report_id: str) -> Optional[ComplianceReport]:
+        """
+        Retrieve a compliance report by ID.
+
+        Args:
+            report_id: The ID of the report to retrieve
+
+        Returns:
+            ComplianceReport object if found, None otherwise
+        """
+        return self.reports.get(report_id)
+
+    async def resubmit_document(self, document_id: str, file: BinaryIO) -> Document:
+        """
+        Resubmit a document with changes.
+
+        Args:
+            document_id: The ID of the document to resubmit
+            file: The updated document file
+
+        Returns:
+            Updated Document object
+        """
+        # Check if document exists
+        document = await self.get_document(document_id)
+        if not document:
+            raise ValueError(f"Document with ID {document_id} not found")
+
+        # Get file content
+        content = await self._read_file_content(file)
+
+        # Update document metadata
+        document.metadata.file_size = len(content)
+        document.metadata.last_modified = datetime.now()
+        document.version += 1
+        document.status = DocumentStatus.PENDING
+
+        # Process the updated document
+        try:
+            await self._process_document(document_id, content)
+        except Exception as e:
+            logger.error(f"Error processing resubmitted document {document_id}: {str(e)}")
+            document.status = DocumentStatus.FAILED
+
+        return document
+
+    async def process_document(self, document_id: str, content: str) -> ComplianceReport:
+        """
+        Process document and generate compliance report.
+
+        Args:
+            document_id: The ID of the document
+            content: Document content
+
+        Returns:
+            ComplianceReport object
+        """
+        try:
+            # Get the document
+            document = self.documents.get(document_id)
+            if not document:
+                raise ValueError(f"Document {document_id} not found")
+
+            # Split document into sections
+            sections = self._split_into_sections(content)
+
+            # Generate embeddings for sections
+            document.embedding = await self.embedding_service.embed_document(document_id, sections)
+
+            # Identify relevant standards for the document
+            if self.standards_service:
+                # Log the standards service instance ID to verify singleton pattern
+                logger.info(f"Using StandardsService instance: {id(self.standards_service)}")
+                logger.info(f"Standards count before matching: {len(self.standards_service.standards)}")
+
+                standard_names = await self.standards_service.get_standard_names_for_document(content)
+                logger.info(f"Identified standards for document {document_id}: {standard_names}")
+            else:
+                logger.warning(f"No StandardsService available for document {document_id}")
+                standard_names = ["ISO-9001", "IEEE-829", "RFC-2119"]
+
+            # Use reasoning service for compliance analysis
+            report = await self.reasoning_service.analyze_document(document_id, sections, standard_names)
+
+            # Store the report
+            self.reports[report.report_id] = report
+
+            return report
+
+        except Exception as e:
+            logger.error(f"Error in document processing: {str(e)}")
+            raise
+
+    async def _read_file_content(self, file: BinaryIO) -> str:
+        """
+        Read and decode file content.
+
+        Args:
+            file: The file to read
+
+        Returns:
+            File content as string
+        """
+        file_content = file.read()
+
+        # Try to decode as UTF-8
+        try:
+            return file_content.decode('utf-8')
+        except UnicodeDecodeError:
+            # Try other encodings if UTF-8 fails
+            try:
+                return file_content.decode('latin-1')
+            except:
+                raise ValueError("Unable to decode file content. Please ensure file is text-based.")
+
+    def _get_file_type(self, filename: str) -> str:
+        """
+        Determine file type from filename.
+
+        Args:
+            filename: The name of the file
+
+        Returns:
+            File type (extension)
+        """
+        _, extension = os.path.splitext(filename)
+        return extension.lstrip('.').lower()
+
+    def _validate_file_type(self, filename: str) -> bool:
+        """
+        Validate if the file type is supported.
+
+        Args:
+            filename: Name of the file to validate
+
+        Returns:
+            bool: True if file type is supported, False otherwise
+        """
+        SUPPORTED_EXTENSIONS = {'.txt', '.md', '.rst', '.doc', '.docx', '.pdf'}
+        _, ext = os.path.splitext(filename)
+        return ext.lower() in SUPPORTED_EXTENSIONS
+
+    def _split_into_sections(self, content: str) -> Dict[str, str]:
+        """
+        Split document content into sections.
+
+        Args:
+            content: The document content
+
+        Returns:
+            Dictionary mapping section names to section content
+        """
+        # This is a simple implementation - in production, you would use more advanced
+        # techniques like heading detection, markdown parsing, etc.
+
+        # For simplicity, we'll just split by markdown headings
+        sections = {}
+
+        # Add the whole document as one section
+        sections["full_document"] = content
+
+        # Try to split by markdown headings
+        heading_pattern = re.compile(r'^(#{1,3})\s+(.+)$', re.MULTILINE)
+        matches = list(heading_pattern.finditer(content))
+
+        if matches:
+            for i, match in enumerate(matches):
+                heading_level = len(match.group(1))
+                section_name = match.group(2).strip()
+
+                # Get section content (from this heading to the next, or to the end)
+                start_pos = match.end()
+                end_pos = matches[i+1].start() if i < len(matches) - 1 else len(content)
+
+                section_content = content[start_pos:end_pos].strip()
+                section_key = f"h{heading_level}_{section_name}"
+
+                sections[section_key] = section_content
+        else:
+            # No headings found, try to split by newlines into paragraphs
+            paragraphs = [p for p in content.split('\n\n') if p.strip()]
+
+            for i, paragraph in enumerate(paragraphs):
+                if len(paragraph) > 100:  # Only include substantial paragraphs
+                    sections[f"paragraph_{i+1}"] = paragraph
+
+        return sections
+
+    async def _generate_mock_report(self, document_id: str, sections: Dict[str, str]) -> ComplianceReport:
+        """
+        Generate a mock compliance report for development/testing.
+
+        Args:
+            document_id: The ID of the document
+            sections: Dictionary of document sections
+
+        Returns:
+            ComplianceReport object
+        """
+        # In production, this would use the reasoning service
+        # For now, we'll generate a simple mock report
+
+        # Create some mock issues
+        issues = []
+
+        if "full_document" in sections:
+            content = sections["full_document"]
+
+            # Check for missing sections (mock check)
+            if "introduction" not in content.lower():
+                issues.append(ComplianceIssue(
+                    section="Document Structure",
+                    description="Missing introduction section",
+                    level=ComplianceLevel.MAJOR,
+                    recommendation="Add an introduction section to provide context for the document"
+                ))
+
+            # Check for formatting issues (mock check)
+            if content.count('#') < 3:
+                issues.append(ComplianceIssue(
+                    section="Formatting",
+                    description="Insufficient section headings",
+                    level=ComplianceLevel.MINOR,
+                    recommendation="Use markdown headings to better structure the document"
+                ))
+
+            # Check for technical compliance (mock check)
+            if "compliance" in content.lower() and "standard" not in content.lower():
+                issues.append(ComplianceIssue(
+                    section="Technical Content",
+                    description="Mentions compliance but doesn't reference specific standards",
+                    level=ComplianceLevel.CRITICAL,
+                    recommendation="Specify which standards or regulations the document complies with"
+                ))
+
+        # Calculate mock compliance score
+        if issues:
+            compliance_score = max(0.0, 1.0 - (len(issues) * 0.1))
+        else:
+            compliance_score = 1.0
+
+        # Create summary based on issues
+        if not issues:
+            summary = "The document meets all compliance requirements. No issues found."
+        else:
+            critical_count = sum(1 for i in issues if i.level == ComplianceLevel.CRITICAL)
+            major_count = sum(1 for i in issues if i.level == ComplianceLevel.MAJOR)
+            minor_count = sum(1 for i in issues if i.level == ComplianceLevel.MINOR)
+
+            summary = f"The document has {len(issues)} compliance issues: "
+            if critical_count:
+                summary += f"{critical_count} critical, "
+            if major_count:
+                summary += f"{major_count} major, "
+            if minor_count:
+                summary += f"{minor_count} minor."
+            else:
+                summary = summary.rstrip(", ") + "."
+
+            summary += " See detailed report for recommendations."
+
+        # Create report
+        report = ComplianceReport(
+            document_id=document_id,
+            compliance_score=compliance_score,
+            summary=summary,
+            issues=issues
+        )
+
+        return report
+
+    async def _process_document(self, document_id: str, content: str) -> None:
+        """
+        Internal method to process a document and update its status.
+
+        Args:
+            document_id: The ID of the document to process
+            content: The document content
+        """
+        try:
+            # Get the document
+            document = self.documents.get(document_id)
+            if not document:
+                raise ValueError(f"Document {document_id} not found")
+
+            # Update status to processing
+            document.status = DocumentStatus.PROCESSING
+
+            # Generate compliance report
+            report = await self.process_document(document_id, content)
+
+            # Store report ID in document
+            document.reports.append(report.report_id)
+
+            # Update document status
+            document.status = DocumentStatus.COMPLETED
+
+        except Exception as e:
+            # Update document status to failed
+            if document:
+                document.status = DocumentStatus.FAILED
+            raise
+
+    async def get_document_stats(self, document_id: str) -> Dict[str, any]:
+        """
+        Get statistics for a document.
+
+        Args:
+            document_id: The ID of the document
+
+        Returns:
+            Dictionary containing document statistics
+        """
+        document = await self.get_document(document_id)
+        if not document:
+            raise ValueError(f"Document {document_id} not found")
+
+        latest_report = None
+        if document.reports:
+            latest_report = await self.get_report(document.reports[-1])
+
+        stats = {
+            "document_id": document_id,
+            "version": document.version,
+            "status": document.status,
+            "file_size": document.metadata.file_size,
+            "upload_date": document.metadata.upload_timestamp,
+            "last_modified": document.metadata.last_modified,
+            "num_reports": len(document.reports),
+            "latest_compliance_score": latest_report.compliance_score if latest_report else None,
+            "critical_issues": latest_report.critical_issues_count if latest_report else 0,
+            "major_issues": latest_report.major_issues_count if latest_report else 0,
+            "minor_issues": latest_report.minor_issues_count if latest_report else 0
+        }
+
+        return stats
+
+    async def cleanup_old_documents(self, days: int = 30) -> List[str]:
+        """
+        Remove documents older than specified days.
+
+        Args:
+            days: Number of days after which documents should be removed
+
+        Returns:
+            List of removed document IDs
+        """
+        cutoff_date = datetime.now() - timedelta(days=days)
+        removed_ids = []
+
+        for doc_id, document in list(self.documents.items()):
+            if document.metadata.upload_timestamp < cutoff_date:
+                # Remove associated reports
+                for report_id in document.reports:
+                    self.reports.pop(report_id, None)
+
+                # Remove document
+                self.documents.pop(doc_id)
+                removed_ids.append(doc_id)
+
+        return removed_ids
+
+
@@ -0,0 +1,254 @@
+import cohere
+from typing import List, Dict, Any, Optional
+import uuid
+from pinecone import Pinecone
+import weaviate
+from loguru import logger
+
+from app.core.config import settings
+from app.core.models import DocumentEmbedding
+
+class EmbeddingService:
+    """Service for document embedding and vector database operations."""
+    
+    def __init__(self):
+        """Initialize the embedding service with the Cohere client and vector DB."""
+        # Initialize Cohere client
+        self.cohere_client = cohere.Client(settings.COHERE_API_KEY)
+        
+        # Initialize vector database client based on configuration
+        self.vector_db_client = self._init_vector_db()
+        self.embedding_model = settings.EMBEDDING_MODEL
+    
+    def _init_vector_db(self) -> Any:
+        """Initialize the vector database client based on settings."""
+        if settings.VECTOR_DB == "pinecone" and settings.PINECONE_API_KEY:
+            # Initialize Pinecone with new API
+            pc = Pinecone(api_key=settings.PINECONE_API_KEY)
+            
+            # Check if index exists, if not create it
+            if settings.PINECONE_INDEX_NAME not in [idx["name"] for idx in pc.list_indexes()]:
+                pc.create_index(
+                    name=settings.PINECONE_INDEX_NAME,
+                    dimension=1024,  # Cohere embed-english-v3.0 dimension
+                    metric="cosine"
+                )
+            
+            # Return the index
+            return pc.Index(settings.PINECONE_INDEX_NAME)
+        
+        elif settings.VECTOR_DB == "weaviate" and settings.WEAVIATE_URL:
+            # Initialize Weaviate
+            auth_config = weaviate.auth.AuthApiKey(api_key=settings.WEAVIATE_API_KEY) if settings.WEAVIATE_API_KEY else None
+            client = weaviate.Client(
+                url=settings.WEAVIATE_URL,
+                auth_client_secret=auth_config
+            )
+            # Check if schema exists, if not create it
+            if not client.schema.contains().get("classes", []):
+                class_obj = {
+                    "class": "Document",
+                    "vectorizer": "none",  # We'll provide our own vectors
+                    "properties": [
+                        {
+                            "name": "content",
+                            "dataType": ["text"]
+                        },
+                        {
+                            "name": "document_id",
+                            "dataType": ["string"]
+                        },
+                        {
+                            "name": "section_name",
+                            "dataType": ["string"]
+                        }
+                    ]
+                }
+                client.schema.create_class(class_obj)
+            return client
+        
+        else:
+            logger.warning("No valid vector database configuration found. Using mock implementation.")
+            return MockVectorDB()
+    
+    async def embed_document(self, document_id: str, sections: Dict[str, str]) -> DocumentEmbedding:
+        """
+        Embed document sections and store in vector database.
+        
+        Args:
+            document_id: Unique identifier for the document
+            sections: Dictionary mapping section names to section content
+            
+        Returns:
+            DocumentEmbedding object with embedding metadata
+        """
+        section_ids = {}
+        
+        for section_name, content in sections.items():
+            # Generate embedding for section content
+            try:
+                embedding_response = self.cohere_client.embed(
+                    texts=[content],
+                    model=self.embedding_model,
+                    input_type="search_document"
+                )
+                embedding_vector = embedding_response.embeddings[0]
+                
+                # Generate a unique ID for this section
+                section_id = f"{document_id}_{section_name}_{str(uuid.uuid4())[:8]}"
+                
+                # Store in vector database
+                if settings.VECTOR_DB == "pinecone":
+                    self.vector_db_client.upsert(
+                        vectors=[{
+                            "id": section_id,
+                            "values": embedding_vector,
+                            "metadata": {
+                                "document_id": document_id,
+                                "section_name": section_name,
+                                "content": content[:1000]  # Store truncated content for context
+                            }
+                        }],
+                        namespace=document_id
+                    )
+                
+                elif settings.VECTOR_DB == "weaviate":
+                    self.vector_db_client.data_object.create(
+                        class_name="Document",
+                        data_object={
+                            "content": content,
+                            "document_id": document_id,
+                            "section_name": section_name
+                        },
+                        uuid=section_id,
+                        vector=embedding_vector
+                    )
+                
+                # Store the section ID
+                section_ids[section_name] = section_id
+                logger.info(f"Successfully embedded section '{section_name}' for document {document_id}")
+                
+            except Exception as e:
+                logger.error(f"Error embedding section '{section_name}': {str(e)}")
+                raise
+        
+        # Create and return DocumentEmbedding object
+        embedding = DocumentEmbedding(
+            embedding_id=str(uuid.uuid4()),
+            embedding_model=self.embedding_model,
+            vector_db=settings.VECTOR_DB,
+            sections=section_ids
+        )
+        
+        return embedding
+    
+    async def retrieve_similar_sections(self, query: str, document_id: Optional[str] = None, top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Retrieve similar document sections for a query.
+        
+        Args:
+            query: The query text to find similar sections for
+            document_id: Optional document ID to restrict search
+            top_k: Number of results to return
+            
+        Returns:
+            List of similar sections with metadata
+        """
+        # Generate embedding for query
+        query_embedding = self.cohere_client.embed(
+            texts=[query],
+            model=self.embedding_model,
+            input_type="search_query"
+        ).embeddings[0]
+        
+        # Search vector database
+        if settings.VECTOR_DB == "pinecone":
+            namespace = document_id if document_id else None
+            results = self.vector_db_client.query(
+                vector=query_embedding,
+                top_k=top_k,
+                namespace=namespace,
+                include_metadata=True
+            )
+            
+            # Format results
+            similar_sections = []
+            for match in results.matches:
+                similar_sections.append({
+                    "section_id": match.id,
+                    "document_id": match.metadata["document_id"],
+                    "section_name": match.metadata["section_name"],
+                    "content": match.metadata.get("content", ""),
+                    "score": match.score
+                })
+        
+        elif settings.VECTOR_DB == "weaviate":
+            query_builder = self.vector_db_client.query.get(
+                "Document", ["content", "document_id", "section_name"]
+            ).with_near_vector({
+                "vector": query_embedding
+            }).with_limit(top_k)
+            
+            if document_id:
+                query_builder = query_builder.with_where({
+                    "path": ["document_id"],
+                    "operator": "Equal",
+                    "valueString": document_id
+                })
+            
+            results = query_builder.do()
+            
+            # Format results
+            similar_sections = []
+            for item in results.get("data", {}).get("Get", {}).get("Document", []):
+                similar_sections.append({
+                    "section_id": item.get("_additional", {}).get("id"),
+                    "document_id": item.get("document_id"),
+                    "section_name": item.get("section_name"),
+                    "content": item.get("content", ""),
+                    "score": item.get("_additional", {}).get("distance")
+                })
+        
+        else:
+            # Mock implementation
+            similar_sections = []
+        
+        return similar_sections
+
+
+class MockVectorDB:
+    """Mock vector database for development without actual vector DB."""
+    
+    def __init__(self):
+        self.vectors = {}
+        logger.warning("Using mock vector database. Not suitable for production.")
+    
+    def upsert(self, vectors, namespace=None):
+        """Mock upsert method."""
+        namespace = namespace or "default"
+        if namespace not in self.vectors:
+            self.vectors[namespace] = {}
+        
+        for vector in vectors:
+            vector_id = vector['id']
+            metadata = vector['metadata']
+            self.vectors[namespace][vector_id] = metadata
+    
+    def query(self, vector, top_k=5, namespace=None, include_metadata=True):
+        """Mock query method."""
+        from collections import namedtuple
+        
+        namespace = namespace or "default"
+        if namespace not in self.vectors:
+            return []
+        
+        # Just return some mock results
+        Match = namedtuple('Match', ['id', 'score', 'metadata'])
+        Results = namedtuple('Results', ['matches'])
+        
+        matches = [
+            Match(id=vector_id, score=0.8, metadata=metadata)
+            for vector_id, metadata in list(self.vectors[namespace].items())[:top_k]
+        ]
+        
+        return Results(matches=matches)
@@ -0,0 +1,136 @@
+# Reranking services
+import cohere
+from typing import List, Dict, Any
+from loguru import logger
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from app.core.config import settings
+from app.core.models import ComplianceIssue, ComplianceReport, ComplianceLevel
+
+class RankingService:
+    """Service for ranking and prioritizing compliance issues using Cohere Reranker."""
+    
+    def __init__(self):
+        """Initialize the ranking service with the Cohere client."""
+        self.cohere_client = cohere.Client(settings.COHERE_API_KEY)
+        self.reranker_model = settings.RERANKER_MODEL
+    
+    async def prioritize_issues(self, report: ComplianceReport, max_issues: int = 10) -> ComplianceReport:
+        """
+        Prioritize and rank compliance issues in a report.
+        
+        Args:
+            report: The compliance report with issues to prioritize
+            max_issues: Maximum number of issues to include in the final report
+            
+        Returns:
+            Updated compliance report with prioritized issues
+        """
+        if not report.issues or len(report.issues) <= 1:
+            # No need to rank if there's only 0 or 1 issues
+            return report
+        
+        try:
+            # Prepare issues for ranking
+            issue_texts = [
+                f"Section: {issue.section}. "
+                f"Level: {issue.level.value}. "
+                f"Description: {issue.description}. "
+                f"Recommendation: {issue.recommendation}"
+                for issue in report.issues
+            ]
+            
+            # Query object representing what we're looking for
+            query = "critical compliance issues that require immediate attention"
+            
+            # Rerank issues based on relevance to the query
+            reranked_issues = await self._rerank_issues(query, issue_texts)
+            
+            # Sort issues based on:
+            # 1. Compliance level (critical > major > minor > info)
+            # 2. Reranker relevance score
+            sorted_issues = []
+            level_scores = {
+                ComplianceLevel.CRITICAL: 4,
+                ComplianceLevel.MAJOR: 3,
+                ComplianceLevel.MINOR: 2,
+                ComplianceLevel.INFO: 1
+            }
+            
+            # Combine original issues with reranked scores
+            combined_issues = []
+            for i, issue in enumerate(report.issues):
+                rerank_score = next((item["relevance_score"] for item in reranked_issues 
+                                    if item["index"] == i), 0.0)
+                
+                # Calculate combined score (level_score * 100 + rerank_score)
+                # This ensures level is always the primary sorting factor
+                level_score = level_scores.get(issue.level, 0)
+                combined_score = (level_score * 100) + rerank_score
+                
+                combined_issues.append({
+                    "issue": issue,
+                    "combined_score": combined_score,
+                    "rerank_score": rerank_score
+                })
+            
+            # Sort by combined score (descending)
+            combined_issues.sort(key=lambda x: x["combined_score"], reverse=True)
+            
+            # Take top issues based on max_issues limit
+            sorted_issues = [item["issue"] for item in combined_issues[:max_issues]]
+            
+            # Create updated report
+            prioritized_report = ComplianceReport(
+                report_id=report.report_id,
+                document_id=report.document_id,
+                timestamp=report.timestamp,
+                compliance_score=report.compliance_score,
+                summary=report.summary,
+                issues=sorted_issues
+            )
+            
+            return prioritized_report
+            
+        except Exception as e:
+            logger.error(f"Error prioritizing issues: {str(e)}")
+            # If ranking fails, return the original report
+            return report
+    
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
+    async def _rerank_issues(self, query: str, issue_texts: List[str]) -> List[Dict[str, Any]]:
+        """
+        Rerank issues using Cohere Reranker.
+        
+        Args:
+            query: The search query to compare issues against
+            issue_texts: List of issue descriptions to rank
+            
+        Returns:
+            List of dictionaries with reranked issues and scores
+        """
+        try:
+            # Call Cohere Rerank endpoint
+            response = self.cohere_client.rerank(
+                model=self.reranker_model,
+                query=query,
+                documents=issue_texts,
+                top_n=len(issue_texts)
+            )
+            
+            # Format results
+            reranked_issues = []
+            for result in response.results:
+                reranked_issues.append({
+                    "index": result.index,  # Original index in the issues list
+                    "relevance_score": result.relevance_score
+                })
+            
+            return reranked_issues
+            
+        except Exception as e:
+            logger.error(f"Error calling Cohere Reranker: {str(e)}")
+            
+            # Return basic ranking if reranking fails
+            return [{"index": i, "relevance_score": 1.0 - (i * 0.1)} 
+                   for i in range(len(issue_texts))]
@@ -0,0 +1,168 @@
+# Reasoning with LLMs
+# Reasoning with LLMs using GROQ
+import json
+from typing import Dict, List
+from loguru import logger
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from app.core.config import settings
+from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport
+from app.utils.token_counter import count_tokens, truncate_by_tokens
+from groq import Groq  # Assuming groq Python SDK is installed
+
+class ReasoningService:
+    """Service for performing deep reasoning on documents using Groq."""
+
+    def __init__(self):
+        """Initialize the reasoning service with the Groq client."""
+        self.client = Groq(api_key=settings.GROQ_API_KEY)
+        self.model = settings.REASONING_MODEL  # e.g., "mixtral-8x7b-32768"
+
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
+    async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:
+        document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])
+
+        # Use token-based truncation instead of character-based
+        max_tokens = 30000  # Adjust based on model context window
+        token_count = count_tokens(document_content)
+
+        logger.info(f"Document {document_id} has {token_count} tokens before truncation")
+
+        if token_count > max_tokens:
+            document_content = truncate_by_tokens(document_content, max_tokens)
+            logger.info(f"Document {document_id} truncated to {max_tokens} tokens")
+
+        prompt = self._create_analysis_prompt(document_content, standards)
+
+        try:
+            response = await self._query_groq(prompt)
+            compliance_report = self._parse_compliance_response(document_id, response, standards)
+            return compliance_report
+        except Exception as e:
+            logger.error(f"Error analyzing document with Groq: {str(e)}")
+            raise
+
+    def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:
+        standards_text = "\n".join([f"- {standard}" for standard in standards])
+        return f"""<document>
+{document_content}
+</document>
+
+<standards>
+{standards_text}
+</standards>
+
+You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.
+
+Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:
+1. Technical accuracy and completeness
+2. Compliance with the specified standards
+3. Document structure and organization
+4. Clarity and specificity of language
+5. Consistency and coherence
+
+For each compliance issue you find, please provide:
+- The section where the issue appears
+- A detailed description of the issue
+- The severity level (critical, major, minor, or info)
+- A thorough explanation of why this is an issue and how it impacts compliance
+- Specific, actionable recommendations to fix the issue
+- References to specific standards or best practices that apply
+
+Respond in the following JSON format:
+{{
+  "summary": "Comprehensive overall assessment of the document",
+  "compliance_score": 0.0 to 1.0,
+  "issues": [
+    {{
+      "section": "Section name",
+      "description": "Detailed issue description",
+      "level": "critical/major/minor/info",
+      "reasoning": "Thorough explanation of why this is an issue",
+      "standard_references": ["Specific standards or requirements that are violated"],
+      "recommendation": "Detailed, actionable recommendation to fix the issue"
+    }}
+  ]
+}}"""
+
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
+    async def _query_groq(self, prompt: str) -> str:
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=4000,
+                temperature=0.2,
+                top_p=1.0
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            logger.error(f"Error querying Groq: {str(e)}")
+            raise
+
+    def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:
+        try:
+            json_start = response.find('{')
+            json_end = response.rfind('}') + 1
+
+            if json_start == -1 or json_end == 0:
+                raise ValueError("Could not find JSON in response")
+
+            json_response = response[json_start:json_end]
+            data = json.loads(json_response)
+
+            summary = data.get("summary", "No summary provided")
+            compliance_score = float(data.get("compliance_score", 0.5))
+            issues = []
+
+            for issue_data in data.get("issues", []):
+                level_str = issue_data.get("level", "minor").lower()
+                if level_str == "critical":
+                    level = ComplianceLevel.CRITICAL
+                elif level_str == "major":
+                    level = ComplianceLevel.MAJOR
+                elif level_str == "info":
+                    level = ComplianceLevel.INFO
+                else:
+                    level = ComplianceLevel.MINOR
+
+                issues.append(ComplianceIssue(
+                    section=issue_data.get("section", "Unknown"),
+                    description=issue_data.get("description", "No description provided"),
+                    level=level,
+                    reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),
+                    standard_references=issue_data.get("standard_references", []),
+                    recommendation=issue_data.get("recommendation", "No recommendation provided")
+                ))
+
+            return ComplianceReport(
+                document_id=document_id,
+                compliance_score=compliance_score,
+                summary=summary,
+                issues=issues,
+                applied_standards=standards
+            )
+        except json.JSONDecodeError:
+            logger.error("Failed to parse JSON from response")
+            return ComplianceReport(
+                document_id=document_id,
+                compliance_score=0.0,
+                summary="Failed to analyze document due to parsing error.",
+                issues=[
+                    ComplianceIssue(
+                        section="System",
+                        description="Failed to parse compliance analysis results.",
+                        level=ComplianceLevel.CRITICAL,
+                        reasoning="The system encountered an error while parsing the compliance analysis results.",
+                        standard_references=[],
+                        recommendation="Please try resubmitting the document or contact support."
+                    )
+                ],
+                applied_standards=[]
+            )
+        except Exception as e:
+            logger.error(f"Error parsing compliance response: {str(e)}")
+            raise
@@ -0,0 +1,250 @@
+# Standards management
+import json
+import os
+from typing import Dict, List, Optional, BinaryIO, Tuple
+import uuid
+from loguru import logger
+
+from app.core.models import Standard, Requirement, RequirementSeverity
+from app.utils.helpers import load_standards_from_file
+from app.services.standards_matcher import StandardsMatcher
+
+# Singleton instance to ensure all parts of the application use the same standards
+_standards_service_instance = None
+
+class StandardsService:
+    """Service for managing compliance standards."""
+
+    def __new__(cls):
+        """Implement singleton pattern to ensure all parts of the app use the same standards."""
+        global _standards_service_instance
+        if _standards_service_instance is None:
+            _standards_service_instance = super(StandardsService, cls).__new__(cls)
+            _standards_service_instance.standards = {}  # In-memory storage for standards
+            _standards_service_instance.matcher = StandardsMatcher()  # Advanced standards matching logic
+            _standards_service_instance._load_default_standards()
+        return _standards_service_instance
+
+    def __init__(self):
+        """Initialize the standards service."""
+        # Initialization is done in __new__ for the singleton pattern
+
+    def _load_default_standards(self):
+        """Load default standards from the standards directory."""
+        standards_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "standard")
+
+        if not os.path.exists(standards_dir):
+            logger.warning(f"Standards directory not found: {standards_dir}")
+            return
+
+        for filename in os.listdir(standards_dir):
+            if filename.endswith(".json"):
+                try:
+                    file_path = os.path.join(standards_dir, filename)
+                    standards_data = load_standards_from_file(file_path)
+
+                    if "standards" in standards_data:
+                        for std_data in standards_data["standards"]:
+                            standard = self._create_standard_from_data(std_data)
+                            self.standards[standard.id] = standard
+                            logger.info(f"Loaded standard: {standard.name} ({standard.id})")
+                except Exception as e:
+                    logger.error(f"Error loading standard from {filename}: {str(e)}")
+
+    def _create_standard_from_data(self, data: Dict) -> Standard:
+        """
+        Create a Standard object from dictionary data.
+
+        Args:
+            data: Dictionary containing standard data
+
+        Returns:
+            Standard object
+        """
+        requirements = []
+
+        if "requirements" in data:
+            for req_data in data["requirements"]:
+                # Map severity string to RequirementSeverity enum
+                severity_str = req_data.get("severity", "minor").lower()
+                if severity_str == "critical":
+                    severity = RequirementSeverity.CRITICAL
+                elif severity_str == "major":
+                    severity = RequirementSeverity.MAJOR
+                elif severity_str == "info":
+                    severity = RequirementSeverity.INFO
+                else:
+                    severity = RequirementSeverity.MINOR
+
+                requirement = Requirement(
+                    id=req_data.get("id", str(uuid.uuid4())),
+                    description=req_data.get("description", ""),
+                    severity=severity,
+                    details=req_data.get("details", None)
+                )
+                requirements.append(requirement)
+
+        return Standard(
+            id=data.get("id", str(uuid.uuid4())),
+            name=data.get("name", "Unnamed Standard"),
+            description=data.get("description", ""),
+            requirements=requirements
+        )
+
+    async def get_all_standards(self) -> List[Standard]:
+        """
+        Get all available standards.
+
+        Returns:
+            List of Standard objects
+        """
+        return list(self.standards.values())
+
+    async def get_standard(self, standard_id: str) -> Optional[Standard]:
+        """
+        Get a standard by ID.
+
+        Args:
+            standard_id: ID of the standard to retrieve
+
+        Returns:
+            Standard object if found, None otherwise
+        """
+        return self.standards.get(standard_id)
+
+    async def get_standard_by_name(self, name: str) -> Optional[Standard]:
+        """
+        Get a standard by name (case-insensitive).
+
+        Args:
+            name: Name of the standard to retrieve
+
+        Returns:
+            Standard object if found, None otherwise
+        """
+        name_lower = name.lower()
+        for standard in self.standards.values():
+            if standard.name.lower() == name_lower:
+                return standard
+        return None
+
+    async def upload_standard(self, file: BinaryIO, filename: str) -> Standard:
+        """
+        Upload and process a standard definition file.
+
+        Args:
+            file: The standard definition file (JSON)
+            filename: Name of the uploaded file
+
+        Returns:
+            Standard object
+        """
+        try:
+            # Read file content
+            content = await self._read_file_content(file)
+
+            # Parse JSON
+            data = json.loads(content)
+
+            if "standards" in data and isinstance(data["standards"], list):
+                # Multiple standards in file
+                standards = []
+                for std_data in data["standards"]:
+                    standard = self._create_standard_from_data(std_data)
+                    self.standards[standard.id] = standard
+                    standards.append(standard)
+                    logger.info(f"Uploaded standard: {standard.name} (ID: {standard.id}) with {len(standard.requirements)} requirements")
+
+                # Log the current standards count after upload
+                logger.info(f"Total standards in system after upload: {len(self.standards)}")
+
+                # Return the first standard for simplicity
+                return standards[0] if standards else None
+            else:
+                # Single standard in file
+                standard = self._create_standard_from_data(data)
+                self.standards[standard.id] = standard
+                logger.info(f"Uploaded standard: {standard.name} (ID: {standard.id}) with {len(standard.requirements)} requirements")
+
+                # Log the current standards count after upload
+                logger.info(f"Total standards in system after upload: {len(self.standards)}")
+
+                return standard
+
+        except json.JSONDecodeError:
+            raise ValueError("Invalid JSON format in standard definition file")
+        except Exception as e:
+            logger.error(f"Error processing standard file: {str(e)}")
+            raise
+
+    async def _read_file_content(self, file: BinaryIO) -> str:
+        """
+        Read and decode file content.
+
+        Args:
+            file: The file to read
+
+        Returns:
+            File content as string
+        """
+        file_content = file.read()
+
+        # Try to decode as UTF-8
+        try:
+            return file_content.decode('utf-8')
+        except UnicodeDecodeError:
+            # Try other encodings if UTF-8 fails
+            try:
+                return file_content.decode('latin-1')
+            except:
+                raise ValueError("Unable to decode file content. Please ensure file is text-based.")
+
+    async def get_standard_names_for_document(self, document_content: str) -> List[str]:
+        """
+        Identify which standards might be relevant for a document based on content.
+        Uses advanced matching logic to find the most relevant standards.
+
+        Args:
+            document_content: The document content
+
+        Returns:
+            List of standard names that might be relevant
+        """
+        # Default standards to use if no matches are found
+        DEFAULT_STANDARDS = ["ISO-9001", "IEEE-829", "RFC-2119"]
+
+        # Log available standards for debugging
+        logger.info(f"Available standards in the system: {len(self.standards)}")
+        for std_id, std in self.standards.items():
+            logger.info(f"  - {std.name} (ID: {std_id})")
+
+        # If no standards are available, return defaults
+        if not self.standards:
+            logger.warning("No standards available in the system. Using default standards.")
+            return DEFAULT_STANDARDS
+
+        # Use the standards matcher to find relevant standards
+        standard_scores = self.matcher.find_relevant_standards(
+            document_content=document_content,
+            standards=list(self.standards.values()),
+            threshold=0.1,  # Minimum relevance threshold
+            max_standards=5  # Maximum number of standards to return
+        )
+
+        # Log the matching results
+        if standard_scores:
+            logger.info(f"Found {len(standard_scores)} relevant standards:")
+            for name, score in standard_scores:
+                logger.info(f"  - {name}: relevance score {score:.2f}")
+        else:
+            logger.info("No relevant standards found based on document content.")
+
+        # Extract standard names from the results
+        relevant_standards = [std[0] for std in standard_scores]
+
+        # If no relevant standards found, use defaults
+        if not relevant_standards:
+            logger.info(f"Using default standards: {DEFAULT_STANDARDS}")
+            return DEFAULT_STANDARDS
+
+        return relevant_standards
@@ -0,0 +1,304 @@
+# Standards matching logic
+import re
+from typing import Dict, List, Set, Tuple, Optional
+from loguru import logger
+
+from app.core.models import Standard, Requirement
+
+
+class StandardsMatcher:
+    """
+    Advanced matching logic to identify relevant standards for documents.
+    This class implements sophisticated matching algorithms beyond simple text matching.
+    """
+
+    def __init__(self):
+        """Initialize the standards matcher."""
+        # Common stopwords to filter out when extracting keywords
+        self.stopwords = {
+            "the", "a", "an", "and", "or", "in", "on", "at", "to", "for", "with",
+            "by", "of", "is", "are", "was", "were", "be", "been", "being", "have",
+            "has", "had", "do", "does", "did", "but", "if", "then", "else", "when",
+            "where", "why", "how", "all", "any", "both", "each", "few", "more",
+            "most", "other", "some", "such", "no", "nor", "not", "only", "own",
+            "same", "so", "than", "too", "very", "can", "will", "just", "should",
+            "now", "this", "that", "these", "those"
+        }
+
+        # Technical terms that indicate compliance requirements
+        self.technical_indicators = [
+            "shall", "must", "required", "should", "recommended", "may", "optional",
+            "compliant", "compliance", "conform", "standard", "specification", "requirement",
+            "procedure", "process", "method", "test", "verify", "validate", "certification",
+            "certified", "approved", "regulation", "regulatory", "guideline", "protocol"
+        ]
+
+        # Common standard prefixes and abbreviations
+        self.standard_prefixes = [
+            "iso", "ieee", "astm", "ansi", "iec", "din", "bs", "en", "jis",
+            "gb", "api", "asme", "nfpa", "ul", "mil", "std", "rfc", "itu"
+        ]
+
+    def extract_document_sections(self, document_content: str) -> Dict[str, str]:
+        """
+        Extract sections from a document to improve matching.
+
+        Args:
+            document_content: The document content
+
+        Returns:
+            Dictionary of section name to section content
+        """
+        sections = {}
+        sections["full_document"] = document_content
+
+        # Try to identify document sections using markdown headings
+        heading_pattern = re.compile(r'^(#{1,3})\s+(.+)$', re.MULTILINE)
+        matches = list(heading_pattern.finditer(document_content))
+
+        if matches:
+            for i, match in enumerate(matches):
+                section_name = match.group(2).strip()
+
+                # Get section content (from this heading to the next, or to the end)
+                start_pos = match.end()
+                end_pos = matches[i+1].start() if i < len(matches) - 1 else len(document_content)
+
+                section_content = document_content[start_pos:end_pos].strip()
+                sections[section_name] = section_content
+
+        # Look for common document sections by name
+        common_sections = [
+            "introduction", "scope", "purpose", "references", "definitions",
+            "requirements", "compliance", "standards", "conclusion", "summary",
+            "appendix", "annex"
+        ]
+
+        for section in common_sections:
+            pattern = re.compile(rf'(?i)(?:^|\n)(?:{section}|{section.capitalize()})(?:[\s:]+)(.*?)(?=\n\s*\n|\n\s*[A-Z]|\Z)', re.DOTALL)
+            match = pattern.search(document_content)
+            if match:
+                sections[section] = match.group(1).strip()
+
+        return sections
+
+    def extract_key_terms(self, document_content: str) -> List[str]:
+        """
+        Extract key technical terms from document content.
+
+        Args:
+            document_content: The document content
+
+        Returns:
+            List of key terms
+        """
+        key_terms = []
+
+        # Split into sentences
+        sentences = re.split(r'[.!?]\s+', document_content)
+
+        for sentence in sentences:
+            words = sentence.split()
+
+            # Check if sentence contains technical indicators
+            if any(indicator in sentence.lower() for indicator in self.technical_indicators):
+                # Extract noun phrases (simplified approach)
+                for i in range(len(words) - 1):
+                    if words[i].lower() not in self.stopwords and words[i+1].lower() not in self.stopwords:
+                        key_terms.append(f"{words[i]} {words[i+1]}".lower())
+
+        # Look for capitalized terms (often defined terms)
+        cap_pattern = re.compile(r'\b[A-Z][A-Z0-9]+\b')
+        cap_terms = cap_pattern.findall(document_content)
+        key_terms.extend([term.lower() for term in cap_terms])
+
+        # Look for standard references (e.g., ISO-9001, IEEE 829)
+        for prefix in self.standard_prefixes:
+            pattern = re.compile(rf'\b{prefix}[-\s]?\d+\b', re.IGNORECASE)
+            matches = pattern.findall(document_content)
+            key_terms.extend([match.lower() for match in matches])
+
+        # Remove duplicates
+        return list(set(key_terms))
+
+    def extract_standard_keywords(self, standard: Standard) -> List[str]:
+        """
+        Extract keywords from a standard that can be used for matching.
+
+        Args:
+            standard: The standard to extract keywords from
+
+        Returns:
+            List of keywords associated with the standard
+        """
+        keywords = []
+
+        # Add standard name and variations
+        keywords.append(standard.name.lower())
+        keywords.append(standard.name.replace("-", "").lower())
+        keywords.append(standard.name.replace("-", " ").lower())
+
+        # Add standard description words (excluding common words)
+        if standard.description:
+            description_words = [word.lower() for word in standard.description.split()
+                                if word.lower() not in self.stopwords]
+            keywords.extend(description_words)
+
+        # Add requirement keywords
+        for req in standard.requirements:
+            # Add requirement ID
+            keywords.append(req.id.lower())
+
+            # Add key phrases from requirement description
+            if req.description:
+                # Extract noun phrases and technical terms (simplified approach)
+                phrases = []
+                words = req.description.split()
+                for i in range(len(words) - 1):
+                    if words[i].lower() not in self.stopwords and words[i+1].lower() not in self.stopwords:
+                        phrases.append(f"{words[i]} {words[i+1]}".lower())
+                keywords.extend(phrases)
+
+                # Add individual technical terms
+                for word in words:
+                    if word.lower() in self.technical_indicators:
+                        keywords.append(word.lower())
+
+        # Remove duplicates and return
+        return list(set(keywords))
+
+    def calculate_standard_relevance(self, standard: Standard, document_content: str,
+                                    sections: Dict[str, str], key_terms: List[str]) -> float:
+        """
+        Calculate a relevance score for a standard based on multiple factors.
+
+        Args:
+            standard: The standard to evaluate
+            document_content: The document content
+            sections: Document sections
+            key_terms: Key terms extracted from the document
+
+        Returns:
+            Relevance score (0.0 to 1.0)
+        """
+        document_content_lower = document_content.lower()
+
+        # Extract keywords for this standard
+        standard_keywords = self.extract_standard_keywords(standard)
+
+        # Initialize scores for different matching components
+        name_match_score = 0.0
+        keyword_match_score = 0.0
+        section_match_score = 0.0
+        term_match_score = 0.0
+        requirement_match_score = 0.0
+
+        # 1. Check for standard name matches (highest weight)
+        if standard.name.lower() in document_content_lower:
+            name_match_score = 0.5
+        elif standard.name.replace("-", "").lower() in document_content_lower:
+            name_match_score = 0.4
+        elif standard.name.replace("-", " ").lower() in document_content_lower:
+            name_match_score = 0.4
+
+        # 2. Check for keyword matches
+        matched_keywords = 0
+        total_keywords = len(standard_keywords)
+
+        if total_keywords > 0:
+            for keyword in standard_keywords:
+                if keyword in document_content_lower:
+                    matched_keywords += 1
+
+            keyword_match_score = matched_keywords / total_keywords * 0.3
+
+        # 3. Check for section-specific matches
+        important_sections = ["introduction", "scope", "purpose", "references",
+                             "standards", "compliance", "requirements"]
+
+        for section_name in important_sections:
+            if section_name in sections:
+                section_content = sections[section_name].lower()
+
+                # Check for standard name in important sections
+                if standard.name.lower() in section_content:
+                    section_match_score += 0.1
+                    break
+
+        # Check for standard name in section titles
+        for section_name in sections.keys():
+            if standard.name.lower() in section_name.lower():
+                section_match_score += 0.2
+                break
+
+        # 4. Check for key term matches
+        matching_terms = 0
+        for term in key_terms:
+            if any(kw in term or term in kw for kw in standard_keywords):
+                matching_terms += 1
+
+        if len(key_terms) > 0:
+            term_match_score = min(0.2, 0.01 * matching_terms)
+
+        # 5. Check for requirement-specific matches
+        for req in standard.requirements:
+            req_desc_lower = req.description.lower()
+            req_keywords = [word for word in req_desc_lower.split()
+                           if word not in self.stopwords and len(word) > 3]
+
+            for keyword in req_keywords:
+                if keyword in document_content_lower:
+                    requirement_match_score += 0.01
+
+        requirement_match_score = min(0.2, requirement_match_score)
+
+        # Calculate final score (weighted sum of all components)
+        final_score = (
+            name_match_score +
+            keyword_match_score +
+            section_match_score +
+            term_match_score +
+            requirement_match_score
+        )
+
+        # Cap at 1.0
+        return min(final_score, 1.0)
+
+    def find_relevant_standards(self, document_content: str, standards: List[Standard],
+                               threshold: float = 0.1, max_standards: int = 5) -> List[Tuple[str, float]]:
+        """
+        Find standards relevant to a document with relevance scores.
+
+        Args:
+            document_content: The document content
+            standards: List of available standards
+            threshold: Minimum relevance score threshold
+            max_standards: Maximum number of standards to return
+
+        Returns:
+            List of tuples (standard_name, relevance_score) sorted by relevance
+        """
+        if not standards:
+            return []
+
+        # Extract document sections and key terms
+        sections = self.extract_document_sections(document_content)
+        key_terms = self.extract_key_terms(document_content)
+
+        # Calculate relevance scores for each standard
+        standard_scores = []
+
+        for standard in standards:
+            score = self.calculate_standard_relevance(
+                standard, document_content, sections, key_terms
+            )
+
+            if score >= threshold:
+                standard_scores.append((standard.name, score))
+                logger.debug(f"Standard {standard.name} relevance score: {score:.2f}")
+
+        # Sort by relevance score (highest first)
+        standard_scores.sort(key=lambda x: x[1], reverse=True)
+
+        # Limit to max_standards
+        return standard_scores[:max_standards]
				`@@ -0,0 +1 @@`
				`"""Services for the Mini SpecsComply Pro application."""`