Initial commit

2025-07-17 22:20:25 +01:00
commit 0e3e22e8cb
39 changed files with 13295 additions and 0 deletions
@@ -0,0 +1,168 @@
+# Reasoning with LLMs
+# Reasoning with LLMs using GROQ
+import json
+from typing import Dict, List
+from loguru import logger
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from app.core.config import settings
+from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport
+from app.utils.token_counter import count_tokens, truncate_by_tokens
+from groq import Groq  # Assuming groq Python SDK is installed
+
+class ReasoningService:
+    """Service for performing deep reasoning on documents using Groq."""
+
+    def __init__(self):
+        """Initialize the reasoning service with the Groq client."""
+        self.client = Groq(api_key=settings.GROQ_API_KEY)
+        self.model = settings.REASONING_MODEL  # e.g., "mixtral-8x7b-32768"
+
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
+    async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:
+        document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])
+
+        # Use token-based truncation instead of character-based
+        max_tokens = 30000  # Adjust based on model context window
+        token_count = count_tokens(document_content)
+
+        logger.info(f"Document {document_id} has {token_count} tokens before truncation")
+
+        if token_count > max_tokens:
+            document_content = truncate_by_tokens(document_content, max_tokens)
+            logger.info(f"Document {document_id} truncated to {max_tokens} tokens")
+
+        prompt = self._create_analysis_prompt(document_content, standards)
+
+        try:
+            response = await self._query_groq(prompt)
+            compliance_report = self._parse_compliance_response(document_id, response, standards)
+            return compliance_report
+        except Exception as e:
+            logger.error(f"Error analyzing document with Groq: {str(e)}")
+            raise
+
+    def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:
+        standards_text = "\n".join([f"- {standard}" for standard in standards])
+        return f"""<document>
+{document_content}
+</document>
+
+<standards>
+{standards_text}
+</standards>
+
+You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.
+
+Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:
+1. Technical accuracy and completeness
+2. Compliance with the specified standards
+3. Document structure and organization
+4. Clarity and specificity of language
+5. Consistency and coherence
+
+For each compliance issue you find, please provide:
+- The section where the issue appears
+- A detailed description of the issue
+- The severity level (critical, major, minor, or info)
+- A thorough explanation of why this is an issue and how it impacts compliance
+- Specific, actionable recommendations to fix the issue
+- References to specific standards or best practices that apply
+
+Respond in the following JSON format:
+{{
+  "summary": "Comprehensive overall assessment of the document",
+  "compliance_score": 0.0 to 1.0,
+  "issues": [
+    {{
+      "section": "Section name",
+      "description": "Detailed issue description",
+      "level": "critical/major/minor/info",
+      "reasoning": "Thorough explanation of why this is an issue",
+      "standard_references": ["Specific standards or requirements that are violated"],
+      "recommendation": "Detailed, actionable recommendation to fix the issue"
+    }}
+  ]
+}}"""
+
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
+    async def _query_groq(self, prompt: str) -> str:
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=4000,
+                temperature=0.2,
+                top_p=1.0
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            logger.error(f"Error querying Groq: {str(e)}")
+            raise
+
+    def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:
+        try:
+            json_start = response.find('{')
+            json_end = response.rfind('}') + 1
+
+            if json_start == -1 or json_end == 0:
+                raise ValueError("Could not find JSON in response")
+
+            json_response = response[json_start:json_end]
+            data = json.loads(json_response)
+
+            summary = data.get("summary", "No summary provided")
+            compliance_score = float(data.get("compliance_score", 0.5))
+            issues = []
+
+            for issue_data in data.get("issues", []):
+                level_str = issue_data.get("level", "minor").lower()
+                if level_str == "critical":
+                    level = ComplianceLevel.CRITICAL
+                elif level_str == "major":
+                    level = ComplianceLevel.MAJOR
+                elif level_str == "info":
+                    level = ComplianceLevel.INFO
+                else:
+                    level = ComplianceLevel.MINOR
+
+                issues.append(ComplianceIssue(
+                    section=issue_data.get("section", "Unknown"),
+                    description=issue_data.get("description", "No description provided"),
+                    level=level,
+                    reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),
+                    standard_references=issue_data.get("standard_references", []),
+                    recommendation=issue_data.get("recommendation", "No recommendation provided")
+                ))
+
+            return ComplianceReport(
+                document_id=document_id,
+                compliance_score=compliance_score,
+                summary=summary,
+                issues=issues,
+                applied_standards=standards
+            )
+        except json.JSONDecodeError:
+            logger.error("Failed to parse JSON from response")
+            return ComplianceReport(
+                document_id=document_id,
+                compliance_score=0.0,
+                summary="Failed to analyze document due to parsing error.",
+                issues=[
+                    ComplianceIssue(
+                        section="System",
+                        description="Failed to parse compliance analysis results.",
+                        level=ComplianceLevel.CRITICAL,
+                        reasoning="The system encountered an error while parsing the compliance analysis results.",
+                        standard_references=[],
+                        recommendation="Please try resubmitting the document or contact support."
+                    )
+                ],
+                applied_standards=[]
+            )
+        except Exception as e:
+            logger.error(f"Error parsing compliance response: {str(e)}")
+            raise