app/services/reasoning.py

# Reasoning with LLMs
# Reasoning with LLMs using GROQ
import json
from typing import Dict, List
from loguru import logger
from tenacity import retry, stop_after_attempt, wait_exponential

from app.core.config import settings
from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport
from app.utils.token_counter import count_tokens, truncate_by_tokens
from groq import Groq  # Assuming groq Python SDK is installed

class ReasoningService:
    """Service for performing deep reasoning on documents using Groq."""

    def __init__(self):
        """Initialize the reasoning service with the Groq client."""
        self.client = Groq(api_key=settings.GROQ_API_KEY)
        self.model = settings.REASONING_MODEL  # e.g., "mixtral-8x7b-32768"

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
    async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:
        document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])

        # Use token-based truncation instead of character-based
        max_tokens = 30000  # Adjust based on model context window
        token_count = count_tokens(document_content)

        logger.info(f"Document {document_id} has {token_count} tokens before truncation")

        if token_count > max_tokens:
            document_content = truncate_by_tokens(document_content, max_tokens)
            logger.info(f"Document {document_id} truncated to {max_tokens} tokens")

        prompt = self._create_analysis_prompt(document_content, standards)

        try:
            response = await self._query_groq(prompt)
            compliance_report = self._parse_compliance_response(document_id, response, standards)
            return compliance_report
        except Exception as e:
            logger.error(f"Error analyzing document with Groq: {str(e)}")
            raise

    def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:
        standards_text = "\n".join([f"- {standard}" for standard in standards])
        return f"""<document>
{document_content}
</document>

<standards>
{standards_text}
</standards>

You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.

Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:
1. Technical accuracy and completeness
2. Compliance with the specified standards
3. Document structure and organization
4. Clarity and specificity of language
5. Consistency and coherence

For each compliance issue you find, please provide:
- The section where the issue appears
- A detailed description of the issue
- The severity level (critical, major, minor, or info)
- A thorough explanation of why this is an issue and how it impacts compliance
- Specific, actionable recommendations to fix the issue
- References to specific standards or best practices that apply

Respond in the following JSON format:
{{
  "summary": "Comprehensive overall assessment of the document",
  "compliance_score": 0.0 to 1.0,
  "issues": [
    {{
      "section": "Section name",
      "description": "Detailed issue description",
      "level": "critical/major/minor/info",
      "reasoning": "Thorough explanation of why this is an issue",
      "standard_references": ["Specific standards or requirements that are violated"],
      "recommendation": "Detailed, actionable recommendation to fix the issue"
    }}
  ]
}}"""

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
    async def _query_groq(self, prompt: str) -> str:
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=4000,
                temperature=0.2,
                top_p=1.0
            )
            return response.choices[0].message.content
        except Exception as e:
            logger.error(f"Error querying Groq: {str(e)}")
            raise

    def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:
        try:
            json_start = response.find('{')
            json_end = response.rfind('}') + 1

            if json_start == -1 or json_end == 0:
                raise ValueError("Could not find JSON in response")

            json_response = response[json_start:json_end]
            data = json.loads(json_response)

            summary = data.get("summary", "No summary provided")
            compliance_score = float(data.get("compliance_score", 0.5))
            issues = []

            for issue_data in data.get("issues", []):
                level_str = issue_data.get("level", "minor").lower()
                if level_str == "critical":
                    level = ComplianceLevel.CRITICAL
                elif level_str == "major":
                    level = ComplianceLevel.MAJOR
                elif level_str == "info":
                    level = ComplianceLevel.INFO
                else:
                    level = ComplianceLevel.MINOR

                issues.append(ComplianceIssue(
                    section=issue_data.get("section", "Unknown"),
                    description=issue_data.get("description", "No description provided"),
                    level=level,
                    reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),
                    standard_references=issue_data.get("standard_references", []),
                    recommendation=issue_data.get("recommendation", "No recommendation provided")
                ))

            return ComplianceReport(
                document_id=document_id,
                compliance_score=compliance_score,
                summary=summary,
                issues=issues,
                applied_standards=standards
            )
        except json.JSONDecodeError:
            logger.error("Failed to parse JSON from response")
            return ComplianceReport(
                document_id=document_id,
                compliance_score=0.0,
                summary="Failed to analyze document due to parsing error.",
                issues=[
                    ComplianceIssue(
                        section="System",
                        description="Failed to parse compliance analysis results.",
                        level=ComplianceLevel.CRITICAL,
                        reasoning="The system encountered an error while parsing the compliance analysis results.",
                        standard_references=[],
                        recommendation="Please try resubmitting the document or contact support."
                    )
                ],
                applied_standards=[]
            )
        except Exception as e:
            logger.error(f"Error parsing compliance response: {str(e)}")
            raise
Initial commit 2025-07-17 22:20:25 +01:00			`# Reasoning with LLMs`
			`# Reasoning with LLMs using GROQ`
			`import json`
			`from typing import Dict, List`
			`from loguru import logger`
			`from tenacity import retry, stop_after_attempt, wait_exponential`

			`from app.core.config import settings`
			`from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport`
			`from app.utils.token_counter import count_tokens, truncate_by_tokens`
			`from groq import Groq # Assuming groq Python SDK is installed`

			`class ReasoningService:`
			`"""Service for performing deep reasoning on documents using Groq."""`

			`def __init__(self):`
			`"""Initialize the reasoning service with the Groq client."""`
			`self.client = Groq(api_key=settings.GROQ_API_KEY)`
			`self.model = settings.REASONING_MODEL # e.g., "mixtral-8x7b-32768"`

			`@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))`
			`async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:`
			`document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])`

			`# Use token-based truncation instead of character-based`
			`max_tokens = 30000 # Adjust based on model context window`
			`token_count = count_tokens(document_content)`

			`logger.info(f"Document {document_id} has {token_count} tokens before truncation")`

			`if token_count > max_tokens:`
			`document_content = truncate_by_tokens(document_content, max_tokens)`
			`logger.info(f"Document {document_id} truncated to {max_tokens} tokens")`

			`prompt = self._create_analysis_prompt(document_content, standards)`

			`try:`
			`response = await self._query_groq(prompt)`
			`compliance_report = self._parse_compliance_response(document_id, response, standards)`
			`return compliance_report`
			`except Exception as e:`
			`logger.error(f"Error analyzing document with Groq: {str(e)}")`
			`raise`

			`def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:`
			`standards_text = "\n".join([f"- {standard}" for standard in standards])`
			`return f"""<document>`
			`{document_content}`
			`</document>`

			`<standards>`
			`{standards_text}`
			`</standards>`

			`You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.`

			`Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:`
			`1. Technical accuracy and completeness`
			`2. Compliance with the specified standards`
			`3. Document structure and organization`
			`4. Clarity and specificity of language`
			`5. Consistency and coherence`

			`For each compliance issue you find, please provide:`
			`- The section where the issue appears`
			`- A detailed description of the issue`
			`- The severity level (critical, major, minor, or info)`
			`- A thorough explanation of why this is an issue and how it impacts compliance`
			`- Specific, actionable recommendations to fix the issue`
			`- References to specific standards or best practices that apply`

			`Respond in the following JSON format:`
			`{{`
			`"summary": "Comprehensive overall assessment of the document",`
			`"compliance_score": 0.0 to 1.0,`
			`"issues": [`
			`{{`
			`"section": "Section name",`
			`"description": "Detailed issue description",`
			`"level": "critical/major/minor/info",`
			`"reasoning": "Thorough explanation of why this is an issue",`
			`"standard_references": ["Specific standards or requirements that are violated"],`
			`"recommendation": "Detailed, actionable recommendation to fix the issue"`
			`}}`
			`]`
			`}}"""`

			`@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))`
			`async def _query_groq(self, prompt: str) -> str:`
			`try:`
			`response = self.client.chat.completions.create(`
			`model=self.model,`
			`messages=[`
			`{"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},`
			`{"role": "user", "content": prompt}`
			`],`
			`max_tokens=4000,`
			`temperature=0.2,`
			`top_p=1.0`
			`)`
			`return response.choices[0].message.content`
			`except Exception as e:`
			`logger.error(f"Error querying Groq: {str(e)}")`
			`raise`

			`def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:`
			`try:`
			`json_start = response.find('{')`
			`json_end = response.rfind('}') + 1`

			`if json_start == -1 or json_end == 0:`
			`raise ValueError("Could not find JSON in response")`

			`json_response = response[json_start:json_end]`
			`data = json.loads(json_response)`

			`summary = data.get("summary", "No summary provided")`
			`compliance_score = float(data.get("compliance_score", 0.5))`
			`issues = []`

			`for issue_data in data.get("issues", []):`
			`level_str = issue_data.get("level", "minor").lower()`
			`if level_str == "critical":`
			`level = ComplianceLevel.CRITICAL`
			`elif level_str == "major":`
			`level = ComplianceLevel.MAJOR`
			`elif level_str == "info":`
			`level = ComplianceLevel.INFO`
			`else:`
			`level = ComplianceLevel.MINOR`

			`issues.append(ComplianceIssue(`
			`section=issue_data.get("section", "Unknown"),`
			`description=issue_data.get("description", "No description provided"),`
			`level=level,`
			`reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),`
			`standard_references=issue_data.get("standard_references", []),`
			`recommendation=issue_data.get("recommendation", "No recommendation provided")`
			`))`

			`return ComplianceReport(`
			`document_id=document_id,`
			`compliance_score=compliance_score,`
			`summary=summary,`
			`issues=issues,`
			`applied_standards=standards`
			`)`
			`except json.JSONDecodeError:`
			`logger.error("Failed to parse JSON from response")`
			`return ComplianceReport(`
			`document_id=document_id,`
			`compliance_score=0.0,`
			`summary="Failed to analyze document due to parsing error.",`
			`issues=[`
			`ComplianceIssue(`
			`section="System",`
			`description="Failed to parse compliance analysis results.",`
			`level=ComplianceLevel.CRITICAL,`
			`reasoning="The system encountered an error while parsing the compliance analysis results.",`
			`standard_references=[],`
			`recommendation="Please try resubmitting the document or contact support."`
			`)`
			`],`
			`applied_standards=[]`
			`)`
			`except Exception as e:`
			`logger.error(f"Error parsing compliance response: {str(e)}")`
			`raise`