Initial commit
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
# Reasoning with LLMs
|
||||
# Reasoning with LLMs using GROQ
|
||||
import json
|
||||
from typing import Dict, List
|
||||
from loguru import logger
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.models import ComplianceIssue, ComplianceLevel, ComplianceReport
|
||||
from app.utils.token_counter import count_tokens, truncate_by_tokens
|
||||
from groq import Groq # Assuming groq Python SDK is installed
|
||||
|
||||
class ReasoningService:
|
||||
"""Service for performing deep reasoning on documents using Groq."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the reasoning service with the Groq client."""
|
||||
self.client = Groq(api_key=settings.GROQ_API_KEY)
|
||||
self.model = settings.REASONING_MODEL # e.g., "mixtral-8x7b-32768"
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
|
||||
async def analyze_document(self, document_id: str, sections: Dict[str, str], standards: List[str]) -> ComplianceReport:
|
||||
document_content = "\n\n".join([f"# {name}\n{content}" for name, content in sections.items()])
|
||||
|
||||
# Use token-based truncation instead of character-based
|
||||
max_tokens = 30000 # Adjust based on model context window
|
||||
token_count = count_tokens(document_content)
|
||||
|
||||
logger.info(f"Document {document_id} has {token_count} tokens before truncation")
|
||||
|
||||
if token_count > max_tokens:
|
||||
document_content = truncate_by_tokens(document_content, max_tokens)
|
||||
logger.info(f"Document {document_id} truncated to {max_tokens} tokens")
|
||||
|
||||
prompt = self._create_analysis_prompt(document_content, standards)
|
||||
|
||||
try:
|
||||
response = await self._query_groq(prompt)
|
||||
compliance_report = self._parse_compliance_response(document_id, response, standards)
|
||||
return compliance_report
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing document with Groq: {str(e)}")
|
||||
raise
|
||||
|
||||
def _create_analysis_prompt(self, document_content: str, standards: List[str]) -> str:
|
||||
standards_text = "\n".join([f"- {standard}" for standard in standards])
|
||||
return f"""<document>
|
||||
{document_content}
|
||||
</document>
|
||||
|
||||
<standards>
|
||||
{standards_text}
|
||||
</standards>
|
||||
|
||||
You are an expert in document compliance and technical specifications. Please analyze the document above against the listed standards.
|
||||
|
||||
Your job is to identify compliance issues and provide detailed reasoning and recommendations. Focus on:
|
||||
1. Technical accuracy and completeness
|
||||
2. Compliance with the specified standards
|
||||
3. Document structure and organization
|
||||
4. Clarity and specificity of language
|
||||
5. Consistency and coherence
|
||||
|
||||
For each compliance issue you find, please provide:
|
||||
- The section where the issue appears
|
||||
- A detailed description of the issue
|
||||
- The severity level (critical, major, minor, or info)
|
||||
- A thorough explanation of why this is an issue and how it impacts compliance
|
||||
- Specific, actionable recommendations to fix the issue
|
||||
- References to specific standards or best practices that apply
|
||||
|
||||
Respond in the following JSON format:
|
||||
{{
|
||||
"summary": "Comprehensive overall assessment of the document",
|
||||
"compliance_score": 0.0 to 1.0,
|
||||
"issues": [
|
||||
{{
|
||||
"section": "Section name",
|
||||
"description": "Detailed issue description",
|
||||
"level": "critical/major/minor/info",
|
||||
"reasoning": "Thorough explanation of why this is an issue",
|
||||
"standard_references": ["Specific standards or requirements that are violated"],
|
||||
"recommendation": "Detailed, actionable recommendation to fix the issue"
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
|
||||
async def _query_groq(self, prompt: str) -> str:
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are an AI assistant specialized in document compliance analysis."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
max_tokens=4000,
|
||||
temperature=0.2,
|
||||
top_p=1.0
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying Groq: {str(e)}")
|
||||
raise
|
||||
|
||||
def _parse_compliance_response(self, document_id: str, response: str, standards: List[str]) -> ComplianceReport:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("Could not find JSON in response")
|
||||
|
||||
json_response = response[json_start:json_end]
|
||||
data = json.loads(json_response)
|
||||
|
||||
summary = data.get("summary", "No summary provided")
|
||||
compliance_score = float(data.get("compliance_score", 0.5))
|
||||
issues = []
|
||||
|
||||
for issue_data in data.get("issues", []):
|
||||
level_str = issue_data.get("level", "minor").lower()
|
||||
if level_str == "critical":
|
||||
level = ComplianceLevel.CRITICAL
|
||||
elif level_str == "major":
|
||||
level = ComplianceLevel.MAJOR
|
||||
elif level_str == "info":
|
||||
level = ComplianceLevel.INFO
|
||||
else:
|
||||
level = ComplianceLevel.MINOR
|
||||
|
||||
issues.append(ComplianceIssue(
|
||||
section=issue_data.get("section", "Unknown"),
|
||||
description=issue_data.get("description", "No description provided"),
|
||||
level=level,
|
||||
reasoning=issue_data.get("reasoning", "No detailed reasoning provided"),
|
||||
standard_references=issue_data.get("standard_references", []),
|
||||
recommendation=issue_data.get("recommendation", "No recommendation provided")
|
||||
))
|
||||
|
||||
return ComplianceReport(
|
||||
document_id=document_id,
|
||||
compliance_score=compliance_score,
|
||||
summary=summary,
|
||||
issues=issues,
|
||||
applied_standards=standards
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse JSON from response")
|
||||
return ComplianceReport(
|
||||
document_id=document_id,
|
||||
compliance_score=0.0,
|
||||
summary="Failed to analyze document due to parsing error.",
|
||||
issues=[
|
||||
ComplianceIssue(
|
||||
section="System",
|
||||
description="Failed to parse compliance analysis results.",
|
||||
level=ComplianceLevel.CRITICAL,
|
||||
reasoning="The system encountered an error while parsing the compliance analysis results.",
|
||||
standard_references=[],
|
||||
recommendation="Please try resubmitting the document or contact support."
|
||||
)
|
||||
],
|
||||
applied_standards=[]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing compliance response: {str(e)}")
|
||||
raise
|
||||
Reference in New Issue
Block a user