refactor: Improve analysis display and enhance issue reranking

- Removed debug print statements from analysis retrieval. - Implemented a reranking method for issues and recommendations based on relevance to compliance. - Updated the analysis HTML template to display issues with improved formatting and markdown support.
2025-04-22 12:57:09 +01:00
parent c4145977dd
commit 9259d61e96
3 changed files with 44 additions and 5 deletions
@@ -135,7 +135,6 @@ async def get_analysis(request: Request, doc_id: str):
        analysis = await document_processor.get_analysis(doc_id)
        metadata = database.get_metadata(doc_id)
        
-        print(f"analysis: {analysis}")
        return templates.TemplateResponse(
            "analysis.html", 
            {
@@ -122,6 +122,8 @@ class DocumentProcessor:
                    try:
                        deepseek_result = deepseek_response.json()
                        issues_and_recommendations = self._extract_issues_and_recommendations(deepseek_result['choices'][0]['message']['content'])
+                        # Rerank the issues and recommendations
+                        issues_and_recommendations = self._rerank_issues_and_recommendations(issues_and_recommendations)
                    except (json.JSONDecodeError, KeyError) as e:
                        logging.error(f"Error parsing DeepSeek response: {str(e)}")
                        logging.error(f"Response text: {deepseek_response.text}")
@@ -250,9 +252,45 @@ class DocumentProcessor:
                        
                        issues_and_recommendations.append({'issue': issue, 'recommendation': recommendation})
        
-        print(f"issues_and_recommendations: {issues_and_recommendations}")
        return issues_and_recommendations
        
    def _store_document(self, doc_id: str, file_path: str):
        # save document to vector store
-        self.vector_store.add_document(doc_id, file_path)
+        self.vector_store.add_document(doc_id, file_path)
+
+    def _rerank_issues_and_recommendations(self, issues_and_recommendations: List[Dict[str, str]]) -> List[Dict[str, str]]:
+        """
+        Rerank issues and recommendations based on their relevance to compliance issues.
+        Uses Cohere's reranker to score and sort the issues.
+        """
+        if not issues_and_recommendations:
+            return issues_and_recommendations
+
+        try:
+            logging.info(f"Reranking issues and recommendations")
+            # Prepare documents for reranking - combine issue and recommendation for context
+            documents = [
+                f"Issue: {item['issue']}\nRecommendation: {item['recommendation']}"
+                for item in issues_and_recommendations
+            ]
+
+            # Use Cohere's reranker to score the issues
+            reranked = self.cohere_client.rerank(
+                query="Critical compliance issues and their solutions in technical documents",
+                documents=documents,
+                model=config.COHERE_RERANKER_MODEL,
+                top_n=len(documents)  # Get all results ranked
+            )
+
+            # Create a new list with reranked issues and recommendations
+            reranked_results = []
+            for result in reranked:
+                original_index = result.index
+                reranked_results.append(issues_and_recommendations[original_index])
+
+            return reranked_results
+
+        except Exception as e:
+            logging.error(f"Error reranking issues: {str(e)}")
+            # If reranking fails, return the original order
+            return issues_and_recommendations
@@ -35,8 +35,10 @@
                    {% for item in analysis.issues_and_recommendations %}
                    <div class="list-group-item">
                        <div class="mb-2">
-                            <strong>Issue:</strong>
-                            <p class="mb-3">{{ item.issue }}</p>
+                            <strong>Issue {{ loop.index }}:</strong>
+                            <div class="markdown-body mb-3">
+                                {{ item.issue|markdown|safe }}
+                            </div>
                            <strong>Recommendation:</strong>
                            <div class="alert alert-info markdown-body mt-2">
                                {{ item.recommendation|markdown|safe }}