From 9259d61e96f95f064cdbf918a3d7281b01669b5b Mon Sep 17 00:00:00 2001 From: boladeE Date: Tue, 22 Apr 2025 12:57:09 +0100 Subject: [PATCH] refactor: Improve analysis display and enhance issue reranking - Removed debug print statements from analysis retrieval. - Implemented a reranking method for issues and recommendations based on relevance to compliance. - Updated the analysis HTML template to display issues with improved formatting and markdown support. --- src/main.py | 1 - src/services/document_processor.py | 42 ++++++++++++++++++++++++++++-- src/templates/analysis.html | 6 +++-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/main.py b/src/main.py index f435e1c..a5c02f1 100644 --- a/src/main.py +++ b/src/main.py @@ -135,7 +135,6 @@ async def get_analysis(request: Request, doc_id: str): analysis = await document_processor.get_analysis(doc_id) metadata = database.get_metadata(doc_id) - print(f"analysis: {analysis}") return templates.TemplateResponse( "analysis.html", { diff --git a/src/services/document_processor.py b/src/services/document_processor.py index df776cc..fef88b5 100644 --- a/src/services/document_processor.py +++ b/src/services/document_processor.py @@ -122,6 +122,8 @@ class DocumentProcessor: try: deepseek_result = deepseek_response.json() issues_and_recommendations = self._extract_issues_and_recommendations(deepseek_result['choices'][0]['message']['content']) + # Rerank the issues and recommendations + issues_and_recommendations = self._rerank_issues_and_recommendations(issues_and_recommendations) except (json.JSONDecodeError, KeyError) as e: logging.error(f"Error parsing DeepSeek response: {str(e)}") logging.error(f"Response text: {deepseek_response.text}") @@ -250,9 +252,45 @@ class DocumentProcessor: issues_and_recommendations.append({'issue': issue, 'recommendation': recommendation}) - print(f"issues_and_recommendations: {issues_and_recommendations}") return issues_and_recommendations def _store_document(self, doc_id: str, file_path: str): # save document to vector store - self.vector_store.add_document(doc_id, file_path) \ No newline at end of file + self.vector_store.add_document(doc_id, file_path) + + def _rerank_issues_and_recommendations(self, issues_and_recommendations: List[Dict[str, str]]) -> List[Dict[str, str]]: + """ + Rerank issues and recommendations based on their relevance to compliance issues. + Uses Cohere's reranker to score and sort the issues. + """ + if not issues_and_recommendations: + return issues_and_recommendations + + try: + logging.info(f"Reranking issues and recommendations") + # Prepare documents for reranking - combine issue and recommendation for context + documents = [ + f"Issue: {item['issue']}\nRecommendation: {item['recommendation']}" + for item in issues_and_recommendations + ] + + # Use Cohere's reranker to score the issues + reranked = self.cohere_client.rerank( + query="Critical compliance issues and their solutions in technical documents", + documents=documents, + model=config.COHERE_RERANKER_MODEL, + top_n=len(documents) # Get all results ranked + ) + + # Create a new list with reranked issues and recommendations + reranked_results = [] + for result in reranked: + original_index = result.index + reranked_results.append(issues_and_recommendations[original_index]) + + return reranked_results + + except Exception as e: + logging.error(f"Error reranking issues: {str(e)}") + # If reranking fails, return the original order + return issues_and_recommendations \ No newline at end of file diff --git a/src/templates/analysis.html b/src/templates/analysis.html index c82b9ef..30c1370 100644 --- a/src/templates/analysis.html +++ b/src/templates/analysis.html @@ -35,8 +35,10 @@ {% for item in analysis.issues_and_recommendations %}
- Issue: -

{{ item.issue }}

+ Issue {{ loop.index }}: +
+ {{ item.issue|markdown|safe }} +
Recommendation:
{{ item.recommendation|markdown|safe }}