Implement code changes to enhance functionality and improve performance

2025-10-10 17:18:52 +00:00
parent 3559cbe19d
commit c8da3c61ca
4 changed files with 1632 additions and 749 deletions
@@ -2,6 +2,7 @@ import base64
 import json
 import logging
 import os
+import re
 from datetime import datetime
 from typing import Any, Dict

@@ -125,23 +126,36 @@ class DocumentProcessor:
            # Build AI rules context for categorization
            ai_rules_context = ""
            if ai_rules and len(ai_rules) > 0:
-                ai_rules_context = "\n            CATEGORIZATION RULES (IMPORTANT - Apply these first):"
+                # Create a simple, direct instruction for each rule
+                ai_rules_context = "\n            "
                for idx, rule in enumerate(ai_rules, 1):
                    condition = rule.get("condition", "")
                    action = rule.get("action", "")
-                    ai_rules_context += f"\n            {idx}. If {condition} → set category to '{action}'"
-                ai_rules_context += "\n            - Apply these custom rules before using default categorization logic\n            - If multiple rules match, use the first matching rule\n            - If no rules match, use default categorization based on vendor type"
+                    
+                    # Extract the keyword and category from the rule
+                    keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
+                    category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
+                    
+                    if keyword_match and category_match:
+                        keyword = keyword_match.group(1)
+                        category = category_match.group(1).strip()
+                        # Create one simple instruction per line
+                        ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n            '
+                
+                ai_rules_context += "\n"

            # Create Groq vision prompt
            prompt = f"""
-            Analyze this receipt image and extract the following information in JSON format:
+            Analyze this receipt image and extract the following information in JSON format.
+            {ai_rules_context}
+            JSON Format:
            {{
                "vendor": "Store/company name",
                "description": "Detailed description of items/services purchased",
                "total_amount": 0.00,
                "tax_amount": 0.00,
                "date": "YYYY-MM-DD",
-                "category": "Food/Transport/Office/Other",
+                "category": "Check rules above first",
                "confidence": 0.95,
                "currency": "USD",
                "location": "Province/State, Country",
@@ -150,10 +164,11 @@ class DocumentProcessor:
                "name_of_asset": null,
                "cca_rate": null,
                "useful_life": null,
-                "residual_value": null
+                "residual_value": null,
+                "extraction_success": True
            }}
            
-            Rules:
+            EXTRACTION Rules:
            - Extract vendor name as it appears on receipt
            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
            - Total amount should be the final total including tax
@@ -161,7 +176,7 @@ class DocumentProcessor:
            - Date should be the date on the receipt
            - Confidence score 0-1 based on how clear the receipt is
            - Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
-            {ai_rules_context}
+            
            {user_location_context}
            LOCATION & TAX RULES:
            - Extract location from receipt (look for store address, province/state, country)
@@ -199,11 +214,9 @@ class DocumentProcessor:
              * residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
            - If is_depreciable is false, set name_of_asset, cca_rate, useful_life, and residual_value to null

-            CATEGORY RULES:
-            - Assign the category based on all the details in the receipt
            Return only valid JSON.
            """
-
+            logger.info(f"This is the prompt: {prompt}")
            # Call Groq vision API with correct format
            response = self.client.chat.completions.create(
                messages=[
@@ -293,16 +306,27 @@ class DocumentProcessor:
            # Build AI rules context for categorization
            ai_rules_context = ""
            if ai_rules and len(ai_rules) > 0:
-                ai_rules_context = "\n            CATEGORIZATION RULES (IMPORTANT - Apply these first):"
+                # Create a simple, direct instruction for each rule
+                ai_rules_context = "\n            "
                for idx, rule in enumerate(ai_rules, 1):
                    condition = rule.get("condition", "")
                    action = rule.get("action", "")
-                    ai_rules_context += f"\n            {idx}. If {condition} → set category to '{action}'"
-                ai_rules_context += "\n            - Apply these custom rules before using default categorization logic\n            - If multiple rules match, use the first matching rule\n            - If no rules match, use default categorization based on vendor type"
+                    
+                    # Extract the keyword and category from the rule
+                    keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
+                    category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
+                    
+                    if keyword_match and category_match:
+                        keyword = keyword_match.group(1)
+                        category = category_match.group(1).strip()
+                        # Create one simple instruction per line
+                        ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n            '
+                
+                ai_rules_context += "\n"

            prompt = f"""
-            Analyze this receipt text and extract the following information in JSON format:
-            
+            Analyze this receipt text and extract the following information in JSON format.
+            {ai_rules_context}
            Receipt Text:
            {text_content}
            
@@ -313,7 +337,7 @@ class DocumentProcessor:
                "total_amount": 0.00,
                "tax_amount": 0.00,
                "date": "YYYY-MM-DD",
-                "category": "Food/Transport/Office/Other",
+                "category": "Check rules above first",
                "confidence": 0.95,
                "currency": "USD",
                "location": "Province/State, Country",
@@ -322,10 +346,11 @@ class DocumentProcessor:
                "name_of_asset": null,
                "cca_rate": null,
                "useful_life": null,
-                "residual_value": null
+                "residual_value": null,
+                "extraction_success": True
            }}
            
-            Rules:
+            EXTRACTION Rules:
            - Extract vendor name as it appears on receipt
            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
            - Total amount should be the final total including tax
@@ -333,7 +358,6 @@ class DocumentProcessor:
            - Date should be the date on the receipt
            - Confidence score 0-1 based on clarity
            - Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
-            {ai_rules_context}
            {user_location_context}
            LOCATION & TAX RULES:
            - Extract location from receipt (look for store address, province/state, country)