Enhance receipt matching by adding user location support and implementing LLM-based tax analysis rules

2025-10-05 13:25:55 +01:00
parent 3d48cf0385
commit c78c4c6fe9
5 changed files with 644 additions and 9 deletions
@@ -0,0 +1,446 @@
+import json
+import logging
+from typing import Any, Dict
+
+import groq
+from config import settings
+from schemas import Receipt, Transaction
+
+logger = logging.getLogger(__name__)
+
+
+class LLMTaxAnalyzer:
+    """
+    Uses LLM to intelligently apply tax rules based on context.
+
+    Implements four core tax rules:
+    1. Sales Tax Rule - Based on receipt location (shipping/billing address)
+    2. Foreign Exchange Rule - Handles currency mismatches
+    3. Depreciation Rule - Capital assets (based on user location)
+    4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction
+    """
+
+    # Provincial tax rates for reference
+    PROVINCIAL_TAX_RATES = {
+        "ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"},
+        "QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"},
+        "BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
+        "AB": {"rate": 0.05, "name": "GST", "type": "Federal only"},
+        "SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"},
+        "MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
+        "NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
+        "NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
+        "NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
+        "PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
+        "NT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
+        "NU": {"rate": 0.05, "name": "GST", "type": "Federal only"},
+        "YT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
+    }
+
+    # CCA rates by asset class (simplified)
+    CCA_RATES = {
+        "vehicles": 0.30,  # Class 10
+        "computer_equipment": 0.55,  # Class 50
+        "furniture": 0.20,  # Class 8
+        "buildings": 0.04,  # Class 1
+        "machinery": 0.20,  # Class 8
+    }
+
+    def __init__(self):
+        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
+        self.model = "llama-3.1-8b-instant"
+        self.max_retries = 3
+
+    def analyze_and_apply_tax_rules(
+        self,
+        receipt: Receipt,
+        transaction: Transaction,
+        user_location: str = "ON",  # Default to Ontario
+    ) -> Dict[str, Any]:
+        """
+        Use LLM to intelligently analyze and apply all tax rules:
+        1. Sales tax based on receipt location (shipping/billing address priority)
+        2. Foreign exchange rules for currency mismatches
+        3. Depreciation rules for capital assets (based on user location)
+        4. Meals & Entertainment deduction rules
+        """
+
+        # Prepare context for LLM
+        analysis_context = self._build_analysis_context(
+            receipt, transaction, user_location
+        )
+
+        # Get LLM analysis
+        llm_analysis = self._get_llm_tax_analysis(analysis_context)
+
+        # Parse and structure the results
+        structured_results = self._structure_analysis_results(
+            llm_analysis, receipt, transaction, user_location
+        )
+
+        return structured_results
+
+    def _build_analysis_context(
+        self, receipt: Receipt, transaction: Transaction, user_location: str
+    ) -> str:
+        """Build comprehensive context for LLM analysis"""
+
+        # Extract location information
+        receipt_location = self._extract_receipt_location(receipt)
+        user_province = user_location.upper()
+
+        # Build tax rates reference
+        tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
+        cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
+
+        context = f"""
+RECEIPT DETAILS:
+- Vendor: {receipt.vendor}
+- Amount: ${receipt.amount:.2f}
+- Tax: ${receipt.tax:.2f}
+- Currency: {receipt.currency}
+- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
+- Category: {receipt.category}
+- Description: {receipt.description}
+- Billing Address: {self._format_address(receipt.billing_address)}
+- Shipping Address: {self._format_address(receipt.shipping_address)}
+- Is Meals & Entertainment: {receipt.is_meals_entertainment}
+
+TRANSACTION DETAILS:
+- Vendor: {transaction.vendor}
+- Amount: ${transaction.amount:.2f}
+- Currency: {transaction.currency}
+- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
+- Notes: {transaction.notes}
+- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
+
+USER CONTEXT:
+- User Location (Province): {user_province}
+- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
+- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
+
+RECEIPT LOCATION DETECTED:
+{receipt_location}
+
+PROVINCIAL TAX RATES REFERENCE:
+{tax_rates_info}
+
+CCA DEPRECIATION RATES BY ASSET CLASS:
+{cca_rates_info}
+"""
+        return context
+
+    def _extract_receipt_location(self, receipt: Receipt) -> str:
+        """Extract and format receipt location information"""
+
+        # Priority: Use shipping address if available, then billing
+        location = (
+            receipt.shipping_address
+            if receipt.shipping_address
+            else receipt.billing_address
+        )
+
+        if location:
+            return f"""
+- Province: {location.province}
+- City: {location.city}
+- Country: {location.country}
+- Postal Code: {location.postal_code}
+"""
+        else:
+            return "- No address information available (will use user location)"
+
+    def _format_address(self, address) -> str:
+        """Format address for display"""
+        if address:
+            return f"{address.city}, {address.province}, {address.country} ({address.postal_code})"
+        return "Not provided"
+
+    def _get_llm_tax_analysis(self, context: str) -> str:
+        """Get tax rule analysis from LLM"""
+
+        prompt = f"""
+You are a Canadian tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
+
+{context}
+
+=== FOUR CORE TAX RULES ===
+
+### 1. SALES TAX RULE
+**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
+
+**Key Principles**:
+- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
+- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
+- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
+
+**Scenario Examples**:
+a) User in Ontario, Receipt from Quebec:
+   - Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
+   - The user's location is only for depreciation purposes
+   
+b) User in Ontario, Receipt from USA (New York):
+   - DO NOT apply Canadian sales tax
+   - This is an international transaction
+   - Flag for FX review instead
+   
+c) User in USA (New York), Receipt from California:
+   - Apply California's sales tax rate (receipt location)
+   - Not New York's rate (user location)
+   
+d) User in Ontario, Receipt has NO address information:
+   - DEFAULT to user's location (Ontario 13% HST)
+   - This is the fallback when receipt location is unknown
+
+**Tax Calculation**:
+- Compare calculated tax vs stated tax on receipt
+- Flag discrepancies for review
+
+### 2. FOREIGN EXCHANGE (FX) RULE
+**Purpose**: Handle currency mismatches between receipts and transactions.
+
+**Actions**:
+- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
+- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
+- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
+- If FX rate is provided in transaction data, note it but still require manual review
+
+**Examples**:
+- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
+- The user must manually approve or adjust the FX difference
+
+### 3. DEPRECIATION RULE
+**Purpose**: Calculate depreciation for assets using two methods.
+
+**Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location.
+
+**Asset Identification**:
+- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
+- Identify from receipt category and description
+- Typical threshold: Assets generally > $500
+
+**Two Methods Required**:
+a) **Straight-Line Depreciation** (for accounting purposes):
+   Formula: (Cost - Residual Value) / Useful Life
+   Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year
+   
+b) **CCA Depreciation** (for tax purposes - Canada):
+   Method: Declining Balance
+   Formula: Book Value × CCA Rate each year
+   Example: Truck $20,000, 30% CCA:
+   - Year 1: $20,000 × 30% = $6,000
+   - Year 2: ($20,000 - $6,000) × 30% = $4,200
+   - Continues declining each year
+
+**CCA Classes** (Canada):
+- Vehicles: 30% (Class 10)
+- Computer Equipment: 55% (Class 50)
+- Furniture/Machinery: 20% (Class 8)
+- Buildings: 4% (Class 1)
+
+### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
+**Purpose**: Apply correct deductions for meals and entertainment expenses.
+
+**Deduction Rules**:
+- **For Tax Purposes**: Only 50% of total receipt amount is deductible
+- **For Accounting Purposes**: 100% of total receipt amount is deductible
+- **Sales Tax**: Full sales tax amount is deductible for accounting
+
+**Example**:
+- Receipt: $100 meal + $12 sales tax = $112 total
+- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
+- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112
+
+=== LOCATION-BASED SCENARIO HANDLING ===
+
+**When Receipt Location ≠ User Location**:
+
+1. **Sales Tax**: Use RECEIPT's location for tax calculation
+   - Exception: If international (different country), no Canadian sales tax + flag FX
+   - Exception: If no location on receipt, use user's location as default
+
+2. **Depreciation**: ALWAYS use USER's location for depreciation rules
+   - Receipt location is irrelevant for depreciation
+   - Apply user's country/province depreciation methods
+
+3. **FX Handling**: 
+   - If receipt currency ≠ transaction currency: Flag for manual review
+   - Do NOT automatically fetch or apply exchange rates
+
+4. **Missing Location**: 
+   - If receipt has no address: Default to user's location for sales tax
+   - Still apply user's location for depreciation
+
+=== ANALYSIS REQUIRED ===
+
+Provide a structured JSON response with the following format:
+
+{{
+  "sales_tax": {{
+    "applicable_province": "XX",
+    "applicable_rate": 0.XX,
+    "tax_name": "HST/GST/PST/QST",
+    "calculated_tax": XX.XX,
+    "stated_tax": XX.XX,
+    "discrepancy": XX.XX,
+    "reason": "Detailed explanation: which address used (billing/shipping), why this location, which scenario applies",
+    "requires_review": true/false
+  }},
+  "foreign_exchange": {{
+    "currency_mismatch": true/false,
+    "receipt_currency": "XXX",
+    "transaction_currency": "XXX",
+    "receipt_amount": XX.XX,
+    "transaction_amount": XX.XX,
+    "discrepancy": XX.XX,
+    "requires_manual_review": true/false,
+    "reason": "Explanation of FX situation"
+  }},
+  "depreciation": {{
+    "is_capital_asset": true/false,
+    "asset_class": "category name or N/A",
+    "suggested_cca_rate": 0.XX,
+    "straight_line_applicable": true/false,
+    "cca_applicable": true/false,
+    "straight_line_example": "Brief calculation example if applicable",
+    "cca_example": "Brief calculation example if applicable",
+    "reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location"
+  }},
+  "meals_entertainment": {{
+    "is_meals_entertainment": true/false,
+    "tax_deduction_amount": XX.XX,
+    "accounting_deduction_amount": XX.XX,
+    "sales_tax_included": XX.XX,
+    "reason": "Explanation of M&E rule application"
+  }},
+  "confidence_adjustment": {{
+    "boost": 0.XX,
+    "reduce": 0.XX,
+    "reason": "Why confidence should be adjusted based on tax analysis"
+  }},
+  "overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions"
+}}
+
+**Critical Reminders**:
+- Sales tax uses RECEIPT location (or user location if receipt has none)
+- Depreciation ALWAYS uses USER location
+- For different addresses, use SHIPPING address for sales tax
+- International transactions: no Canadian tax + FX flag
+- Be precise with all calculations
+- Always explain your reasoning clearly
+"""
+
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=0.1,  # Low temperature for consistent, factual responses
+                max_tokens=2000,
+            )
+
+            content = response.choices[0].message.content.strip()
+            logger.info(f"LLM tax analysis received: {len(content)} characters")
+            return content
+
+        except Exception as e:
+            logger.error(f"Error getting LLM tax analysis: {str(e)}")
+            return self._get_fallback_analysis()
+
+    def _get_fallback_analysis(self) -> str:
+        """Return fallback analysis if LLM fails"""
+        return json.dumps(
+            {
+                "sales_tax": {
+                    "applicable_province": "ON",
+                    "applicable_rate": 0.13,
+                    "tax_name": "HST",
+                    "calculated_tax": 0.0,
+                    "stated_tax": 0.0,
+                    "discrepancy": 0.0,
+                    "reason": "LLM analysis failed - using defaults",
+                    "requires_review": True,
+                },
+                "foreign_exchange": {
+                    "currency_mismatch": False,
+                    "requires_manual_review": False,
+                    "reason": "Analysis not available",
+                },
+                "depreciation": {
+                    "is_capital_asset": False,
+                    "reason": "Analysis not available",
+                },
+                "meals_entertainment": {
+                    "is_meals_entertainment": False,
+                    "reason": "Analysis not available",
+                },
+                "confidence_adjustment": {
+                    "boost": 0.0,
+                    "reduce": 0.1,
+                    "reason": "LLM analysis failed - recommend manual review",
+                },
+                "overall_assessment": "Automatic analysis failed. Manual review recommended.",
+            }
+        )
+
+    def _structure_analysis_results(
+        self,
+        llm_response: str,
+        receipt: Receipt,
+        transaction: Transaction,
+        user_location: str,
+    ) -> Dict[str, Any]:
+        """Parse LLM response and structure it for application"""
+
+        try:
+            # Extract JSON from LLM response (may have markdown code blocks)
+            json_str = llm_response
+            if "```json" in llm_response:
+                json_str = llm_response.split("```json")[1].split("```")[0].strip()
+            elif "```" in llm_response:
+                json_str = llm_response.split("```")[1].split("```")[0].strip()
+
+            analysis = json.loads(json_str)
+
+            # Add metadata
+            analysis["metadata"] = {
+                "user_location": user_location,
+                "receipt_id": receipt.id,
+                "transaction_id": transaction.id,
+                "analysis_method": "LLM-based",
+                "model": self.model,
+            }
+
+            return analysis
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
+            logger.error(f"LLM response was: {llm_response}")
+
+            # Return structured fallback
+            return {
+                "sales_tax": {
+                    "requires_review": True,
+                    "reason": "Failed to parse LLM response",
+                },
+                "foreign_exchange": {
+                    "requires_manual_review": receipt.currency != transaction.currency
+                },
+                "depreciation": {"is_capital_asset": False},
+                "confidence_adjustment": {
+                    "boost": 0.0,
+                    "reduce": 0.15,
+                    "reason": "Analysis parsing failed",
+                },
+                "overall_assessment": "Analysis failed. Manual review required.",
+                "error": str(e),
+                "metadata": {
+                    "user_location": user_location,
+                    "analysis_method": "fallback",
+                },
+            }