import json import logging from typing import Any, Dict import groq from config import settings from schemas import Receipt, Transaction logger = logging.getLogger(__name__) class LLMTaxAnalyzer: """ Uses LLM to intelligently apply tax rules based on context. Implements four core tax rules: 1. Sales Tax Rule - Based on receipt location (shipping/billing address) 2. Foreign Exchange Rule - Handles currency mismatches 3. Depreciation Rule - Capital assets (based on user location) 4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction """ # Provincial tax rates for reference PROVINCIAL_TAX_RATES = { "ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"}, "QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"}, "BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"}, "AB": {"rate": 0.05, "name": "GST", "type": "Federal only"}, "SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"}, "MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"}, "NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"}, "NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"}, "NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"}, "PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"}, "NT": {"rate": 0.05, "name": "GST", "type": "Federal only"}, "NU": {"rate": 0.05, "name": "GST", "type": "Federal only"}, "YT": {"rate": 0.05, "name": "GST", "type": "Federal only"}, } # CCA rates by asset class (simplified) CCA_RATES = { "vehicles": 0.30, # Class 10 "computer_equipment": 0.55, # Class 50 "furniture": 0.20, # Class 8 "buildings": 0.04, # Class 1 "machinery": 0.20, # Class 8 } def __init__(self): self.client = groq.Groq(api_key=settings.GROQ_API_KEY) self.model = "llama-3.1-8b-instant" self.max_retries = 3 def analyze_and_apply_tax_rules_batch( self, matches: list, # List of Match objects user_location: str = "ON", ) -> list: """ Batch process all matches in a SINGLE LLM call to reduce costs. Analyzes all receipt-transaction pairs together and applies tax rules. Falls back to individual processing if batch fails. """ if not matches: return matches logger.info(f"Starting batch tax analysis for {len(matches)} matches") # Build batch context for all matches try: batch_context = self._build_batch_analysis_context(matches, user_location) except Exception as e: logger.error(f"Error building batch context: {str(e)}") # If we can't even build the context, return matches as-is for match in matches: match.match_reason += " (Batch analysis setup failed)" return matches # Get LLM analysis for ALL matches at once llm_batch_analysis = self._get_llm_tax_analysis_batch( batch_context, len(matches) ) # Check if we got any analysis back if not llm_batch_analysis: logger.warning("Batch LLM analysis returned empty results") # Fallback: Try processing each match individually if batch size is small if ( len(matches) <= 5 ): # Only fallback for small batches to avoid excessive API calls logger.info( f"Attempting individual processing fallback for {len(matches)} matches" ) return self._process_matches_individually(matches, user_location) else: logger.warning( f"Batch too large ({len(matches)} matches) for individual fallback - returning matches without enhanced tax analysis" ) for match in matches: match.match_reason += " (Batch tax analysis unavailable)" return matches logger.info(f"Received batch analysis for {len(llm_batch_analysis)} matches") # Apply results to each match enhanced_matches = [] for i, match in enumerate(matches): try: # Get the analysis for this specific match from the batch results match_key = f"match_{i}" match_analysis = llm_batch_analysis.get(match_key, {}) if match_analysis and isinstance(match_analysis, dict): # Apply the tax analysis to this match enhanced_match = self._apply_tax_analysis_to_match( match, match_analysis ) enhanced_matches.append(enhanced_match) else: # No analysis available for this match, use as-is logger.warning( f"No analysis found for match {i} (key: {match_key})" ) match.match_reason += " (Tax analysis incomplete)" enhanced_matches.append(match) except Exception as e: logger.error(f"Error applying tax analysis to match {i}: {str(e)}") match.match_reason += " (Tax analysis error)" enhanced_matches.append(match) logger.info( f"Completed batch tax analysis, enhanced {len(enhanced_matches)} matches" ) # logger.info( # f"\n\n\nFinal batch enhanced matches: {enhanced_matches}" # ) return enhanced_matches def _process_matches_individually(self, matches: list, user_location: str) -> list: """ Fallback method: Process matches one at a time using the legacy method. Only used when batch processing fails and batch size is small. """ logger.info(f"Processing {len(matches)} matches individually as fallback") enhanced_matches = [] for i, match in enumerate(matches): try: # Use the legacy single-match analysis method tax_analysis = self.analyze_and_apply_tax_rules( match.receipt, match.transaction, user_location ) # Apply the analysis to the match enhanced_match = self._apply_tax_analysis_to_match(match, tax_analysis) enhanced_matches.append(enhanced_match) logger.info( f"Successfully processed match {i + 1}/{len(matches)} individually" ) except Exception as e: logger.error(f"Error in individual processing for match {i}: {str(e)}") match.match_reason += " (Individual tax analysis failed)" enhanced_matches.append(match) return enhanced_matches def analyze_and_apply_tax_rules( self, receipt: Receipt, transaction: Transaction, user_location: str = "ON", # Default to Ontario ) -> Dict[str, Any]: """ Legacy single-match analysis method (kept for backward compatibility). Use analyze_and_apply_tax_rules_batch() for better performance. Use LLM to intelligently analyze and apply all tax rules: 1. Sales tax based on receipt location (shipping/billing address priority) 2. Foreign exchange rules for currency mismatches 3. Depreciation rules for capital assets (based on user location) 4. Meals & Entertainment deduction rules """ # Prepare context for LLM analysis_context = self._build_analysis_context( receipt, transaction, user_location ) # Get LLM analysis llm_analysis = self._get_llm_tax_analysis(analysis_context) # Parse and structure the results structured_results = self._structure_analysis_results( llm_analysis, receipt, transaction, user_location ) return structured_results def _build_analysis_context( self, receipt: Receipt, transaction: Transaction, user_location: str ) -> str: """Build comprehensive context for LLM analysis""" # Extract location information receipt_location = self._extract_receipt_location(receipt) # Normalize user_location to province code (handle "Canada", "Ontario", "ON", etc.) user_province = self._normalize_location_to_province(user_location) logger.info( f"Building tax analysis context - User Location: {user_location} → Province Code: {user_province}" ) # Build tax rates reference tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2) cca_rates_info = json.dumps(self.CCA_RATES, indent=2) context = f""" RECEIPT DETAILS: - Vendor: {receipt.vendor} - Amount: ${receipt.amount:.2f} - Currency: {receipt.currency} - Date: {receipt.receipt_date.strftime("%Y-%m-%d")} - Category: {receipt.category} - Description: {receipt.description} - Billing Address: {self._format_address(receipt.billing_address)} - Shipping Address: {self._format_address(receipt.shipping_address)} - Is Meals & Entertainment: {receipt.is_meals_entertainment} TRANSACTION DETAILS: - Vendor: {transaction.vendor} - Amount: ${transaction.amount:.2f} - Currency: {transaction.currency} - Date: {transaction.transaction_date.strftime("%Y-%m-%d")} - Notes: {transaction.notes} - FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"} USER CONTEXT: - User Location (Province): {user_province} - User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}% - User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")} RECEIPT LOCATION DETECTED: {receipt_location} PROVINCIAL TAX RATES REFERENCE: {tax_rates_info} CCA DEPRECIATION RATES BY ASSET CLASS: {cca_rates_info} """ return context def _normalize_location_to_province(self, location: str) -> str: """ Normalize various location formats to province code. Handles: "ON", "Ontario", "Canada", etc. """ location_upper = location.upper().strip() # Direct province code match if location_upper in self.PROVINCIAL_TAX_RATES: return location_upper # Map full province names to codes province_name_map = { "ONTARIO": "ON", "QUEBEC": "QC", "BRITISH COLUMBIA": "BC", "ALBERTA": "AB", "SASKATCHEWAN": "SK", "MANITOBA": "MB", "NOVA SCOTIA": "NS", "NEW BRUNSWICK": "NB", "NEWFOUNDLAND AND LABRADOR": "NL", "NEWFOUNDLAND": "NL", "PRINCE EDWARD ISLAND": "PE", "NORTHWEST TERRITORIES": "NT", "NUNAVUT": "NU", "YUKON": "YT", } if location_upper in province_name_map: return province_name_map[location_upper] # Default to Ontario if country is Canada or unspecified if location_upper in ["CANADA", "CAN", "CA", ""]: logger.warning(f"Location '{location}' is too generic, defaulting to ON") return "ON" # If nothing matches, default to Ontario logger.warning(f"Could not parse location '{location}', defaulting to ON") return "ON" def _extract_receipt_location(self, receipt: Receipt) -> str: """Extract and format receipt location information""" # Priority: Use shipping address if available, then billing location = ( receipt.shipping_address if receipt.shipping_address else receipt.billing_address ) if location: return f""" - Province: {location.province} - City: {location.city} - Country: {location.country} - Postal Code: {location.postal_code} """ else: return "- No address information available (will use user location)" def _format_address(self, address) -> str: """Format address for display""" if address: return f"{address.city}, {address.province}, {address.country} ({address.postal_code})" return "Not provided" def _get_llm_tax_analysis(self, context: str) -> str: """Get tax rule analysis from LLM""" prompt = f""" You are a tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently: And you are to calculate the tax for the receipt based on the context provided. {context} === FOUR CORE TAX RULES === ### 1. SALES TAX RULE **Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses. **Key Principles**: - When billing and shipping addresses are THE SAME: Apply sales tax based on that address location. - When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address. - Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location). **Scenario Examples**: a) User in Ontario, Receipt from Quebec: - Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST - The user's location is only for depreciation purposes b) User in Ontario, Receipt from USA (New York): - DO NOT apply Canadian sales tax - This is an international transaction - Flag for FX review instead c) User in USA (New York), Receipt from California: - Apply California's sales tax rate (receipt location) - Not New York's rate (user location) d) User in Ontario, Receipt has NO address information: - DEFAULT to user's location (Ontario 13% HST) - This is the fallback when receipt location is unknown **Tax Calculation**: - Compare calculated tax vs stated tax on receipt - Flag discrepancies for review ### 2. FOREIGN EXCHANGE (FX) RULE **Purpose**: Handle currency mismatches between receipts and transactions. **Actions**: - Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD) - Calculate the absolute discrepancy: |receipt_amount - transaction_amount| - ALWAYS flag for manual review - DO NOT fetch exchange rates automatically - If FX rate is provided in transaction data, note it but still require manual review **Examples**: - Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review - The user must manually approve or adjust the FX difference ### 3. DEPRECIATION RULE **Purpose**: Calculate depreciation for assets using two methods. **Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location. **Asset Identification**: - Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery - Identify from receipt category and description - Typical threshold: Assets generally > $500 **Two Methods Required**: a) **Straight-Line Depreciation** (for accounting purposes): Formula: (Cost - Residual Value) / Useful Life Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year b) **CCA Depreciation** (for tax purposes - Canada): Method: Declining Balance Formula: Book Value × CCA Rate each year Example: Truck $20,000, 30% CCA: - Year 1: $20,000 × 30% = $6,000 - Year 2: ($20,000 - $6,000) × 30% = $4,200 - Continues declining each year **CCA Classes** (Canada): - Vehicles: 30% (Class 10) - Computer Equipment: 55% (Class 50) - Furniture/Machinery: 20% (Class 8) - Buildings: 4% (Class 1) ### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE **Purpose**: Apply correct deductions for meals and entertainment expenses. **Deduction Rules**: - **For Tax Purposes**: Only 50% of total receipt amount is deductible - **For Accounting Purposes**: 100% of total receipt amount is deductible - **Sales Tax**: Full sales tax amount is deductible for accounting **Example**: - Receipt: $100 meal + $12 sales tax = $112 total - **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62 - **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112 === LOCATION-BASED SCENARIO HANDLING === **When Receipt Location ≠ User Location**: 1. **Sales Tax**: Use RECEIPT's location for tax calculation - Exception: If international (different country), no Canadian sales tax + flag FX - Exception: If no location on receipt, use user's location as default 2. **Depreciation**: ALWAYS use USER's location for depreciation rules - Receipt location is irrelevant for depreciation - Apply user's country/province depreciation methods 3. **FX Handling**: - If receipt currency ≠ transaction currency: Flag for manual review - Do NOT automatically fetch or apply exchange rates 4. **Missing Location**: - If receipt has no address: Default to user's location for sales tax - Still apply user's location for depreciation === ANALYSIS REQUIRED === Provide a structured JSON response with the following format: **CRITICAL INSTRUCTION FOR final_tax_amount:** - This field MUST contain ONLY the calculated sales tax amount in dollars - This is NOT the total amount including tax - This is ONLY the tax portion (HST/GST/PST/QST) - Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00) - For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount) {{ "final_tax_amount": XX.XX, // ONLY the calculated tax amount (e.g., 13.00 for $100 + $13 HST) "sales_tax": {{ "applicable_province": "XX", "applicable_rate": 0.XX, "tax_name": "HST/GST/PST/QST", "calculated_tax": XX.XX, // This should match final_tax_amount above "stated_tax": XX.XX, "discrepancy": XX.XX, "reason": "Detailed explanation", "requires_review": true/false }}, "foreign_exchange": {{ "currency_mismatch": true/false, "receipt_currency": "XXX", "transaction_currency": "XXX", "receipt_amount": XX.XX, "transaction_amount": XX.XX, "discrepancy": XX.XX, "requires_manual_review": true/false, "reason": "Explanation of FX situation" }}, "depreciation": {{ "is_capital_asset": true/false, "asset_class": "category name or N/A", "suggested_cca_rate": 0.XX, "straight_line_applicable": true/false, "cca_applicable": true/false, "straight_line_example": "Brief calculation example if applicable", "cca_example": "Brief calculation example if applicable", "reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location" }}, "meals_entertainment": {{ "is_meals_entertainment": true/false, "tax_deduction_amount": XX.XX, "accounting_deduction_amount": XX.XX, "sales_tax_included": XX.XX, "reason": "Explanation of M&E rule application" }}, "confidence_adjustment": {{ "boost": 0.XX, "reduce": 0.XX, "reason": "Why confidence should be adjusted based on tax analysis" }}, "overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions" }} **IMPORTANT**: The "final_tax_amount" field at the top level must contain the final calculated tax amount. This should be the calculated_tax from sales_tax analysis. If this is a meals & entertainment expense, ensure you return the FULL tax amount here (not the 50% adjusted amount). **Critical Reminders**: - Sales tax uses RECEIPT location (or user location if receipt has none) - Depreciation ALWAYS uses USER location - For different addresses, use SHIPPING address for sales tax - International transactions: no Canadian tax + FX flag - Be precise with all calculations - Always explain your reasoning clearly """ try: response = self.client.chat.completions.create( model=self.model, messages=[ { "role": "system", "content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.", }, {"role": "user", "content": prompt}, ], temperature=0.1, # Low temperature for consistent, factual responses max_tokens=2000, ) content = response.choices[0].message.content.strip() logger.info(f"LLM tax analysis received: {len(content)} characters") return content except Exception as e: logger.error(f"Error getting LLM tax analysis: {str(e)}") return self._get_fallback_analysis() def _get_fallback_analysis(self) -> str: """Return fallback analysis if LLM fails""" return json.dumps( { "final_tax_amount": 0.0, "sales_tax": { "applicable_province": "ON", "applicable_rate": 0.13, "tax_name": "HST", "calculated_tax": 0.0, "stated_tax": 0.0, "discrepancy": 0.0, "reason": "LLM analysis failed - using defaults", "requires_review": True, }, "foreign_exchange": { "currency_mismatch": False, "requires_manual_review": False, "reason": "Analysis not available", }, "depreciation": { "is_capital_asset": False, "reason": "Analysis not available", }, "meals_entertainment": { "is_meals_entertainment": False, "reason": "Analysis not available", }, "confidence_adjustment": { "boost": 0.0, "reduce": 0.1, "reason": "LLM analysis failed - recommend manual review", }, "overall_assessment": "Automatic analysis failed. Manual review recommended.", } ) def _structure_analysis_results( self, llm_response: str, receipt: Receipt, transaction: Transaction, user_location: str, ) -> Dict[str, Any]: """Parse LLM response and structure it for application""" try: # Extract JSON from LLM response (may have markdown code blocks) json_str = llm_response if "```json" in llm_response: json_str = llm_response.split("```json")[1].split("```")[0].strip() elif "```" in llm_response: json_str = llm_response.split("```")[1].split("```")[0].strip() analysis = json.loads(json_str) # Add metadata analysis["metadata"] = { "user_location": user_location, "receipt_id": receipt.id, "transaction_id": transaction.id, "analysis_method": "LLM-based", "model": self.model, } return analysis except json.JSONDecodeError as e: logger.error(f"Failed to parse LLM response as JSON: {str(e)}") logger.error(f"LLM response was: {llm_response}") # Return structured fallback return { "final_tax_amount": receipt.tax if receipt.tax else 0.0, "sales_tax": { "requires_review": True, "reason": "Failed to parse LLM response", }, "foreign_exchange": { "requires_manual_review": receipt.currency != transaction.currency }, "depreciation": {"is_capital_asset": False}, "confidence_adjustment": { "boost": 0.0, "reduce": 0.15, "reason": "Analysis parsing failed", }, "overall_assessment": "Analysis failed. Manual review required.", "error": str(e), "metadata": { "user_location": user_location, "analysis_method": "fallback", }, } def _build_batch_analysis_context(self, matches: list, user_location: str) -> str: """Build comprehensive context for batch LLM analysis of all matches""" # Normalize user_location to province code user_province = self._normalize_location_to_province(user_location) logger.info( f"Building batch tax analysis context for {len(matches)} matches - User Location: {user_location} → Province Code: {user_province}" ) # Build tax rates and CCA references once tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2) cca_rates_info = json.dumps(self.CCA_RATES, indent=2) # Build match entries matches_info = [] for i, match in enumerate(matches): receipt = match.receipt transaction = match.transaction receipt_location = self._extract_receipt_location(receipt) match_info = f""" MATCH {i} (ID: match_{i}): Receipt Details: - Vendor: {receipt.vendor} - Amount: ${receipt.amount:.2f} - Currency: {receipt.currency} - Date: {receipt.receipt_date.strftime("%Y-%m-%d")} - Category: {receipt.category} - Description: {receipt.description} - Billing Address: {self._format_address(receipt.billing_address)} - Shipping Address: {self._format_address(receipt.shipping_address)} - Is Meals & Entertainment: {receipt.is_meals_entertainment} Transaction Details: - Vendor: {transaction.vendor} - Amount: ${transaction.amount:.2f} - Currency: {transaction.currency} - Date: {transaction.transaction_date.strftime("%Y-%m-%d")} - Notes: {transaction.notes} - FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"} Receipt Location Detected: {receipt_location} """ matches_info.append(match_info) matches_section = "\n".join(matches_info) context = f""" USER CONTEXT: - User Location (Province): {user_province} - User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}% - User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")} PROVINCIAL TAX RATES REFERENCE: {tax_rates_info} CCA DEPRECIATION RATES BY ASSET CLASS: {cca_rates_info} === MATCHES TO ANALYZE ({len(matches)} total) === {matches_section} """ return context def _get_llm_tax_analysis_batch(self, context: str, num_matches: int) -> Dict[str, Any]: """Get tax rule analysis from LLM for ALL matches in a single call""" prompt = f""" You are a Canadian tax expert analyzing MULTIPLE receipt-transaction matches. {context} === FOUR CORE TAX RULES === ### 1. SALES TAX RULE **Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses. **Key Principles**: - When billing and shipping addresses are THE SAME: Apply sales tax based on that address location. - When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address. - Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location). **Scenario Examples**: a) User in Ontario, Receipt from Quebec: - Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST b) User in Ontario, Receipt from USA (New York): - DO NOT apply Canadian sales tax - This is an international transaction - Flag for FX review instead c) User in Ontario, Receipt has NO address information: - DEFAULT to user's location (Ontario 13% HST) **Tax Calculation**: - Compare calculated tax vs stated tax on receipt - Flag discrepancies for review ### 2. FOREIGN EXCHANGE (FX) RULE **Purpose**: Handle currency mismatches between receipts and transactions. **Actions**: - Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD) - Calculate expected transaction amount using FX rate if available - Flag discrepancies > $5 or 5% for manual review - If FX rate missing but currencies differ, flag for review ### 3. DEPRECIATION RULE **Purpose**: Identify capital assets requiring depreciation based on USER'S location. **Critical**: Depreciation is ALWAYS based on the USER'S location (for Canadian tax filing), NOT the receipt location. **Capital Asset Criteria**: - Cost > $500 typically - Useful life > 1 year - Examples: computers, vehicles, furniture, machinery, buildings **CCA Classes**: Assign appropriate class and rate based on asset type and user's jurisdiction ### 4. MEALS & ENTERTAINMENT RULE **Purpose**: Apply 50% tax deduction limit for M&E expenses. **Actions**: - Identify M&E expenses (meals, entertainment, client dinners, etc.) - Tax Deduction: 50% of total amount (including tax) - Accounting Deduction: 100% of total amount (including tax) - Always include sales tax in both calculations === YOUR TASK === Analyze EACH match and return a JSON object where each key is the match ID and the value is the complete tax analysis. **CRITICAL INSTRUCTION FOR final_tax_amount:** - This field MUST contain ONLY the calculated sales tax amount in dollars - This is NOT the total amount including tax - This is ONLY the tax portion (HST/GST/PST/QST) - Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00) - For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount) - VERIFY: final_tax_amount should equal sales_tax.calculated_tax - Return your response as a SINGLE JSON object in this format: {{ "match_0": {{ "final_tax_amount": XX.XX, // ONLY the calculated tax amount "sales_tax": {{ "applicable_province": "XX", "applicable_rate": 0.XX, "tax_name": "HST/GST/PST", "calculated_tax": XX.XX, "stated_tax": XX.XX, "discrepancy": XX.XX, "reason": "Detailed explanation", "requires_review": true/false }}, "foreign_exchange": {{ "currency_mismatch": true/false, "receipt_currency": "XXX", "transaction_currency": "XXX", "expected_transaction_amount": XX.XX, "actual_transaction_amount": XX.XX, "discrepancy": XX.XX, "requires_manual_review": true/false, "reason": "Explanation" }}, "depreciation": {{ "is_capital_asset": true/false, "asset_class": "class_XX", "cca_rate": 0.XX, "applicable_jurisdiction": "XX", "reason": "Explanation" }}, "meals_entertainment": {{ "is_meals_entertainment": true/false, "tax_deduction_amount": XX.XX, "accounting_deduction_amount": XX.XX, "sales_tax_included": XX.XX, "reason": "Explanation" }}, "confidence_adjustment": {{ "boost": 0.XX, "reduce": 0.XX, "reason": "Why confidence should be adjusted" }}, "overall_assessment": "Summary for this match" }}, "match_1": {{ ... same structure ... }}, ... for all {num_matches} matches ... }} """ try: response = self.client.chat.completions.create( model=self.model, messages=[ { "role": "system", "content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.", }, {"role": "user", "content": prompt}, ], temperature=0.1, # Low temperature for consistent, factual responses max_tokens=8000, # Higher limit for batch processing ) content = response.choices[0].message.content # Validate that we got content if not content: logger.error("LLM returned empty response") return {} content = content.strip() # Check if content is empty after stripping if not content: logger.error("LLM returned whitespace-only response") return {} logger.info( f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches" ) logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars # Parse the JSON response - handle various markdown code block formats json_str = content # Check for markdown code blocks with various language identifiers if "```json" in content: json_str = content.split("```json")[1].split("```")[0].strip() elif "```javascript" in content: json_str = content.split("```javascript")[1].split("```")[0].strip() elif "```js" in content: json_str = content.split("```js")[1].split("```")[0].strip() elif "```" in content: # Generic code block - extract content between first ``` and last ``` parts = content.split("```") if len(parts) >= 3: # Take the second part (index 1), which is between first and second ``` json_str = parts[1].strip() # Remove language identifier if it's on the first line lines = json_str.split("\n", 1) if len(lines) > 1 and lines[0].strip() in [ "json", "javascript", "js", "", ]: json_str = lines[1].strip() # Validate JSON string is not empty if not json_str: logger.error("Extracted JSON string is empty") logger.error(f"Original content was: {content[:500]}") return {} batch_analysis = json.loads(json_str) # Validate we got a dictionary back if not isinstance(batch_analysis, dict): logger.error(f"LLM returned non-dict type: {type(batch_analysis)}") return {} logger.info( f"Successfully parsed batch analysis with {len(batch_analysis)} matches" ) return batch_analysis except json.JSONDecodeError as e: logger.error(f"JSON decode error in batch LLM tax analysis: {str(e)}") logger.error( f"Failed to parse: {json_str[:500] if 'json_str' in locals() else 'N/A'}" ) return {} except Exception as e: logger.error(f"Error getting batch LLM tax analysis: {str(e)}") logger.error(f"Exception type: {type(e).__name__}") # Return empty dict so each match can handle fallback individually return {} def _apply_tax_analysis_to_match(self, match, tax_analysis: Dict[str, Any]): """Apply tax analysis results to a match object""" # **CRITICAL FIX: Ensure final_tax_amount matches calculated_tax** final_tax = tax_analysis.get("final_tax_amount", 0.0) calculated_tax = tax_analysis.get("sales_tax", {}).get("calculated_tax", 0.0) # If there's a mismatch, use calculated_tax as the source of truth if abs(final_tax - calculated_tax) > 0.01: logger.warning( f"Correcting final_tax_amount mismatch for {match.receipt.vendor}: " f"LLM returned final_tax_amount={final_tax}, but calculated_tax={calculated_tax}. " f"Using calculated_tax as final value." ) tax_analysis["final_tax_amount"] = calculated_tax # Special case: If final_tax is 0 but calculated_tax > 0, always use calculated_tax if final_tax == 0.0 and calculated_tax > 0.0: logger.warning( f"Correcting zero final_tax_amount for {match.receipt.vendor}: " f"LLM returned 0 but calculated {calculated_tax} HST. Setting final_tax_amount={calculated_tax}" ) tax_analysis["final_tax_amount"] = calculated_tax tax_analysis["sales_tax"]["requires_review"] = True # Apply the corrected tax analysis match.tax_analysis = tax_analysis logger.debug( f"Applied tax analysis to match: {match.receipt.vendor} -> " f"final_tax_amount={tax_analysis['final_tax_amount']}" ) # Apply confidence adjustments based on tax analysis confidence_adj = tax_analysis.get("confidence_adjustment", {}) # Boost confidence if tax rules validate the match boost = confidence_adj.get("boost", 0.0) if boost > 0: match.confidence_score = min(1.0, match.confidence_score + boost) match.match_reason += f" (Tax analysis confidence boost: +{boost:.2f})" # Reduce confidence if tax issues detected reduce = confidence_adj.get("reduce", 0.0) if reduce > 0: match.confidence_score = max(0.0, match.confidence_score - reduce) match.match_reason += f" (Tax issues detected: -{reduce:.2f})" # Add flags for manual review if needed review_flags = [] # Check sales tax issues sales_tax = tax_analysis.get("sales_tax", {}) if sales_tax.get("requires_review", False): review_flags.append("Sales Tax Review Required") # Check FX issues fx_analysis = tax_analysis.get("foreign_exchange", {}) if fx_analysis.get("requires_manual_review", False): review_flags.append( f"FX Review Required (Discrepancy: ${fx_analysis.get('discrepancy', 0):.2f})" ) # Check depreciation depreciation = tax_analysis.get("depreciation", {}) if depreciation.get("is_capital_asset", False): review_flags.append( f"Capital Asset - Depreciation Applicable ({depreciation.get('asset_class', 'Unknown')})" ) # Check meals & entertainment meals_ent = tax_analysis.get("meals_entertainment", {}) if meals_ent.get("is_meals_entertainment", False): tax_deduction = meals_ent.get("tax_deduction_amount", 0) accounting_deduction = meals_ent.get("accounting_deduction_amount", 0) review_flags.append( f"M&E Expense - Tax Deduction: ${tax_deduction:.2f} (50%), Accounting: ${accounting_deduction:.2f} (100%)" ) # Add review flags to match reason if review_flags: match.match_reason += " | REVIEW: " + "; ".join(review_flags) return match