993 lines
38 KiB
Python
993 lines
38 KiB
Python
import json
|
||
import logging
|
||
from typing import Any, Dict
|
||
|
||
import groq
|
||
from config import settings
|
||
from schemas import Receipt, Transaction
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class LLMTaxAnalyzer:
|
||
"""
|
||
Uses LLM to intelligently apply tax rules based on context.
|
||
|
||
Implements four core tax rules:
|
||
1. Sales Tax Rule - Based on receipt location (shipping/billing address)
|
||
2. Foreign Exchange Rule - Handles currency mismatches
|
||
3. Depreciation Rule - Capital assets (based on user location)
|
||
4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction
|
||
"""
|
||
|
||
# Provincial tax rates for reference
|
||
PROVINCIAL_TAX_RATES = {
|
||
"ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"},
|
||
"QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"},
|
||
"BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
|
||
"AB": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
||
"SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"},
|
||
"MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
|
||
"NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
||
"NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
||
"NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
||
"PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
||
"NT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
||
"NU": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
||
"YT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
||
}
|
||
|
||
# CCA rates by asset class (simplified)
|
||
CCA_RATES = {
|
||
"vehicles": 0.30, # Class 10
|
||
"computer_equipment": 0.55, # Class 50
|
||
"furniture": 0.20, # Class 8
|
||
"buildings": 0.04, # Class 1
|
||
"machinery": 0.20, # Class 8
|
||
}
|
||
|
||
def __init__(self):
|
||
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||
self.model = settings.model
|
||
self.max_retries = 3
|
||
|
||
def analyze_and_apply_tax_rules_batch(
|
||
self,
|
||
matches: list, # List of Match objects
|
||
user_location: str = "ON",
|
||
) -> list:
|
||
"""
|
||
Batch process all matches in a SINGLE LLM call to reduce costs.
|
||
Analyzes all receipt-transaction pairs together and applies tax rules.
|
||
Falls back to individual processing if batch fails.
|
||
"""
|
||
if not matches:
|
||
return matches
|
||
|
||
logger.info(f"Starting batch tax analysis for {len(matches)} matches")
|
||
|
||
# Build batch context for all matches
|
||
try:
|
||
batch_context = self._build_batch_analysis_context(matches, user_location)
|
||
except Exception as e:
|
||
logger.error(f"Error building batch context: {str(e)}")
|
||
# If we can't even build the context, return matches as-is
|
||
for match in matches:
|
||
match.match_reason += " (Batch analysis setup failed)"
|
||
return matches
|
||
|
||
# Get LLM analysis for ALL matches at once
|
||
llm_batch_analysis = self._get_llm_tax_analysis_batch(
|
||
batch_context, len(matches)
|
||
)
|
||
|
||
# Check if we got any analysis back
|
||
if not llm_batch_analysis:
|
||
logger.warning("Batch LLM analysis returned empty results")
|
||
|
||
# Fallback: Try processing each match individually if batch size is small
|
||
if (
|
||
len(matches) <= 5
|
||
): # Only fallback for small batches to avoid excessive API calls
|
||
logger.info(
|
||
f"Attempting individual processing fallback for {len(matches)} matches"
|
||
)
|
||
return self._process_matches_individually(matches, user_location)
|
||
else:
|
||
logger.warning(
|
||
f"Batch too large ({len(matches)} matches) for individual fallback - returning matches without enhanced tax analysis"
|
||
)
|
||
for match in matches:
|
||
match.match_reason += " (Batch tax analysis unavailable)"
|
||
return matches
|
||
|
||
logger.info(f"Received batch analysis for {len(llm_batch_analysis)} matches")
|
||
|
||
# Apply results to each match
|
||
enhanced_matches = []
|
||
for i, match in enumerate(matches):
|
||
try:
|
||
# Get the analysis for this specific match from the batch results
|
||
match_key = f"match_{i}"
|
||
match_analysis = llm_batch_analysis.get(match_key, {})
|
||
|
||
if match_analysis and isinstance(match_analysis, dict):
|
||
# Apply the tax analysis to this match
|
||
enhanced_match = self._apply_tax_analysis_to_match(
|
||
match, match_analysis
|
||
)
|
||
enhanced_matches.append(enhanced_match)
|
||
else:
|
||
# No analysis available for this match, use as-is
|
||
logger.warning(
|
||
f"No analysis found for match {i} (key: {match_key})"
|
||
)
|
||
match.match_reason += " (Tax analysis incomplete)"
|
||
enhanced_matches.append(match)
|
||
except Exception as e:
|
||
logger.error(f"Error applying tax analysis to match {i}: {str(e)}")
|
||
match.match_reason += " (Tax analysis error)"
|
||
enhanced_matches.append(match)
|
||
|
||
logger.info(
|
||
f"Completed batch tax analysis, enhanced {len(enhanced_matches)} matches"
|
||
)
|
||
# logger.info(
|
||
# f"\n\n\nFinal batch enhanced matches: {enhanced_matches}"
|
||
# )
|
||
return enhanced_matches
|
||
|
||
def _process_matches_individually(self, matches: list, user_location: str) -> list:
|
||
"""
|
||
Fallback method: Process matches one at a time using the legacy method.
|
||
Only used when batch processing fails and batch size is small.
|
||
"""
|
||
logger.info(f"Processing {len(matches)} matches individually as fallback")
|
||
enhanced_matches = []
|
||
|
||
for i, match in enumerate(matches):
|
||
try:
|
||
# Use the legacy single-match analysis method
|
||
tax_analysis = self.analyze_and_apply_tax_rules(
|
||
match.receipt, match.transaction, user_location
|
||
)
|
||
|
||
# Apply the analysis to the match
|
||
enhanced_match = self._apply_tax_analysis_to_match(match, tax_analysis)
|
||
enhanced_matches.append(enhanced_match)
|
||
logger.info(
|
||
f"Successfully processed match {i + 1}/{len(matches)} individually"
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error in individual processing for match {i}: {str(e)}")
|
||
match.match_reason += " (Individual tax analysis failed)"
|
||
enhanced_matches.append(match)
|
||
|
||
return enhanced_matches
|
||
|
||
def analyze_and_apply_tax_rules(
|
||
self,
|
||
receipt: Receipt,
|
||
transaction: Transaction,
|
||
user_location: str = "ON", # Default to Ontario
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Legacy single-match analysis method (kept for backward compatibility).
|
||
Use analyze_and_apply_tax_rules_batch() for better performance.
|
||
|
||
Use LLM to intelligently analyze and apply all tax rules:
|
||
1. Sales tax based on receipt location (shipping/billing address priority)
|
||
2. Foreign exchange rules for currency mismatches
|
||
3. Depreciation rules for capital assets (based on user location)
|
||
4. Meals & Entertainment deduction rules
|
||
"""
|
||
|
||
# Prepare context for LLM
|
||
analysis_context = self._build_analysis_context(
|
||
receipt, transaction, user_location
|
||
)
|
||
|
||
# Get LLM analysis
|
||
llm_analysis = self._get_llm_tax_analysis(analysis_context)
|
||
|
||
# Parse and structure the results
|
||
structured_results = self._structure_analysis_results(
|
||
llm_analysis, receipt, transaction, user_location
|
||
)
|
||
|
||
return structured_results
|
||
|
||
def _build_analysis_context(
|
||
self, receipt: Receipt, transaction: Transaction, user_location: str
|
||
) -> str:
|
||
"""Build comprehensive context for LLM analysis"""
|
||
|
||
# Extract location information
|
||
receipt_location = self._extract_receipt_location(receipt)
|
||
|
||
# Normalize user_location to province code (handle "Canada", "Ontario", "ON", etc.)
|
||
user_province = self._normalize_location_to_province(user_location)
|
||
|
||
logger.info(
|
||
f"Building tax analysis context - User Location: {user_location} → Province Code: {user_province}"
|
||
)
|
||
|
||
# Build tax rates reference
|
||
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
|
||
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
|
||
|
||
context = f"""
|
||
RECEIPT DETAILS:
|
||
- Vendor: {receipt.vendor}
|
||
- Amount: ${receipt.amount:.2f}
|
||
- Currency: {receipt.currency}
|
||
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
|
||
- Category: {receipt.category}
|
||
- Description: {receipt.description}
|
||
- Billing Address: {self._format_address(receipt.billing_address)}
|
||
- Shipping Address: {self._format_address(receipt.shipping_address)}
|
||
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
|
||
|
||
TRANSACTION DETAILS:
|
||
- Vendor: {transaction.vendor}
|
||
- Amount: ${transaction.amount:.2f}
|
||
- Currency: {transaction.currency}
|
||
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
|
||
- Notes: {transaction.notes}
|
||
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
|
||
|
||
USER CONTEXT:
|
||
- User Location (Province): {user_province}
|
||
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
|
||
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
|
||
|
||
RECEIPT LOCATION DETECTED:
|
||
{receipt_location}
|
||
|
||
PROVINCIAL TAX RATES REFERENCE:
|
||
{tax_rates_info}
|
||
|
||
CCA DEPRECIATION RATES BY ASSET CLASS:
|
||
{cca_rates_info}
|
||
"""
|
||
return context
|
||
|
||
def _normalize_location_to_province(self, location: str) -> str:
|
||
"""
|
||
Normalize various location formats to province code.
|
||
Handles: "ON", "Ontario", "Canada", etc.
|
||
"""
|
||
location_upper = location.upper().strip()
|
||
|
||
# Direct province code match
|
||
if location_upper in self.PROVINCIAL_TAX_RATES:
|
||
return location_upper
|
||
|
||
# Map full province names to codes
|
||
province_name_map = {
|
||
"ONTARIO": "ON",
|
||
"QUEBEC": "QC",
|
||
"BRITISH COLUMBIA": "BC",
|
||
"ALBERTA": "AB",
|
||
"SASKATCHEWAN": "SK",
|
||
"MANITOBA": "MB",
|
||
"NOVA SCOTIA": "NS",
|
||
"NEW BRUNSWICK": "NB",
|
||
"NEWFOUNDLAND AND LABRADOR": "NL",
|
||
"NEWFOUNDLAND": "NL",
|
||
"PRINCE EDWARD ISLAND": "PE",
|
||
"NORTHWEST TERRITORIES": "NT",
|
||
"NUNAVUT": "NU",
|
||
"YUKON": "YT",
|
||
}
|
||
|
||
if location_upper in province_name_map:
|
||
return province_name_map[location_upper]
|
||
|
||
# Default to Ontario if country is Canada or unspecified
|
||
if location_upper in ["CANADA", "CAN", "CA", ""]:
|
||
logger.warning(f"Location '{location}' is too generic, defaulting to ON")
|
||
return "ON"
|
||
|
||
# If nothing matches, default to Ontario
|
||
logger.warning(f"Could not parse location '{location}', defaulting to ON")
|
||
return "ON"
|
||
|
||
def _extract_receipt_location(self, receipt: Receipt) -> str:
|
||
"""Extract and format receipt location information"""
|
||
|
||
# Priority: Use shipping address if available, then billing
|
||
location = (
|
||
receipt.shipping_address
|
||
if receipt.shipping_address
|
||
else receipt.billing_address
|
||
)
|
||
|
||
if location:
|
||
return f"""
|
||
- Province: {location.province}
|
||
- City: {location.city}
|
||
- Country: {location.country}
|
||
- Postal Code: {location.postal_code}
|
||
"""
|
||
else:
|
||
return "- No address information available (will use user location)"
|
||
|
||
def _format_address(self, address) -> str:
|
||
"""Format address for display"""
|
||
if address:
|
||
return f"{address.city}, {address.province}, {address.country} ({address.postal_code})"
|
||
return "Not provided"
|
||
|
||
def _get_llm_tax_analysis(self, context: str) -> str:
|
||
"""Get tax rule analysis from LLM"""
|
||
|
||
prompt = f"""
|
||
You are a tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
|
||
And you are to calculate the tax for the receipt based on the context provided.
|
||
|
||
{context}
|
||
|
||
=== FOUR CORE TAX RULES ===
|
||
|
||
### 1. SALES TAX RULE
|
||
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
|
||
|
||
**Key Principles**:
|
||
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
|
||
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
|
||
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
|
||
|
||
**Scenario Examples**:
|
||
a) User in Ontario, Receipt from Quebec:
|
||
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
|
||
- The user's location is only for depreciation purposes
|
||
|
||
b) User in Ontario, Receipt from USA (New York):
|
||
- DO NOT apply Canadian sales tax
|
||
- This is an international transaction
|
||
- Flag for FX review instead
|
||
|
||
c) User in USA (New York), Receipt from California:
|
||
- Apply California's sales tax rate (receipt location)
|
||
- Not New York's rate (user location)
|
||
|
||
d) User in Ontario, Receipt has NO address information:
|
||
- DEFAULT to user's location (Ontario 13% HST)
|
||
- This is the fallback when receipt location is unknown
|
||
|
||
**Tax Calculation**:
|
||
- Compare calculated tax vs stated tax on receipt
|
||
- Flag discrepancies for review
|
||
|
||
### 2. FOREIGN EXCHANGE (FX) RULE
|
||
**Purpose**: Handle currency mismatches between receipts and transactions.
|
||
|
||
**Actions**:
|
||
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
|
||
- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
|
||
- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
|
||
- If FX rate is provided in transaction data, note it but still require manual review
|
||
|
||
**Examples**:
|
||
- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
|
||
- The user must manually approve or adjust the FX difference
|
||
|
||
### 3. DEPRECIATION RULE
|
||
**Purpose**: Calculate depreciation for assets using two methods.
|
||
|
||
**Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location.
|
||
|
||
**Asset Identification**:
|
||
- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
|
||
- Identify from receipt category and description
|
||
- Typical threshold: Assets generally > $500
|
||
|
||
**Two Methods Required**:
|
||
a) **Straight-Line Depreciation** (for accounting purposes):
|
||
Formula: (Cost - Residual Value) / Useful Life
|
||
Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year
|
||
|
||
b) **CCA Depreciation** (for tax purposes - Canada):
|
||
Method: Declining Balance
|
||
Formula: Book Value × CCA Rate each year
|
||
Example: Truck $20,000, 30% CCA:
|
||
- Year 1: $20,000 × 30% = $6,000
|
||
- Year 2: ($20,000 - $6,000) × 30% = $4,200
|
||
- Continues declining each year
|
||
|
||
**CCA Classes** (Canada):
|
||
- Vehicles: 30% (Class 10)
|
||
- Computer Equipment: 55% (Class 50)
|
||
- Furniture/Machinery: 20% (Class 8)
|
||
- Buildings: 4% (Class 1)
|
||
|
||
### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
|
||
**Purpose**: Apply correct deductions for meals and entertainment expenses.
|
||
|
||
**Deduction Rules**:
|
||
- **For Tax Purposes**: Only 50% of total receipt amount is deductible
|
||
- **For Accounting Purposes**: 100% of total receipt amount is deductible
|
||
- **Sales Tax**: Full sales tax amount is deductible for accounting
|
||
|
||
**Example**:
|
||
- Receipt: $100 meal + $12 sales tax = $112 total
|
||
- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
|
||
- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112
|
||
|
||
=== LOCATION-BASED SCENARIO HANDLING ===
|
||
|
||
**When Receipt Location ≠ User Location**:
|
||
|
||
1. **Sales Tax**: Use RECEIPT's location for tax calculation
|
||
- Exception: If international (different country), no Canadian sales tax + flag FX
|
||
- Exception: If no location on receipt, use user's location as default
|
||
|
||
2. **Depreciation**: ALWAYS use USER's location for depreciation rules
|
||
- Receipt location is irrelevant for depreciation
|
||
- Apply user's country/province depreciation methods
|
||
|
||
3. **FX Handling**:
|
||
- If receipt currency ≠ transaction currency: Flag for manual review
|
||
- Do NOT automatically fetch or apply exchange rates
|
||
|
||
4. **Missing Location**:
|
||
- If receipt has no address: Default to user's location for sales tax
|
||
- Still apply user's location for depreciation
|
||
|
||
=== ANALYSIS REQUIRED ===
|
||
|
||
Provide a structured JSON response with the following format:
|
||
|
||
**CRITICAL INSTRUCTION FOR final_tax_amount:**
|
||
- This field MUST contain ONLY the calculated sales tax amount in dollars
|
||
- This is NOT the total amount including tax
|
||
- This is ONLY the tax portion (HST/GST/PST/QST)
|
||
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
|
||
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
|
||
|
||
{{
|
||
"final_tax_amount": XX.XX, // ONLY the calculated tax amount (e.g., 13.00 for $100 + $13 HST)
|
||
"sales_tax": {{
|
||
"applicable_province": "XX",
|
||
"applicable_rate": 0.XX,
|
||
"tax_name": "HST/GST/PST/QST",
|
||
"calculated_tax": XX.XX, // This should match final_tax_amount above
|
||
"stated_tax": XX.XX,
|
||
"discrepancy": XX.XX,
|
||
"reason": "Detailed explanation",
|
||
"requires_review": true/false
|
||
}},
|
||
"foreign_exchange": {{
|
||
"currency_mismatch": true/false,
|
||
"receipt_currency": "XXX",
|
||
"transaction_currency": "XXX",
|
||
"receipt_amount": XX.XX,
|
||
"transaction_amount": XX.XX,
|
||
"discrepancy": XX.XX,
|
||
"requires_manual_review": true/false,
|
||
"reason": "Explanation of FX situation"
|
||
}},
|
||
"depreciation": {{
|
||
"is_capital_asset": true/false,
|
||
"asset_class": "category name or N/A",
|
||
"suggested_cca_rate": 0.XX,
|
||
"straight_line_applicable": true/false,
|
||
"cca_applicable": true/false,
|
||
"straight_line_example": "Brief calculation example if applicable",
|
||
"cca_example": "Brief calculation example if applicable",
|
||
"reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location"
|
||
}},
|
||
"meals_entertainment": {{
|
||
"is_meals_entertainment": true/false,
|
||
"tax_deduction_amount": XX.XX,
|
||
"accounting_deduction_amount": XX.XX,
|
||
"sales_tax_included": XX.XX,
|
||
"reason": "Explanation of M&E rule application"
|
||
}},
|
||
"confidence_adjustment": {{
|
||
"boost": 0.XX,
|
||
"reduce": 0.XX,
|
||
"reason": "Why confidence should be adjusted based on tax analysis"
|
||
}},
|
||
"overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions"
|
||
}}
|
||
|
||
**IMPORTANT**: The "final_tax_amount" field at the top level must contain the final calculated tax amount. This should be the calculated_tax from sales_tax analysis. If this is a meals & entertainment expense, ensure you return the FULL tax amount here (not the 50% adjusted amount).
|
||
|
||
**Critical Reminders**:
|
||
- Sales tax uses RECEIPT location (or user location if receipt has none)
|
||
- Depreciation ALWAYS uses USER location
|
||
- For different addresses, use SHIPPING address for sales tax
|
||
- International transactions: no Canadian tax + FX flag
|
||
- Be precise with all calculations
|
||
- Always explain your reasoning clearly
|
||
"""
|
||
|
||
try:
|
||
response = self.client.chat.completions.create(
|
||
model=self.model,
|
||
messages=[
|
||
{
|
||
"role": "system",
|
||
"content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.",
|
||
},
|
||
{"role": "user", "content": prompt},
|
||
],
|
||
temperature=0.1, # Low temperature for consistent, factual responses
|
||
max_tokens=2000,
|
||
)
|
||
|
||
content = response.choices[0].message.content.strip()
|
||
logger.info(f"LLM tax analysis received: {len(content)} characters")
|
||
return content
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error getting LLM tax analysis: {str(e)}")
|
||
return self._get_fallback_analysis()
|
||
|
||
def _get_fallback_analysis(self) -> str:
|
||
"""Return fallback analysis if LLM fails"""
|
||
return json.dumps(
|
||
{
|
||
"final_tax_amount": 0.0,
|
||
"sales_tax": {
|
||
"applicable_province": "ON",
|
||
"applicable_rate": 0.13,
|
||
"tax_name": "HST",
|
||
"calculated_tax": 0.0,
|
||
"stated_tax": 0.0,
|
||
"discrepancy": 0.0,
|
||
"reason": "LLM analysis failed - using defaults",
|
||
"requires_review": True,
|
||
},
|
||
"foreign_exchange": {
|
||
"currency_mismatch": False,
|
||
"requires_manual_review": False,
|
||
"reason": "Analysis not available",
|
||
},
|
||
"depreciation": {
|
||
"is_capital_asset": False,
|
||
"reason": "Analysis not available",
|
||
},
|
||
"meals_entertainment": {
|
||
"is_meals_entertainment": False,
|
||
"reason": "Analysis not available",
|
||
},
|
||
"confidence_adjustment": {
|
||
"boost": 0.0,
|
||
"reduce": 0.1,
|
||
"reason": "LLM analysis failed - recommend manual review",
|
||
},
|
||
"overall_assessment": "Automatic analysis failed. Manual review recommended.",
|
||
}
|
||
)
|
||
|
||
def _structure_analysis_results(
|
||
self,
|
||
llm_response: str,
|
||
receipt: Receipt,
|
||
transaction: Transaction,
|
||
user_location: str,
|
||
) -> Dict[str, Any]:
|
||
"""Parse LLM response and structure it for application"""
|
||
|
||
try:
|
||
# Extract JSON from LLM response (may have markdown code blocks)
|
||
json_str = llm_response
|
||
if "```json" in llm_response:
|
||
json_str = llm_response.split("```json")[1].split("```")[0].strip()
|
||
elif "```" in llm_response:
|
||
json_str = llm_response.split("```")[1].split("```")[0].strip()
|
||
|
||
analysis = json.loads(json_str)
|
||
|
||
# Add metadata
|
||
analysis["metadata"] = {
|
||
"user_location": user_location,
|
||
"receipt_id": receipt.id,
|
||
"transaction_id": transaction.id,
|
||
"analysis_method": "LLM-based",
|
||
"model": self.model,
|
||
}
|
||
|
||
return analysis
|
||
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
|
||
logger.error(f"LLM response was: {llm_response}")
|
||
|
||
# Return structured fallback
|
||
return {
|
||
"final_tax_amount": receipt.tax if receipt.tax else 0.0,
|
||
"sales_tax": {
|
||
"requires_review": True,
|
||
"reason": "Failed to parse LLM response",
|
||
},
|
||
"foreign_exchange": {
|
||
"requires_manual_review": receipt.currency != transaction.currency
|
||
},
|
||
"depreciation": {"is_capital_asset": False},
|
||
"confidence_adjustment": {
|
||
"boost": 0.0,
|
||
"reduce": 0.15,
|
||
"reason": "Analysis parsing failed",
|
||
},
|
||
"overall_assessment": "Analysis failed. Manual review required.",
|
||
"error": str(e),
|
||
"metadata": {
|
||
"user_location": user_location,
|
||
"analysis_method": "fallback",
|
||
},
|
||
}
|
||
|
||
def _build_batch_analysis_context(self, matches: list, user_location: str) -> str:
|
||
"""Build comprehensive context for batch LLM analysis of all matches"""
|
||
|
||
# Normalize user_location to province code
|
||
user_province = self._normalize_location_to_province(user_location)
|
||
|
||
logger.info(
|
||
f"Building batch tax analysis context for {len(matches)} matches - User Location: {user_location} → Province Code: {user_province}"
|
||
)
|
||
|
||
# Build tax rates and CCA references once
|
||
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
|
||
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
|
||
|
||
# Build match entries
|
||
matches_info = []
|
||
for i, match in enumerate(matches):
|
||
receipt = match.receipt
|
||
transaction = match.transaction
|
||
receipt_location = self._extract_receipt_location(receipt)
|
||
|
||
match_info = f"""
|
||
MATCH {i} (ID: match_{i}):
|
||
Receipt Details:
|
||
- Vendor: {receipt.vendor}
|
||
- Amount: ${receipt.amount:.2f}
|
||
- Currency: {receipt.currency}
|
||
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
|
||
- Category: {receipt.category}
|
||
- Description: {receipt.description}
|
||
- Billing Address: {self._format_address(receipt.billing_address)}
|
||
- Shipping Address: {self._format_address(receipt.shipping_address)}
|
||
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
|
||
|
||
Transaction Details:
|
||
- Vendor: {transaction.vendor}
|
||
- Amount: ${transaction.amount:.2f}
|
||
- Currency: {transaction.currency}
|
||
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
|
||
- Notes: {transaction.notes}
|
||
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
|
||
|
||
Receipt Location Detected:
|
||
{receipt_location}
|
||
"""
|
||
matches_info.append(match_info)
|
||
|
||
matches_section = "\n".join(matches_info)
|
||
|
||
context = f"""
|
||
USER CONTEXT:
|
||
- User Location (Province): {user_province}
|
||
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
|
||
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
|
||
|
||
PROVINCIAL TAX RATES REFERENCE:
|
||
{tax_rates_info}
|
||
|
||
CCA DEPRECIATION RATES BY ASSET CLASS:
|
||
{cca_rates_info}
|
||
|
||
=== MATCHES TO ANALYZE ({len(matches)} total) ===
|
||
{matches_section}
|
||
"""
|
||
return context
|
||
|
||
def _get_llm_tax_analysis_batch(self, context: str, num_matches: int) -> Dict[str, Any]:
|
||
"""Get tax rule analysis from LLM for ALL matches in a single call"""
|
||
|
||
prompt = f"""
|
||
You are a Canadian tax expert analyzing MULTIPLE receipt-transaction matches.
|
||
|
||
{context}
|
||
|
||
=== FOUR CORE TAX RULES ===
|
||
|
||
### 1. SALES TAX RULE
|
||
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
|
||
|
||
**Key Principles**:
|
||
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
|
||
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
|
||
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
|
||
|
||
**Scenario Examples**:
|
||
a) User in Ontario, Receipt from Quebec:
|
||
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
|
||
|
||
b) User in Ontario, Receipt from USA (New York):
|
||
- DO NOT apply Canadian sales tax
|
||
- This is an international transaction
|
||
- Flag for FX review instead
|
||
|
||
c) User in Ontario, Receipt has NO address information:
|
||
- DEFAULT to user's location (Ontario 13% HST)
|
||
|
||
**Tax Calculation**:
|
||
- Compare calculated tax vs stated tax on receipt
|
||
- Flag discrepancies for review
|
||
|
||
### 2. FOREIGN EXCHANGE (FX) RULE
|
||
**Purpose**: Handle currency mismatches between receipts and transactions.
|
||
|
||
**Actions**:
|
||
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
|
||
- Calculate expected transaction amount using FX rate if available
|
||
- Flag discrepancies > $5 or 5% for manual review
|
||
- If FX rate missing but currencies differ, flag for review
|
||
|
||
### 3. DEPRECIATION RULE
|
||
**Purpose**: Identify capital assets requiring depreciation based on USER'S location.
|
||
|
||
**Critical**: Depreciation is ALWAYS based on the USER'S location (for Canadian tax filing), NOT the receipt location.
|
||
|
||
**Capital Asset Criteria**:
|
||
- Cost > $500 typically
|
||
- Useful life > 1 year
|
||
- Examples: computers, vehicles, furniture, machinery, buildings
|
||
|
||
**CCA Classes**: Assign appropriate class and rate based on asset type and user's jurisdiction
|
||
|
||
### 4. MEALS & ENTERTAINMENT RULE
|
||
**Purpose**: Apply 50% tax deduction limit for M&E expenses.
|
||
|
||
**Actions**:
|
||
- Identify M&E expenses (meals, entertainment, client dinners, etc.)
|
||
- Tax Deduction: 50% of total amount (including tax)
|
||
- Accounting Deduction: 100% of total amount (including tax)
|
||
- Always include sales tax in both calculations
|
||
|
||
=== YOUR TASK ===
|
||
|
||
Analyze EACH match and return a JSON object where each key is the match ID and the value is the complete tax analysis.
|
||
|
||
**CRITICAL INSTRUCTION FOR final_tax_amount:**
|
||
- This field MUST contain ONLY the calculated sales tax amount in dollars
|
||
- This is NOT the total amount including tax
|
||
- This is ONLY the tax portion (HST/GST/PST/QST)
|
||
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
|
||
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
|
||
- VERIFY: final_tax_amount should equal sales_tax.calculated_tax
|
||
-
|
||
|
||
Return your response as a SINGLE JSON object in this format:
|
||
|
||
{{
|
||
"match_0": {{
|
||
"final_tax_amount": XX.XX, // ONLY the calculated tax amount
|
||
"sales_tax": {{
|
||
"applicable_province": "XX",
|
||
"applicable_rate": 0.XX,
|
||
"tax_name": "HST/GST/PST",
|
||
"calculated_tax": XX.XX,
|
||
"stated_tax": XX.XX,
|
||
"discrepancy": XX.XX,
|
||
"reason": "Detailed explanation",
|
||
"requires_review": true/false
|
||
}},
|
||
"foreign_exchange": {{
|
||
"currency_mismatch": true/false,
|
||
"receipt_currency": "XXX",
|
||
"transaction_currency": "XXX",
|
||
"expected_transaction_amount": XX.XX,
|
||
"actual_transaction_amount": XX.XX,
|
||
"discrepancy": XX.XX,
|
||
"requires_manual_review": true/false,
|
||
"reason": "Explanation"
|
||
}},
|
||
"depreciation": {{
|
||
"is_capital_asset": true/false,
|
||
"asset_class": "class_XX",
|
||
"cca_rate": 0.XX,
|
||
"applicable_jurisdiction": "XX",
|
||
"reason": "Explanation"
|
||
}},
|
||
"meals_entertainment": {{
|
||
"is_meals_entertainment": true/false,
|
||
"tax_deduction_amount": XX.XX,
|
||
"accounting_deduction_amount": XX.XX,
|
||
"sales_tax_included": XX.XX,
|
||
"reason": "Explanation"
|
||
}},
|
||
"confidence_adjustment": {{
|
||
"boost": 0.XX,
|
||
"reduce": 0.XX,
|
||
"reason": "Why confidence should be adjusted"
|
||
}},
|
||
"overall_assessment": "Summary for this match"
|
||
}},
|
||
"match_1": {{
|
||
... same structure ...
|
||
}},
|
||
... for all {num_matches} matches ...
|
||
}}
|
||
"""
|
||
try:
|
||
response = self.client.chat.completions.create(
|
||
model=self.model,
|
||
messages=[
|
||
{
|
||
"role": "system",
|
||
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.",
|
||
},
|
||
{"role": "user", "content": prompt},
|
||
],
|
||
temperature=0.1, # Low temperature for consistent, factual responses
|
||
max_tokens=8000, # Higher limit for batch processing
|
||
)
|
||
|
||
content = response.choices[0].message.content
|
||
|
||
# Validate that we got content
|
||
if not content:
|
||
logger.error("LLM returned empty response")
|
||
return {}
|
||
|
||
content = content.strip()
|
||
|
||
# Check if content is empty after stripping
|
||
if not content:
|
||
logger.error("LLM returned whitespace-only response")
|
||
return {}
|
||
|
||
logger.info(
|
||
f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches"
|
||
)
|
||
logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars
|
||
|
||
# Parse the JSON response - handle various markdown code block formats
|
||
json_str = content
|
||
|
||
# Check for markdown code blocks with various language identifiers
|
||
if "```json" in content:
|
||
json_str = content.split("```json")[1].split("```")[0].strip()
|
||
elif "```javascript" in content:
|
||
json_str = content.split("```javascript")[1].split("```")[0].strip()
|
||
elif "```js" in content:
|
||
json_str = content.split("```js")[1].split("```")[0].strip()
|
||
elif "```" in content:
|
||
# Generic code block - extract content between first ``` and last ```
|
||
parts = content.split("```")
|
||
if len(parts) >= 3:
|
||
# Take the second part (index 1), which is between first and second ```
|
||
json_str = parts[1].strip()
|
||
# Remove language identifier if it's on the first line
|
||
lines = json_str.split("\n", 1)
|
||
if len(lines) > 1 and lines[0].strip() in [
|
||
"json",
|
||
"javascript",
|
||
"js",
|
||
"",
|
||
]:
|
||
json_str = lines[1].strip()
|
||
|
||
# Validate JSON string is not empty
|
||
if not json_str:
|
||
logger.error("Extracted JSON string is empty")
|
||
logger.error(f"Original content was: {content[:500]}")
|
||
return {}
|
||
|
||
batch_analysis = json.loads(json_str)
|
||
|
||
# Validate we got a dictionary back
|
||
if not isinstance(batch_analysis, dict):
|
||
logger.error(f"LLM returned non-dict type: {type(batch_analysis)}")
|
||
return {}
|
||
|
||
logger.info(
|
||
f"Successfully parsed batch analysis with {len(batch_analysis)} matches"
|
||
)
|
||
return batch_analysis
|
||
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"JSON decode error in batch LLM tax analysis: {str(e)}")
|
||
logger.error(
|
||
f"Failed to parse: {json_str[:500] if 'json_str' in locals() else 'N/A'}"
|
||
)
|
||
return {}
|
||
except Exception as e:
|
||
logger.error(f"Error getting batch LLM tax analysis: {str(e)}")
|
||
logger.error(f"Exception type: {type(e).__name__}")
|
||
# Return empty dict so each match can handle fallback individually
|
||
return {}
|
||
|
||
def _apply_tax_analysis_to_match(self, match, tax_analysis: Dict[str, Any]):
|
||
"""Apply tax analysis results to a match object"""
|
||
|
||
# **CRITICAL FIX: Ensure final_tax_amount matches calculated_tax**
|
||
final_tax = tax_analysis.get("final_tax_amount", 0.0)
|
||
calculated_tax = tax_analysis.get("sales_tax", {}).get("calculated_tax", 0.0)
|
||
|
||
# If there's a mismatch, use calculated_tax as the source of truth
|
||
if abs(final_tax - calculated_tax) > 0.01:
|
||
logger.warning(
|
||
f"Correcting final_tax_amount mismatch for {match.receipt.vendor}: "
|
||
f"LLM returned final_tax_amount={final_tax}, but calculated_tax={calculated_tax}. "
|
||
f"Using calculated_tax as final value."
|
||
)
|
||
tax_analysis["final_tax_amount"] = calculated_tax
|
||
|
||
# Special case: If final_tax is 0 but calculated_tax > 0, always use calculated_tax
|
||
if final_tax == 0.0 and calculated_tax > 0.0:
|
||
logger.warning(
|
||
f"Correcting zero final_tax_amount for {match.receipt.vendor}: "
|
||
f"LLM returned 0 but calculated {calculated_tax} HST. Setting final_tax_amount={calculated_tax}"
|
||
)
|
||
tax_analysis["final_tax_amount"] = calculated_tax
|
||
tax_analysis["sales_tax"]["requires_review"] = True
|
||
|
||
# Apply the corrected tax analysis
|
||
match.tax_analysis = tax_analysis
|
||
|
||
logger.debug(
|
||
f"Applied tax analysis to match: {match.receipt.vendor} -> "
|
||
f"final_tax_amount={tax_analysis['final_tax_amount']}"
|
||
)
|
||
|
||
# Apply confidence adjustments based on tax analysis
|
||
confidence_adj = tax_analysis.get("confidence_adjustment", {})
|
||
|
||
# Boost confidence if tax rules validate the match
|
||
boost = confidence_adj.get("boost", 0.0)
|
||
if boost > 0:
|
||
match.confidence_score = min(1.0, match.confidence_score + boost)
|
||
match.match_reason += f" (Tax analysis confidence boost: +{boost:.2f})"
|
||
|
||
# Reduce confidence if tax issues detected
|
||
reduce = confidence_adj.get("reduce", 0.0)
|
||
if reduce > 0:
|
||
match.confidence_score = max(0.0, match.confidence_score - reduce)
|
||
match.match_reason += f" (Tax issues detected: -{reduce:.2f})"
|
||
|
||
# Add flags for manual review if needed
|
||
review_flags = []
|
||
|
||
# Check sales tax issues
|
||
sales_tax = tax_analysis.get("sales_tax", {})
|
||
if sales_tax.get("requires_review", False):
|
||
review_flags.append("Sales Tax Review Required")
|
||
|
||
# Check FX issues
|
||
fx_analysis = tax_analysis.get("foreign_exchange", {})
|
||
if fx_analysis.get("requires_manual_review", False):
|
||
review_flags.append(
|
||
f"FX Review Required (Discrepancy: ${fx_analysis.get('discrepancy', 0):.2f})"
|
||
)
|
||
|
||
# Check depreciation
|
||
depreciation = tax_analysis.get("depreciation", {})
|
||
if depreciation.get("is_capital_asset", False):
|
||
review_flags.append(
|
||
f"Capital Asset - Depreciation Applicable ({depreciation.get('asset_class', 'Unknown')})"
|
||
)
|
||
|
||
# Check meals & entertainment
|
||
meals_ent = tax_analysis.get("meals_entertainment", {})
|
||
if meals_ent.get("is_meals_entertainment", False):
|
||
tax_deduction = meals_ent.get("tax_deduction_amount", 0)
|
||
accounting_deduction = meals_ent.get("accounting_deduction_amount", 0)
|
||
review_flags.append(
|
||
f"M&E Expense - Tax Deduction: ${tax_deduction:.2f} (50%), Accounting: ${accounting_deduction:.2f} (100%)"
|
||
)
|
||
|
||
# Add review flags to match reason
|
||
if review_flags:
|
||
match.match_reason += " | REVIEW: " + "; ".join(review_flags)
|
||
|
||
return match
|