Files
ds_quickbooks/app/services/llm_tax_analyzer.py
T

993 lines
38 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import logging
from typing import Any, Dict
import groq
from config import settings
from schemas import Receipt, Transaction
logger = logging.getLogger(__name__)
class LLMTaxAnalyzer:
"""
Uses LLM to intelligently apply tax rules based on context.
Implements four core tax rules:
1. Sales Tax Rule - Based on receipt location (shipping/billing address)
2. Foreign Exchange Rule - Handles currency mismatches
3. Depreciation Rule - Capital assets (based on user location)
4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction
"""
# Provincial tax rates for reference
PROVINCIAL_TAX_RATES = {
"ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"},
"QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"},
"BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
"AB": {"rate": 0.05, "name": "GST", "type": "Federal only"},
"SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"},
"MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
"NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"NT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
"NU": {"rate": 0.05, "name": "GST", "type": "Federal only"},
"YT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
}
# CCA rates by asset class (simplified)
CCA_RATES = {
"vehicles": 0.30, # Class 10
"computer_equipment": 0.55, # Class 50
"furniture": 0.20, # Class 8
"buildings": 0.04, # Class 1
"machinery": 0.20, # Class 8
}
def __init__(self):
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
self.model = settings.model
self.max_retries = 3
def analyze_and_apply_tax_rules_batch(
self,
matches: list, # List of Match objects
user_location: str = "ON",
) -> list:
"""
Batch process all matches in a SINGLE LLM call to reduce costs.
Analyzes all receipt-transaction pairs together and applies tax rules.
Falls back to individual processing if batch fails.
"""
if not matches:
return matches
logger.info(f"Starting batch tax analysis for {len(matches)} matches")
# Build batch context for all matches
try:
batch_context = self._build_batch_analysis_context(matches, user_location)
except Exception as e:
logger.error(f"Error building batch context: {str(e)}")
# If we can't even build the context, return matches as-is
for match in matches:
match.match_reason += " (Batch analysis setup failed)"
return matches
# Get LLM analysis for ALL matches at once
llm_batch_analysis = self._get_llm_tax_analysis_batch(
batch_context, len(matches)
)
# Check if we got any analysis back
if not llm_batch_analysis:
logger.warning("Batch LLM analysis returned empty results")
# Fallback: Try processing each match individually if batch size is small
if (
len(matches) <= 5
): # Only fallback for small batches to avoid excessive API calls
logger.info(
f"Attempting individual processing fallback for {len(matches)} matches"
)
return self._process_matches_individually(matches, user_location)
else:
logger.warning(
f"Batch too large ({len(matches)} matches) for individual fallback - returning matches without enhanced tax analysis"
)
for match in matches:
match.match_reason += " (Batch tax analysis unavailable)"
return matches
logger.info(f"Received batch analysis for {len(llm_batch_analysis)} matches")
# Apply results to each match
enhanced_matches = []
for i, match in enumerate(matches):
try:
# Get the analysis for this specific match from the batch results
match_key = f"match_{i}"
match_analysis = llm_batch_analysis.get(match_key, {})
if match_analysis and isinstance(match_analysis, dict):
# Apply the tax analysis to this match
enhanced_match = self._apply_tax_analysis_to_match(
match, match_analysis
)
enhanced_matches.append(enhanced_match)
else:
# No analysis available for this match, use as-is
logger.warning(
f"No analysis found for match {i} (key: {match_key})"
)
match.match_reason += " (Tax analysis incomplete)"
enhanced_matches.append(match)
except Exception as e:
logger.error(f"Error applying tax analysis to match {i}: {str(e)}")
match.match_reason += " (Tax analysis error)"
enhanced_matches.append(match)
logger.info(
f"Completed batch tax analysis, enhanced {len(enhanced_matches)} matches"
)
# logger.info(
# f"\n\n\nFinal batch enhanced matches: {enhanced_matches}"
# )
return enhanced_matches
def _process_matches_individually(self, matches: list, user_location: str) -> list:
"""
Fallback method: Process matches one at a time using the legacy method.
Only used when batch processing fails and batch size is small.
"""
logger.info(f"Processing {len(matches)} matches individually as fallback")
enhanced_matches = []
for i, match in enumerate(matches):
try:
# Use the legacy single-match analysis method
tax_analysis = self.analyze_and_apply_tax_rules(
match.receipt, match.transaction, user_location
)
# Apply the analysis to the match
enhanced_match = self._apply_tax_analysis_to_match(match, tax_analysis)
enhanced_matches.append(enhanced_match)
logger.info(
f"Successfully processed match {i + 1}/{len(matches)} individually"
)
except Exception as e:
logger.error(f"Error in individual processing for match {i}: {str(e)}")
match.match_reason += " (Individual tax analysis failed)"
enhanced_matches.append(match)
return enhanced_matches
def analyze_and_apply_tax_rules(
self,
receipt: Receipt,
transaction: Transaction,
user_location: str = "ON", # Default to Ontario
) -> Dict[str, Any]:
"""
Legacy single-match analysis method (kept for backward compatibility).
Use analyze_and_apply_tax_rules_batch() for better performance.
Use LLM to intelligently analyze and apply all tax rules:
1. Sales tax based on receipt location (shipping/billing address priority)
2. Foreign exchange rules for currency mismatches
3. Depreciation rules for capital assets (based on user location)
4. Meals & Entertainment deduction rules
"""
# Prepare context for LLM
analysis_context = self._build_analysis_context(
receipt, transaction, user_location
)
# Get LLM analysis
llm_analysis = self._get_llm_tax_analysis(analysis_context)
# Parse and structure the results
structured_results = self._structure_analysis_results(
llm_analysis, receipt, transaction, user_location
)
return structured_results
def _build_analysis_context(
self, receipt: Receipt, transaction: Transaction, user_location: str
) -> str:
"""Build comprehensive context for LLM analysis"""
# Extract location information
receipt_location = self._extract_receipt_location(receipt)
# Normalize user_location to province code (handle "Canada", "Ontario", "ON", etc.)
user_province = self._normalize_location_to_province(user_location)
logger.info(
f"Building tax analysis context - User Location: {user_location} → Province Code: {user_province}"
)
# Build tax rates reference
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
context = f"""
RECEIPT DETAILS:
- Vendor: {receipt.vendor}
- Amount: ${receipt.amount:.2f}
- Currency: {receipt.currency}
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
- Category: {receipt.category}
- Description: {receipt.description}
- Billing Address: {self._format_address(receipt.billing_address)}
- Shipping Address: {self._format_address(receipt.shipping_address)}
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
TRANSACTION DETAILS:
- Vendor: {transaction.vendor}
- Amount: ${transaction.amount:.2f}
- Currency: {transaction.currency}
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
- Notes: {transaction.notes}
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
USER CONTEXT:
- User Location (Province): {user_province}
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
RECEIPT LOCATION DETECTED:
{receipt_location}
PROVINCIAL TAX RATES REFERENCE:
{tax_rates_info}
CCA DEPRECIATION RATES BY ASSET CLASS:
{cca_rates_info}
"""
return context
def _normalize_location_to_province(self, location: str) -> str:
"""
Normalize various location formats to province code.
Handles: "ON", "Ontario", "Canada", etc.
"""
location_upper = location.upper().strip()
# Direct province code match
if location_upper in self.PROVINCIAL_TAX_RATES:
return location_upper
# Map full province names to codes
province_name_map = {
"ONTARIO": "ON",
"QUEBEC": "QC",
"BRITISH COLUMBIA": "BC",
"ALBERTA": "AB",
"SASKATCHEWAN": "SK",
"MANITOBA": "MB",
"NOVA SCOTIA": "NS",
"NEW BRUNSWICK": "NB",
"NEWFOUNDLAND AND LABRADOR": "NL",
"NEWFOUNDLAND": "NL",
"PRINCE EDWARD ISLAND": "PE",
"NORTHWEST TERRITORIES": "NT",
"NUNAVUT": "NU",
"YUKON": "YT",
}
if location_upper in province_name_map:
return province_name_map[location_upper]
# Default to Ontario if country is Canada or unspecified
if location_upper in ["CANADA", "CAN", "CA", ""]:
logger.warning(f"Location '{location}' is too generic, defaulting to ON")
return "ON"
# If nothing matches, default to Ontario
logger.warning(f"Could not parse location '{location}', defaulting to ON")
return "ON"
def _extract_receipt_location(self, receipt: Receipt) -> str:
"""Extract and format receipt location information"""
# Priority: Use shipping address if available, then billing
location = (
receipt.shipping_address
if receipt.shipping_address
else receipt.billing_address
)
if location:
return f"""
- Province: {location.province}
- City: {location.city}
- Country: {location.country}
- Postal Code: {location.postal_code}
"""
else:
return "- No address information available (will use user location)"
def _format_address(self, address) -> str:
"""Format address for display"""
if address:
return f"{address.city}, {address.province}, {address.country} ({address.postal_code})"
return "Not provided"
def _get_llm_tax_analysis(self, context: str) -> str:
"""Get tax rule analysis from LLM"""
prompt = f"""
You are a tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
And you are to calculate the tax for the receipt based on the context provided.
{context}
=== FOUR CORE TAX RULES ===
### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
- The user's location is only for depreciation purposes
b) User in Ontario, Receipt from USA (New York):
- DO NOT apply Canadian sales tax
- This is an international transaction
- Flag for FX review instead
c) User in USA (New York), Receipt from California:
- Apply California's sales tax rate (receipt location)
- Not New York's rate (user location)
d) User in Ontario, Receipt has NO address information:
- DEFAULT to user's location (Ontario 13% HST)
- This is the fallback when receipt location is unknown
**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review
### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.
**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
- If FX rate is provided in transaction data, note it but still require manual review
**Examples**:
- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
- The user must manually approve or adjust the FX difference
### 3. DEPRECIATION RULE
**Purpose**: Calculate depreciation for assets using two methods.
**Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location.
**Asset Identification**:
- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
- Identify from receipt category and description
- Typical threshold: Assets generally > $500
**Two Methods Required**:
a) **Straight-Line Depreciation** (for accounting purposes):
Formula: (Cost - Residual Value) / Useful Life
Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year
b) **CCA Depreciation** (for tax purposes - Canada):
Method: Declining Balance
Formula: Book Value × CCA Rate each year
Example: Truck $20,000, 30% CCA:
- Year 1: $20,000 × 30% = $6,000
- Year 2: ($20,000 - $6,000) × 30% = $4,200
- Continues declining each year
**CCA Classes** (Canada):
- Vehicles: 30% (Class 10)
- Computer Equipment: 55% (Class 50)
- Furniture/Machinery: 20% (Class 8)
- Buildings: 4% (Class 1)
### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
**Purpose**: Apply correct deductions for meals and entertainment expenses.
**Deduction Rules**:
- **For Tax Purposes**: Only 50% of total receipt amount is deductible
- **For Accounting Purposes**: 100% of total receipt amount is deductible
- **Sales Tax**: Full sales tax amount is deductible for accounting
**Example**:
- Receipt: $100 meal + $12 sales tax = $112 total
- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112
=== LOCATION-BASED SCENARIO HANDLING ===
**When Receipt Location ≠ User Location**:
1. **Sales Tax**: Use RECEIPT's location for tax calculation
- Exception: If international (different country), no Canadian sales tax + flag FX
- Exception: If no location on receipt, use user's location as default
2. **Depreciation**: ALWAYS use USER's location for depreciation rules
- Receipt location is irrelevant for depreciation
- Apply user's country/province depreciation methods
3. **FX Handling**:
- If receipt currency ≠ transaction currency: Flag for manual review
- Do NOT automatically fetch or apply exchange rates
4. **Missing Location**:
- If receipt has no address: Default to user's location for sales tax
- Still apply user's location for depreciation
=== ANALYSIS REQUIRED ===
Provide a structured JSON response with the following format:
**CRITICAL INSTRUCTION FOR final_tax_amount:**
- This field MUST contain ONLY the calculated sales tax amount in dollars
- This is NOT the total amount including tax
- This is ONLY the tax portion (HST/GST/PST/QST)
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
{{
"final_tax_amount": XX.XX, // ONLY the calculated tax amount (e.g., 13.00 for $100 + $13 HST)
"sales_tax": {{
"applicable_province": "XX",
"applicable_rate": 0.XX,
"tax_name": "HST/GST/PST/QST",
"calculated_tax": XX.XX, // This should match final_tax_amount above
"stated_tax": XX.XX,
"discrepancy": XX.XX,
"reason": "Detailed explanation",
"requires_review": true/false
}},
"foreign_exchange": {{
"currency_mismatch": true/false,
"receipt_currency": "XXX",
"transaction_currency": "XXX",
"receipt_amount": XX.XX,
"transaction_amount": XX.XX,
"discrepancy": XX.XX,
"requires_manual_review": true/false,
"reason": "Explanation of FX situation"
}},
"depreciation": {{
"is_capital_asset": true/false,
"asset_class": "category name or N/A",
"suggested_cca_rate": 0.XX,
"straight_line_applicable": true/false,
"cca_applicable": true/false,
"straight_line_example": "Brief calculation example if applicable",
"cca_example": "Brief calculation example if applicable",
"reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location"
}},
"meals_entertainment": {{
"is_meals_entertainment": true/false,
"tax_deduction_amount": XX.XX,
"accounting_deduction_amount": XX.XX,
"sales_tax_included": XX.XX,
"reason": "Explanation of M&E rule application"
}},
"confidence_adjustment": {{
"boost": 0.XX,
"reduce": 0.XX,
"reason": "Why confidence should be adjusted based on tax analysis"
}},
"overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions"
}}
**IMPORTANT**: The "final_tax_amount" field at the top level must contain the final calculated tax amount. This should be the calculated_tax from sales_tax analysis. If this is a meals & entertainment expense, ensure you return the FULL tax amount here (not the 50% adjusted amount).
**Critical Reminders**:
- Sales tax uses RECEIPT location (or user location if receipt has none)
- Depreciation ALWAYS uses USER location
- For different addresses, use SHIPPING address for sales tax
- International transactions: no Canadian tax + FX flag
- Be precise with all calculations
- Always explain your reasoning clearly
"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.",
},
{"role": "user", "content": prompt},
],
temperature=0.1, # Low temperature for consistent, factual responses
max_tokens=2000,
)
content = response.choices[0].message.content.strip()
logger.info(f"LLM tax analysis received: {len(content)} characters")
return content
except Exception as e:
logger.error(f"Error getting LLM tax analysis: {str(e)}")
return self._get_fallback_analysis()
def _get_fallback_analysis(self) -> str:
"""Return fallback analysis if LLM fails"""
return json.dumps(
{
"final_tax_amount": 0.0,
"sales_tax": {
"applicable_province": "ON",
"applicable_rate": 0.13,
"tax_name": "HST",
"calculated_tax": 0.0,
"stated_tax": 0.0,
"discrepancy": 0.0,
"reason": "LLM analysis failed - using defaults",
"requires_review": True,
},
"foreign_exchange": {
"currency_mismatch": False,
"requires_manual_review": False,
"reason": "Analysis not available",
},
"depreciation": {
"is_capital_asset": False,
"reason": "Analysis not available",
},
"meals_entertainment": {
"is_meals_entertainment": False,
"reason": "Analysis not available",
},
"confidence_adjustment": {
"boost": 0.0,
"reduce": 0.1,
"reason": "LLM analysis failed - recommend manual review",
},
"overall_assessment": "Automatic analysis failed. Manual review recommended.",
}
)
def _structure_analysis_results(
self,
llm_response: str,
receipt: Receipt,
transaction: Transaction,
user_location: str,
) -> Dict[str, Any]:
"""Parse LLM response and structure it for application"""
try:
# Extract JSON from LLM response (may have markdown code blocks)
json_str = llm_response
if "```json" in llm_response:
json_str = llm_response.split("```json")[1].split("```")[0].strip()
elif "```" in llm_response:
json_str = llm_response.split("```")[1].split("```")[0].strip()
analysis = json.loads(json_str)
# Add metadata
analysis["metadata"] = {
"user_location": user_location,
"receipt_id": receipt.id,
"transaction_id": transaction.id,
"analysis_method": "LLM-based",
"model": self.model,
}
return analysis
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
logger.error(f"LLM response was: {llm_response}")
# Return structured fallback
return {
"final_tax_amount": receipt.tax if receipt.tax else 0.0,
"sales_tax": {
"requires_review": True,
"reason": "Failed to parse LLM response",
},
"foreign_exchange": {
"requires_manual_review": receipt.currency != transaction.currency
},
"depreciation": {"is_capital_asset": False},
"confidence_adjustment": {
"boost": 0.0,
"reduce": 0.15,
"reason": "Analysis parsing failed",
},
"overall_assessment": "Analysis failed. Manual review required.",
"error": str(e),
"metadata": {
"user_location": user_location,
"analysis_method": "fallback",
},
}
def _build_batch_analysis_context(self, matches: list, user_location: str) -> str:
"""Build comprehensive context for batch LLM analysis of all matches"""
# Normalize user_location to province code
user_province = self._normalize_location_to_province(user_location)
logger.info(
f"Building batch tax analysis context for {len(matches)} matches - User Location: {user_location} → Province Code: {user_province}"
)
# Build tax rates and CCA references once
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
# Build match entries
matches_info = []
for i, match in enumerate(matches):
receipt = match.receipt
transaction = match.transaction
receipt_location = self._extract_receipt_location(receipt)
match_info = f"""
MATCH {i} (ID: match_{i}):
Receipt Details:
- Vendor: {receipt.vendor}
- Amount: ${receipt.amount:.2f}
- Currency: {receipt.currency}
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
- Category: {receipt.category}
- Description: {receipt.description}
- Billing Address: {self._format_address(receipt.billing_address)}
- Shipping Address: {self._format_address(receipt.shipping_address)}
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
Transaction Details:
- Vendor: {transaction.vendor}
- Amount: ${transaction.amount:.2f}
- Currency: {transaction.currency}
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
- Notes: {transaction.notes}
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
Receipt Location Detected:
{receipt_location}
"""
matches_info.append(match_info)
matches_section = "\n".join(matches_info)
context = f"""
USER CONTEXT:
- User Location (Province): {user_province}
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
PROVINCIAL TAX RATES REFERENCE:
{tax_rates_info}
CCA DEPRECIATION RATES BY ASSET CLASS:
{cca_rates_info}
=== MATCHES TO ANALYZE ({len(matches)} total) ===
{matches_section}
"""
return context
def _get_llm_tax_analysis_batch(self, context: str, num_matches: int) -> Dict[str, Any]:
"""Get tax rule analysis from LLM for ALL matches in a single call"""
prompt = f"""
You are a Canadian tax expert analyzing MULTIPLE receipt-transaction matches.
{context}
=== FOUR CORE TAX RULES ===
### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
b) User in Ontario, Receipt from USA (New York):
- DO NOT apply Canadian sales tax
- This is an international transaction
- Flag for FX review instead
c) User in Ontario, Receipt has NO address information:
- DEFAULT to user's location (Ontario 13% HST)
**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review
### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.
**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate expected transaction amount using FX rate if available
- Flag discrepancies > $5 or 5% for manual review
- If FX rate missing but currencies differ, flag for review
### 3. DEPRECIATION RULE
**Purpose**: Identify capital assets requiring depreciation based on USER'S location.
**Critical**: Depreciation is ALWAYS based on the USER'S location (for Canadian tax filing), NOT the receipt location.
**Capital Asset Criteria**:
- Cost > $500 typically
- Useful life > 1 year
- Examples: computers, vehicles, furniture, machinery, buildings
**CCA Classes**: Assign appropriate class and rate based on asset type and user's jurisdiction
### 4. MEALS & ENTERTAINMENT RULE
**Purpose**: Apply 50% tax deduction limit for M&E expenses.
**Actions**:
- Identify M&E expenses (meals, entertainment, client dinners, etc.)
- Tax Deduction: 50% of total amount (including tax)
- Accounting Deduction: 100% of total amount (including tax)
- Always include sales tax in both calculations
=== YOUR TASK ===
Analyze EACH match and return a JSON object where each key is the match ID and the value is the complete tax analysis.
**CRITICAL INSTRUCTION FOR final_tax_amount:**
- This field MUST contain ONLY the calculated sales tax amount in dollars
- This is NOT the total amount including tax
- This is ONLY the tax portion (HST/GST/PST/QST)
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
- VERIFY: final_tax_amount should equal sales_tax.calculated_tax
-
Return your response as a SINGLE JSON object in this format:
{{
"match_0": {{
"final_tax_amount": XX.XX, // ONLY the calculated tax amount
"sales_tax": {{
"applicable_province": "XX",
"applicable_rate": 0.XX,
"tax_name": "HST/GST/PST",
"calculated_tax": XX.XX,
"stated_tax": XX.XX,
"discrepancy": XX.XX,
"reason": "Detailed explanation",
"requires_review": true/false
}},
"foreign_exchange": {{
"currency_mismatch": true/false,
"receipt_currency": "XXX",
"transaction_currency": "XXX",
"expected_transaction_amount": XX.XX,
"actual_transaction_amount": XX.XX,
"discrepancy": XX.XX,
"requires_manual_review": true/false,
"reason": "Explanation"
}},
"depreciation": {{
"is_capital_asset": true/false,
"asset_class": "class_XX",
"cca_rate": 0.XX,
"applicable_jurisdiction": "XX",
"reason": "Explanation"
}},
"meals_entertainment": {{
"is_meals_entertainment": true/false,
"tax_deduction_amount": XX.XX,
"accounting_deduction_amount": XX.XX,
"sales_tax_included": XX.XX,
"reason": "Explanation"
}},
"confidence_adjustment": {{
"boost": 0.XX,
"reduce": 0.XX,
"reason": "Why confidence should be adjusted"
}},
"overall_assessment": "Summary for this match"
}},
"match_1": {{
... same structure ...
}},
... for all {num_matches} matches ...
}}
"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.",
},
{"role": "user", "content": prompt},
],
temperature=0.1, # Low temperature for consistent, factual responses
max_tokens=8000, # Higher limit for batch processing
)
content = response.choices[0].message.content
# Validate that we got content
if not content:
logger.error("LLM returned empty response")
return {}
content = content.strip()
# Check if content is empty after stripping
if not content:
logger.error("LLM returned whitespace-only response")
return {}
logger.info(
f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches"
)
logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars
# Parse the JSON response - handle various markdown code block formats
json_str = content
# Check for markdown code blocks with various language identifiers
if "```json" in content:
json_str = content.split("```json")[1].split("```")[0].strip()
elif "```javascript" in content:
json_str = content.split("```javascript")[1].split("```")[0].strip()
elif "```js" in content:
json_str = content.split("```js")[1].split("```")[0].strip()
elif "```" in content:
# Generic code block - extract content between first ``` and last ```
parts = content.split("```")
if len(parts) >= 3:
# Take the second part (index 1), which is between first and second ```
json_str = parts[1].strip()
# Remove language identifier if it's on the first line
lines = json_str.split("\n", 1)
if len(lines) > 1 and lines[0].strip() in [
"json",
"javascript",
"js",
"",
]:
json_str = lines[1].strip()
# Validate JSON string is not empty
if not json_str:
logger.error("Extracted JSON string is empty")
logger.error(f"Original content was: {content[:500]}")
return {}
batch_analysis = json.loads(json_str)
# Validate we got a dictionary back
if not isinstance(batch_analysis, dict):
logger.error(f"LLM returned non-dict type: {type(batch_analysis)}")
return {}
logger.info(
f"Successfully parsed batch analysis with {len(batch_analysis)} matches"
)
return batch_analysis
except json.JSONDecodeError as e:
logger.error(f"JSON decode error in batch LLM tax analysis: {str(e)}")
logger.error(
f"Failed to parse: {json_str[:500] if 'json_str' in locals() else 'N/A'}"
)
return {}
except Exception as e:
logger.error(f"Error getting batch LLM tax analysis: {str(e)}")
logger.error(f"Exception type: {type(e).__name__}")
# Return empty dict so each match can handle fallback individually
return {}
def _apply_tax_analysis_to_match(self, match, tax_analysis: Dict[str, Any]):
"""Apply tax analysis results to a match object"""
# **CRITICAL FIX: Ensure final_tax_amount matches calculated_tax**
final_tax = tax_analysis.get("final_tax_amount", 0.0)
calculated_tax = tax_analysis.get("sales_tax", {}).get("calculated_tax", 0.0)
# If there's a mismatch, use calculated_tax as the source of truth
if abs(final_tax - calculated_tax) > 0.01:
logger.warning(
f"Correcting final_tax_amount mismatch for {match.receipt.vendor}: "
f"LLM returned final_tax_amount={final_tax}, but calculated_tax={calculated_tax}. "
f"Using calculated_tax as final value."
)
tax_analysis["final_tax_amount"] = calculated_tax
# Special case: If final_tax is 0 but calculated_tax > 0, always use calculated_tax
if final_tax == 0.0 and calculated_tax > 0.0:
logger.warning(
f"Correcting zero final_tax_amount for {match.receipt.vendor}: "
f"LLM returned 0 but calculated {calculated_tax} HST. Setting final_tax_amount={calculated_tax}"
)
tax_analysis["final_tax_amount"] = calculated_tax
tax_analysis["sales_tax"]["requires_review"] = True
# Apply the corrected tax analysis
match.tax_analysis = tax_analysis
logger.debug(
f"Applied tax analysis to match: {match.receipt.vendor} -> "
f"final_tax_amount={tax_analysis['final_tax_amount']}"
)
# Apply confidence adjustments based on tax analysis
confidence_adj = tax_analysis.get("confidence_adjustment", {})
# Boost confidence if tax rules validate the match
boost = confidence_adj.get("boost", 0.0)
if boost > 0:
match.confidence_score = min(1.0, match.confidence_score + boost)
match.match_reason += f" (Tax analysis confidence boost: +{boost:.2f})"
# Reduce confidence if tax issues detected
reduce = confidence_adj.get("reduce", 0.0)
if reduce > 0:
match.confidence_score = max(0.0, match.confidence_score - reduce)
match.match_reason += f" (Tax issues detected: -{reduce:.2f})"
# Add flags for manual review if needed
review_flags = []
# Check sales tax issues
sales_tax = tax_analysis.get("sales_tax", {})
if sales_tax.get("requires_review", False):
review_flags.append("Sales Tax Review Required")
# Check FX issues
fx_analysis = tax_analysis.get("foreign_exchange", {})
if fx_analysis.get("requires_manual_review", False):
review_flags.append(
f"FX Review Required (Discrepancy: ${fx_analysis.get('discrepancy', 0):.2f})"
)
# Check depreciation
depreciation = tax_analysis.get("depreciation", {})
if depreciation.get("is_capital_asset", False):
review_flags.append(
f"Capital Asset - Depreciation Applicable ({depreciation.get('asset_class', 'Unknown')})"
)
# Check meals & entertainment
meals_ent = tax_analysis.get("meals_entertainment", {})
if meals_ent.get("is_meals_entertainment", False):
tax_deduction = meals_ent.get("tax_deduction_amount", 0)
accounting_deduction = meals_ent.get("accounting_deduction_amount", 0)
review_flags.append(
f"M&E Expense - Tax Deduction: ${tax_deduction:.2f} (50%), Accounting: ${accounting_deduction:.2f} (100%)"
)
# Add review flags to match reason
if review_flags:
match.match_reason += " | REVIEW: " + "; ".join(review_flags)
return match