2025-10-05 13:25:55 +01:00
import json
import logging
from typing import Any , Dict
import groq
from config import settings
from schemas import Receipt , Transaction
logger = logging . getLogger ( __name__ )
class LLMTaxAnalyzer :
"""
Uses LLM to intelligently apply tax rules based on context.
Implements four core tax rules:
1. Sales Tax Rule - Based on receipt location (shipping/billing address)
2. Foreign Exchange Rule - Handles currency mismatches
3. Depreciation Rule - Capital assets (based on user location)
4. Meals & Entertainment Rule - 50 % tax deduction, 100 % a ccounting deduction
"""
# Provincial tax rates for reference
PROVINCIAL_TAX_RATES = {
" ON " : { " rate " : 0.13 , " name " : " HST " , " type " : " Harmonized " } ,
" QC " : { " rate " : 0.14975 , " name " : " QST + GST " , " type " : " Combined " } ,
" BC " : { " rate " : 0.12 , " name " : " PST + GST " , " type " : " Combined " } ,
" AB " : { " rate " : 0.05 , " name " : " GST " , " type " : " Federal only " } ,
" SK " : { " rate " : 0.11 , " name " : " PST + GST " , " type " : " Combined " } ,
" MB " : { " rate " : 0.12 , " name " : " PST + GST " , " type " : " Combined " } ,
" NS " : { " rate " : 0.15 , " name " : " HST " , " type " : " Harmonized " } ,
" NB " : { " rate " : 0.15 , " name " : " HST " , " type " : " Harmonized " } ,
" NL " : { " rate " : 0.15 , " name " : " HST " , " type " : " Harmonized " } ,
" PE " : { " rate " : 0.15 , " name " : " HST " , " type " : " Harmonized " } ,
" NT " : { " rate " : 0.05 , " name " : " GST " , " type " : " Federal only " } ,
" NU " : { " rate " : 0.05 , " name " : " GST " , " type " : " Federal only " } ,
" YT " : { " rate " : 0.05 , " name " : " GST " , " type " : " Federal only " } ,
}
# CCA rates by asset class (simplified)
CCA_RATES = {
" vehicles " : 0.30 , # Class 10
" computer_equipment " : 0.55 , # Class 50
" furniture " : 0.20 , # Class 8
" buildings " : 0.04 , # Class 1
" machinery " : 0.20 , # Class 8
}
def __init__ ( self ) :
self . client = groq . Groq ( api_key = settings . GROQ_API_KEY )
self . model = " llama-3.1-8b-instant "
self . max_retries = 3
2025-10-05 19:38:34 +01:00
def analyze_and_apply_tax_rules_batch (
self ,
matches : list , # List of Match objects
user_location : str = " ON " ,
) - > list :
"""
Batch process all matches in a SINGLE LLM call to reduce costs.
Analyzes all receipt-transaction pairs together and applies tax rules.
2025-10-05 20:03:46 +01:00
Falls back to individual processing if batch fails.
2025-10-05 19:38:34 +01:00
"""
if not matches :
return matches
2025-10-05 20:03:46 +01:00
logger . info ( f " Starting batch tax analysis for { len ( matches ) } matches " )
2025-10-05 19:38:34 +01:00
# Build batch context for all matches
2025-10-05 20:03:46 +01:00
try :
batch_context = self . _build_batch_analysis_context ( matches , user_location )
except Exception as e :
logger . error ( f " Error building batch context: { str ( e ) } " )
# If we can't even build the context, return matches as-is
for match in matches :
match . match_reason + = " (Batch analysis setup failed) "
return matches
2025-10-05 19:38:34 +01:00
# Get LLM analysis for ALL matches at once
llm_batch_analysis = self . _get_llm_tax_analysis_batch (
batch_context , len ( matches )
)
2025-10-05 20:03:46 +01:00
# Check if we got any analysis back
if not llm_batch_analysis :
logger . warning ( " Batch LLM analysis returned empty results " )
# Fallback: Try processing each match individually if batch size is small
if (
len ( matches ) < = 5
) : # Only fallback for small batches to avoid excessive API calls
logger . info (
f " Attempting individual processing fallback for { len ( matches ) } matches "
)
return self . _process_matches_individually ( matches , user_location )
else :
logger . warning (
f " Batch too large ( { len ( matches ) } matches) for individual fallback - returning matches without enhanced tax analysis "
)
for match in matches :
match . match_reason + = " (Batch tax analysis unavailable) "
return matches
logger . info ( f " Received batch analysis for { len ( llm_batch_analysis ) } matches " )
2025-10-05 19:38:34 +01:00
# Apply results to each match
enhanced_matches = [ ]
for i , match in enumerate ( matches ) :
try :
# Get the analysis for this specific match from the batch results
2025-10-05 20:03:46 +01:00
match_key = f " match_ { i } "
match_analysis = llm_batch_analysis . get ( match_key , { } )
2025-10-05 19:38:34 +01:00
2025-10-05 20:03:46 +01:00
if match_analysis and isinstance ( match_analysis , dict ) :
2025-10-05 19:38:34 +01:00
# Apply the tax analysis to this match
enhanced_match = self . _apply_tax_analysis_to_match (
match , match_analysis
)
enhanced_matches . append ( enhanced_match )
else :
# No analysis available for this match, use as-is
2025-10-05 20:03:46 +01:00
logger . warning (
f " No analysis found for match { i } (key: { match_key } ) "
)
2025-10-05 19:38:34 +01:00
match . match_reason + = " (Tax analysis incomplete) "
enhanced_matches . append ( match )
except Exception as e :
logger . error ( f " Error applying tax analysis to match { i } : { str ( e ) } " )
match . match_reason + = " (Tax analysis error) "
enhanced_matches . append ( match )
2025-10-05 20:03:46 +01:00
logger . info (
f " Completed batch tax analysis, enhanced { len ( enhanced_matches ) } matches "
)
2025-10-05 23:38:03 +00:00
# logger.info(
# f"\n\n\nFinal batch enhanced matches: {enhanced_matches}"
# )
2025-10-05 20:03:46 +01:00
return enhanced_matches
def _process_matches_individually ( self , matches : list , user_location : str ) - > list :
"""
Fallback method: Process matches one at a time using the legacy method.
Only used when batch processing fails and batch size is small.
"""
logger . info ( f " Processing { len ( matches ) } matches individually as fallback " )
enhanced_matches = [ ]
for i , match in enumerate ( matches ) :
try :
# Use the legacy single-match analysis method
tax_analysis = self . analyze_and_apply_tax_rules (
match . receipt , match . transaction , user_location
)
# Apply the analysis to the match
enhanced_match = self . _apply_tax_analysis_to_match ( match , tax_analysis )
enhanced_matches . append ( enhanced_match )
logger . info (
f " Successfully processed match { i + 1 } / { len ( matches ) } individually "
)
except Exception as e :
logger . error ( f " Error in individual processing for match { i } : { str ( e ) } " )
match . match_reason + = " (Individual tax analysis failed) "
enhanced_matches . append ( match )
2025-10-05 19:38:34 +01:00
return enhanced_matches
2025-10-05 13:25:55 +01:00
def analyze_and_apply_tax_rules (
self ,
receipt : Receipt ,
transaction : Transaction ,
user_location : str = " ON " , # Default to Ontario
) - > Dict [ str , Any ] :
"""
2025-10-05 19:38:34 +01:00
Legacy single-match analysis method (kept for backward compatibility).
Use analyze_and_apply_tax_rules_batch() for better performance.
2025-10-05 13:25:55 +01:00
Use LLM to intelligently analyze and apply all tax rules:
1. Sales tax based on receipt location (shipping/billing address priority)
2. Foreign exchange rules for currency mismatches
3. Depreciation rules for capital assets (based on user location)
4. Meals & Entertainment deduction rules
"""
# Prepare context for LLM
analysis_context = self . _build_analysis_context (
receipt , transaction , user_location
)
# Get LLM analysis
llm_analysis = self . _get_llm_tax_analysis ( analysis_context )
# Parse and structure the results
structured_results = self . _structure_analysis_results (
llm_analysis , receipt , transaction , user_location
)
return structured_results
def _build_analysis_context (
self , receipt : Receipt , transaction : Transaction , user_location : str
) - > str :
""" Build comprehensive context for LLM analysis """
# Extract location information
receipt_location = self . _extract_receipt_location ( receipt )
2025-10-05 18:34:35 +01:00
# Normalize user_location to province code (handle "Canada", "Ontario", "ON", etc.)
user_province = self . _normalize_location_to_province ( user_location )
logger . info (
f " Building tax analysis context - User Location: { user_location } → Province Code: { user_province } "
)
2025-10-05 13:25:55 +01:00
# Build tax rates reference
tax_rates_info = json . dumps ( self . PROVINCIAL_TAX_RATES , indent = 2 )
cca_rates_info = json . dumps ( self . CCA_RATES , indent = 2 )
context = f """
RECEIPT DETAILS:
- Vendor: { receipt . vendor }
- Amount: $ { receipt . amount : .2f }
- Currency: { receipt . currency }
- Date: { receipt . receipt_date . strftime ( " % Y- % m- %d " ) }
- Category: { receipt . category }
- Description: { receipt . description }
- Billing Address: { self . _format_address ( receipt . billing_address ) }
- Shipping Address: { self . _format_address ( receipt . shipping_address ) }
- Is Meals & Entertainment: { receipt . is_meals_entertainment }
TRANSACTION DETAILS:
- Vendor: { transaction . vendor }
- Amount: $ { transaction . amount : .2f }
- Currency: { transaction . currency }
- Date: { transaction . transaction_date . strftime ( " % Y- % m- %d " ) }
- Notes: { transaction . notes }
- FX Rate: { transaction . fx_rate if transaction . fx_rate else " N/A " }
USER CONTEXT:
- User Location (Province): { user_province }
- User Province Tax Rate: { self . PROVINCIAL_TAX_RATES . get ( user_province , { } ) . get ( " rate " , 0.13 ) * 100 } %
- User Tax Type: { self . PROVINCIAL_TAX_RATES . get ( user_province , { } ) . get ( " name " , " HST " ) }
RECEIPT LOCATION DETECTED:
{ receipt_location }
PROVINCIAL TAX RATES REFERENCE:
{ tax_rates_info }
CCA DEPRECIATION RATES BY ASSET CLASS:
{ cca_rates_info }
"""
return context
2025-10-05 18:34:35 +01:00
def _normalize_location_to_province ( self , location : str ) - > str :
"""
Normalize various location formats to province code.
Handles: " ON " , " Ontario " , " Canada " , etc.
"""
location_upper = location . upper ( ) . strip ( )
# Direct province code match
if location_upper in self . PROVINCIAL_TAX_RATES :
return location_upper
# Map full province names to codes
province_name_map = {
" ONTARIO " : " ON " ,
" QUEBEC " : " QC " ,
" BRITISH COLUMBIA " : " BC " ,
" ALBERTA " : " AB " ,
" SASKATCHEWAN " : " SK " ,
" MANITOBA " : " MB " ,
" NOVA SCOTIA " : " NS " ,
" NEW BRUNSWICK " : " NB " ,
" NEWFOUNDLAND AND LABRADOR " : " NL " ,
" NEWFOUNDLAND " : " NL " ,
" PRINCE EDWARD ISLAND " : " PE " ,
" NORTHWEST TERRITORIES " : " NT " ,
" NUNAVUT " : " NU " ,
" YUKON " : " YT " ,
}
if location_upper in province_name_map :
return province_name_map [ location_upper ]
# Default to Ontario if country is Canada or unspecified
if location_upper in [ " CANADA " , " CAN " , " CA " , " " ] :
logger . warning ( f " Location ' { location } ' is too generic, defaulting to ON " )
return " ON "
# If nothing matches, default to Ontario
logger . warning ( f " Could not parse location ' { location } ' , defaulting to ON " )
return " ON "
2025-10-05 13:25:55 +01:00
def _extract_receipt_location ( self , receipt : Receipt ) - > str :
""" Extract and format receipt location information """
# Priority: Use shipping address if available, then billing
location = (
receipt . shipping_address
if receipt . shipping_address
else receipt . billing_address
)
if location :
return f """
- Province: { location . province }
- City: { location . city }
- Country: { location . country }
- Postal Code: { location . postal_code }
"""
else :
return " - No address information available (will use user location) "
def _format_address ( self , address ) - > str :
""" Format address for display """
if address :
return f " { address . city } , { address . province } , { address . country } ( { address . postal_code } ) "
return " Not provided "
def _get_llm_tax_analysis ( self , context : str ) - > str :
""" Get tax rule analysis from LLM """
prompt = f """
2025-10-05 23:38:03 +00:00
You are a tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
And you are to calculate the tax for the receipt based on the context provided.
2025-10-05 13:25:55 +01:00
{ context }
=== FOUR CORE TAX RULES ===
### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT ' S location, NOT the user ' s location (unless no receipt location).
**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
- Apply Quebec ' s tax rate (14.975% QST+GST), not Ontario ' s 13% HST
- The user ' s location is only for depreciation purposes
b) User in Ontario, Receipt from USA (New York):
- DO NOT apply Canadian sales tax
- This is an international transaction
- Flag for FX review instead
c) User in USA (New York), Receipt from California:
- Apply California ' s sales tax rate (receipt location)
- Not New York ' s rate (user location)
d) User in Ontario, Receipt has NO address information:
- DEFAULT to user ' s location (Ontario 13% HST)
- This is the fallback when receipt location is unknown
**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review
### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.
**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
- If FX rate is provided in transaction data, note it but still require manual review
**Examples**:
- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
- The user must manually approve or adjust the FX difference
### 3. DEPRECIATION RULE
**Purpose**: Calculate depreciation for assets using two methods.
**Key Principle**: Depreciation is ALWAYS based on USER ' S location, NOT receipt location.
**Asset Identification**:
- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
- Identify from receipt category and description
- Typical threshold: Assets generally > $500
**Two Methods Required**:
a) **Straight-Line Depreciation** (for accounting purposes):
Formula: (Cost - Residual Value) / Useful Life
Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year
b) **CCA Depreciation** (for tax purposes - Canada):
Method: Declining Balance
Formula: Book Value × CCA Rate each year
Example: Truck $20,000, 30% CCA:
- Year 1: $20,000 × 30% = $6,000
- Year 2: ($20,000 - $6,000) × 30% = $4,200
- Continues declining each year
**CCA Classes** (Canada):
- Vehicles: 30% (Class 10)
- Computer Equipment: 55% (Class 50)
- Furniture/Machinery: 20% (Class 8)
- Buildings: 4% (Class 1)
### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
**Purpose**: Apply correct deductions for meals and entertainment expenses.
**Deduction Rules**:
- **For Tax Purposes**: Only 50% of total receipt amount is deductible
- **For Accounting Purposes**: 100% of total receipt amount is deductible
- **Sales Tax**: Full sales tax amount is deductible for accounting
**Example**:
- Receipt: $100 meal + $12 sales tax = $112 total
- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112
=== LOCATION-BASED SCENARIO HANDLING ===
**When Receipt Location ≠ User Location**:
1. **Sales Tax**: Use RECEIPT ' s location for tax calculation
- Exception: If international (different country), no Canadian sales tax + flag FX
- Exception: If no location on receipt, use user ' s location as default
2. **Depreciation**: ALWAYS use USER ' s location for depreciation rules
- Receipt location is irrelevant for depreciation
- Apply user ' s country/province depreciation methods
3. **FX Handling**:
- If receipt currency ≠ transaction currency: Flag for manual review
- Do NOT automatically fetch or apply exchange rates
4. **Missing Location**:
- If receipt has no address: Default to user ' s location for sales tax
- Still apply user ' s location for depreciation
=== ANALYSIS REQUIRED ===
Provide a structured JSON response with the following format:
2025-10-05 23:38:03 +00:00
**CRITICAL INSTRUCTION FOR final_tax_amount:**
- This field MUST contain ONLY the calculated sales tax amount in dollars
- This is NOT the total amount including tax
- This is ONLY the tax portion (HST/GST/PST/QST)
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
2025-10-05 13:25:55 +01:00
{{
2025-10-05 23:38:03 +00:00
" final_tax_amount " : XX.XX, // ONLY the calculated tax amount (e.g., 13.00 for $100 + $13 HST)
2025-10-05 13:25:55 +01:00
" sales_tax " : {{
" applicable_province " : " XX " ,
" applicable_rate " : 0.XX,
" tax_name " : " HST/GST/PST/QST " ,
2025-10-05 23:38:03 +00:00
" calculated_tax " : XX.XX, // This should match final_tax_amount above
2025-10-05 13:25:55 +01:00
" stated_tax " : XX.XX,
" discrepancy " : XX.XX,
2025-10-05 23:38:03 +00:00
" reason " : " Detailed explanation " ,
2025-10-05 13:25:55 +01:00
" requires_review " : true/false
}} ,
" foreign_exchange " : {{
" currency_mismatch " : true/false,
" receipt_currency " : " XXX " ,
" transaction_currency " : " XXX " ,
" receipt_amount " : XX.XX,
" transaction_amount " : XX.XX,
" discrepancy " : XX.XX,
" requires_manual_review " : true/false,
" reason " : " Explanation of FX situation "
}} ,
" depreciation " : {{
" is_capital_asset " : true/false,
" asset_class " : " category name or N/A " ,
" suggested_cca_rate " : 0.XX,
" straight_line_applicable " : true/false,
" cca_applicable " : true/false,
" straight_line_example " : " Brief calculation example if applicable " ,
" cca_example " : " Brief calculation example if applicable " ,
" reason " : " Why this is/isn ' t a capital asset, which CCA class, and why depreciation based on user ' s location "
}} ,
" meals_entertainment " : {{
" is_meals_entertainment " : true/false,
" tax_deduction_amount " : XX.XX,
" accounting_deduction_amount " : XX.XX,
" sales_tax_included " : XX.XX,
" reason " : " Explanation of M&E rule application "
}} ,
" confidence_adjustment " : {{
" boost " : 0.XX,
" reduce " : 0.XX,
" reason " : " Why confidence should be adjusted based on tax analysis "
}} ,
" overall_assessment " : " Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions "
}}
2025-10-05 18:34:35 +01:00
**IMPORTANT**: The " final_tax_amount " field at the top level must contain the final calculated tax amount. This should be the calculated_tax from sales_tax analysis. If this is a meals & entertainment expense, ensure you return the FULL tax amount here (not the 50% adjusted amount).
2025-10-05 13:25:55 +01:00
**Critical Reminders**:
- Sales tax uses RECEIPT location (or user location if receipt has none)
- Depreciation ALWAYS uses USER location
- For different addresses, use SHIPPING address for sales tax
- International transactions: no Canadian tax + FX flag
- Be precise with all calculations
- Always explain your reasoning clearly
"""
try :
response = self . client . chat . completions . create (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : " You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON. " ,
} ,
{ " role " : " user " , " content " : prompt } ,
] ,
temperature = 0.1 , # Low temperature for consistent, factual responses
max_tokens = 2000 ,
)
content = response . choices [ 0 ] . message . content . strip ( )
logger . info ( f " LLM tax analysis received: { len ( content ) } characters " )
return content
except Exception as e :
logger . error ( f " Error getting LLM tax analysis: { str ( e ) } " )
return self . _get_fallback_analysis ( )
def _get_fallback_analysis ( self ) - > str :
""" Return fallback analysis if LLM fails """
return json . dumps (
{
2025-10-05 18:34:35 +01:00
" final_tax_amount " : 0.0 ,
2025-10-05 13:25:55 +01:00
" sales_tax " : {
" applicable_province " : " ON " ,
" applicable_rate " : 0.13 ,
" tax_name " : " HST " ,
" calculated_tax " : 0.0 ,
" stated_tax " : 0.0 ,
" discrepancy " : 0.0 ,
" reason " : " LLM analysis failed - using defaults " ,
" requires_review " : True ,
} ,
" foreign_exchange " : {
" currency_mismatch " : False ,
" requires_manual_review " : False ,
" reason " : " Analysis not available " ,
} ,
" depreciation " : {
" is_capital_asset " : False ,
" reason " : " Analysis not available " ,
} ,
" meals_entertainment " : {
" is_meals_entertainment " : False ,
" reason " : " Analysis not available " ,
} ,
" confidence_adjustment " : {
" boost " : 0.0 ,
" reduce " : 0.1 ,
" reason " : " LLM analysis failed - recommend manual review " ,
} ,
" overall_assessment " : " Automatic analysis failed. Manual review recommended. " ,
}
)
def _structure_analysis_results (
self ,
llm_response : str ,
receipt : Receipt ,
transaction : Transaction ,
user_location : str ,
) - > Dict [ str , Any ] :
""" Parse LLM response and structure it for application """
try :
# Extract JSON from LLM response (may have markdown code blocks)
json_str = llm_response
if " ```json " in llm_response :
json_str = llm_response . split ( " ```json " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
elif " ``` " in llm_response :
json_str = llm_response . split ( " ``` " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
analysis = json . loads ( json_str )
# Add metadata
analysis [ " metadata " ] = {
" user_location " : user_location ,
" receipt_id " : receipt . id ,
" transaction_id " : transaction . id ,
" analysis_method " : " LLM-based " ,
" model " : self . model ,
}
return analysis
except json . JSONDecodeError as e :
logger . error ( f " Failed to parse LLM response as JSON: { str ( e ) } " )
logger . error ( f " LLM response was: { llm_response } " )
# Return structured fallback
return {
2025-10-05 18:34:35 +01:00
" final_tax_amount " : receipt . tax if receipt . tax else 0.0 ,
2025-10-05 13:25:55 +01:00
" sales_tax " : {
" requires_review " : True ,
" reason " : " Failed to parse LLM response " ,
} ,
" foreign_exchange " : {
" requires_manual_review " : receipt . currency != transaction . currency
} ,
" depreciation " : { " is_capital_asset " : False } ,
" confidence_adjustment " : {
" boost " : 0.0 ,
" reduce " : 0.15 ,
" reason " : " Analysis parsing failed " ,
} ,
" overall_assessment " : " Analysis failed. Manual review required. " ,
" error " : str ( e ) ,
" metadata " : {
" user_location " : user_location ,
" analysis_method " : " fallback " ,
} ,
}
2025-10-05 19:38:34 +01:00
def _build_batch_analysis_context ( self , matches : list , user_location : str ) - > str :
""" Build comprehensive context for batch LLM analysis of all matches """
# Normalize user_location to province code
user_province = self . _normalize_location_to_province ( user_location )
logger . info (
f " Building batch tax analysis context for { len ( matches ) } matches - User Location: { user_location } → Province Code: { user_province } "
)
# Build tax rates and CCA references once
tax_rates_info = json . dumps ( self . PROVINCIAL_TAX_RATES , indent = 2 )
cca_rates_info = json . dumps ( self . CCA_RATES , indent = 2 )
# Build match entries
matches_info = [ ]
for i , match in enumerate ( matches ) :
receipt = match . receipt
transaction = match . transaction
receipt_location = self . _extract_receipt_location ( receipt )
match_info = f """
MATCH { i } (ID: match_ { i } ):
Receipt Details:
- Vendor: { receipt . vendor }
- Amount: $ { receipt . amount : .2f }
- Currency: { receipt . currency }
- Date: { receipt . receipt_date . strftime ( " % Y- % m- %d " ) }
- Category: { receipt . category }
- Description: { receipt . description }
- Billing Address: { self . _format_address ( receipt . billing_address ) }
- Shipping Address: { self . _format_address ( receipt . shipping_address ) }
- Is Meals & Entertainment: { receipt . is_meals_entertainment }
Transaction Details:
- Vendor: { transaction . vendor }
- Amount: $ { transaction . amount : .2f }
- Currency: { transaction . currency }
- Date: { transaction . transaction_date . strftime ( " % Y- % m- %d " ) }
- Notes: { transaction . notes }
- FX Rate: { transaction . fx_rate if transaction . fx_rate else " N/A " }
Receipt Location Detected:
{ receipt_location }
"""
matches_info . append ( match_info )
matches_section = " \n " . join ( matches_info )
context = f """
USER CONTEXT:
- User Location (Province): { user_province }
- User Province Tax Rate: { self . PROVINCIAL_TAX_RATES . get ( user_province , { } ) . get ( " rate " , 0.13 ) * 100 } %
- User Tax Type: { self . PROVINCIAL_TAX_RATES . get ( user_province , { } ) . get ( " name " , " HST " ) }
PROVINCIAL TAX RATES REFERENCE:
{ tax_rates_info }
CCA DEPRECIATION RATES BY ASSET CLASS:
{ cca_rates_info }
=== MATCHES TO ANALYZE ( { len ( matches ) } total) ===
{ matches_section }
"""
return context
2025-10-05 23:38:03 +00:00
def _get_llm_tax_analysis_batch ( self , context : str , num_matches : int ) - > Dict [ str , Any ] :
2025-10-05 19:38:34 +01:00
""" Get tax rule analysis from LLM for ALL matches in a single call """
prompt = f """
2025-10-05 23:38:03 +00:00
You are a Canadian tax expert analyzing MULTIPLE receipt-transaction matches.
2025-10-05 19:38:34 +01:00
{ context }
=== FOUR CORE TAX RULES ===
### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT ' S location, NOT the user ' s location (unless no receipt location).
**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
- Apply Quebec ' s tax rate (14.975% QST+GST), not Ontario ' s 13% HST
b) User in Ontario, Receipt from USA (New York):
- DO NOT apply Canadian sales tax
- This is an international transaction
- Flag for FX review instead
c) User in Ontario, Receipt has NO address information:
- DEFAULT to user ' s location (Ontario 13% HST)
**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review
### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.
**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate expected transaction amount using FX rate if available
- Flag discrepancies > $5 or 5% for manual review
- If FX rate missing but currencies differ, flag for review
### 3. DEPRECIATION RULE
**Purpose**: Identify capital assets requiring depreciation based on USER ' S location.
**Critical**: Depreciation is ALWAYS based on the USER ' S location (for Canadian tax filing), NOT the receipt location.
**Capital Asset Criteria**:
- Cost > $500 typically
- Useful life > 1 year
- Examples: computers, vehicles, furniture, machinery, buildings
**CCA Classes**: Assign appropriate class and rate based on asset type and user ' s jurisdiction
### 4. MEALS & ENTERTAINMENT RULE
**Purpose**: Apply 50% tax deduction limit for M&E expenses.
**Actions**:
- Identify M&E expenses (meals, entertainment, client dinners, etc.)
- Tax Deduction: 50% of total amount (including tax)
- Accounting Deduction: 100% of total amount (including tax)
- Always include sales tax in both calculations
=== YOUR TASK ===
2025-10-05 23:38:03 +00:00
Analyze EACH match and return a JSON object where each key is the match ID and the value is the complete tax analysis.
**CRITICAL INSTRUCTION FOR final_tax_amount:**
- This field MUST contain ONLY the calculated sales tax amount in dollars
- This is NOT the total amount including tax
- This is ONLY the tax portion (HST/GST/PST/QST)
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
- VERIFY: final_tax_amount should equal sales_tax.calculated_tax
-
2025-10-05 19:38:34 +01:00
Return your response as a SINGLE JSON object in this format:
{{
" match_0 " : {{
2025-10-05 23:38:03 +00:00
" final_tax_amount " : XX.XX, // ONLY the calculated tax amount
2025-10-05 19:38:34 +01:00
" sales_tax " : {{
" applicable_province " : " XX " ,
" applicable_rate " : 0.XX,
" tax_name " : " HST/GST/PST " ,
" calculated_tax " : XX.XX,
" stated_tax " : XX.XX,
" discrepancy " : XX.XX,
" reason " : " Detailed explanation " ,
" requires_review " : true/false
}} ,
" foreign_exchange " : {{
" currency_mismatch " : true/false,
" receipt_currency " : " XXX " ,
" transaction_currency " : " XXX " ,
" expected_transaction_amount " : XX.XX,
" actual_transaction_amount " : XX.XX,
" discrepancy " : XX.XX,
" requires_manual_review " : true/false,
" reason " : " Explanation "
}} ,
" depreciation " : {{
" is_capital_asset " : true/false,
" asset_class " : " class_XX " ,
" cca_rate " : 0.XX,
" applicable_jurisdiction " : " XX " ,
" reason " : " Explanation "
}} ,
" meals_entertainment " : {{
" is_meals_entertainment " : true/false,
" tax_deduction_amount " : XX.XX,
" accounting_deduction_amount " : XX.XX,
" sales_tax_included " : XX.XX,
" reason " : " Explanation "
}} ,
" confidence_adjustment " : {{
" boost " : 0.XX,
" reduce " : 0.XX,
" reason " : " Why confidence should be adjusted "
}} ,
" overall_assessment " : " Summary for this match "
}} ,
" match_1 " : {{
... same structure ...
}} ,
... for all { num_matches } matches ...
}}
"""
try :
response = self . client . chat . completions . create (
model = self . model ,
messages = [
{
" role " : " system " ,
2025-10-05 20:36:47 +01:00
" content " : " You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON. " ,
2025-10-05 19:38:34 +01:00
} ,
{ " role " : " user " , " content " : prompt } ,
] ,
temperature = 0.1 , # Low temperature for consistent, factual responses
max_tokens = 8000 , # Higher limit for batch processing
)
2025-10-05 20:03:46 +01:00
content = response . choices [ 0 ] . message . content
# Validate that we got content
if not content :
logger . error ( " LLM returned empty response " )
return { }
content = content . strip ( )
# Check if content is empty after stripping
if not content :
logger . error ( " LLM returned whitespace-only response " )
return { }
2025-10-05 19:38:34 +01:00
logger . info (
f " LLM batch tax analysis received: { len ( content ) } characters for { num_matches } matches "
)
2025-10-05 20:03:46 +01:00
logger . debug ( f " Raw LLM response: { content [ : 500 ] } ... " ) # Log first 500 chars
2025-10-05 19:38:34 +01:00
2025-10-05 20:36:47 +01:00
# Parse the JSON response - handle various markdown code block formats
2025-10-05 19:38:34 +01:00
json_str = content
2025-10-05 20:36:47 +01:00
# Check for markdown code blocks with various language identifiers
2025-10-05 19:38:34 +01:00
if " ```json " in content :
json_str = content . split ( " ```json " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
2025-10-05 20:36:47 +01:00
elif " ```javascript " in content :
json_str = content . split ( " ```javascript " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
elif " ```js " in content :
json_str = content . split ( " ```js " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
2025-10-05 19:38:34 +01:00
elif " ``` " in content :
2025-10-05 20:36:47 +01:00
# Generic code block - extract content between first ``` and last ```
parts = content . split ( " ``` " )
if len ( parts ) > = 3 :
# Take the second part (index 1), which is between first and second ```
json_str = parts [ 1 ] . strip ( )
# Remove language identifier if it's on the first line
lines = json_str . split ( " \n " , 1 )
if len ( lines ) > 1 and lines [ 0 ] . strip ( ) in [
" json " ,
" javascript " ,
" js " ,
" " ,
] :
json_str = lines [ 1 ] . strip ( )
2025-10-05 19:38:34 +01:00
2025-10-05 20:03:46 +01:00
# Validate JSON string is not empty
if not json_str :
logger . error ( " Extracted JSON string is empty " )
2025-10-05 20:36:47 +01:00
logger . error ( f " Original content was: { content [ : 500 ] } " )
2025-10-05 20:03:46 +01:00
return { }
2025-10-05 19:38:34 +01:00
batch_analysis = json . loads ( json_str )
2025-10-05 20:03:46 +01:00
# Validate we got a dictionary back
if not isinstance ( batch_analysis , dict ) :
logger . error ( f " LLM returned non-dict type: { type ( batch_analysis ) } " )
return { }
logger . info (
f " Successfully parsed batch analysis with { len ( batch_analysis ) } matches "
)
2025-10-05 19:38:34 +01:00
return batch_analysis
2025-10-05 20:03:46 +01:00
except json . JSONDecodeError as e :
logger . error ( f " JSON decode error in batch LLM tax analysis: { str ( e ) } " )
logger . error (
f " Failed to parse: { json_str [ : 500 ] if ' json_str ' in locals ( ) else ' N/A ' } "
)
return { }
2025-10-05 19:38:34 +01:00
except Exception as e :
logger . error ( f " Error getting batch LLM tax analysis: { str ( e ) } " )
2025-10-05 20:03:46 +01:00
logger . error ( f " Exception type: { type ( e ) . __name__ } " )
2025-10-05 19:38:34 +01:00
# Return empty dict so each match can handle fallback individually
return { }
def _apply_tax_analysis_to_match ( self , match , tax_analysis : Dict [ str , Any ] ) :
""" Apply tax analysis results to a match object """
2025-10-05 23:38:03 +00:00
# **CRITICAL FIX: Ensure final_tax_amount matches calculated_tax**
final_tax = tax_analysis . get ( " final_tax_amount " , 0.0 )
calculated_tax = tax_analysis . get ( " sales_tax " , { } ) . get ( " calculated_tax " , 0.0 )
# If there's a mismatch, use calculated_tax as the source of truth
if abs ( final_tax - calculated_tax ) > 0.01 :
logger . warning (
f " Correcting final_tax_amount mismatch for { match . receipt . vendor } : "
f " LLM returned final_tax_amount= { final_tax } , but calculated_tax= { calculated_tax } . "
f " Using calculated_tax as final value. "
)
tax_analysis [ " final_tax_amount " ] = calculated_tax
# Special case: If final_tax is 0 but calculated_tax > 0, always use calculated_tax
if final_tax == 0.0 and calculated_tax > 0.0 :
logger . warning (
f " Correcting zero final_tax_amount for { match . receipt . vendor } : "
f " LLM returned 0 but calculated { calculated_tax } HST. Setting final_tax_amount= { calculated_tax } "
)
tax_analysis [ " final_tax_amount " ] = calculated_tax
tax_analysis [ " sales_tax " ] [ " requires_review " ] = True
# Apply the corrected tax analysis
2025-10-05 19:38:34 +01:00
match . tax_analysis = tax_analysis
2025-10-05 23:38:03 +00:00
logger . debug (
f " Applied tax analysis to match: { match . receipt . vendor } -> "
f " final_tax_amount= { tax_analysis [ ' final_tax_amount ' ] } "
)
2025-10-05 19:38:34 +01:00
# Apply confidence adjustments based on tax analysis
confidence_adj = tax_analysis . get ( " confidence_adjustment " , { } )
# Boost confidence if tax rules validate the match
boost = confidence_adj . get ( " boost " , 0.0 )
if boost > 0 :
match . confidence_score = min ( 1.0 , match . confidence_score + boost )
match . match_reason + = f " (Tax analysis confidence boost: + { boost : .2f } ) "
# Reduce confidence if tax issues detected
reduce = confidence_adj . get ( " reduce " , 0.0 )
if reduce > 0 :
match . confidence_score = max ( 0.0 , match . confidence_score - reduce )
match . match_reason + = f " (Tax issues detected: - { reduce : .2f } ) "
# Add flags for manual review if needed
review_flags = [ ]
# Check sales tax issues
sales_tax = tax_analysis . get ( " sales_tax " , { } )
if sales_tax . get ( " requires_review " , False ) :
review_flags . append ( " Sales Tax Review Required " )
# Check FX issues
fx_analysis = tax_analysis . get ( " foreign_exchange " , { } )
if fx_analysis . get ( " requires_manual_review " , False ) :
review_flags . append (
f " FX Review Required (Discrepancy: $ { fx_analysis . get ( ' discrepancy ' , 0 ) : .2f } ) "
)
# Check depreciation
depreciation = tax_analysis . get ( " depreciation " , { } )
if depreciation . get ( " is_capital_asset " , False ) :
review_flags . append (
f " Capital Asset - Depreciation Applicable ( { depreciation . get ( ' asset_class ' , ' Unknown ' ) } ) "
)
# Check meals & entertainment
meals_ent = tax_analysis . get ( " meals_entertainment " , { } )
if meals_ent . get ( " is_meals_entertainment " , False ) :
tax_deduction = meals_ent . get ( " tax_deduction_amount " , 0 )
accounting_deduction = meals_ent . get ( " accounting_deduction_amount " , 0 )
review_flags . append (
f " M&E Expense - Tax Deduction: $ { tax_deduction : .2f } (50%), Accounting: $ { accounting_deduction : .2f } (100%) "
)
# Add review flags to match reason
if review_flags :
match . match_reason + = " | REVIEW: " + " ; " . join ( review_flags )
return match