447 lines
16 KiB
Python
447 lines
16 KiB
Python
|
|
import json
|
|||
|
|
import logging
|
|||
|
|
from typing import Any, Dict
|
|||
|
|
|
|||
|
|
import groq
|
|||
|
|
from config import settings
|
|||
|
|
from schemas import Receipt, Transaction
|
|||
|
|
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class LLMTaxAnalyzer:
|
|||
|
|
"""
|
|||
|
|
Uses LLM to intelligently apply tax rules based on context.
|
|||
|
|
|
|||
|
|
Implements four core tax rules:
|
|||
|
|
1. Sales Tax Rule - Based on receipt location (shipping/billing address)
|
|||
|
|
2. Foreign Exchange Rule - Handles currency mismatches
|
|||
|
|
3. Depreciation Rule - Capital assets (based on user location)
|
|||
|
|
4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# Provincial tax rates for reference
|
|||
|
|
PROVINCIAL_TAX_RATES = {
|
|||
|
|
"ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"},
|
|||
|
|
"QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"},
|
|||
|
|
"BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
|
|||
|
|
"AB": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
|||
|
|
"SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"},
|
|||
|
|
"MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
|
|||
|
|
"NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
|||
|
|
"NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
|||
|
|
"NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
|||
|
|
"PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
|
|||
|
|
"NT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
|||
|
|
"NU": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
|||
|
|
"YT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# CCA rates by asset class (simplified)
|
|||
|
|
CCA_RATES = {
|
|||
|
|
"vehicles": 0.30, # Class 10
|
|||
|
|
"computer_equipment": 0.55, # Class 50
|
|||
|
|
"furniture": 0.20, # Class 8
|
|||
|
|
"buildings": 0.04, # Class 1
|
|||
|
|
"machinery": 0.20, # Class 8
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
|||
|
|
self.model = "llama-3.1-8b-instant"
|
|||
|
|
self.max_retries = 3
|
|||
|
|
|
|||
|
|
def analyze_and_apply_tax_rules(
|
|||
|
|
self,
|
|||
|
|
receipt: Receipt,
|
|||
|
|
transaction: Transaction,
|
|||
|
|
user_location: str = "ON", # Default to Ontario
|
|||
|
|
) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
Use LLM to intelligently analyze and apply all tax rules:
|
|||
|
|
1. Sales tax based on receipt location (shipping/billing address priority)
|
|||
|
|
2. Foreign exchange rules for currency mismatches
|
|||
|
|
3. Depreciation rules for capital assets (based on user location)
|
|||
|
|
4. Meals & Entertainment deduction rules
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# Prepare context for LLM
|
|||
|
|
analysis_context = self._build_analysis_context(
|
|||
|
|
receipt, transaction, user_location
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Get LLM analysis
|
|||
|
|
llm_analysis = self._get_llm_tax_analysis(analysis_context)
|
|||
|
|
|
|||
|
|
# Parse and structure the results
|
|||
|
|
structured_results = self._structure_analysis_results(
|
|||
|
|
llm_analysis, receipt, transaction, user_location
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return structured_results
|
|||
|
|
|
|||
|
|
def _build_analysis_context(
|
|||
|
|
self, receipt: Receipt, transaction: Transaction, user_location: str
|
|||
|
|
) -> str:
|
|||
|
|
"""Build comprehensive context for LLM analysis"""
|
|||
|
|
|
|||
|
|
# Extract location information
|
|||
|
|
receipt_location = self._extract_receipt_location(receipt)
|
|||
|
|
user_province = user_location.upper()
|
|||
|
|
|
|||
|
|
# Build tax rates reference
|
|||
|
|
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
|
|||
|
|
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
|
|||
|
|
|
|||
|
|
context = f"""
|
|||
|
|
RECEIPT DETAILS:
|
|||
|
|
- Vendor: {receipt.vendor}
|
|||
|
|
- Amount: ${receipt.amount:.2f}
|
|||
|
|
- Tax: ${receipt.tax:.2f}
|
|||
|
|
- Currency: {receipt.currency}
|
|||
|
|
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
|
|||
|
|
- Category: {receipt.category}
|
|||
|
|
- Description: {receipt.description}
|
|||
|
|
- Billing Address: {self._format_address(receipt.billing_address)}
|
|||
|
|
- Shipping Address: {self._format_address(receipt.shipping_address)}
|
|||
|
|
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
|
|||
|
|
|
|||
|
|
TRANSACTION DETAILS:
|
|||
|
|
- Vendor: {transaction.vendor}
|
|||
|
|
- Amount: ${transaction.amount:.2f}
|
|||
|
|
- Currency: {transaction.currency}
|
|||
|
|
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
|
|||
|
|
- Notes: {transaction.notes}
|
|||
|
|
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
|
|||
|
|
|
|||
|
|
USER CONTEXT:
|
|||
|
|
- User Location (Province): {user_province}
|
|||
|
|
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
|
|||
|
|
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
|
|||
|
|
|
|||
|
|
RECEIPT LOCATION DETECTED:
|
|||
|
|
{receipt_location}
|
|||
|
|
|
|||
|
|
PROVINCIAL TAX RATES REFERENCE:
|
|||
|
|
{tax_rates_info}
|
|||
|
|
|
|||
|
|
CCA DEPRECIATION RATES BY ASSET CLASS:
|
|||
|
|
{cca_rates_info}
|
|||
|
|
"""
|
|||
|
|
return context
|
|||
|
|
|
|||
|
|
def _extract_receipt_location(self, receipt: Receipt) -> str:
|
|||
|
|
"""Extract and format receipt location information"""
|
|||
|
|
|
|||
|
|
# Priority: Use shipping address if available, then billing
|
|||
|
|
location = (
|
|||
|
|
receipt.shipping_address
|
|||
|
|
if receipt.shipping_address
|
|||
|
|
else receipt.billing_address
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if location:
|
|||
|
|
return f"""
|
|||
|
|
- Province: {location.province}
|
|||
|
|
- City: {location.city}
|
|||
|
|
- Country: {location.country}
|
|||
|
|
- Postal Code: {location.postal_code}
|
|||
|
|
"""
|
|||
|
|
else:
|
|||
|
|
return "- No address information available (will use user location)"
|
|||
|
|
|
|||
|
|
def _format_address(self, address) -> str:
|
|||
|
|
"""Format address for display"""
|
|||
|
|
if address:
|
|||
|
|
return f"{address.city}, {address.province}, {address.country} ({address.postal_code})"
|
|||
|
|
return "Not provided"
|
|||
|
|
|
|||
|
|
def _get_llm_tax_analysis(self, context: str) -> str:
|
|||
|
|
"""Get tax rule analysis from LLM"""
|
|||
|
|
|
|||
|
|
prompt = f"""
|
|||
|
|
You are a Canadian tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
|
|||
|
|
|
|||
|
|
{context}
|
|||
|
|
|
|||
|
|
=== FOUR CORE TAX RULES ===
|
|||
|
|
|
|||
|
|
### 1. SALES TAX RULE
|
|||
|
|
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
|
|||
|
|
|
|||
|
|
**Key Principles**:
|
|||
|
|
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
|
|||
|
|
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
|
|||
|
|
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
|
|||
|
|
|
|||
|
|
**Scenario Examples**:
|
|||
|
|
a) User in Ontario, Receipt from Quebec:
|
|||
|
|
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
|
|||
|
|
- The user's location is only for depreciation purposes
|
|||
|
|
|
|||
|
|
b) User in Ontario, Receipt from USA (New York):
|
|||
|
|
- DO NOT apply Canadian sales tax
|
|||
|
|
- This is an international transaction
|
|||
|
|
- Flag for FX review instead
|
|||
|
|
|
|||
|
|
c) User in USA (New York), Receipt from California:
|
|||
|
|
- Apply California's sales tax rate (receipt location)
|
|||
|
|
- Not New York's rate (user location)
|
|||
|
|
|
|||
|
|
d) User in Ontario, Receipt has NO address information:
|
|||
|
|
- DEFAULT to user's location (Ontario 13% HST)
|
|||
|
|
- This is the fallback when receipt location is unknown
|
|||
|
|
|
|||
|
|
**Tax Calculation**:
|
|||
|
|
- Compare calculated tax vs stated tax on receipt
|
|||
|
|
- Flag discrepancies for review
|
|||
|
|
|
|||
|
|
### 2. FOREIGN EXCHANGE (FX) RULE
|
|||
|
|
**Purpose**: Handle currency mismatches between receipts and transactions.
|
|||
|
|
|
|||
|
|
**Actions**:
|
|||
|
|
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
|
|||
|
|
- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
|
|||
|
|
- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
|
|||
|
|
- If FX rate is provided in transaction data, note it but still require manual review
|
|||
|
|
|
|||
|
|
**Examples**:
|
|||
|
|
- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
|
|||
|
|
- The user must manually approve or adjust the FX difference
|
|||
|
|
|
|||
|
|
### 3. DEPRECIATION RULE
|
|||
|
|
**Purpose**: Calculate depreciation for assets using two methods.
|
|||
|
|
|
|||
|
|
**Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location.
|
|||
|
|
|
|||
|
|
**Asset Identification**:
|
|||
|
|
- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
|
|||
|
|
- Identify from receipt category and description
|
|||
|
|
- Typical threshold: Assets generally > $500
|
|||
|
|
|
|||
|
|
**Two Methods Required**:
|
|||
|
|
a) **Straight-Line Depreciation** (for accounting purposes):
|
|||
|
|
Formula: (Cost - Residual Value) / Useful Life
|
|||
|
|
Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year
|
|||
|
|
|
|||
|
|
b) **CCA Depreciation** (for tax purposes - Canada):
|
|||
|
|
Method: Declining Balance
|
|||
|
|
Formula: Book Value × CCA Rate each year
|
|||
|
|
Example: Truck $20,000, 30% CCA:
|
|||
|
|
- Year 1: $20,000 × 30% = $6,000
|
|||
|
|
- Year 2: ($20,000 - $6,000) × 30% = $4,200
|
|||
|
|
- Continues declining each year
|
|||
|
|
|
|||
|
|
**CCA Classes** (Canada):
|
|||
|
|
- Vehicles: 30% (Class 10)
|
|||
|
|
- Computer Equipment: 55% (Class 50)
|
|||
|
|
- Furniture/Machinery: 20% (Class 8)
|
|||
|
|
- Buildings: 4% (Class 1)
|
|||
|
|
|
|||
|
|
### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
|
|||
|
|
**Purpose**: Apply correct deductions for meals and entertainment expenses.
|
|||
|
|
|
|||
|
|
**Deduction Rules**:
|
|||
|
|
- **For Tax Purposes**: Only 50% of total receipt amount is deductible
|
|||
|
|
- **For Accounting Purposes**: 100% of total receipt amount is deductible
|
|||
|
|
- **Sales Tax**: Full sales tax amount is deductible for accounting
|
|||
|
|
|
|||
|
|
**Example**:
|
|||
|
|
- Receipt: $100 meal + $12 sales tax = $112 total
|
|||
|
|
- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
|
|||
|
|
- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112
|
|||
|
|
|
|||
|
|
=== LOCATION-BASED SCENARIO HANDLING ===
|
|||
|
|
|
|||
|
|
**When Receipt Location ≠ User Location**:
|
|||
|
|
|
|||
|
|
1. **Sales Tax**: Use RECEIPT's location for tax calculation
|
|||
|
|
- Exception: If international (different country), no Canadian sales tax + flag FX
|
|||
|
|
- Exception: If no location on receipt, use user's location as default
|
|||
|
|
|
|||
|
|
2. **Depreciation**: ALWAYS use USER's location for depreciation rules
|
|||
|
|
- Receipt location is irrelevant for depreciation
|
|||
|
|
- Apply user's country/province depreciation methods
|
|||
|
|
|
|||
|
|
3. **FX Handling**:
|
|||
|
|
- If receipt currency ≠ transaction currency: Flag for manual review
|
|||
|
|
- Do NOT automatically fetch or apply exchange rates
|
|||
|
|
|
|||
|
|
4. **Missing Location**:
|
|||
|
|
- If receipt has no address: Default to user's location for sales tax
|
|||
|
|
- Still apply user's location for depreciation
|
|||
|
|
|
|||
|
|
=== ANALYSIS REQUIRED ===
|
|||
|
|
|
|||
|
|
Provide a structured JSON response with the following format:
|
|||
|
|
|
|||
|
|
{{
|
|||
|
|
"sales_tax": {{
|
|||
|
|
"applicable_province": "XX",
|
|||
|
|
"applicable_rate": 0.XX,
|
|||
|
|
"tax_name": "HST/GST/PST/QST",
|
|||
|
|
"calculated_tax": XX.XX,
|
|||
|
|
"stated_tax": XX.XX,
|
|||
|
|
"discrepancy": XX.XX,
|
|||
|
|
"reason": "Detailed explanation: which address used (billing/shipping), why this location, which scenario applies",
|
|||
|
|
"requires_review": true/false
|
|||
|
|
}},
|
|||
|
|
"foreign_exchange": {{
|
|||
|
|
"currency_mismatch": true/false,
|
|||
|
|
"receipt_currency": "XXX",
|
|||
|
|
"transaction_currency": "XXX",
|
|||
|
|
"receipt_amount": XX.XX,
|
|||
|
|
"transaction_amount": XX.XX,
|
|||
|
|
"discrepancy": XX.XX,
|
|||
|
|
"requires_manual_review": true/false,
|
|||
|
|
"reason": "Explanation of FX situation"
|
|||
|
|
}},
|
|||
|
|
"depreciation": {{
|
|||
|
|
"is_capital_asset": true/false,
|
|||
|
|
"asset_class": "category name or N/A",
|
|||
|
|
"suggested_cca_rate": 0.XX,
|
|||
|
|
"straight_line_applicable": true/false,
|
|||
|
|
"cca_applicable": true/false,
|
|||
|
|
"straight_line_example": "Brief calculation example if applicable",
|
|||
|
|
"cca_example": "Brief calculation example if applicable",
|
|||
|
|
"reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location"
|
|||
|
|
}},
|
|||
|
|
"meals_entertainment": {{
|
|||
|
|
"is_meals_entertainment": true/false,
|
|||
|
|
"tax_deduction_amount": XX.XX,
|
|||
|
|
"accounting_deduction_amount": XX.XX,
|
|||
|
|
"sales_tax_included": XX.XX,
|
|||
|
|
"reason": "Explanation of M&E rule application"
|
|||
|
|
}},
|
|||
|
|
"confidence_adjustment": {{
|
|||
|
|
"boost": 0.XX,
|
|||
|
|
"reduce": 0.XX,
|
|||
|
|
"reason": "Why confidence should be adjusted based on tax analysis"
|
|||
|
|
}},
|
|||
|
|
"overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions"
|
|||
|
|
}}
|
|||
|
|
|
|||
|
|
**Critical Reminders**:
|
|||
|
|
- Sales tax uses RECEIPT location (or user location if receipt has none)
|
|||
|
|
- Depreciation ALWAYS uses USER location
|
|||
|
|
- For different addresses, use SHIPPING address for sales tax
|
|||
|
|
- International transactions: no Canadian tax + FX flag
|
|||
|
|
- Be precise with all calculations
|
|||
|
|
- Always explain your reasoning clearly
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = self.client.chat.completions.create(
|
|||
|
|
model=self.model,
|
|||
|
|
messages=[
|
|||
|
|
{
|
|||
|
|
"role": "system",
|
|||
|
|
"content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.",
|
|||
|
|
},
|
|||
|
|
{"role": "user", "content": prompt},
|
|||
|
|
],
|
|||
|
|
temperature=0.1, # Low temperature for consistent, factual responses
|
|||
|
|
max_tokens=2000,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
content = response.choices[0].message.content.strip()
|
|||
|
|
logger.info(f"LLM tax analysis received: {len(content)} characters")
|
|||
|
|
return content
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"Error getting LLM tax analysis: {str(e)}")
|
|||
|
|
return self._get_fallback_analysis()
|
|||
|
|
|
|||
|
|
def _get_fallback_analysis(self) -> str:
|
|||
|
|
"""Return fallback analysis if LLM fails"""
|
|||
|
|
return json.dumps(
|
|||
|
|
{
|
|||
|
|
"sales_tax": {
|
|||
|
|
"applicable_province": "ON",
|
|||
|
|
"applicable_rate": 0.13,
|
|||
|
|
"tax_name": "HST",
|
|||
|
|
"calculated_tax": 0.0,
|
|||
|
|
"stated_tax": 0.0,
|
|||
|
|
"discrepancy": 0.0,
|
|||
|
|
"reason": "LLM analysis failed - using defaults",
|
|||
|
|
"requires_review": True,
|
|||
|
|
},
|
|||
|
|
"foreign_exchange": {
|
|||
|
|
"currency_mismatch": False,
|
|||
|
|
"requires_manual_review": False,
|
|||
|
|
"reason": "Analysis not available",
|
|||
|
|
},
|
|||
|
|
"depreciation": {
|
|||
|
|
"is_capital_asset": False,
|
|||
|
|
"reason": "Analysis not available",
|
|||
|
|
},
|
|||
|
|
"meals_entertainment": {
|
|||
|
|
"is_meals_entertainment": False,
|
|||
|
|
"reason": "Analysis not available",
|
|||
|
|
},
|
|||
|
|
"confidence_adjustment": {
|
|||
|
|
"boost": 0.0,
|
|||
|
|
"reduce": 0.1,
|
|||
|
|
"reason": "LLM analysis failed - recommend manual review",
|
|||
|
|
},
|
|||
|
|
"overall_assessment": "Automatic analysis failed. Manual review recommended.",
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def _structure_analysis_results(
|
|||
|
|
self,
|
|||
|
|
llm_response: str,
|
|||
|
|
receipt: Receipt,
|
|||
|
|
transaction: Transaction,
|
|||
|
|
user_location: str,
|
|||
|
|
) -> Dict[str, Any]:
|
|||
|
|
"""Parse LLM response and structure it for application"""
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Extract JSON from LLM response (may have markdown code blocks)
|
|||
|
|
json_str = llm_response
|
|||
|
|
if "```json" in llm_response:
|
|||
|
|
json_str = llm_response.split("```json")[1].split("```")[0].strip()
|
|||
|
|
elif "```" in llm_response:
|
|||
|
|
json_str = llm_response.split("```")[1].split("```")[0].strip()
|
|||
|
|
|
|||
|
|
analysis = json.loads(json_str)
|
|||
|
|
|
|||
|
|
# Add metadata
|
|||
|
|
analysis["metadata"] = {
|
|||
|
|
"user_location": user_location,
|
|||
|
|
"receipt_id": receipt.id,
|
|||
|
|
"transaction_id": transaction.id,
|
|||
|
|
"analysis_method": "LLM-based",
|
|||
|
|
"model": self.model,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return analysis
|
|||
|
|
|
|||
|
|
except json.JSONDecodeError as e:
|
|||
|
|
logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
|
|||
|
|
logger.error(f"LLM response was: {llm_response}")
|
|||
|
|
|
|||
|
|
# Return structured fallback
|
|||
|
|
return {
|
|||
|
|
"sales_tax": {
|
|||
|
|
"requires_review": True,
|
|||
|
|
"reason": "Failed to parse LLM response",
|
|||
|
|
},
|
|||
|
|
"foreign_exchange": {
|
|||
|
|
"requires_manual_review": receipt.currency != transaction.currency
|
|||
|
|
},
|
|||
|
|
"depreciation": {"is_capital_asset": False},
|
|||
|
|
"confidence_adjustment": {
|
|||
|
|
"boost": 0.0,
|
|||
|
|
"reduce": 0.15,
|
|||
|
|
"reason": "Analysis parsing failed",
|
|||
|
|
},
|
|||
|
|
"overall_assessment": "Analysis failed. Manual review required.",
|
|||
|
|
"error": str(e),
|
|||
|
|
"metadata": {
|
|||
|
|
"user_location": user_location,
|
|||
|
|
"analysis_method": "fallback",
|
|||
|
|
},
|
|||
|
|
}
|