Files
ds_quickbooks/app/services/llm_tax_analyzer.py
T

447 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import logging
from typing import Any, Dict
import groq
from config import settings
from schemas import Receipt, Transaction
logger = logging.getLogger(__name__)
class LLMTaxAnalyzer:
"""
Uses LLM to intelligently apply tax rules based on context.
Implements four core tax rules:
1. Sales Tax Rule - Based on receipt location (shipping/billing address)
2. Foreign Exchange Rule - Handles currency mismatches
3. Depreciation Rule - Capital assets (based on user location)
4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction
"""
# Provincial tax rates for reference
PROVINCIAL_TAX_RATES = {
"ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"},
"QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"},
"BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
"AB": {"rate": 0.05, "name": "GST", "type": "Federal only"},
"SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"},
"MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
"NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
"NT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
"NU": {"rate": 0.05, "name": "GST", "type": "Federal only"},
"YT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
}
# CCA rates by asset class (simplified)
CCA_RATES = {
"vehicles": 0.30, # Class 10
"computer_equipment": 0.55, # Class 50
"furniture": 0.20, # Class 8
"buildings": 0.04, # Class 1
"machinery": 0.20, # Class 8
}
def __init__(self):
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
self.model = "llama-3.1-8b-instant"
self.max_retries = 3
def analyze_and_apply_tax_rules(
self,
receipt: Receipt,
transaction: Transaction,
user_location: str = "ON", # Default to Ontario
) -> Dict[str, Any]:
"""
Use LLM to intelligently analyze and apply all tax rules:
1. Sales tax based on receipt location (shipping/billing address priority)
2. Foreign exchange rules for currency mismatches
3. Depreciation rules for capital assets (based on user location)
4. Meals & Entertainment deduction rules
"""
# Prepare context for LLM
analysis_context = self._build_analysis_context(
receipt, transaction, user_location
)
# Get LLM analysis
llm_analysis = self._get_llm_tax_analysis(analysis_context)
# Parse and structure the results
structured_results = self._structure_analysis_results(
llm_analysis, receipt, transaction, user_location
)
return structured_results
def _build_analysis_context(
self, receipt: Receipt, transaction: Transaction, user_location: str
) -> str:
"""Build comprehensive context for LLM analysis"""
# Extract location information
receipt_location = self._extract_receipt_location(receipt)
user_province = user_location.upper()
# Build tax rates reference
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
context = f"""
RECEIPT DETAILS:
- Vendor: {receipt.vendor}
- Amount: ${receipt.amount:.2f}
- Tax: ${receipt.tax:.2f}
- Currency: {receipt.currency}
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
- Category: {receipt.category}
- Description: {receipt.description}
- Billing Address: {self._format_address(receipt.billing_address)}
- Shipping Address: {self._format_address(receipt.shipping_address)}
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
TRANSACTION DETAILS:
- Vendor: {transaction.vendor}
- Amount: ${transaction.amount:.2f}
- Currency: {transaction.currency}
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
- Notes: {transaction.notes}
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
USER CONTEXT:
- User Location (Province): {user_province}
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
RECEIPT LOCATION DETECTED:
{receipt_location}
PROVINCIAL TAX RATES REFERENCE:
{tax_rates_info}
CCA DEPRECIATION RATES BY ASSET CLASS:
{cca_rates_info}
"""
return context
def _extract_receipt_location(self, receipt: Receipt) -> str:
"""Extract and format receipt location information"""
# Priority: Use shipping address if available, then billing
location = (
receipt.shipping_address
if receipt.shipping_address
else receipt.billing_address
)
if location:
return f"""
- Province: {location.province}
- City: {location.city}
- Country: {location.country}
- Postal Code: {location.postal_code}
"""
else:
return "- No address information available (will use user location)"
def _format_address(self, address) -> str:
"""Format address for display"""
if address:
return f"{address.city}, {address.province}, {address.country} ({address.postal_code})"
return "Not provided"
def _get_llm_tax_analysis(self, context: str) -> str:
"""Get tax rule analysis from LLM"""
prompt = f"""
You are a Canadian tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
{context}
=== FOUR CORE TAX RULES ===
### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
- The user's location is only for depreciation purposes
b) User in Ontario, Receipt from USA (New York):
- DO NOT apply Canadian sales tax
- This is an international transaction
- Flag for FX review instead
c) User in USA (New York), Receipt from California:
- Apply California's sales tax rate (receipt location)
- Not New York's rate (user location)
d) User in Ontario, Receipt has NO address information:
- DEFAULT to user's location (Ontario 13% HST)
- This is the fallback when receipt location is unknown
**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review
### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.
**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
- If FX rate is provided in transaction data, note it but still require manual review
**Examples**:
- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
- The user must manually approve or adjust the FX difference
### 3. DEPRECIATION RULE
**Purpose**: Calculate depreciation for assets using two methods.
**Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location.
**Asset Identification**:
- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
- Identify from receipt category and description
- Typical threshold: Assets generally > $500
**Two Methods Required**:
a) **Straight-Line Depreciation** (for accounting purposes):
Formula: (Cost - Residual Value) / Useful Life
Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year
b) **CCA Depreciation** (for tax purposes - Canada):
Method: Declining Balance
Formula: Book Value × CCA Rate each year
Example: Truck $20,000, 30% CCA:
- Year 1: $20,000 × 30% = $6,000
- Year 2: ($20,000 - $6,000) × 30% = $4,200
- Continues declining each year
**CCA Classes** (Canada):
- Vehicles: 30% (Class 10)
- Computer Equipment: 55% (Class 50)
- Furniture/Machinery: 20% (Class 8)
- Buildings: 4% (Class 1)
### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
**Purpose**: Apply correct deductions for meals and entertainment expenses.
**Deduction Rules**:
- **For Tax Purposes**: Only 50% of total receipt amount is deductible
- **For Accounting Purposes**: 100% of total receipt amount is deductible
- **Sales Tax**: Full sales tax amount is deductible for accounting
**Example**:
- Receipt: $100 meal + $12 sales tax = $112 total
- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112
=== LOCATION-BASED SCENARIO HANDLING ===
**When Receipt Location ≠ User Location**:
1. **Sales Tax**: Use RECEIPT's location for tax calculation
- Exception: If international (different country), no Canadian sales tax + flag FX
- Exception: If no location on receipt, use user's location as default
2. **Depreciation**: ALWAYS use USER's location for depreciation rules
- Receipt location is irrelevant for depreciation
- Apply user's country/province depreciation methods
3. **FX Handling**:
- If receipt currency ≠ transaction currency: Flag for manual review
- Do NOT automatically fetch or apply exchange rates
4. **Missing Location**:
- If receipt has no address: Default to user's location for sales tax
- Still apply user's location for depreciation
=== ANALYSIS REQUIRED ===
Provide a structured JSON response with the following format:
{{
"sales_tax": {{
"applicable_province": "XX",
"applicable_rate": 0.XX,
"tax_name": "HST/GST/PST/QST",
"calculated_tax": XX.XX,
"stated_tax": XX.XX,
"discrepancy": XX.XX,
"reason": "Detailed explanation: which address used (billing/shipping), why this location, which scenario applies",
"requires_review": true/false
}},
"foreign_exchange": {{
"currency_mismatch": true/false,
"receipt_currency": "XXX",
"transaction_currency": "XXX",
"receipt_amount": XX.XX,
"transaction_amount": XX.XX,
"discrepancy": XX.XX,
"requires_manual_review": true/false,
"reason": "Explanation of FX situation"
}},
"depreciation": {{
"is_capital_asset": true/false,
"asset_class": "category name or N/A",
"suggested_cca_rate": 0.XX,
"straight_line_applicable": true/false,
"cca_applicable": true/false,
"straight_line_example": "Brief calculation example if applicable",
"cca_example": "Brief calculation example if applicable",
"reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location"
}},
"meals_entertainment": {{
"is_meals_entertainment": true/false,
"tax_deduction_amount": XX.XX,
"accounting_deduction_amount": XX.XX,
"sales_tax_included": XX.XX,
"reason": "Explanation of M&E rule application"
}},
"confidence_adjustment": {{
"boost": 0.XX,
"reduce": 0.XX,
"reason": "Why confidence should be adjusted based on tax analysis"
}},
"overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions"
}}
**Critical Reminders**:
- Sales tax uses RECEIPT location (or user location if receipt has none)
- Depreciation ALWAYS uses USER location
- For different addresses, use SHIPPING address for sales tax
- International transactions: no Canadian tax + FX flag
- Be precise with all calculations
- Always explain your reasoning clearly
"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.",
},
{"role": "user", "content": prompt},
],
temperature=0.1, # Low temperature for consistent, factual responses
max_tokens=2000,
)
content = response.choices[0].message.content.strip()
logger.info(f"LLM tax analysis received: {len(content)} characters")
return content
except Exception as e:
logger.error(f"Error getting LLM tax analysis: {str(e)}")
return self._get_fallback_analysis()
def _get_fallback_analysis(self) -> str:
"""Return fallback analysis if LLM fails"""
return json.dumps(
{
"sales_tax": {
"applicable_province": "ON",
"applicable_rate": 0.13,
"tax_name": "HST",
"calculated_tax": 0.0,
"stated_tax": 0.0,
"discrepancy": 0.0,
"reason": "LLM analysis failed - using defaults",
"requires_review": True,
},
"foreign_exchange": {
"currency_mismatch": False,
"requires_manual_review": False,
"reason": "Analysis not available",
},
"depreciation": {
"is_capital_asset": False,
"reason": "Analysis not available",
},
"meals_entertainment": {
"is_meals_entertainment": False,
"reason": "Analysis not available",
},
"confidence_adjustment": {
"boost": 0.0,
"reduce": 0.1,
"reason": "LLM analysis failed - recommend manual review",
},
"overall_assessment": "Automatic analysis failed. Manual review recommended.",
}
)
def _structure_analysis_results(
self,
llm_response: str,
receipt: Receipt,
transaction: Transaction,
user_location: str,
) -> Dict[str, Any]:
"""Parse LLM response and structure it for application"""
try:
# Extract JSON from LLM response (may have markdown code blocks)
json_str = llm_response
if "```json" in llm_response:
json_str = llm_response.split("```json")[1].split("```")[0].strip()
elif "```" in llm_response:
json_str = llm_response.split("```")[1].split("```")[0].strip()
analysis = json.loads(json_str)
# Add metadata
analysis["metadata"] = {
"user_location": user_location,
"receipt_id": receipt.id,
"transaction_id": transaction.id,
"analysis_method": "LLM-based",
"model": self.model,
}
return analysis
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
logger.error(f"LLM response was: {llm_response}")
# Return structured fallback
return {
"sales_tax": {
"requires_review": True,
"reason": "Failed to parse LLM response",
},
"foreign_exchange": {
"requires_manual_review": receipt.currency != transaction.currency
},
"depreciation": {"is_capital_asset": False},
"confidence_adjustment": {
"boost": 0.0,
"reduce": 0.15,
"reason": "Analysis parsing failed",
},
"overall_assessment": "Analysis failed. Manual review required.",
"error": str(e),
"metadata": {
"user_location": user_location,
"analysis_method": "fallback",
},
}