Implement batch processing for LLM-based tax analysis and enhance match confidence scoring
This commit is contained in:
@@ -51,6 +51,50 @@ class LLMTaxAnalyzer:
|
||||
self.model = "llama-3.1-8b-instant"
|
||||
self.max_retries = 3
|
||||
|
||||
def analyze_and_apply_tax_rules_batch(
|
||||
self,
|
||||
matches: list, # List of Match objects
|
||||
user_location: str = "ON",
|
||||
) -> list:
|
||||
"""
|
||||
Batch process all matches in a SINGLE LLM call to reduce costs.
|
||||
Analyzes all receipt-transaction pairs together and applies tax rules.
|
||||
"""
|
||||
if not matches:
|
||||
return matches
|
||||
|
||||
# Build batch context for all matches
|
||||
batch_context = self._build_batch_analysis_context(matches, user_location)
|
||||
|
||||
# Get LLM analysis for ALL matches at once
|
||||
llm_batch_analysis = self._get_llm_tax_analysis_batch(
|
||||
batch_context, len(matches)
|
||||
)
|
||||
|
||||
# Apply results to each match
|
||||
enhanced_matches = []
|
||||
for i, match in enumerate(matches):
|
||||
try:
|
||||
# Get the analysis for this specific match from the batch results
|
||||
match_analysis = llm_batch_analysis.get(f"match_{i}", {})
|
||||
|
||||
if match_analysis:
|
||||
# Apply the tax analysis to this match
|
||||
enhanced_match = self._apply_tax_analysis_to_match(
|
||||
match, match_analysis
|
||||
)
|
||||
enhanced_matches.append(enhanced_match)
|
||||
else:
|
||||
# No analysis available for this match, use as-is
|
||||
match.match_reason += " (Tax analysis incomplete)"
|
||||
enhanced_matches.append(match)
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying tax analysis to match {i}: {str(e)}")
|
||||
match.match_reason += " (Tax analysis error)"
|
||||
enhanced_matches.append(match)
|
||||
|
||||
return enhanced_matches
|
||||
|
||||
def analyze_and_apply_tax_rules(
|
||||
self,
|
||||
receipt: Receipt,
|
||||
@@ -58,6 +102,9 @@ class LLMTaxAnalyzer:
|
||||
user_location: str = "ON", # Default to Ontario
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Legacy single-match analysis method (kept for backward compatibility).
|
||||
Use analyze_and_apply_tax_rules_batch() for better performance.
|
||||
|
||||
Use LLM to intelligently analyze and apply all tax rules:
|
||||
1. Sales tax based on receipt location (shipping/billing address priority)
|
||||
2. Foreign exchange rules for currency mismatches
|
||||
@@ -496,3 +543,294 @@ Provide a structured JSON response with the following format:
|
||||
"analysis_method": "fallback",
|
||||
},
|
||||
}
|
||||
|
||||
def _build_batch_analysis_context(self, matches: list, user_location: str) -> str:
|
||||
"""Build comprehensive context for batch LLM analysis of all matches"""
|
||||
|
||||
# Normalize user_location to province code
|
||||
user_province = self._normalize_location_to_province(user_location)
|
||||
|
||||
logger.info(
|
||||
f"Building batch tax analysis context for {len(matches)} matches - User Location: {user_location} → Province Code: {user_province}"
|
||||
)
|
||||
|
||||
# Build tax rates and CCA references once
|
||||
tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
|
||||
cca_rates_info = json.dumps(self.CCA_RATES, indent=2)
|
||||
|
||||
# Build match entries
|
||||
matches_info = []
|
||||
for i, match in enumerate(matches):
|
||||
receipt = match.receipt
|
||||
transaction = match.transaction
|
||||
receipt_location = self._extract_receipt_location(receipt)
|
||||
|
||||
match_info = f"""
|
||||
MATCH {i} (ID: match_{i}):
|
||||
Receipt Details:
|
||||
- Vendor: {receipt.vendor}
|
||||
- Amount: ${receipt.amount:.2f}
|
||||
- Tax: ${receipt.tax:.2f}
|
||||
- Currency: {receipt.currency}
|
||||
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
|
||||
- Category: {receipt.category}
|
||||
- Description: {receipt.description}
|
||||
- Billing Address: {self._format_address(receipt.billing_address)}
|
||||
- Shipping Address: {self._format_address(receipt.shipping_address)}
|
||||
- Is Meals & Entertainment: {receipt.is_meals_entertainment}
|
||||
|
||||
Transaction Details:
|
||||
- Vendor: {transaction.vendor}
|
||||
- Amount: ${transaction.amount:.2f}
|
||||
- Currency: {transaction.currency}
|
||||
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
|
||||
- Notes: {transaction.notes}
|
||||
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}
|
||||
|
||||
Receipt Location Detected:
|
||||
{receipt_location}
|
||||
"""
|
||||
matches_info.append(match_info)
|
||||
|
||||
matches_section = "\n".join(matches_info)
|
||||
|
||||
context = f"""
|
||||
USER CONTEXT:
|
||||
- User Location (Province): {user_province}
|
||||
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
|
||||
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}
|
||||
|
||||
PROVINCIAL TAX RATES REFERENCE:
|
||||
{tax_rates_info}
|
||||
|
||||
CCA DEPRECIATION RATES BY ASSET CLASS:
|
||||
{cca_rates_info}
|
||||
|
||||
=== MATCHES TO ANALYZE ({len(matches)} total) ===
|
||||
{matches_section}
|
||||
"""
|
||||
return context
|
||||
|
||||
def _get_llm_tax_analysis_batch(
|
||||
self, context: str, num_matches: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Get tax rule analysis from LLM for ALL matches in a single call"""
|
||||
|
||||
prompt = f"""
|
||||
You are a Canadian tax expert analyzing MULTIPLE receipt-transaction matches. Apply the following tax rules intelligently to EACH match.
|
||||
|
||||
{context}
|
||||
|
||||
=== FOUR CORE TAX RULES ===
|
||||
|
||||
### 1. SALES TAX RULE
|
||||
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.
|
||||
|
||||
**Key Principles**:
|
||||
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
|
||||
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
|
||||
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).
|
||||
|
||||
**Scenario Examples**:
|
||||
a) User in Ontario, Receipt from Quebec:
|
||||
- Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
|
||||
|
||||
b) User in Ontario, Receipt from USA (New York):
|
||||
- DO NOT apply Canadian sales tax
|
||||
- This is an international transaction
|
||||
- Flag for FX review instead
|
||||
|
||||
c) User in Ontario, Receipt has NO address information:
|
||||
- DEFAULT to user's location (Ontario 13% HST)
|
||||
|
||||
**Tax Calculation**:
|
||||
- Compare calculated tax vs stated tax on receipt
|
||||
- Flag discrepancies for review
|
||||
|
||||
### 2. FOREIGN EXCHANGE (FX) RULE
|
||||
**Purpose**: Handle currency mismatches between receipts and transactions.
|
||||
|
||||
**Actions**:
|
||||
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
|
||||
- Calculate expected transaction amount using FX rate if available
|
||||
- Flag discrepancies > $5 or 5% for manual review
|
||||
- If FX rate missing but currencies differ, flag for review
|
||||
|
||||
### 3. DEPRECIATION RULE
|
||||
**Purpose**: Identify capital assets requiring depreciation based on USER'S location.
|
||||
|
||||
**Critical**: Depreciation is ALWAYS based on the USER'S location (for Canadian tax filing), NOT the receipt location.
|
||||
|
||||
**Capital Asset Criteria**:
|
||||
- Cost > $500 typically
|
||||
- Useful life > 1 year
|
||||
- Examples: computers, vehicles, furniture, machinery, buildings
|
||||
|
||||
**CCA Classes**: Assign appropriate class and rate based on asset type and user's jurisdiction
|
||||
|
||||
### 4. MEALS & ENTERTAINMENT RULE
|
||||
**Purpose**: Apply 50% tax deduction limit for M&E expenses.
|
||||
|
||||
**Actions**:
|
||||
- Identify M&E expenses (meals, entertainment, client dinners, etc.)
|
||||
- Tax Deduction: 50% of total amount (including tax)
|
||||
- Accounting Deduction: 100% of total amount (including tax)
|
||||
- Always include sales tax in both calculations
|
||||
|
||||
=== YOUR TASK ===
|
||||
|
||||
Analyze EACH match (match_0, match_1, match_2, etc.) and return a JSON object where each key is the match ID (e.g., "match_0") and the value is the complete tax analysis for that match.
|
||||
|
||||
Return your response as a SINGLE JSON object in this format:
|
||||
|
||||
{{
|
||||
"match_0": {{
|
||||
"final_tax_amount": XX.XX,
|
||||
"sales_tax": {{
|
||||
"applicable_province": "XX",
|
||||
"applicable_rate": 0.XX,
|
||||
"tax_name": "HST/GST/PST",
|
||||
"calculated_tax": XX.XX,
|
||||
"stated_tax": XX.XX,
|
||||
"discrepancy": XX.XX,
|
||||
"reason": "Detailed explanation",
|
||||
"requires_review": true/false
|
||||
}},
|
||||
"foreign_exchange": {{
|
||||
"currency_mismatch": true/false,
|
||||
"receipt_currency": "XXX",
|
||||
"transaction_currency": "XXX",
|
||||
"expected_transaction_amount": XX.XX,
|
||||
"actual_transaction_amount": XX.XX,
|
||||
"discrepancy": XX.XX,
|
||||
"requires_manual_review": true/false,
|
||||
"reason": "Explanation"
|
||||
}},
|
||||
"depreciation": {{
|
||||
"is_capital_asset": true/false,
|
||||
"asset_class": "class_XX",
|
||||
"cca_rate": 0.XX,
|
||||
"applicable_jurisdiction": "XX",
|
||||
"reason": "Explanation"
|
||||
}},
|
||||
"meals_entertainment": {{
|
||||
"is_meals_entertainment": true/false,
|
||||
"tax_deduction_amount": XX.XX,
|
||||
"accounting_deduction_amount": XX.XX,
|
||||
"sales_tax_included": XX.XX,
|
||||
"reason": "Explanation"
|
||||
}},
|
||||
"confidence_adjustment": {{
|
||||
"boost": 0.XX,
|
||||
"reduce": 0.XX,
|
||||
"reason": "Why confidence should be adjusted"
|
||||
}},
|
||||
"overall_assessment": "Summary for this match"
|
||||
}},
|
||||
"match_1": {{
|
||||
... same structure ...
|
||||
}},
|
||||
... for all {num_matches} matches ...
|
||||
}}
|
||||
|
||||
**Critical Reminders**:
|
||||
- Sales tax uses RECEIPT location (or user location if receipt has none)
|
||||
- Depreciation ALWAYS uses USER location
|
||||
- For different addresses, use SHIPPING address for sales tax
|
||||
- International transactions: no Canadian tax + FX flag
|
||||
- Be precise with all calculations
|
||||
- Always explain your reasoning clearly
|
||||
- Return analysis for ALL {num_matches} matches
|
||||
"""
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Always return valid JSON with all requested matches.",
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.1, # Low temperature for consistent, factual responses
|
||||
max_tokens=8000, # Higher limit for batch processing
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
logger.info(
|
||||
f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches"
|
||||
)
|
||||
|
||||
# Parse the JSON response
|
||||
json_str = content
|
||||
if "```json" in content:
|
||||
json_str = content.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in content:
|
||||
json_str = content.split("```")[1].split("```")[0].strip()
|
||||
|
||||
batch_analysis = json.loads(json_str)
|
||||
return batch_analysis
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting batch LLM tax analysis: {str(e)}")
|
||||
# Return empty dict so each match can handle fallback individually
|
||||
return {}
|
||||
|
||||
def _apply_tax_analysis_to_match(self, match, tax_analysis: Dict[str, Any]):
|
||||
"""Apply tax analysis results to a match object"""
|
||||
|
||||
# Store the complete tax analysis
|
||||
match.tax_analysis = tax_analysis
|
||||
|
||||
# Apply confidence adjustments based on tax analysis
|
||||
confidence_adj = tax_analysis.get("confidence_adjustment", {})
|
||||
|
||||
# Boost confidence if tax rules validate the match
|
||||
boost = confidence_adj.get("boost", 0.0)
|
||||
if boost > 0:
|
||||
match.confidence_score = min(1.0, match.confidence_score + boost)
|
||||
match.match_reason += f" (Tax analysis confidence boost: +{boost:.2f})"
|
||||
|
||||
# Reduce confidence if tax issues detected
|
||||
reduce = confidence_adj.get("reduce", 0.0)
|
||||
if reduce > 0:
|
||||
match.confidence_score = max(0.0, match.confidence_score - reduce)
|
||||
match.match_reason += f" (Tax issues detected: -{reduce:.2f})"
|
||||
|
||||
# Add flags for manual review if needed
|
||||
review_flags = []
|
||||
|
||||
# Check sales tax issues
|
||||
sales_tax = tax_analysis.get("sales_tax", {})
|
||||
if sales_tax.get("requires_review", False):
|
||||
review_flags.append("Sales Tax Review Required")
|
||||
|
||||
# Check FX issues
|
||||
fx_analysis = tax_analysis.get("foreign_exchange", {})
|
||||
if fx_analysis.get("requires_manual_review", False):
|
||||
review_flags.append(
|
||||
f"FX Review Required (Discrepancy: ${fx_analysis.get('discrepancy', 0):.2f})"
|
||||
)
|
||||
|
||||
# Check depreciation
|
||||
depreciation = tax_analysis.get("depreciation", {})
|
||||
if depreciation.get("is_capital_asset", False):
|
||||
review_flags.append(
|
||||
f"Capital Asset - Depreciation Applicable ({depreciation.get('asset_class', 'Unknown')})"
|
||||
)
|
||||
|
||||
# Check meals & entertainment
|
||||
meals_ent = tax_analysis.get("meals_entertainment", {})
|
||||
if meals_ent.get("is_meals_entertainment", False):
|
||||
tax_deduction = meals_ent.get("tax_deduction_amount", 0)
|
||||
accounting_deduction = meals_ent.get("accounting_deduction_amount", 0)
|
||||
review_flags.append(
|
||||
f"M&E Expense - Tax Deduction: ${tax_deduction:.2f} (50%), Accounting: ${accounting_deduction:.2f} (100%)"
|
||||
)
|
||||
|
||||
# Add review flags to match reason
|
||||
if review_flags:
|
||||
match.match_reason += " | REVIEW: " + "; ".join(review_flags)
|
||||
|
||||
return match
|
||||
|
||||
@@ -25,11 +25,36 @@ class MatchingEngine:
|
||||
receipts, transactions
|
||||
)
|
||||
|
||||
# Apply rules and enhance matches
|
||||
enhanced_matches = []
|
||||
# Apply traditional rules first (lightweight, no API calls)
|
||||
for match in ai_matches:
|
||||
enhanced_match = self._enhance_match_with_rules(match, user_location)
|
||||
enhanced_matches.append(enhanced_match)
|
||||
rule_results = self.rules_engine.apply_rules(
|
||||
match.receipt, match.transaction
|
||||
)
|
||||
|
||||
# Apply confidence boost from traditional rules
|
||||
if rule_results["confidence_boost"] > 0:
|
||||
match.confidence_score = min(
|
||||
1.0, match.confidence_score + rule_results["confidence_boost"]
|
||||
)
|
||||
|
||||
# Auto-approve if rules say so
|
||||
if rule_results["auto_approve"]:
|
||||
match.confidence_score = 1.0
|
||||
match.match_reason += " (Auto-approved by rules)"
|
||||
|
||||
# Now apply LLM-based tax analysis in a SINGLE batch call
|
||||
try:
|
||||
enhanced_matches = self.llm_tax_analyzer.analyze_and_apply_tax_rules_batch(
|
||||
ai_matches, user_location
|
||||
)
|
||||
except Exception as e:
|
||||
# If batch LLM analysis fails, log it and continue with matches as-is
|
||||
import logging
|
||||
|
||||
logging.error(f"Batch LLM tax analysis failed: {str(e)}")
|
||||
for match in ai_matches:
|
||||
match.match_reason += " (Note: Advanced tax analysis unavailable)"
|
||||
enhanced_matches = ai_matches
|
||||
|
||||
return enhanced_matches
|
||||
|
||||
|
||||
Reference in New Issue
Block a user