311 lines
12 KiB
Python
311 lines
12 KiB
Python
from typing import Any, Dict, List
|
|
|
|
from schemas import Match, Receipt, Transaction
|
|
from services.ai_matcher import AIMatcher
|
|
from services.ai_rules import AIRulesEngine
|
|
from services.feedback_logger import FeedbackLogger
|
|
from services.llm_tax_analyzer import LLMTaxAnalyzer
|
|
from services.manual_tax_calculator import ManualTaxCalculator
|
|
|
|
|
|
class MatchingEngine:
|
|
def __init__(self, use_manual_tax_calculator: bool = False):
|
|
self.ai_matcher = AIMatcher()
|
|
self.rules_engine = AIRulesEngine()
|
|
self.feedback_logger = FeedbackLogger()
|
|
self.llm_tax_analyzer = LLMTaxAnalyzer()
|
|
self.manual_tax_calculator = ManualTaxCalculator()
|
|
self.use_manual_tax_calculator = use_manual_tax_calculator
|
|
|
|
def process_matching(
|
|
self,
|
|
receipts: List[Receipt],
|
|
transactions: List[Transaction],
|
|
user_location: str = "ON",
|
|
) -> List[Match]:
|
|
# Get AI matches
|
|
ai_matches = self.ai_matcher.match_receipts_to_transactions(
|
|
receipts, transactions
|
|
)
|
|
|
|
# Apply traditional rules first (lightweight, no API calls)
|
|
for match in ai_matches:
|
|
rule_results = self.rules_engine.apply_rules(
|
|
match.receipt, match.transaction
|
|
)
|
|
|
|
# Apply confidence boost from traditional rules
|
|
if rule_results["confidence_boost"] > 0:
|
|
match.confidence_score = min(
|
|
1.0, match.confidence_score + rule_results["confidence_boost"]
|
|
)
|
|
|
|
# Auto-approve if rules say so
|
|
if rule_results["auto_approve"]:
|
|
match.confidence_score = 1.0
|
|
match.match_reason += " (Auto-approved by rules)"
|
|
|
|
# Apply tax analysis - use manual calculator or LLM based on configuration
|
|
if self.use_manual_tax_calculator:
|
|
# Use deterministic rule-based calculator
|
|
enhanced_matches = self._apply_manual_tax_analysis(
|
|
ai_matches, user_location
|
|
)
|
|
else:
|
|
# Use LLM-based tax analysis in a SINGLE batch call
|
|
try:
|
|
enhanced_matches = (
|
|
self.llm_tax_analyzer.analyze_and_apply_tax_rules_batch(
|
|
ai_matches, user_location
|
|
)
|
|
)
|
|
except Exception as e:
|
|
# If batch LLM analysis fails, log it and continue with matches as-is
|
|
import logging
|
|
|
|
logging.error(f"Batch LLM tax analysis failed: {str(e)}")
|
|
for match in ai_matches:
|
|
match.match_reason += " (Note: Advanced tax analysis unavailable)"
|
|
enhanced_matches = ai_matches
|
|
|
|
return enhanced_matches
|
|
|
|
def _enhance_match_with_rules(
|
|
self, match: Match, user_location: str = "ON"
|
|
) -> Match:
|
|
"""
|
|
Enhanced version using LLM to intelligently apply tax rules:
|
|
1. Sales tax based on receipt location (shipping/billing address priority)
|
|
2. Foreign exchange rules for currency mismatches
|
|
3. Depreciation rules for capital assets (based on user location)
|
|
4. Meals & Entertainment tax deduction rules (50% for tax, 100% for accounting)
|
|
"""
|
|
|
|
# First, apply traditional rule-based checks for basic matching quality
|
|
rule_results = self.rules_engine.apply_rules(match.receipt, match.transaction)
|
|
|
|
# Apply confidence boost from traditional rules
|
|
if rule_results["confidence_boost"] > 0:
|
|
match.confidence_score = min(
|
|
1.0, match.confidence_score + rule_results["confidence_boost"]
|
|
)
|
|
|
|
# Auto-approve if rules say so
|
|
if rule_results["auto_approve"]:
|
|
match.confidence_score = 1.0
|
|
match.match_reason += " (Auto-approved by rules)"
|
|
|
|
# Now apply LLM-based tax analysis
|
|
try:
|
|
llm_tax_analysis = self.llm_tax_analyzer.analyze_and_apply_tax_rules(
|
|
match.receipt, match.transaction, user_location
|
|
)
|
|
|
|
# Store the complete tax analysis
|
|
match.tax_analysis = llm_tax_analysis
|
|
|
|
# Apply confidence adjustments based on tax analysis
|
|
confidence_adj = llm_tax_analysis.get("confidence_adjustment", {})
|
|
|
|
# Boost confidence if tax rules validate the match
|
|
boost = confidence_adj.get("boost", 0.0)
|
|
if boost > 0:
|
|
match.confidence_score = min(1.0, match.confidence_score + boost)
|
|
match.match_reason += f" (Tax analysis confidence boost: +{boost:.2f})"
|
|
|
|
# Reduce confidence if tax issues detected
|
|
reduce = confidence_adj.get("reduce", 0.0)
|
|
if reduce > 0:
|
|
match.confidence_score = max(0.0, match.confidence_score - reduce)
|
|
match.match_reason += f" (Tax issues detected: -{reduce:.2f})"
|
|
|
|
# Add flags for manual review if needed
|
|
review_flags = []
|
|
|
|
# Check sales tax issues
|
|
sales_tax = llm_tax_analysis.get("sales_tax", {})
|
|
if sales_tax.get("requires_review", False):
|
|
review_flags.append("Sales Tax Review Required")
|
|
|
|
# Check FX issues
|
|
fx_analysis = llm_tax_analysis.get("foreign_exchange", {})
|
|
if fx_analysis.get("requires_manual_review", False):
|
|
review_flags.append(
|
|
f"FX Review Required (Discrepancy: ${fx_analysis.get('discrepancy', 0):.2f})"
|
|
)
|
|
|
|
# Check depreciation
|
|
depreciation = llm_tax_analysis.get("depreciation", {})
|
|
if depreciation.get("is_capital_asset", False):
|
|
review_flags.append(
|
|
f"Capital Asset - Depreciation Applicable ({depreciation.get('asset_class', 'Unknown')})"
|
|
)
|
|
|
|
# Check meals & entertainment
|
|
meals_ent = llm_tax_analysis.get("meals_entertainment", {})
|
|
if meals_ent.get("is_meals_entertainment", False):
|
|
tax_deduction = meals_ent.get("tax_deduction_amount", 0)
|
|
accounting_deduction = meals_ent.get("accounting_deduction_amount", 0)
|
|
review_flags.append(
|
|
f"M&E Expense - Tax Deduction: ${tax_deduction:.2f} (50%), Accounting: ${accounting_deduction:.2f} (100%)"
|
|
)
|
|
|
|
# Add review flags to match reason
|
|
if review_flags:
|
|
match.match_reason += " | REVIEW: " + "; ".join(review_flags)
|
|
|
|
except Exception as e:
|
|
# If LLM analysis fails, log it and continue with traditional rules
|
|
import logging
|
|
|
|
logging.error(f"LLM tax analysis failed: {str(e)}")
|
|
match.match_reason += " (Note: Advanced tax analysis unavailable)"
|
|
|
|
# Fall back to traditional tax rules if available
|
|
if rule_results.get("tax_analysis"):
|
|
match.tax_analysis = rule_results["tax_analysis"]
|
|
|
|
return match
|
|
|
|
def _apply_manual_tax_analysis(
|
|
self, matches: List[Match], user_location: str = "ON"
|
|
) -> List[Match]:
|
|
"""
|
|
Apply deterministic rule-based tax analysis to all matches
|
|
No LLM calls - pure business logic for consistent results
|
|
"""
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logger.info(
|
|
f"Applying manual tax analysis to {len(matches)} matches using rule-based calculator"
|
|
)
|
|
|
|
enhanced_matches = []
|
|
|
|
for match in matches:
|
|
try:
|
|
# Get comprehensive tax analysis from manual calculator
|
|
tax_analysis = self.manual_tax_calculator.calculate_tax_analysis(
|
|
match.receipt, match.transaction, user_location
|
|
)
|
|
|
|
# Store the complete tax analysis
|
|
match.tax_analysis = tax_analysis
|
|
|
|
# Apply confidence adjustments
|
|
confidence_adj = tax_analysis.get("confidence_adjustment", {})
|
|
|
|
# Boost confidence if tax rules validate the match
|
|
boost = confidence_adj.get("boost", 0.0)
|
|
if boost > 0:
|
|
match.confidence_score = min(1.0, match.confidence_score + boost)
|
|
match.match_reason += f" (Tax validated: +{boost:.2f})"
|
|
|
|
# Reduce confidence if tax issues detected
|
|
reduce = confidence_adj.get("reduce", 0.0)
|
|
if reduce > 0:
|
|
match.confidence_score = max(0.0, match.confidence_score - reduce)
|
|
match.match_reason += f" (Tax issues: -{reduce:.2f})"
|
|
|
|
# Add flags for manual review
|
|
review_flags = []
|
|
|
|
# Sales tax issues
|
|
sales_tax = tax_analysis.get("sales_tax", {})
|
|
if sales_tax.get("requires_review"):
|
|
if sales_tax.get("is_international"):
|
|
review_flags.append("International Transaction - FX Review")
|
|
else:
|
|
discrepancy_pct = sales_tax.get("discrepancy_percentage", 0)
|
|
review_flags.append(
|
|
f"Sales Tax Discrepancy: {discrepancy_pct:.1f}%"
|
|
)
|
|
|
|
# FX issues
|
|
fx = tax_analysis.get("foreign_exchange", {})
|
|
if fx.get("currency_mismatch"):
|
|
review_flags.append(
|
|
f"FX: {fx['receipt_currency']} → {fx['transaction_currency']} (${fx['discrepancy']:.2f})"
|
|
)
|
|
|
|
# Capital asset depreciation
|
|
depreciation = tax_analysis.get("depreciation", {})
|
|
if depreciation.get("is_capital_asset"):
|
|
cca_class = depreciation.get("cca_class", "Unknown")
|
|
year1_cca = depreciation.get("cca_depreciation", {}).get(
|
|
"year_1_depreciation", 0
|
|
)
|
|
review_flags.append(
|
|
f"Capital Asset ({cca_class}) - Year 1 CCA: ${year1_cca:.2f}"
|
|
)
|
|
|
|
# Meals & entertainment
|
|
meals_ent = tax_analysis.get("meals_entertainment", {})
|
|
if meals_ent.get("is_meals_entertainment"):
|
|
tax_deduction = meals_ent.get("tax_deduction_amount", 0)
|
|
accounting_deduction = meals_ent.get(
|
|
"accounting_deduction_amount", 0
|
|
)
|
|
review_flags.append(
|
|
f"M&E: Tax ${tax_deduction:.2f} (50%), Accounting ${accounting_deduction:.2f} (100%)"
|
|
)
|
|
|
|
# Add review flags to match reason
|
|
if review_flags:
|
|
match.match_reason += " | " + "; ".join(review_flags)
|
|
|
|
enhanced_matches.append(match)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Manual tax analysis failed for match: {str(e)}", exc_info=True
|
|
)
|
|
match.match_reason += " (Tax analysis failed)"
|
|
enhanced_matches.append(match)
|
|
|
|
logger.info(
|
|
f"Manual tax analysis completed for {len(enhanced_matches)} matches"
|
|
)
|
|
return enhanced_matches
|
|
|
|
def approve_match(self, match: Match, user_id: str):
|
|
# Log the approval
|
|
self.feedback_logger.log_override(
|
|
transaction_id=match.transaction.id,
|
|
original_match=f"AI Score: {match.confidence_score}",
|
|
correction="Approved",
|
|
reason="User approved match",
|
|
user_id=user_id,
|
|
)
|
|
|
|
def reject_match(self, match: Match, reason: str, user_id: str):
|
|
# Log the rejection
|
|
self.feedback_logger.log_override(
|
|
transaction_id=match.transaction.id,
|
|
original_match=f"AI Score: {match.confidence_score}",
|
|
correction="Rejected",
|
|
reason=reason,
|
|
user_id=user_id,
|
|
)
|
|
|
|
def get_matching_stats(self, matches: List[Match]) -> Dict[str, Any]:
|
|
if not matches:
|
|
return {
|
|
"total": 0,
|
|
"high_confidence": 0,
|
|
"low_confidence": 0,
|
|
"avg_score": 0,
|
|
}
|
|
|
|
high_confidence = len([m for m in matches if m.confidence_score >= 0.8])
|
|
low_confidence = len([m for m in matches if m.confidence_score < 0.8])
|
|
avg_score = sum(m.confidence_score for m in matches) / len(matches)
|
|
|
|
return {
|
|
"total": len(matches),
|
|
"high_confidence": high_confidence,
|
|
"low_confidence": low_confidence,
|
|
"avg_score": round(avg_score, 3),
|
|
}
|