import groq from datetime import datetime, timedelta from typing import List, Tuple import config from models import Receipt, Transaction, Match class AIMatcher: def __init__(self): self.client = groq.Groq(api_key=config.GROQ_API_KEY) self.model = "llama3-8b-8192" def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]: matches = [] for receipt in receipts: # Get the BEST match for this receipt (highest confidence score) best_match = self._find_best_match(receipt, transactions) if best_match: matches.append(best_match) return sorted(matches, key=lambda x: x.confidence_score, reverse=True) def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match: """Find the BEST match for a receipt (highest confidence score)""" candidates = self._filter_candidates(receipt, transactions) if not candidates: return None best_match = None highest_score = 0 for transaction in candidates: score, reason = self._calculate_match_score(receipt, transaction) # Keep the match with the highest score, regardless of how low it is if score > highest_score: highest_score = score best_match = Match(receipt, transaction, score, reason) return best_match def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]: # Return MOST transactions - let the AI decide on scoring # Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches candidates = [] amount_threshold = receipt.amount * 1.0 # 100% threshold - more inclusive for transaction in transactions: # Use absolute value for transaction amount comparison transaction_amount_abs = abs(transaction.amount) # Only exclude transactions with obviously different amounts if abs(receipt.amount - transaction_amount_abs) <= amount_threshold: candidates.append(transaction) return candidates def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]: # Calculate differences for the AI to consider date_diff = abs((receipt.receipt_date - transaction.transaction_date).days) transaction_amount_abs = abs(transaction.amount) amount_diff = abs(receipt.amount - transaction_amount_abs) amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0 prompt = f""" Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason: Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')} Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')} Differences: - Date difference: {date_diff} days - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%) - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}" Score this potential match based on how likely it is the correct match: - Perfect matches (same vendor, amount, date): 0.95-1.0 - High confidence (minor differences): 0.8-0.94 - Medium confidence (moderate differences): 0.6-0.79 - Low confidence (significant differences): 0.4-0.59 - Very low confidence (major differences): 0.2-0.39 - Minimal similarity: 0.1-0.19 - No meaningful similarity: 0.0-0.09 Examples: - Same vendor, same amount, 11 days apart: 0.7-0.8 - Similar vendor name, same amount, same date: 0.8-0.9 - Same vendor, 10% amount difference, same date: 0.6-0.7 - Different vendor, same amount, same date: 0.3-0.4 - Completely different vendor, amount, date: 0.1-0.2 Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match. Return only: score|reason """ try: response = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], max_tokens=100, temperature=0.1 ) result = response.choices[0].message.content.strip() if '|' in result: score_str, reason = result.split('|', 1) score = float(score_str.strip()) return min(max(score, 0), 1), reason.strip() else: return 0.0, "Invalid AI response" except Exception as e: return 0.0, f"AI error: {str(e)}"