Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements

2025-07-02 16:38:01 +01:00
commit a519c42866
10641 changed files with 3944174 additions and 0 deletions
@@ -0,0 +1,102 @@
+import groq
+from datetime import datetime, timedelta
+from typing import List, Tuple
+import config
+from models import Receipt, Transaction, Match
+
+class AIMatcher:
+    def __init__(self):
+        self.client = groq.Groq(api_key=config.GROQ_API_KEY)
+        self.model = "llama3-8b-8192"
+    
+    def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
+        matches = []
+        
+        for receipt in receipts:
+            best_match = self._find_best_match(receipt, transactions)
+            if best_match:
+                matches.append(best_match)
+        
+        return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
+    
+    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
+        candidates = self._filter_candidates(receipt, transactions)
+        if not candidates:
+            return None
+            
+        best_match = None
+        highest_score = 0
+        
+        for transaction in candidates:
+            score, reason = self._calculate_match_score(receipt, transaction)
+            if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
+                highest_score = score
+                best_match = Match(receipt, transaction, score, reason)
+        
+        return best_match
+    
+    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
+        # Return ALL transactions - let the AI decide on scoring
+        # Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
+        candidates = []
+        amount_threshold = receipt.amount * 0.5  # 50% threshold for obvious mismatches
+        
+        for transaction in transactions:
+            # Use absolute value for transaction amount comparison
+            transaction_amount_abs = abs(transaction.amount)
+            # Only exclude transactions with obviously different amounts
+            if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
+                candidates.append(transaction)
+        
+        return candidates
+    
+    def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
+        # Calculate differences for the AI to consider
+        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+        transaction_amount_abs = abs(transaction.amount)
+        amount_diff = abs(receipt.amount - transaction_amount_abs)
+        amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+        
+        prompt = f"""
+        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:
+        
+        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
+        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
+        
+        Differences:
+        - Date difference: {date_diff} days
+        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
+        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
+        
+        Scoring guidelines:
+        - Perfect matches (same vendor, amount, date): 0.95-1.0
+        - High confidence (minor differences): 0.8-0.94
+        - Medium confidence (moderate differences): 0.6-0.79
+        - Low confidence (significant differences): 0.4-0.59
+        - Very low confidence (major differences): 0.2-0.39
+        - No match: 0.0-0.19
+        
+        Consider vendor name similarity, amount accuracy, and date proximity.
+        Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
+        
+        Return only: score|reason
+        """
+        
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=100,
+                temperature=0.1
+            )
+            
+            result = response.choices[0].message.content.strip()
+            if '|' in result:
+                score_str, reason = result.split('|', 1)
+                score = float(score_str.strip())
+                return min(max(score, 0), 1), reason.strip()
+            else:
+                return 0.0, "Invalid AI response"
+                
+        except Exception as e:
+            return 0.0, f"AI error: {str(e)}"