Improve AI matching to show ALL potential matches with confidence scores

2025-07-02 21:15:33 +01:00
parent 08386f8544
commit 58d579700e
1 changed files with 24 additions and 18 deletions
@@ -13,33 +13,33 @@ class AIMatcher:
        matches = []
        
        for receipt in receipts:
-            best_match = self._find_best_match(receipt, transactions)
-            if best_match:
-                matches.append(best_match)
+            # Get ALL potential matches for this receipt, not just the best one
+            receipt_matches = self._find_all_matches(receipt, transactions)
+            matches.extend(receipt_matches)
        
        return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
    
-    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
+    def _find_all_matches(self, receipt: Receipt, transactions: List[Transaction]) -> List[Match]:
+        """Find ALL potential matches for a receipt, not just the best one"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
-            return None
+            return []
            
-        best_match = None
-        highest_score = 0
+        matches = []
        
        for transaction in candidates:
            score, reason = self._calculate_match_score(receipt, transaction)
-            if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
-                highest_score = score
-                best_match = Match(receipt, transaction, score, reason)
+            # Include ALL matches regardless of score - let the user decide
+            match = Match(receipt, transaction, score, reason)
+            matches.append(match)
        
-        return best_match
+        return matches
    
    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
-        # Return ALL transactions - let the AI decide on scoring
-        # Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
+        # Return MOST transactions - let the AI decide on scoring
+        # Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
        candidates = []
-        amount_threshold = receipt.amount * 0.5  # 50% threshold for obvious mismatches
+        amount_threshold = receipt.amount * 1.0  # 100% threshold - more inclusive
        
        for transaction in transactions:
            # Use absolute value for transaction amount comparison
@@ -68,16 +68,22 @@ class AIMatcher:
        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
        
-        Scoring guidelines:
+        IMPORTANT: Score ALL potential matches, even imperfect ones. The score should reflect how likely this is a match:
+        
        - Perfect matches (same vendor, amount, date): 0.95-1.0
        - High confidence (minor differences): 0.8-0.94
        - Medium confidence (moderate differences): 0.6-0.79
        - Low confidence (significant differences): 0.4-0.59
        - Very low confidence (major differences): 0.2-0.39
-        - No match: 0.0-0.19
+        - No meaningful similarity: 0.0-0.19
        
-        Consider vendor name similarity, amount accuracy, and date proximity.
-        Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
+        Examples:
+        - Same vendor, same amount, 11 days apart: 0.7-0.8
+        - Similar vendor name, same amount, same date: 0.8-0.9
+        - Same vendor, 10% amount difference, same date: 0.6-0.7
+        - Different vendor, same amount, same date: 0.3-0.4
+        
+        Consider vendor name similarity, amount accuracy, and date proximity. Even imperfect matches should get reasonable scores if there's any meaningful similarity.
        
        Return only: score|reason
        """