diff --git a/ai_matcher.py b/ai_matcher.py index 2e179ac..c484198 100644 --- a/ai_matcher.py +++ b/ai_matcher.py @@ -13,27 +13,30 @@ class AIMatcher: matches = [] for receipt in receipts: - # Get ALL potential matches for this receipt, not just the best one - receipt_matches = self._find_all_matches(receipt, transactions) - matches.extend(receipt_matches) + # Get the BEST match for this receipt (highest confidence score) + best_match = self._find_best_match(receipt, transactions) + if best_match: + matches.append(best_match) return sorted(matches, key=lambda x: x.confidence_score, reverse=True) - def _find_all_matches(self, receipt: Receipt, transactions: List[Transaction]) -> List[Match]: - """Find ALL potential matches for a receipt, not just the best one""" + def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match: + """Find the BEST match for a receipt (highest confidence score)""" candidates = self._filter_candidates(receipt, transactions) if not candidates: - return [] + return None - matches = [] + best_match = None + highest_score = 0 for transaction in candidates: score, reason = self._calculate_match_score(receipt, transaction) - # Include ALL matches regardless of score - let the user decide - match = Match(receipt, transaction, score, reason) - matches.append(match) + # Keep the match with the highest score, regardless of how low it is + if score > highest_score: + highest_score = score + best_match = Match(receipt, transaction, score, reason) - return matches + return best_match def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]: # Return MOST transactions - let the AI decide on scoring @@ -68,22 +71,24 @@ class AIMatcher: - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%) - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}" - IMPORTANT: Score ALL potential matches, even imperfect ones. The score should reflect how likely this is a match: + Score this potential match based on how likely it is the correct match: - Perfect matches (same vendor, amount, date): 0.95-1.0 - High confidence (minor differences): 0.8-0.94 - Medium confidence (moderate differences): 0.6-0.79 - Low confidence (significant differences): 0.4-0.59 - Very low confidence (major differences): 0.2-0.39 - - No meaningful similarity: 0.0-0.19 + - Minimal similarity: 0.1-0.19 + - No meaningful similarity: 0.0-0.09 Examples: - Same vendor, same amount, 11 days apart: 0.7-0.8 - Similar vendor name, same amount, same date: 0.8-0.9 - Same vendor, 10% amount difference, same date: 0.6-0.7 - Different vendor, same amount, same date: 0.3-0.4 + - Completely different vendor, amount, date: 0.1-0.2 - Consider vendor name similarity, amount accuracy, and date proximity. Even imperfect matches should get reasonable scores if there's any meaningful similarity. + Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match. Return only: score|reason """