ai_matcher.py

import groq
from datetime import datetime, timedelta
from typing import List, Tuple
import config
from models import Receipt, Transaction, Match

class AIMatcher:
    def __init__(self):
        self.client = groq.Groq(api_key=config.GROQ_API_KEY)
        self.model = "llama3-8b-8192"
    
    def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
        matches = []
        
        for receipt in receipts:
            # Get the BEST match for this receipt (highest confidence score)
            best_match = self._find_best_match(receipt, transactions)
            if best_match:
                matches.append(best_match)
        
        return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
    
    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
        """Find the BEST match for a receipt (highest confidence score)"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
            return None
            
        best_match = None
        highest_score = 0
        
        for transaction in candidates:
            score, reason = self._calculate_match_score(receipt, transaction)
            # Keep the match with the highest score, regardless of how low it is
            if score > highest_score:
                highest_score = score
                best_match = Match(receipt, transaction, score, reason)
        
        return best_match
    
    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
        # Return MOST transactions - let the AI decide on scoring
        # Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
        candidates = []
        amount_threshold = receipt.amount * 1.0  # 100% threshold - more inclusive
        
        for transaction in transactions:
            # Use absolute value for transaction amount comparison
            transaction_amount_abs = abs(transaction.amount)
            # Only exclude transactions with obviously different amounts
            if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
                candidates.append(transaction)
        
        return candidates
    
    def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
        # Calculate differences for the AI to consider
        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
        transaction_amount_abs = abs(transaction.amount)
        amount_diff = abs(receipt.amount - transaction_amount_abs)
        amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
        
        prompt = f"""
        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:
        
        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
        
        Differences:
        - Date difference: {date_diff} days
        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
        
        Score this potential match based on how likely it is the correct match:
        
        - Perfect matches (same vendor, amount, date): 0.95-1.0
        - High confidence (minor differences): 0.8-0.94
        - Medium confidence (moderate differences): 0.6-0.79
        - Low confidence (significant differences): 0.4-0.59
        - Very low confidence (major differences): 0.2-0.39
        - Minimal similarity: 0.1-0.19
        - No meaningful similarity: 0.0-0.09
        
        Examples:
        - Same vendor, same amount, 11 days apart: 0.7-0.8
        - Similar vendor name, same amount, same date: 0.8-0.9
        - Same vendor, 10% amount difference, same date: 0.6-0.7
        - Different vendor, same amount, same date: 0.3-0.4
        - Completely different vendor, amount, date: 0.1-0.2
        
        Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match.
        
        Return only: score|reason
        """
        
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=100,
                temperature=0.1
            )
            
            result = response.choices[0].message.content.strip()
            if '|' in result:
                score_str, reason = result.split('|', 1)
                score = float(score_str.strip())
                return min(max(score, 0), 1), reason.strip()
            else:
                return 0.0, "Invalid AI response"
                
        except Exception as e:
            return 0.0, f"AI error: {str(e)}"
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00			`import groq`
			`from datetime import datetime, timedelta`
			`from typing import List, Tuple`
			`import config`
			`from models import Receipt, Transaction, Match`

			`class AIMatcher:`
			`def __init__(self):`
			`self.client = groq.Groq(api_key=config.GROQ_API_KEY)`
			`self.model = "llama3-8b-8192"`

			`def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:`
			`matches = []`

			`for receipt in receipts:`
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`# Get the BEST match for this receipt (highest confidence score)`
			`best_match = self._find_best_match(receipt, transactions)`
			`if best_match:`
			`matches.append(best_match)`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
			`return sorted(matches, key=lambda x: x.confidence_score, reverse=True)`

Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:`
			`"""Find the BEST match for a receipt (highest confidence score)"""`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00			`candidates = self._filter_candidates(receipt, transactions)`
			`if not candidates:`
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`return None`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`best_match = None`
			`highest_score = 0`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
			`for transaction in candidates:`
			`score, reason = self._calculate_match_score(receipt, transaction)`
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`# Keep the match with the highest score, regardless of how low it is`
			`if score > highest_score:`
			`highest_score = score`
			`best_match = Match(receipt, transaction, score, reason)`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`return best_match`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
			`def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:`
Improve AI matching to show ALL potential matches with confidence scores 2025-07-02 21:15:33 +01:00			`# Return MOST transactions - let the AI decide on scoring`
			`# Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00			`candidates = []`
Improve AI matching to show ALL potential matches with confidence scores 2025-07-02 21:15:33 +01:00			`amount_threshold = receipt.amount * 1.0 # 100% threshold - more inclusive`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
			`for transaction in transactions:`
			`# Use absolute value for transaction amount comparison`
			`transaction_amount_abs = abs(transaction.amount)`
			`# Only exclude transactions with obviously different amounts`
			`if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:`
			`candidates.append(transaction)`

			`return candidates`

			`def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:`
			`# Calculate differences for the AI to consider`
			`date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)`
			`transaction_amount_abs = abs(transaction.amount)`
			`amount_diff = abs(receipt.amount - transaction_amount_abs)`
			`amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0`

			`prompt = f"""`
			`Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:`

			`Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}`
			`Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}`

			`Differences:`
			`- Date difference: {date_diff} days`
			`- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)`
			`- Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"`

Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`Score this potential match based on how likely it is the correct match:`
Improve AI matching to show ALL potential matches with confidence scores 2025-07-02 21:15:33 +01:00
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00			`- Perfect matches (same vendor, amount, date): 0.95-1.0`
			`- High confidence (minor differences): 0.8-0.94`
			`- Medium confidence (moderate differences): 0.6-0.79`
			`- Low confidence (significant differences): 0.4-0.59`
			`- Very low confidence (major differences): 0.2-0.39`
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`- Minimal similarity: 0.1-0.19`
			`- No meaningful similarity: 0.0-0.09`
Improve AI matching to show ALL potential matches with confidence scores 2025-07-02 21:15:33 +01:00
			`Examples:`
			`- Same vendor, same amount, 11 days apart: 0.7-0.8`
			`- Similar vendor name, same amount, same date: 0.8-0.9`
			`- Same vendor, 10% amount difference, same date: 0.6-0.7`
			`- Different vendor, same amount, same date: 0.3-0.4`
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`- Completely different vendor, amount, date: 0.1-0.2`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00			`Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match.`
Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements 2025-07-02 16:38:01 +01:00
			`Return only: score\|reason`
			`"""`

			`try:`
			`response = self.client.chat.completions.create(`
			`model=self.model,`
			`messages=[{"role": "user", "content": prompt}],`
			`max_tokens=100,`
			`temperature=0.1`
			`)`

			`result = response.choices[0].message.content.strip()`
			`if '\|' in result:`
			`score_str, reason = result.split('\|', 1)`
			`score = float(score_str.strip())`
			`return min(max(score, 0), 1), reason.strip()`
			`else:`
			`return 0.0, "Invalid AI response"`

			`except Exception as e:`
			`return 0.0, f"AI error: {str(e)}"`