2025-07-02 16:38:01 +01:00
import groq
from datetime import datetime , timedelta
from typing import List , Tuple
import config
from models import Receipt , Transaction , Match
class AIMatcher :
def __init__ ( self ) :
self . client = groq . Groq ( api_key = config . GROQ_API_KEY )
self . model = " llama3-8b-8192 "
def match_receipts_to_transactions ( self , receipts : List [ Receipt ] , transactions : List [ Transaction ] ) - > List [ Match ] :
matches = [ ]
for receipt in receipts :
2025-07-02 21:15:33 +01:00
# Get ALL potential matches for this receipt, not just the best one
receipt_matches = self . _find_all_matches ( receipt , transactions )
matches . extend ( receipt_matches )
2025-07-02 16:38:01 +01:00
return sorted ( matches , key = lambda x : x . confidence_score , reverse = True )
2025-07-02 21:15:33 +01:00
def _find_all_matches ( self , receipt : Receipt , transactions : List [ Transaction ] ) - > List [ Match ] :
""" Find ALL potential matches for a receipt, not just the best one """
2025-07-02 16:38:01 +01:00
candidates = self . _filter_candidates ( receipt , transactions )
if not candidates :
2025-07-02 21:15:33 +01:00
return [ ]
2025-07-02 16:38:01 +01:00
2025-07-02 21:15:33 +01:00
matches = [ ]
2025-07-02 16:38:01 +01:00
for transaction in candidates :
score , reason = self . _calculate_match_score ( receipt , transaction )
2025-07-02 21:15:33 +01:00
# Include ALL matches regardless of score - let the user decide
match = Match ( receipt , transaction , score , reason )
matches . append ( match )
2025-07-02 16:38:01 +01:00
2025-07-02 21:15:33 +01:00
return matches
2025-07-02 16:38:01 +01:00
def _filter_candidates ( self , receipt : Receipt , transactions : List [ Transaction ] ) - > List [ Transaction ] :
2025-07-02 21:15:33 +01:00
# Return MOST transactions - let the AI decide on scoring
# Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
2025-07-02 16:38:01 +01:00
candidates = [ ]
2025-07-02 21:15:33 +01:00
amount_threshold = receipt . amount * 1.0 # 100% threshold - more inclusive
2025-07-02 16:38:01 +01:00
for transaction in transactions :
# Use absolute value for transaction amount comparison
transaction_amount_abs = abs ( transaction . amount )
# Only exclude transactions with obviously different amounts
if abs ( receipt . amount - transaction_amount_abs ) < = amount_threshold :
candidates . append ( transaction )
return candidates
def _calculate_match_score ( self , receipt : Receipt , transaction : Transaction ) - > Tuple [ float , str ] :
# Calculate differences for the AI to consider
date_diff = abs ( ( receipt . receipt_date - transaction . transaction_date ) . days )
transaction_amount_abs = abs ( transaction . amount )
amount_diff = abs ( receipt . amount - transaction_amount_abs )
amount_percent_diff = ( amount_diff / receipt . amount ) * 100 if receipt . amount > 0 else 0
prompt = f """
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:
Receipt: { receipt . vendor } , $ { receipt . amount } , { receipt . receipt_date . strftime ( ' % Y- % m- %d ' ) }
Transaction: { transaction . vendor } , $ { transaction . amount } (absolute: $ { transaction_amount_abs } ), { transaction . transaction_date . strftime ( ' % Y- % m- %d ' ) }
Differences:
- Date difference: { date_diff } days
- Amount difference: $ { amount_diff } ( { amount_percent_diff : .1f } %)
- Vendor comparison: " { receipt . vendor } " vs " { transaction . vendor } "
2025-07-02 21:15:33 +01:00
IMPORTANT: Score ALL potential matches, even imperfect ones. The score should reflect how likely this is a match:
2025-07-02 16:38:01 +01:00
- Perfect matches (same vendor, amount, date): 0.95-1.0
- High confidence (minor differences): 0.8-0.94
- Medium confidence (moderate differences): 0.6-0.79
- Low confidence (significant differences): 0.4-0.59
- Very low confidence (major differences): 0.2-0.39
2025-07-02 21:15:33 +01:00
- No meaningful similarity: 0.0-0.19
Examples:
- Same vendor, same amount, 11 days apart: 0.7-0.8
- Similar vendor name, same amount, same date: 0.8-0.9
- Same vendor, 10% amount difference, same date: 0.6-0.7
- Different vendor, same amount, same date: 0.3-0.4
2025-07-02 16:38:01 +01:00
2025-07-02 21:15:33 +01:00
Consider vendor name similarity, amount accuracy, and date proximity. Even imperfect matches should get reasonable scores if there ' s any meaningful similarity.
2025-07-02 16:38:01 +01:00
Return only: score|reason
"""
try :
response = self . client . chat . completions . create (
model = self . model ,
messages = [ { " role " : " user " , " content " : prompt } ] ,
max_tokens = 100 ,
temperature = 0.1
)
result = response . choices [ 0 ] . message . content . strip ( )
if ' | ' in result :
score_str , reason = result . split ( ' | ' , 1 )
score = float ( score_str . strip ( ) )
return min ( max ( score , 0 ) , 1 ) , reason . strip ( )
else :
return 0.0 , " Invalid AI response "
except Exception as e :
return 0.0 , f " AI error: { str ( e ) } "