Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements
This commit is contained in:
+102
@@ -0,0 +1,102 @@
|
||||
import groq
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Tuple
|
||||
import config
|
||||
from models import Receipt, Transaction, Match
|
||||
|
||||
class AIMatcher:
|
||||
def __init__(self):
|
||||
self.client = groq.Groq(api_key=config.GROQ_API_KEY)
|
||||
self.model = "llama3-8b-8192"
|
||||
|
||||
def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
|
||||
matches = []
|
||||
|
||||
for receipt in receipts:
|
||||
best_match = self._find_best_match(receipt, transactions)
|
||||
if best_match:
|
||||
matches.append(best_match)
|
||||
|
||||
return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
|
||||
|
||||
def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
|
||||
candidates = self._filter_candidates(receipt, transactions)
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
best_match = None
|
||||
highest_score = 0
|
||||
|
||||
for transaction in candidates:
|
||||
score, reason = self._calculate_match_score(receipt, transaction)
|
||||
if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
|
||||
highest_score = score
|
||||
best_match = Match(receipt, transaction, score, reason)
|
||||
|
||||
return best_match
|
||||
|
||||
def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
|
||||
# Return ALL transactions - let the AI decide on scoring
|
||||
# Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
|
||||
candidates = []
|
||||
amount_threshold = receipt.amount * 0.5 # 50% threshold for obvious mismatches
|
||||
|
||||
for transaction in transactions:
|
||||
# Use absolute value for transaction amount comparison
|
||||
transaction_amount_abs = abs(transaction.amount)
|
||||
# Only exclude transactions with obviously different amounts
|
||||
if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
|
||||
candidates.append(transaction)
|
||||
|
||||
return candidates
|
||||
|
||||
def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
|
||||
# Calculate differences for the AI to consider
|
||||
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
|
||||
transaction_amount_abs = abs(transaction.amount)
|
||||
amount_diff = abs(receipt.amount - transaction_amount_abs)
|
||||
amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
|
||||
|
||||
prompt = f"""
|
||||
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:
|
||||
|
||||
Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
|
||||
Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
|
||||
|
||||
Differences:
|
||||
- Date difference: {date_diff} days
|
||||
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
|
||||
- Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
|
||||
|
||||
Scoring guidelines:
|
||||
- Perfect matches (same vendor, amount, date): 0.95-1.0
|
||||
- High confidence (minor differences): 0.8-0.94
|
||||
- Medium confidence (moderate differences): 0.6-0.79
|
||||
- Low confidence (significant differences): 0.4-0.59
|
||||
- Very low confidence (major differences): 0.2-0.39
|
||||
- No match: 0.0-0.19
|
||||
|
||||
Consider vendor name similarity, amount accuracy, and date proximity.
|
||||
Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
|
||||
|
||||
Return only: score|reason
|
||||
"""
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=100,
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
result = response.choices[0].message.content.strip()
|
||||
if '|' in result:
|
||||
score_str, reason = result.split('|', 1)
|
||||
score = float(score_str.strip())
|
||||
return min(max(score, 0), 1), reason.strip()
|
||||
else:
|
||||
return 0.0, "Invalid AI response"
|
||||
|
||||
except Exception as e:
|
||||
return 0.0, f"AI error: {str(e)}"
|
||||
Reference in New Issue
Block a user