Files
ds_quickbooks/ai_matcher.py
T

102 lines
4.5 KiB
Python
Raw Normal View History

import groq
from datetime import datetime, timedelta
from typing import List, Tuple
import config
from models import Receipt, Transaction, Match
class AIMatcher:
def __init__(self):
self.client = groq.Groq(api_key=config.GROQ_API_KEY)
self.model = "llama3-8b-8192"
def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
matches = []
for receipt in receipts:
best_match = self._find_best_match(receipt, transactions)
if best_match:
matches.append(best_match)
return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
candidates = self._filter_candidates(receipt, transactions)
if not candidates:
return None
best_match = None
highest_score = 0
for transaction in candidates:
score, reason = self._calculate_match_score(receipt, transaction)
if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
highest_score = score
best_match = Match(receipt, transaction, score, reason)
return best_match
def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
# Return ALL transactions - let the AI decide on scoring
# Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
candidates = []
amount_threshold = receipt.amount * 0.5 # 50% threshold for obvious mismatches
for transaction in transactions:
# Use absolute value for transaction amount comparison
transaction_amount_abs = abs(transaction.amount)
# Only exclude transactions with obviously different amounts
if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
candidates.append(transaction)
return candidates
def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
# Calculate differences for the AI to consider
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
transaction_amount_abs = abs(transaction.amount)
amount_diff = abs(receipt.amount - transaction_amount_abs)
amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
prompt = f"""
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:
Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
Differences:
- Date difference: {date_diff} days
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
- Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
Scoring guidelines:
- Perfect matches (same vendor, amount, date): 0.95-1.0
- High confidence (minor differences): 0.8-0.94
- Medium confidence (moderate differences): 0.6-0.79
- Low confidence (significant differences): 0.4-0.59
- Very low confidence (major differences): 0.2-0.39
- No match: 0.0-0.19
Consider vendor name similarity, amount accuracy, and date proximity.
Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
Return only: score|reason
"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=100,
temperature=0.1
)
result = response.choices[0].message.content.strip()
if '|' in result:
score_str, reason = result.split('|', 1)
score = float(score_str.strip())
return min(max(score, 0), 1), reason.strip()
else:
return 0.0, "Invalid AI response"
except Exception as e:
return 0.0, f"AI error: {str(e)}"