Files
ds_quickbooks/ai_matcher.py
T

113 lines
5.0 KiB
Python
Raw Normal View History

import groq
from datetime import datetime, timedelta
from typing import List, Tuple
import config
from models import Receipt, Transaction, Match
class AIMatcher:
def __init__(self):
self.client = groq.Groq(api_key=config.GROQ_API_KEY)
self.model = "llama3-8b-8192"
def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
matches = []
for receipt in receipts:
# Get the BEST match for this receipt (highest confidence score)
best_match = self._find_best_match(receipt, transactions)
if best_match:
matches.append(best_match)
return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
"""Find the BEST match for a receipt (highest confidence score)"""
candidates = self._filter_candidates(receipt, transactions)
if not candidates:
return None
best_match = None
highest_score = 0
for transaction in candidates:
score, reason = self._calculate_match_score(receipt, transaction)
# Keep the match with the highest score, regardless of how low it is
if score > highest_score:
highest_score = score
best_match = Match(receipt, transaction, score, reason)
return best_match
def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
# Return MOST transactions - let the AI decide on scoring
# Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
candidates = []
amount_threshold = receipt.amount * 1.0 # 100% threshold - more inclusive
for transaction in transactions:
# Use absolute value for transaction amount comparison
transaction_amount_abs = abs(transaction.amount)
# Only exclude transactions with obviously different amounts
if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
candidates.append(transaction)
return candidates
def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
# Calculate differences for the AI to consider
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
transaction_amount_abs = abs(transaction.amount)
amount_diff = abs(receipt.amount - transaction_amount_abs)
amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
prompt = f"""
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason:
Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
Differences:
- Date difference: {date_diff} days
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
- Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
Score this potential match based on how likely it is the correct match:
- Perfect matches (same vendor, amount, date): 0.95-1.0
- High confidence (minor differences): 0.8-0.94
- Medium confidence (moderate differences): 0.6-0.79
- Low confidence (significant differences): 0.4-0.59
- Very low confidence (major differences): 0.2-0.39
- Minimal similarity: 0.1-0.19
- No meaningful similarity: 0.0-0.09
Examples:
- Same vendor, same amount, 11 days apart: 0.7-0.8
- Similar vendor name, same amount, same date: 0.8-0.9
- Same vendor, 10% amount difference, same date: 0.6-0.7
- Different vendor, same amount, same date: 0.3-0.4
- Completely different vendor, amount, date: 0.1-0.2
Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match.
Return only: score|reason
"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=100,
temperature=0.1
)
result = response.choices[0].message.content.strip()
if '|' in result:
score_str, reason = result.split('|', 1)
score = float(score_str.strip())
return min(max(score, 0), 1), reason.strip()
else:
return 0.0, "Invalid AI response"
except Exception as e:
return 0.0, f"AI error: {str(e)}"