Compare commits

...

3 Commits

Author SHA1 Message Date
Iyeoluwa Akinrinola 6b6633b165 Update AI matcher to return only the best match with confidence score 2025-07-02 21:39:41 +01:00
Iyeoluwa Akinrinola 58d579700e Improve AI matching to show ALL potential matches with confidence scores 2025-07-02 21:15:33 +01:00
Iyeoluwa Akinrinola 08386f8544 Add auto-matching endpoint and data storage for easier demo workflow 2025-07-02 21:02:11 +01:00
2 changed files with 130 additions and 9 deletions
+19 -8
View File
@@ -13,6 +13,7 @@ class AIMatcher:
matches = []
for receipt in receipts:
# Get the BEST match for this receipt (highest confidence score)
best_match = self._find_best_match(receipt, transactions)
if best_match:
matches.append(best_match)
@@ -20,6 +21,7 @@ class AIMatcher:
return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
"""Find the BEST match for a receipt (highest confidence score)"""
candidates = self._filter_candidates(receipt, transactions)
if not candidates:
return None
@@ -29,17 +31,18 @@ class AIMatcher:
for transaction in candidates:
score, reason = self._calculate_match_score(receipt, transaction)
if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
# Keep the match with the highest score, regardless of how low it is
if score > highest_score:
highest_score = score
best_match = Match(receipt, transaction, score, reason)
return best_match
def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
# Return ALL transactions - let the AI decide on scoring
# Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
# Return MOST transactions - let the AI decide on scoring
# Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
candidates = []
amount_threshold = receipt.amount * 0.5 # 50% threshold for obvious mismatches
amount_threshold = receipt.amount * 1.0 # 100% threshold - more inclusive
for transaction in transactions:
# Use absolute value for transaction amount comparison
@@ -68,16 +71,24 @@ class AIMatcher:
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
- Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
Scoring guidelines:
Score this potential match based on how likely it is the correct match:
- Perfect matches (same vendor, amount, date): 0.95-1.0
- High confidence (minor differences): 0.8-0.94
- Medium confidence (moderate differences): 0.6-0.79
- Low confidence (significant differences): 0.4-0.59
- Very low confidence (major differences): 0.2-0.39
- No match: 0.0-0.19
- Minimal similarity: 0.1-0.19
- No meaningful similarity: 0.0-0.09
Consider vendor name similarity, amount accuracy, and date proximity.
Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
Examples:
- Same vendor, same amount, 11 days apart: 0.7-0.8
- Similar vendor name, same amount, same date: 0.8-0.9
- Same vendor, 10% amount difference, same date: 0.6-0.7
- Different vendor, same amount, same date: 0.3-0.4
- Completely different vendor, amount, date: 0.1-0.2
Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match.
Return only: score|reason
"""
+111 -1
View File
@@ -41,6 +41,10 @@ drive_sync = GoogleDriveSync()
# In-memory storage for uploaded files (in production, use a database)
uploaded_files = {}
# Store imported transactions globally for easy access
stored_transactions = []
processed_receipts = {}
@app.get("/")
async def root():
"""Health check endpoint"""
@@ -137,6 +141,10 @@ async def import_quickbooks_transactions_csv(file: UploadFile = File(...)):
})
except Exception as e:
errors.append(f"Row {idx+1}: {str(e)}")
# Store transactions globally for auto-matching
global stored_transactions
stored_transactions = transactions
# Use the same logic as the JSON import endpoint
request_obj = QuickBooksImportRequest(transactions=transactions)
response = await import_quickbooks_transactions(request_obj)
@@ -219,6 +227,21 @@ async def process_document(file_id: str):
else:
uploaded_files[file_id]["status"] = "processed"
uploaded_files[file_id]["extracted_data"] = result
# Store processed receipt data for auto-matching
global processed_receipts
processed_receipts[file_id] = {
"filename": file_info["filename"],
"upload_date": file_info["upload_date"],
"extraction_success": result.get("extraction_success", False),
"vendor": result.get("vendor"),
"total_amount": result.get("total_amount"),
"tax_amount": result.get("tax_amount"),
"date": result.get("date"),
"category": result.get("category"),
"confidence": result.get("confidence"),
"error": result.get("error")
}
return DocumentProcessResponse(
file_id=file_id,
@@ -422,6 +445,74 @@ async def match_receipts_transactions(request: MatchingRequest):
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/match-auto", response_model=MatchingResponse)
async def match_auto():
"""
Automatically match all processed receipts against all imported transactions.
This endpoint uses the stored transaction data from CSV import and
all processed receipts to perform matching without requiring manual data input.
"""
try:
if not stored_transactions:
raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
if not processed_receipts:
raise HTTPException(status_code=400, detail="No receipts processed. Please upload and process receipts first.")
# Convert stored transactions to Receipt/Transaction models
transactions = [
Transaction(
id=t["id"],
transaction_date=datetime.strptime(t["txn_date"], "%Y-%m-%d"),
amount=abs(t["amount"]),
vendor=t["payee_name"],
notes=t.get("memo", "")
) for t in stored_transactions
]
receipts = []
for file_id, receipt_data in processed_receipts.items():
if receipt_data.get("extraction_success"):
receipts.append(Receipt(
id=file_id,
file_name=receipt_data.get("filename", ""),
upload_date=receipt_data.get("upload_date", datetime.now()),
receipt_date=datetime.strptime(receipt_data.get("date", "2024-01-01"), "%Y-%m-%d"),
amount=receipt_data.get("total_amount", 0.0),
tax=receipt_data.get("tax_amount", 0.0),
vendor=receipt_data.get("vendor", ""),
category=receipt_data.get("category", "")
))
if not receipts:
raise HTTPException(status_code=400, detail="No successfully processed receipts found.")
# Process matching using AI engine
matches = matching_engine.process_matching(receipts, transactions)
# Convert to response format
match_responses = [
MatchResponse(
receipt_id=match.receipt.id,
transaction_id=match.transaction.id,
confidence_score=match.confidence_score,
match_reason=match.match_reason,
receipt_vendor=match.receipt.vendor,
receipt_amount=match.receipt.amount,
transaction_vendor=match.transaction.vendor,
transaction_amount=match.transaction.amount
) for match in matches
]
# Get statistics
stats = matching_engine.get_matching_stats(matches)
return MatchingResponse(matches=match_responses, stats=stats)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/approve")
async def approve_match(request: ApprovalRequest):
"""
@@ -504,7 +595,26 @@ async def get_stats():
"recent_feedback_logs": len(recent_logs),
"active_rules": len([r for r in matching_engine.rules_engine.rules if r.status == "active"]),
"uploaded_documents": len(uploaded_files),
"processed_documents": len([f for f in uploaded_files.values() if f["status"] == "processed"])
"processed_documents": len([f for f in uploaded_files.values() if f["status"] == "processed"]),
"stored_transactions": len(stored_transactions),
"processed_receipts": len(processed_receipts)
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/status")
async def get_status():
"""Get current system status for demo purposes"""
try:
return {
"csv_uploaded": len(stored_transactions) > 0,
"transactions_count": len(stored_transactions),
"receipts_uploaded": len(uploaded_files),
"receipts_processed": len(processed_receipts),
"ready_for_matching": len(stored_transactions) > 0 and len(processed_receipts) > 0,
"sample_transactions": stored_transactions[:3] if stored_transactions else [],
"sample_receipts": list(processed_receipts.keys())[:3] if processed_receipts else []
}
except Exception as e: