Update AI matcher to return only the best match with confidence score

Improve AI matching to show ALL potential matches with confidence scores
Add auto-matching endpoint and data storage for easier demo workflow
2025-07-02 21:39:41 +01:00 · 2025-07-02 21:15:33 +01:00 · 2025-07-02 21:02:11 +01:00
2 changed files with 130 additions and 9 deletions
@@ -13,6 +13,7 @@ class AIMatcher:
        matches = []
        for receipt in receipts:
            # Get the BEST match for this receipt (highest confidence score)
            best_match = self._find_best_match(receipt, transactions)
            if best_match:
                matches.append(best_match)
@@ -20,6 +21,7 @@ class AIMatcher:
        return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
        """Find the BEST match for a receipt (highest confidence score)"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
            return None
@@ -29,17 +31,18 @@ class AIMatcher:
        for transaction in candidates:
            score, reason = self._calculate_match_score(receipt, transaction)
-            if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
+            # Keep the match with the highest score, regardless of how low it is
            if score > highest_score:
                highest_score = score
                best_match = Match(receipt, transaction, score, reason)
        return best_match
    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
-        # Return ALL transactions - let the AI decide on scoring
+        # Return MOST transactions - let the AI decide on scoring
-        # Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
+        # Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
        candidates = []
-        amount_threshold = receipt.amount * 0.5  # 50% threshold for obvious mismatches
+        amount_threshold = receipt.amount * 1.0  # 100% threshold - more inclusive
        for transaction in transactions:
            # Use absolute value for transaction amount comparison
@@ -68,16 +71,24 @@ class AIMatcher:
        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
-        Scoring guidelines:
+        Score this potential match based on how likely it is the correct match:
        - Perfect matches (same vendor, amount, date): 0.95-1.0
        - High confidence (minor differences): 0.8-0.94
        - Medium confidence (moderate differences): 0.6-0.79
        - Low confidence (significant differences): 0.4-0.59
        - Very low confidence (major differences): 0.2-0.39
-        - No match: 0.0-0.19
+        - Minimal similarity: 0.1-0.19
        - No meaningful similarity: 0.0-0.09
-        Consider vendor name similarity, amount accuracy, and date proximity.
+        Examples:
-        Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
+        - Same vendor, same amount, 11 days apart: 0.7-0.8
        - Similar vendor name, same amount, same date: 0.8-0.9
        - Same vendor, 10% amount difference, same date: 0.6-0.7
        - Different vendor, same amount, same date: 0.3-0.4
        - Completely different vendor, amount, date: 0.1-0.2
        Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match.
        Return only: score|reason
        """
@@ -41,6 +41,10 @@ drive_sync = GoogleDriveSync()
 # In-memory storage for uploaded files (in production, use a database)
 uploaded_files = {}
 # Store imported transactions globally for easy access
 stored_transactions = []
 processed_receipts = {}
@app.get("/")
 async def root():
    """Health check endpoint"""
@@ -137,6 +141,10 @@ async def import_quickbooks_transactions_csv(file: UploadFile = File(...)):
                })
            except Exception as e:
                errors.append(f"Row {idx+1}: {str(e)}")
        # Store transactions globally for auto-matching
        global stored_transactions
        stored_transactions = transactions
        # Use the same logic as the JSON import endpoint
        request_obj = QuickBooksImportRequest(transactions=transactions)
        response = await import_quickbooks_transactions(request_obj)
@@ -219,6 +227,21 @@ async def process_document(file_id: str):
        else:
            uploaded_files[file_id]["status"] = "processed"
            uploaded_files[file_id]["extracted_data"] = result
            # Store processed receipt data for auto-matching
            global processed_receipts
            processed_receipts[file_id] = {
                "filename": file_info["filename"],
                "upload_date": file_info["upload_date"],
                "extraction_success": result.get("extraction_success", False),
                "vendor": result.get("vendor"),
                "total_amount": result.get("total_amount"),
                "tax_amount": result.get("tax_amount"),
                "date": result.get("date"),
                "category": result.get("category"),
                "confidence": result.get("confidence"),
                "error": result.get("error")
            }
        return DocumentProcessResponse(
            file_id=file_id,
@@ -422,6 +445,74 @@ async def match_receipts_transactions(request: MatchingRequest):
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/match-auto", response_model=MatchingResponse)
 async def match_auto():
    """
    Automatically match all processed receipts against all imported transactions.
    This endpoint uses the stored transaction data from CSV import and 
    all processed receipts to perform matching without requiring manual data input.
    """
    try:
        if not stored_transactions:
            raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
        if not processed_receipts:
            raise HTTPException(status_code=400, detail="No receipts processed. Please upload and process receipts first.")
        # Convert stored transactions to Receipt/Transaction models
        transactions = [
            Transaction(
                id=t["id"],
                transaction_date=datetime.strptime(t["txn_date"], "%Y-%m-%d"),
                amount=abs(t["amount"]),
                vendor=t["payee_name"],
                notes=t.get("memo", "")
            ) for t in stored_transactions
        ]
        receipts = []
        for file_id, receipt_data in processed_receipts.items():
            if receipt_data.get("extraction_success"):
                receipts.append(Receipt(
                    id=file_id,
                    file_name=receipt_data.get("filename", ""),
                    upload_date=receipt_data.get("upload_date", datetime.now()),
                    receipt_date=datetime.strptime(receipt_data.get("date", "2024-01-01"), "%Y-%m-%d"),
                    amount=receipt_data.get("total_amount", 0.0),
                    tax=receipt_data.get("tax_amount", 0.0),
                    vendor=receipt_data.get("vendor", ""),
                    category=receipt_data.get("category", "")
                ))
        if not receipts:
            raise HTTPException(status_code=400, detail="No successfully processed receipts found.")
        # Process matching using AI engine
        matches = matching_engine.process_matching(receipts, transactions)
        # Convert to response format
        match_responses = [
            MatchResponse(
                receipt_id=match.receipt.id,
                transaction_id=match.transaction.id,
                confidence_score=match.confidence_score,
                match_reason=match.match_reason,
                receipt_vendor=match.receipt.vendor,
                receipt_amount=match.receipt.amount,
                transaction_vendor=match.transaction.vendor,
                transaction_amount=match.transaction.amount
            ) for match in matches
        ]
        # Get statistics
        stats = matching_engine.get_matching_stats(matches)
        return MatchingResponse(matches=match_responses, stats=stats)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/approve")
 async def approve_match(request: ApprovalRequest):
    """
@@ -504,7 +595,26 @@ async def get_stats():
            "recent_feedback_logs": len(recent_logs),
            "active_rules": len([r for r in matching_engine.rules_engine.rules if r.status == "active"]),
            "uploaded_documents": len(uploaded_files),
-            "processed_documents": len([f for f in uploaded_files.values() if f["status"] == "processed"])
+            "processed_documents": len([f for f in uploaded_files.values() if f["status"] == "processed"]),
            "stored_transactions": len(stored_transactions),
            "processed_receipts": len(processed_receipts)
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/status")
 async def get_status():
    """Get current system status for demo purposes"""
    try:
        return {
            "csv_uploaded": len(stored_transactions) > 0,
            "transactions_count": len(stored_transactions),
            "receipts_uploaded": len(uploaded_files),
            "receipts_processed": len(processed_receipts),
            "ready_for_matching": len(stored_transactions) > 0 and len(processed_receipts) > 0,
            "sample_transactions": stored_transactions[:3] if stored_transactions else [],
            "sample_receipts": list(processed_receipts.keys())[:3] if processed_receipts else []
        }
    except Exception as e:
Author	SHA1	Message	Date
Iyeoluwa Akinrinola	6b6633b165	Update AI matcher to return only the best match with confidence score	2025-07-02 21:39:41 +01:00
Iyeoluwa Akinrinola	58d579700e	Improve AI matching to show ALL potential matches with confidence scores	2025-07-02 21:15:33 +01:00
Iyeoluwa Akinrinola	08386f8544	Add auto-matching endpoint and data storage for easier demo workflow	2025-07-02 21:02:11 +01:00