Update AI matcher to return only the best match with confidence score

Improve AI matching to show ALL potential matches with confidence scores
Add auto-matching endpoint and data storage for easier demo workflow
2025-07-02 21:39:41 +01:00 · 2025-07-02 21:15:33 +01:00 · 2025-07-02 21:02:11 +01:00
2 changed files with 130 additions and 9 deletions
@@ -13,6 +13,7 @@ class AIMatcher:
        matches = []
        
        for receipt in receipts:
+            # Get the BEST match for this receipt (highest confidence score)
            best_match = self._find_best_match(receipt, transactions)
            if best_match:
                matches.append(best_match)
@@ -20,6 +21,7 @@ class AIMatcher:
        return sorted(matches, key=lambda x: x.confidence_score, reverse=True)
    
    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
+        """Find the BEST match for a receipt (highest confidence score)"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
            return None
@@ -29,17 +31,18 @@ class AIMatcher:
        
        for transaction in candidates:
            score, reason = self._calculate_match_score(receipt, transaction)
-            if score > highest_score and score >= config.CONFIDENCE_THRESHOLD:
+            # Keep the match with the highest score, regardless of how low it is
+            if score > highest_score:
                highest_score = score
                best_match = Match(receipt, transaction, score, reason)
        
        return best_match
    
    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
-        # Return ALL transactions - let the AI decide on scoring
-        # Only filter out transactions with completely different amounts (>50% difference) to avoid obvious mismatches
+        # Return MOST transactions - let the AI decide on scoring
+        # Only filter out transactions with completely different amounts (>100% difference) to avoid obvious mismatches
        candidates = []
-        amount_threshold = receipt.amount * 0.5  # 50% threshold for obvious mismatches
+        amount_threshold = receipt.amount * 1.0  # 100% threshold - more inclusive
        
        for transaction in transactions:
            # Use absolute value for transaction amount comparison
@@ -68,16 +71,24 @@ class AIMatcher:
        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
        
-        Scoring guidelines:
+        Score this potential match based on how likely it is the correct match:
+        
        - Perfect matches (same vendor, amount, date): 0.95-1.0
        - High confidence (minor differences): 0.8-0.94
        - Medium confidence (moderate differences): 0.6-0.79
        - Low confidence (significant differences): 0.4-0.59
        - Very low confidence (major differences): 0.2-0.39
-        - No match: 0.0-0.19
+        - Minimal similarity: 0.1-0.19
+        - No meaningful similarity: 0.0-0.09
        
-        Consider vendor name similarity, amount accuracy, and date proximity.
-        Score based on your discretion - even imperfect matches should get scores if there's reasonable similarity.
+        Examples:
+        - Same vendor, same amount, 11 days apart: 0.7-0.8
+        - Similar vendor name, same amount, same date: 0.8-0.9
+        - Same vendor, 10% amount difference, same date: 0.6-0.7
+        - Different vendor, same amount, same date: 0.3-0.4
+        - Completely different vendor, amount, date: 0.1-0.2
+        
+        Consider vendor name similarity, amount accuracy, and date proximity. Score based on overall likelihood this is the correct match.
        
        Return only: score|reason
        """
@@ -41,6 +41,10 @@ drive_sync = GoogleDriveSync()
 # In-memory storage for uploaded files (in production, use a database)
 uploaded_files = {}

+# Store imported transactions globally for easy access
+stored_transactions = []
+processed_receipts = {}
+
@app.get("/")
 async def root():
    """Health check endpoint"""
@@ -137,6 +141,10 @@ async def import_quickbooks_transactions_csv(file: UploadFile = File(...)):
                })
            except Exception as e:
                errors.append(f"Row {idx+1}: {str(e)}")
+        # Store transactions globally for auto-matching
+        global stored_transactions
+        stored_transactions = transactions
+        
        # Use the same logic as the JSON import endpoint
        request_obj = QuickBooksImportRequest(transactions=transactions)
        response = await import_quickbooks_transactions(request_obj)
@@ -219,6 +227,21 @@ async def process_document(file_id: str):
        else:
            uploaded_files[file_id]["status"] = "processed"
            uploaded_files[file_id]["extracted_data"] = result
+            
+            # Store processed receipt data for auto-matching
+            global processed_receipts
+            processed_receipts[file_id] = {
+                "filename": file_info["filename"],
+                "upload_date": file_info["upload_date"],
+                "extraction_success": result.get("extraction_success", False),
+                "vendor": result.get("vendor"),
+                "total_amount": result.get("total_amount"),
+                "tax_amount": result.get("tax_amount"),
+                "date": result.get("date"),
+                "category": result.get("category"),
+                "confidence": result.get("confidence"),
+                "error": result.get("error")
+            }
        
        return DocumentProcessResponse(
            file_id=file_id,
@@ -422,6 +445,74 @@ async def match_receipts_transactions(request: MatchingRequest):
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+@app.post("/match-auto", response_model=MatchingResponse)
+async def match_auto():
+    """
+    Automatically match all processed receipts against all imported transactions.
+    
+    This endpoint uses the stored transaction data from CSV import and 
+    all processed receipts to perform matching without requiring manual data input.
+    """
+    try:
+        if not stored_transactions:
+            raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
+        
+        if not processed_receipts:
+            raise HTTPException(status_code=400, detail="No receipts processed. Please upload and process receipts first.")
+        
+        # Convert stored transactions to Receipt/Transaction models
+        transactions = [
+            Transaction(
+                id=t["id"],
+                transaction_date=datetime.strptime(t["txn_date"], "%Y-%m-%d"),
+                amount=abs(t["amount"]),
+                vendor=t["payee_name"],
+                notes=t.get("memo", "")
+            ) for t in stored_transactions
+        ]
+        
+        receipts = []
+        for file_id, receipt_data in processed_receipts.items():
+            if receipt_data.get("extraction_success"):
+                receipts.append(Receipt(
+                    id=file_id,
+                    file_name=receipt_data.get("filename", ""),
+                    upload_date=receipt_data.get("upload_date", datetime.now()),
+                    receipt_date=datetime.strptime(receipt_data.get("date", "2024-01-01"), "%Y-%m-%d"),
+                    amount=receipt_data.get("total_amount", 0.0),
+                    tax=receipt_data.get("tax_amount", 0.0),
+                    vendor=receipt_data.get("vendor", ""),
+                    category=receipt_data.get("category", "")
+                ))
+        
+        if not receipts:
+            raise HTTPException(status_code=400, detail="No successfully processed receipts found.")
+        
+        # Process matching using AI engine
+        matches = matching_engine.process_matching(receipts, transactions)
+        
+        # Convert to response format
+        match_responses = [
+            MatchResponse(
+                receipt_id=match.receipt.id,
+                transaction_id=match.transaction.id,
+                confidence_score=match.confidence_score,
+                match_reason=match.match_reason,
+                receipt_vendor=match.receipt.vendor,
+                receipt_amount=match.receipt.amount,
+                transaction_vendor=match.transaction.vendor,
+                transaction_amount=match.transaction.amount
+            ) for match in matches
+        ]
+        
+        # Get statistics
+        stats = matching_engine.get_matching_stats(matches)
+        
+        return MatchingResponse(matches=match_responses, stats=stats)
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
@app.post("/approve")
 async def approve_match(request: ApprovalRequest):
    """
@@ -504,7 +595,26 @@ async def get_stats():
            "recent_feedback_logs": len(recent_logs),
            "active_rules": len([r for r in matching_engine.rules_engine.rules if r.status == "active"]),
            "uploaded_documents": len(uploaded_files),
-            "processed_documents": len([f for f in uploaded_files.values() if f["status"] == "processed"])
+            "processed_documents": len([f for f in uploaded_files.values() if f["status"] == "processed"]),
+            "stored_transactions": len(stored_transactions),
+            "processed_receipts": len(processed_receipts)
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/status")
+async def get_status():
+    """Get current system status for demo purposes"""
+    try:
+        return {
+            "csv_uploaded": len(stored_transactions) > 0,
+            "transactions_count": len(stored_transactions),
+            "receipts_uploaded": len(uploaded_files),
+            "receipts_processed": len(processed_receipts),
+            "ready_for_matching": len(stored_transactions) > 0 and len(processed_receipts) > 0,
+            "sample_transactions": stored_transactions[:3] if stored_transactions else [],
+            "sample_receipts": list(processed_receipts.keys())[:3] if processed_receipts else []
        }
        
    except Exception as e:
Author	SHA1	Message	Date
Iyeoluwa Akinrinola	6b6633b165	Update AI matcher to return only the best match with confidence score	2025-07-02 21:39:41 +01:00
Iyeoluwa Akinrinola	58d579700e	Improve AI matching to show ALL potential matches with confidence scores	2025-07-02 21:15:33 +01:00
Iyeoluwa Akinrinola	08386f8544	Add auto-matching endpoint and data storage for easier demo workflow	2025-07-02 21:02:11 +01:00