From 2e9afe52046d12b50a049854d23017668f43648b Mon Sep 17 00:00:00 2001 From: Iyeoluwa Akinrinola Date: Mon, 7 Jul 2025 17:42:52 +0100 Subject: [PATCH] feat: include receipt tax amount in match-specific endpoint response --- .gitignore | 3 ++- ai_matcher.py | 7 +++++++ api_models.py | 5 +++++ document_processor.py | 7 +++++++ google_drive_sync.py | 1 + main.py | 10 +++++++++- models.py | 1 + 7 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 3d51a72..dc89393 100644 --- a/.gitignore +++ b/.gitignore @@ -225,4 +225,5 @@ Thumbs.db uploads/ chequing statement.csv -test_images/ \ No newline at end of file +test_images/ +.cursorrules.md \ No newline at end of file diff --git a/ai_matcher.py b/ai_matcher.py index 1ff9ff3..5c9ca0f 100644 --- a/ai_matcher.py +++ b/ai_matcher.py @@ -107,12 +107,17 @@ class AIMatcher: Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason. Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')} + Receipt Description: {receipt.description} + Receipt Category: {receipt.category} Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')} + Transaction Notes: {transaction.notes} Differences: - Date difference: {date_diff} days - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%) - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}" + - Description/Notes comparison: "{receipt.description}" vs "{transaction.notes}" + - Category: {receipt.category} Score this potential match based on how likely it is the correct match: @@ -124,6 +129,8 @@ class AIMatcher: - Minimal similarity: 0.1-0.19 - No meaningful similarity: 0.0-0.09 + Consider description and category similarity in your scoring. + IMPORTANT: Return ONLY the score and reason separated by a pipe character. Format: [score]|[reason] Example: 0.85|Same vendor, same amount, 2 days apart diff --git a/api_models.py b/api_models.py index 471049f..f1e382f 100644 --- a/api_models.py +++ b/api_models.py @@ -11,6 +11,7 @@ class ReceiptRequest(BaseModel): tax: float vendor: str category: str + description: str class TransactionRequest(BaseModel): id: str @@ -49,6 +50,9 @@ class MatchResponse(BaseModel): match_reason: str receipt_vendor: str receipt_amount: float + receipt_description: str + receipt_category: str + receipt_tax_amount: float transaction_vendor: str transaction_amount: float @@ -83,6 +87,7 @@ class DocumentProcessResponse(BaseModel): file_id: str extraction_success: bool vendor: Optional[str] = None + description: Optional[str] = None total_amount: Optional[float] = None tax_amount: Optional[float] = None date: Optional[str] = None diff --git a/document_processor.py b/document_processor.py index f196c58..156f3be 100644 --- a/document_processor.py +++ b/document_processor.py @@ -40,6 +40,7 @@ class DocumentProcessor: Analyze this receipt image and extract the following information in JSON format: { "vendor": "Store/company name", + "description": "Detailed description of items/services purchased", "total_amount": 0.00, "tax_amount": 0.00, "date": "YYYY-MM-DD", @@ -49,6 +50,7 @@ class DocumentProcessor: Rules: - Extract vendor name as it appears on receipt + - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies") - Total amount should be the final total including tax - Tax amount is separate tax line if available - Date should be the date on the receipt @@ -125,6 +127,7 @@ class DocumentProcessor: Extract: {{ "vendor": "Store/company name", + "description": "Detailed description of items/services purchased", "total_amount": 0.00, "tax_amount": 0.00, "date": "YYYY-MM-DD", @@ -134,6 +137,7 @@ class DocumentProcessor: Rules: - Extract vendor name as it appears on receipt + - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies") - Total amount should be the final total including tax - Tax amount is separate tax line if available - Date should be the date on the receipt @@ -180,6 +184,7 @@ class DocumentProcessor: # Try to extract individual fields using regex vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str) + description_match = re.search(r'"description"\s*:\s*"([^"]*)"', json_str) total_amount_match = re.search(r'"total_amount"\s*:\s*([0-9.]+)', json_str) tax_amount_match = re.search(r'"tax_amount"\s*:\s*([0-9.]+)', json_str) date_match = re.search(r'"date"\s*:\s*"([^"]*)"', json_str) @@ -188,6 +193,7 @@ class DocumentProcessor: data = { "vendor": vendor_match.group(1) if vendor_match else "", + "description": description_match.group(1) if description_match else "", "total_amount": float(total_amount_match.group(1)) if total_amount_match else 0.0, "tax_amount": float(tax_amount_match.group(1)) if tax_amount_match else 0.0, "date": date_match.group(1) if date_match else "", @@ -198,6 +204,7 @@ class DocumentProcessor: # Validate and clean data return { "vendor": str(data.get("vendor", "")).strip(), + "description": str(data.get("description", "")).strip(), "total_amount": float(data.get("total_amount", 0)), "tax_amount": float(data.get("tax_amount", 0)), "date": str(data.get("date", "")).strip(), diff --git a/google_drive_sync.py b/google_drive_sync.py index 53a4516..1596060 100644 --- a/google_drive_sync.py +++ b/google_drive_sync.py @@ -122,6 +122,7 @@ class GoogleDriveSync: "file_size": file.get('size', 0), "extraction_success": True, "vendor": "Demo Vendor", + "description": "Coffee and sandwich", "total_amount": 25.50, "tax_amount": 2.04, "date": "2024-01-15", diff --git a/main.py b/main.py index c4242c2..04cc872 100644 --- a/main.py +++ b/main.py @@ -252,6 +252,7 @@ async def process_document(file_id: str): file_id=file_id, extraction_success=receipt_data.get("extraction_success", False), vendor=receipt_data.get("vendor", ""), + description=receipt_data.get("description", ""), total_amount=receipt_data.get("total_amount", 0.0), tax_amount=receipt_data.get("tax_amount", 0.0), date=receipt_data.get("date", ""), @@ -350,6 +351,9 @@ async def match_specific_receipts(file_ids: List[str]): # Handle missing category field category = receipt_data.get("category", "Other") + # Handle description field + description = receipt_data.get("description", "") + # Handle tax field tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0)) try: @@ -365,7 +369,8 @@ async def match_specific_receipts(file_ids: List[str]): amount=amount, tax=tax, vendor=vendor, - category=category + category=category, + description=description ) receipts.append(receipt) logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}") @@ -414,6 +419,9 @@ async def match_specific_receipts(file_ids: List[str]): match_reason=match.match_reason, receipt_vendor=match.receipt.vendor, receipt_amount=match.receipt.amount, + receipt_description=match.receipt.description, + receipt_category=match.receipt.category, + receipt_tax_amount=match.receipt.tax, transaction_vendor=match.transaction.vendor, transaction_amount=match.transaction.amount ) diff --git a/models.py b/models.py index 6e52781..684fbd3 100644 --- a/models.py +++ b/models.py @@ -12,6 +12,7 @@ class Receipt: tax: float vendor: str category: str + description: str @dataclass class Transaction: