Refactor main application structure and improve logging

- Reorganized imports in main.py for better readability and structure.
- Enhanced logging configuration and added more detailed log messages throughout the application.
- Improved error handling and response formatting in transaction import endpoints.
- Streamlined transaction processing logic for CSV and image uploads.
- Updated matching engine to enhance match results with rules and improved logging.
- Refactored tax rules engine for better clarity and maintainability.
- Cleaned up requirements.txt by removing specific versioning for easier dependency management.
This commit is contained in:
bolade
2025-08-06 16:12:53 +01:00
parent 5b3c066cea
commit 1f530da7c4
5 changed files with 668 additions and 346 deletions
+237 -157
View File
@@ -1,37 +1,37 @@
from fastapi import FastAPI, HTTPException, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from datetime import datetime
from typing import List
import uuid
import csv
import io
import logging
import uuid
from datetime import datetime
from typing import List
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
# Configure logging
from ai_rules import AIRule
from api_models import (
DocumentProcessResponse,
DocumentUploadResponse,
MatchingResponse,
MatchResponse,
RuleRequest,
)
from document_processor import DocumentProcessor
from matching_engine import MatchingEngine
from models import Receipt, Transaction
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('app.log'),
logging.StreamHandler()
]
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.FileHandler("app.log"), logging.StreamHandler()],
)
logger = logging.getLogger(__name__)
from api_models import (
MatchingRequest, MatchingResponse, MatchResponse,
ApprovalRequest, RuleRequest, DocumentUploadResponse,
DocumentProcessResponse, TransactionRequest
)
from models import Receipt, Transaction, Match
from matching_engine import MatchingEngine
from ai_rules import AIRule
from document_processor import DocumentProcessor
app = FastAPI(
title="AI Bookkeeper - Data Science Engine",
description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
version="1.0.0"
version="1.0.0",
)
# CORS middleware
@@ -54,19 +54,22 @@ uploaded_files = {}
stored_transactions = []
processed_receipts = {}
@app.get("/")
async def root():
"""Health check endpoint"""
return {
"message": "AI Bookkeeper Data Science Engine is running",
"version": "1.0.0",
"status": "healthy"
"status": "healthy",
}
# ============================================================================
# TRANSACTION IMPORT ENDPOINTS
# ============================================================================
@app.post("/transactions/import/csv")
async def import_transactions_csv(file: UploadFile = File(...)):
"""
@@ -74,54 +77,65 @@ async def import_transactions_csv(file: UploadFile = File(...)):
"""
try:
content = await file.read()
decoded = content.decode('utf-8')
decoded = content.decode("utf-8")
reader = csv.DictReader(io.StringIO(decoded))
transactions = []
errors = []
for idx, row in enumerate(reader):
try:
# Use correct headers and strip whitespace
account_number = row.get('Account Number') or row.get('Account Number '.strip())
txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip())
amount_raw = row.get('Amount') or row.get('Amount '.strip())
payee_name = row.get('Description 2') or row.get('Description 2 '.strip())
memo = f"{row.get('Account Type','').strip()} {row.get('Cheque Number','').strip()} {row.get('Description 1','').strip()}".strip()
account_number = row.get("Account Number") or row.get(
"Account Number ".strip()
)
txn_date_raw = row.get("Transaction Date") or row.get(
"Transaction Date ".strip()
)
amount_raw = row.get("Amount") or row.get("Amount ".strip())
payee_name = row.get("Description 2") or row.get(
"Description 2 ".strip()
)
memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
# Compose ID
txn_id = f"{account_number}_{idx+1}"
txn_id = f"{account_number}_{idx + 1}"
# Parse date (try multiple formats)
txn_date_str = txn_date_raw.strip()
txn_date = None
for fmt in ("%m/%d/%y", "%m/%d/%Y"):
try:
txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d")
txn_date = datetime.strptime(txn_date_str, fmt).strftime(
"%Y-%m-%d"
)
break
except Exception:
continue
if not txn_date:
raise ValueError(f"Could not parse date: {txn_date_str}")
# Parse amount
amount = float(amount_raw.replace(',', '').strip())
transactions.append({
"id": txn_id,
"txn_date": txn_date,
"amount": amount,
"payee_name": payee_name.strip(),
"memo": memo
})
amount = float(amount_raw.replace(",", "").strip())
transactions.append(
{
"id": txn_id,
"txn_date": txn_date,
"amount": amount,
"payee_name": payee_name.strip(),
"memo": memo,
}
)
except Exception as e:
errors.append(f"Row {idx+1}: {str(e)}")
errors.append(f"Row {idx + 1}: {str(e)}")
# Store transactions globally for auto-matching
global stored_transactions
stored_transactions = transactions
return {
"imported_count": len(transactions),
"converted_transactions": transactions,
"errors": errors
"errors": errors,
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/transactions/import/image")
async def import_transactions_from_image(file: UploadFile = File(...)):
"""
@@ -129,107 +143,125 @@ async def import_transactions_from_image(file: UploadFile = File(...)):
"""
try:
# Validate file type
allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
file_extension = file.filename.split('.')[-1].lower()
allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
file_extension = file.filename.split(".")[-1].lower()
if file_extension not in allowed_types:
raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}")
raise HTTPException(
status_code=400,
detail=f"Unsupported file type. Allowed: {allowed_types}",
)
# Read file content
content = await file.read()
# Save file to disk
image_path = await document_processor.save_uploaded_file(content, file.filename)
# Extract transactions from image (pass file path)
extraction_result = await document_processor.extract_transactions_from_image(image_path)
extraction_result = await document_processor.extract_transactions_from_image(
image_path
)
if not extraction_result.get("extraction_success", False):
raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed"))
raise HTTPException(
status_code=500,
detail=extraction_result.get("error", "Extraction failed"),
)
extracted_transactions = extraction_result.get("transactions", [])
# Store transactions globally for auto-matching
global stored_transactions
stored_transactions = []
for idx, txn in enumerate(extracted_transactions):
try:
txn_id = f"img_{file.filename}_{idx+1}"
txn_id = f"img_{file.filename}_{idx + 1}"
txn_date_raw = txn.get("date")
amount = txn.get("amount")
vendor = txn.get("vendor")
memo = txn.get("memo", "")
# Parse date to YYYY-MM-DD format
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
if not txn_date:
# Fallback: use current year if parsing fails
txn_date = f"2024-{txn_date_raw}"
stored_transactions.append({
"id": txn_id,
"txn_date": txn_date,
"amount": amount,
"payee_name": vendor,
"memo": memo
})
except Exception as e:
stored_transactions.append(
{
"id": txn_id,
"txn_date": txn_date,
"amount": amount,
"payee_name": vendor,
"memo": memo,
}
)
except Exception:
continue
return {
"imported_count": len(stored_transactions),
"converted_transactions": stored_transactions,
"errors": []
"errors": [],
}
except Exception as e:
logger.error(f"Error importing transactions from image: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
# ============================================================================
# DOCUMENT PROCESSING ENDPOINTS
# ============================================================================
@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
async def upload_multiple_documents(files: List[UploadFile] = File(...)):
"""
Upload multiple receipt images for processing.
This endpoint accepts multiple image files and returns file IDs
that can be used with the /process/{file_id} endpoint.
"""
try:
responses = []
for file in files:
# Validate file type
allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
file_extension = file.filename.split('.')[-1].lower()
allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
file_extension = file.filename.split(".")[-1].lower()
if file_extension not in allowed_types:
raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}")
raise HTTPException(
status_code=400,
detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}",
)
# Generate unique file ID
file_id = str(uuid.uuid4())
# Read and store file content
content = await file.read()
uploaded_files[file_id] = {
"filename": file.filename,
"content": content,
"upload_date": datetime.now()
"upload_date": datetime.now(),
}
responses.append(DocumentUploadResponse(
file_id=file_id,
filename=file.filename,
file_type=file_extension,
upload_date=datetime.now(),
status="uploaded"
))
responses.append(
DocumentUploadResponse(
file_id=file_id,
filename=file.filename,
file_type=file_extension,
upload_date=datetime.now(),
status="uploaded",
)
)
return responses
except Exception as e:
logger.error(f"Error uploading documents: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
async def process_document(file_id: str):
"""
Process a previously uploaded document to extract receipt information.
This endpoint uses AI to extract structured data from receipt images,
including vendor, amount, date, and category information.
"""
@@ -237,17 +269,19 @@ async def process_document(file_id: str):
# Check if file exists
if file_id not in uploaded_files:
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
file_data = uploaded_files[file_id]
# Save file temporarily and process it
file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"])
file_type = file_data["filename"].split('.')[-1].lower()
file_path = await document_processor.save_uploaded_file(
file_data["content"], file_data["filename"]
)
file_type = file_data["filename"].split(".")[-1].lower()
receipt_data = await document_processor.process_file(file_path, file_type)
# Store processed receipt
processed_receipts[file_id] = receipt_data
return DocumentProcessResponse(
file_id=file_id,
extraction_success=receipt_data.get("extraction_success", False),
@@ -258,35 +292,40 @@ async def process_document(file_id: str):
date=receipt_data.get("date", ""),
category=receipt_data.get("category", ""),
confidence=receipt_data.get("confidence", 0.0),
error=receipt_data.get("error", None)
error=receipt_data.get("error", None),
)
except Exception as e:
logger.error(f"Error processing document {file_id}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
# ============================================================================
# MATCHING ENDPOINTS
# ============================================================================
@app.post("/match-specific", response_model=MatchingResponse)
async def match_specific_receipts(file_ids: List[str]):
"""
Match specific receipts against imported transactions.
This endpoint takes a list of receipt file IDs and matches them against
the currently imported transactions using AI-powered matching logic.
"""
try:
logger.info(f"Starting match-specific for file IDs: {file_ids}")
# Check if transactions are imported
if not stored_transactions:
logger.warning("No transactions imported")
raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
raise HTTPException(
status_code=400,
detail="No transactions imported. Please upload CSV first.",
)
logger.info(f"Found {len(stored_transactions)} stored transactions")
# Convert stored transactions to Transaction objects
transactions = []
for txn in stored_transactions:
@@ -297,32 +336,38 @@ async def match_specific_receipts(file_ids: List[str]):
transaction_date=txn_date,
amount=txn["amount"],
vendor=txn["payee_name"],
notes=txn["memo"]
notes=txn["memo"],
)
transactions.append(transaction)
except Exception as e:
logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
continue
logger.info(f"Converted {len(transactions)} transactions")
# Get receipts for the specified file IDs
receipts = []
missing_files = []
for file_id in file_ids:
if file_id in processed_receipts:
receipt_data = processed_receipts[file_id]
logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}")
logger.info(
f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}"
)
try:
# Handle missing date field
if "date" not in receipt_data or not receipt_data["date"]:
logger.warning(f"Missing date for receipt {file_id}, using current date")
logger.warning(
f"Missing date for receipt {file_id}, using current date"
)
receipt_date = datetime.now()
else:
receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
receipt_date = datetime.strptime(
receipt_data["date"], "%Y-%m-%d"
)
# Handle missing amount field - try multiple possible keys
amount = receipt_data.get("amount")
if amount is None:
@@ -330,37 +375,43 @@ async def match_specific_receipts(file_ids: List[str]):
if amount is None:
amount = receipt_data.get("amount_total")
if amount is None:
logger.warning(f"Missing amount for receipt {file_id}, using 0.0")
logger.warning(
f"Missing amount for receipt {file_id}, using 0.0"
)
amount = 0.0
# Ensure amount is a float
try:
amount = float(amount)
except (ValueError, TypeError):
logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0")
logger.warning(
f"Invalid amount '{amount}' for receipt {file_id}, using 0.0"
)
amount = 0.0
logger.info(f"DEBUG: amount for {file_id}: {amount}")
# Handle missing vendor field
vendor = receipt_data.get("vendor", "")
if not vendor:
logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'")
logger.warning(
f"Missing vendor for receipt {file_id}, using 'Unknown'"
)
vendor = "Unknown"
# Handle missing category field
category = receipt_data.get("category", "Other")
# Handle description field
description = receipt_data.get("description", "")
# Handle tax field
tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
try:
tax = float(tax)
except (ValueError, TypeError):
tax = 0.0
receipt = Receipt(
id=file_id,
file_name=uploaded_files[file_id]["filename"],
@@ -370,35 +421,47 @@ async def match_specific_receipts(file_ids: List[str]):
tax=tax,
vendor=vendor,
category=category,
description=description
description=description,
)
receipts.append(receipt)
logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
except Exception as e:
logger.warning(f"Error creating receipt object for {file_id}: {str(e)}")
logger.warning(
f"Error creating receipt object for {file_id}: {str(e)}"
)
missing_files.append(f"{file_id} (error: {str(e)})")
else:
logger.warning(f"Receipt {file_id} not found in processed_receipts")
missing_files.append(f"{file_id} (not found)")
if missing_files:
logger.error(f"Missing files: {missing_files}")
raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}")
logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions")
raise HTTPException(
status_code=400, detail=f"Missing files: {missing_files}"
)
logger.info(
f"Processing {len(receipts)} receipts against {len(transactions)} transactions"
)
# Perform matching
try:
logger.info("Starting direct matching call (without ThreadPoolExecutor)")
logger.info(f"matching_engine type: {type(matching_engine)}")
logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}")
logger.info(
f"matching_engine.process_matching type: {type(matching_engine.process_matching)}"
)
logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}")
logger.info(
f"transactions type: {type(transactions)}, length: {len(transactions)}"
)
matches = matching_engine.process_matching(receipts, transactions)
logger.info(f"Matching completed successfully. Found {len(matches)} matches")
logger.info(
f"Matching completed successfully. Found {len(matches)} matches"
)
# Convert matches to response format
match_responses = []
for match in matches:
@@ -411,7 +474,7 @@ async def match_specific_receipts(file_ids: List[str]):
logger.info(f" receipt_amount: {match.receipt.amount}")
logger.info(f" transaction_vendor: {match.transaction.vendor}")
logger.info(f" transaction_amount: {match.transaction.amount}")
match_response = MatchResponse(
receipt_id=match.receipt.id,
transaction_id=match.transaction.id,
@@ -423,53 +486,62 @@ async def match_specific_receipts(file_ids: List[str]):
receipt_category=match.receipt.category,
receipt_tax_amount=match.receipt.tax,
transaction_vendor=match.transaction.vendor,
transaction_amount=match.transaction.amount
transaction_amount=match.transaction.amount,
)
match_responses.append(match_response)
logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}")
logger.info(
f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}"
)
logger.info(f"Formatted {len(match_responses)} match responses")
# Calculate statistics
if match_responses:
high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8)
high_confidence = sum(
1 for m in match_responses if m.confidence_score >= 0.8
)
low_confidence = len(match_responses) - high_confidence
avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses)
avg_score = sum(m.confidence_score for m in match_responses) / len(
match_responses
)
else:
high_confidence = low_confidence = avg_score = 0
stats = {
"total": len(match_responses),
"high_confidence": high_confidence,
"low_confidence": low_confidence,
"avg_score": round(avg_score, 2)
"avg_score": round(avg_score, 2),
}
logger.info(f"Generated stats: {stats}")
logger.info(f"Match-specific completed successfully with {len(match_responses)} matches")
return MatchingResponse(
matches=match_responses,
stats=stats
logger.info(
f"Match-specific completed successfully with {len(match_responses)} matches"
)
return MatchingResponse(matches=match_responses, stats=stats)
except Exception as e:
logger.error(f"Exception in matching section: {str(e)}")
logger.error(f"Exception type: {type(e)}")
logger.error(f"Exception args: {e.args}")
logger.error(f"Traceback: {e.__traceback__}")
raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}")
raise HTTPException(
status_code=500, detail=f"Unexpected matching error: {str(e)}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
# ============================================================================
# RULES MANAGEMENT ENDPOINTS
# ============================================================================
@app.post("/rules")
async def add_rule(request: RuleRequest):
"""
@@ -480,16 +552,17 @@ async def add_rule(request: RuleRequest):
name=request.name,
condition=request.condition,
action=request.action,
source=request.source
source=request.source,
)
matching_engine.rules_engine.rules.append(new_rule)
return {"message": f"Rule '{request.name}' added successfully"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/rules")
async def get_rules():
"""
@@ -498,19 +571,22 @@ async def get_rules():
try:
rules = []
for rule in matching_engine.rules_engine.rules:
rules.append({
"name": rule.name,
"condition": rule.condition,
"action": rule.action,
"source": rule.source,
"status": rule.status
})
rules.append(
{
"name": rule.name,
"condition": rule.condition,
"action": rule.action,
"source": rule.source,
"status": rule.status,
}
)
return {"rules": rules}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.delete("/rules/{rule_name}")
async def delete_rule(rule_name: str):
"""
@@ -522,18 +598,20 @@ async def delete_rule(rule_name: str):
if rule.name == rule_name:
del rules[i]
return {"message": f"Rule '{rule_name}' deleted successfully"}
raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ============================================================================
# STATISTICS ENDPOINT
# ============================================================================
@app.get("/stats")
async def get_stats():
"""
@@ -544,12 +622,14 @@ async def get_stats():
"total_transactions": len(stored_transactions),
"total_receipts": len(processed_receipts),
"total_uploaded_files": len(uploaded_files),
"rules_count": len(matching_engine.rules_engine.rules)
"rules_count": len(matching_engine.rules_engine.rules),
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8343)