first commit
This commit is contained in:
@@ -0,0 +1,555 @@
|
||||
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
import uuid
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('app.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from api_models import (
|
||||
MatchingRequest, MatchingResponse, MatchResponse,
|
||||
ApprovalRequest, RuleRequest, DocumentUploadResponse,
|
||||
DocumentProcessResponse, TransactionRequest
|
||||
)
|
||||
from models import Receipt, Transaction, Match
|
||||
from matching_engine import MatchingEngine
|
||||
from ai_rules import AIRule
|
||||
from document_processor import DocumentProcessor
|
||||
|
||||
app = FastAPI(
|
||||
title="AI Bookkeeper - Data Science Engine",
|
||||
description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Initialize DS Engine components
|
||||
matching_engine = MatchingEngine()
|
||||
document_processor = DocumentProcessor()
|
||||
|
||||
# In-memory storage for uploaded files (in production, use a database)
|
||||
uploaded_files = {}
|
||||
|
||||
# Store imported transactions globally for easy access
|
||||
stored_transactions = []
|
||||
processed_receipts = {}
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"message": "AI Bookkeeper Data Science Engine is running",
|
||||
"version": "1.0.0",
|
||||
"status": "healthy"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# TRANSACTION IMPORT ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/transactions/import/csv")
|
||||
async def import_transactions_csv(file: UploadFile = File(...)):
|
||||
"""
|
||||
Import transactions from a CSV file (custom bank export format).
|
||||
"""
|
||||
try:
|
||||
content = await file.read()
|
||||
decoded = content.decode('utf-8')
|
||||
reader = csv.DictReader(io.StringIO(decoded))
|
||||
transactions = []
|
||||
errors = []
|
||||
for idx, row in enumerate(reader):
|
||||
try:
|
||||
# Use correct headers and strip whitespace
|
||||
account_number = row.get('Account Number') or row.get('Account Number '.strip())
|
||||
txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip())
|
||||
amount_raw = row.get('Amount') or row.get('Amount '.strip())
|
||||
payee_name = row.get('Description 2') or row.get('Description 2 '.strip())
|
||||
memo = f"{row.get('Account Type','').strip()} {row.get('Cheque Number','').strip()} {row.get('Description 1','').strip()}".strip()
|
||||
# Compose ID
|
||||
txn_id = f"{account_number}_{idx+1}"
|
||||
# Parse date (try multiple formats)
|
||||
txn_date_str = txn_date_raw.strip()
|
||||
txn_date = None
|
||||
for fmt in ("%m/%d/%y", "%m/%d/%Y"):
|
||||
try:
|
||||
txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if not txn_date:
|
||||
raise ValueError(f"Could not parse date: {txn_date_str}")
|
||||
# Parse amount
|
||||
amount = float(amount_raw.replace(',', '').strip())
|
||||
transactions.append({
|
||||
"id": txn_id,
|
||||
"txn_date": txn_date,
|
||||
"amount": amount,
|
||||
"payee_name": payee_name.strip(),
|
||||
"memo": memo
|
||||
})
|
||||
except Exception as e:
|
||||
errors.append(f"Row {idx+1}: {str(e)}")
|
||||
# Store transactions globally for auto-matching
|
||||
global stored_transactions
|
||||
stored_transactions = transactions
|
||||
|
||||
return {
|
||||
"imported_count": len(transactions),
|
||||
"converted_transactions": transactions,
|
||||
"errors": errors
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/transactions/import/image")
|
||||
async def import_transactions_from_image(file: UploadFile = File(...)):
|
||||
"""
|
||||
Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction.
|
||||
"""
|
||||
try:
|
||||
# Validate file type
|
||||
allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
|
||||
file_extension = file.filename.split('.')[-1].lower()
|
||||
if file_extension not in allowed_types:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}")
|
||||
# Read file content
|
||||
content = await file.read()
|
||||
# Save file to disk
|
||||
image_path = await document_processor.save_uploaded_file(content, file.filename)
|
||||
# Extract transactions from image (pass file path)
|
||||
extraction_result = await document_processor.extract_transactions_from_image(image_path)
|
||||
if not extraction_result.get("extraction_success", False):
|
||||
raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed"))
|
||||
extracted_transactions = extraction_result.get("transactions", [])
|
||||
# Store transactions globally for auto-matching
|
||||
global stored_transactions
|
||||
stored_transactions = []
|
||||
for idx, txn in enumerate(extracted_transactions):
|
||||
try:
|
||||
txn_id = f"img_{file.filename}_{idx+1}"
|
||||
txn_date_raw = txn.get("date")
|
||||
amount = txn.get("amount")
|
||||
vendor = txn.get("vendor")
|
||||
memo = txn.get("memo", "")
|
||||
|
||||
# Parse date to YYYY-MM-DD format
|
||||
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
|
||||
if not txn_date:
|
||||
# Fallback: use current year if parsing fails
|
||||
txn_date = f"2024-{txn_date_raw}"
|
||||
|
||||
stored_transactions.append({
|
||||
"id": txn_id,
|
||||
"txn_date": txn_date,
|
||||
"amount": amount,
|
||||
"payee_name": vendor,
|
||||
"memo": memo
|
||||
})
|
||||
except Exception as e:
|
||||
continue
|
||||
return {
|
||||
"imported_count": len(stored_transactions),
|
||||
"converted_transactions": stored_transactions,
|
||||
"errors": []
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing transactions from image: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# ============================================================================
|
||||
# DOCUMENT PROCESSING ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
|
||||
async def upload_multiple_documents(files: List[UploadFile] = File(...)):
|
||||
"""
|
||||
Upload multiple receipt images for processing.
|
||||
|
||||
This endpoint accepts multiple image files and returns file IDs
|
||||
that can be used with the /process/{file_id} endpoint.
|
||||
"""
|
||||
try:
|
||||
responses = []
|
||||
|
||||
for file in files:
|
||||
# Validate file type
|
||||
allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
|
||||
file_extension = file.filename.split('.')[-1].lower()
|
||||
|
||||
if file_extension not in allowed_types:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}")
|
||||
|
||||
# Generate unique file ID
|
||||
file_id = str(uuid.uuid4())
|
||||
|
||||
# Read and store file content
|
||||
content = await file.read()
|
||||
uploaded_files[file_id] = {
|
||||
"filename": file.filename,
|
||||
"content": content,
|
||||
"upload_date": datetime.now()
|
||||
}
|
||||
|
||||
responses.append(DocumentUploadResponse(
|
||||
file_id=file_id,
|
||||
filename=file.filename,
|
||||
file_type=file_extension,
|
||||
upload_date=datetime.now(),
|
||||
status="uploaded"
|
||||
))
|
||||
|
||||
return responses
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading documents: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
|
||||
async def process_document(file_id: str):
|
||||
"""
|
||||
Process a previously uploaded document to extract receipt information.
|
||||
|
||||
This endpoint uses AI to extract structured data from receipt images,
|
||||
including vendor, amount, date, and category information.
|
||||
"""
|
||||
try:
|
||||
# Check if file exists
|
||||
if file_id not in uploaded_files:
|
||||
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
|
||||
|
||||
file_data = uploaded_files[file_id]
|
||||
|
||||
# Save file temporarily and process it
|
||||
file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"])
|
||||
file_type = file_data["filename"].split('.')[-1].lower()
|
||||
receipt_data = await document_processor.process_file(file_path, file_type)
|
||||
|
||||
# Store processed receipt
|
||||
processed_receipts[file_id] = receipt_data
|
||||
|
||||
return DocumentProcessResponse(
|
||||
file_id=file_id,
|
||||
extraction_success=receipt_data.get("extraction_success", False),
|
||||
vendor=receipt_data.get("vendor", ""),
|
||||
description=receipt_data.get("description", ""),
|
||||
total_amount=receipt_data.get("total_amount", 0.0),
|
||||
tax_amount=receipt_data.get("tax_amount", 0.0),
|
||||
date=receipt_data.get("date", ""),
|
||||
category=receipt_data.get("category", ""),
|
||||
confidence=receipt_data.get("confidence", 0.0),
|
||||
error=receipt_data.get("error", None)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing document {file_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# ============================================================================
|
||||
# MATCHING ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/match-specific", response_model=MatchingResponse)
|
||||
async def match_specific_receipts(file_ids: List[str]):
|
||||
"""
|
||||
Match specific receipts against imported transactions.
|
||||
|
||||
This endpoint takes a list of receipt file IDs and matches them against
|
||||
the currently imported transactions using AI-powered matching logic.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting match-specific for file IDs: {file_ids}")
|
||||
|
||||
# Check if transactions are imported
|
||||
if not stored_transactions:
|
||||
logger.warning("No transactions imported")
|
||||
raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
|
||||
|
||||
logger.info(f"Found {len(stored_transactions)} stored transactions")
|
||||
|
||||
# Convert stored transactions to Transaction objects
|
||||
transactions = []
|
||||
for txn in stored_transactions:
|
||||
try:
|
||||
txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d")
|
||||
transaction = Transaction(
|
||||
id=txn["id"],
|
||||
transaction_date=txn_date,
|
||||
amount=txn["amount"],
|
||||
vendor=txn["payee_name"],
|
||||
notes=txn["memo"]
|
||||
)
|
||||
transactions.append(transaction)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"Converted {len(transactions)} transactions")
|
||||
|
||||
# Get receipts for the specified file IDs
|
||||
receipts = []
|
||||
missing_files = []
|
||||
|
||||
for file_id in file_ids:
|
||||
if file_id in processed_receipts:
|
||||
receipt_data = processed_receipts[file_id]
|
||||
logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
|
||||
logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}")
|
||||
try:
|
||||
# Handle missing date field
|
||||
if "date" not in receipt_data or not receipt_data["date"]:
|
||||
logger.warning(f"Missing date for receipt {file_id}, using current date")
|
||||
receipt_date = datetime.now()
|
||||
else:
|
||||
receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
|
||||
|
||||
# Handle missing amount field - try multiple possible keys
|
||||
amount = receipt_data.get("amount")
|
||||
if amount is None:
|
||||
amount = receipt_data.get("total_amount")
|
||||
if amount is None:
|
||||
amount = receipt_data.get("amount_total")
|
||||
if amount is None:
|
||||
logger.warning(f"Missing amount for receipt {file_id}, using 0.0")
|
||||
amount = 0.0
|
||||
|
||||
# Ensure amount is a float
|
||||
try:
|
||||
amount = float(amount)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0")
|
||||
amount = 0.0
|
||||
|
||||
logger.info(f"DEBUG: amount for {file_id}: {amount}")
|
||||
|
||||
# Handle missing vendor field
|
||||
vendor = receipt_data.get("vendor", "")
|
||||
if not vendor:
|
||||
logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'")
|
||||
vendor = "Unknown"
|
||||
|
||||
# Handle missing category field
|
||||
category = receipt_data.get("category", "Other")
|
||||
|
||||
# Handle description field
|
||||
description = receipt_data.get("description", "")
|
||||
|
||||
# Handle tax field
|
||||
tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
|
||||
try:
|
||||
tax = float(tax)
|
||||
except (ValueError, TypeError):
|
||||
tax = 0.0
|
||||
|
||||
receipt = Receipt(
|
||||
id=file_id,
|
||||
file_name=uploaded_files[file_id]["filename"],
|
||||
upload_date=uploaded_files[file_id]["upload_date"],
|
||||
receipt_date=receipt_date,
|
||||
amount=amount,
|
||||
tax=tax,
|
||||
vendor=vendor,
|
||||
category=category,
|
||||
description=description
|
||||
)
|
||||
receipts.append(receipt)
|
||||
logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error creating receipt object for {file_id}: {str(e)}")
|
||||
missing_files.append(f"{file_id} (error: {str(e)})")
|
||||
else:
|
||||
logger.warning(f"Receipt {file_id} not found in processed_receipts")
|
||||
missing_files.append(f"{file_id} (not found)")
|
||||
|
||||
if missing_files:
|
||||
logger.error(f"Missing files: {missing_files}")
|
||||
raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}")
|
||||
|
||||
logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions")
|
||||
|
||||
# Perform matching
|
||||
try:
|
||||
logger.info("Starting direct matching call (without ThreadPoolExecutor)")
|
||||
logger.info(f"matching_engine type: {type(matching_engine)}")
|
||||
logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}")
|
||||
logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
|
||||
logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}")
|
||||
|
||||
matches = matching_engine.process_matching(receipts, transactions)
|
||||
|
||||
logger.info(f"Matching completed successfully. Found {len(matches)} matches")
|
||||
|
||||
# Convert matches to response format
|
||||
match_responses = []
|
||||
for match in matches:
|
||||
logger.info(f"Raw match object: {match}")
|
||||
logger.info(f" receipt_id: {match.receipt.id}")
|
||||
logger.info(f" transaction_id: {match.transaction.id}")
|
||||
logger.info(f" confidence_score: {match.confidence_score}")
|
||||
logger.info(f" match_reason: {match.match_reason}")
|
||||
logger.info(f" receipt_vendor: {match.receipt.vendor}")
|
||||
logger.info(f" receipt_amount: {match.receipt.amount}")
|
||||
logger.info(f" transaction_vendor: {match.transaction.vendor}")
|
||||
logger.info(f" transaction_amount: {match.transaction.amount}")
|
||||
|
||||
match_response = MatchResponse(
|
||||
receipt_id=match.receipt.id,
|
||||
transaction_id=match.transaction.id,
|
||||
confidence_score=match.confidence_score,
|
||||
match_reason=match.match_reason,
|
||||
receipt_vendor=match.receipt.vendor,
|
||||
receipt_amount=match.receipt.amount,
|
||||
receipt_description=match.receipt.description,
|
||||
receipt_category=match.receipt.category,
|
||||
receipt_tax_amount=match.receipt.tax,
|
||||
transaction_vendor=match.transaction.vendor,
|
||||
transaction_amount=match.transaction.amount
|
||||
)
|
||||
match_responses.append(match_response)
|
||||
logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}")
|
||||
|
||||
logger.info(f"Formatted {len(match_responses)} match responses")
|
||||
|
||||
# Calculate statistics
|
||||
if match_responses:
|
||||
high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8)
|
||||
low_confidence = len(match_responses) - high_confidence
|
||||
avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses)
|
||||
else:
|
||||
high_confidence = low_confidence = avg_score = 0
|
||||
|
||||
stats = {
|
||||
"total": len(match_responses),
|
||||
"high_confidence": high_confidence,
|
||||
"low_confidence": low_confidence,
|
||||
"avg_score": round(avg_score, 2)
|
||||
}
|
||||
|
||||
logger.info(f"Generated stats: {stats}")
|
||||
logger.info(f"Match-specific completed successfully with {len(match_responses)} matches")
|
||||
|
||||
return MatchingResponse(
|
||||
matches=match_responses,
|
||||
stats=stats
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Exception in matching section: {str(e)}")
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
logger.error(f"Exception args: {e.args}")
|
||||
logger.error(f"Traceback: {e.__traceback__}")
|
||||
raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# ============================================================================
|
||||
# RULES MANAGEMENT ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/rules")
|
||||
async def add_rule(request: RuleRequest):
|
||||
"""
|
||||
Add a new AI rule for transaction matching.
|
||||
"""
|
||||
try:
|
||||
new_rule = AIRule(
|
||||
name=request.name,
|
||||
condition=request.condition,
|
||||
action=request.action,
|
||||
source=request.source
|
||||
)
|
||||
|
||||
matching_engine.rules_engine.rules.append(new_rule)
|
||||
|
||||
return {"message": f"Rule '{request.name}' added successfully"}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/rules")
|
||||
async def get_rules():
|
||||
"""
|
||||
Get all current AI rules.
|
||||
"""
|
||||
try:
|
||||
rules = []
|
||||
for rule in matching_engine.rules_engine.rules:
|
||||
rules.append({
|
||||
"name": rule.name,
|
||||
"condition": rule.condition,
|
||||
"action": rule.action,
|
||||
"source": rule.source,
|
||||
"status": rule.status
|
||||
})
|
||||
|
||||
return {"rules": rules}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/rules/{rule_name}")
|
||||
async def delete_rule(rule_name: str):
|
||||
"""
|
||||
Delete an AI rule by name.
|
||||
"""
|
||||
try:
|
||||
rules = matching_engine.rules_engine.rules
|
||||
for i, rule in enumerate(rules):
|
||||
if rule.name == rule_name:
|
||||
del rules[i]
|
||||
return {"message": f"Rule '{rule_name}' deleted successfully"}
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# ============================================================================
|
||||
# STATISTICS ENDPOINT
|
||||
# ============================================================================
|
||||
|
||||
@app.get("/stats")
|
||||
async def get_stats():
|
||||
"""
|
||||
Get system statistics.
|
||||
"""
|
||||
try:
|
||||
return {
|
||||
"total_transactions": len(stored_transactions),
|
||||
"total_receipts": len(processed_receipts),
|
||||
"total_uploaded_files": len(uploaded_files),
|
||||
"rules_count": len(matching_engine.rules_engine.rules)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8343)
|
||||
Reference in New Issue
Block a user