from fastapi import FastAPI, HTTPException, UploadFile, File from fastapi.middleware.cors import CORSMiddleware from datetime import datetime from typing import List import uuid import csv import io import logging # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('app.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) from api_models import ( MatchingRequest, MatchingResponse, MatchResponse, ApprovalRequest, RuleRequest, DocumentUploadResponse, DocumentProcessResponse, TransactionRequest ) from models import Receipt, Transaction, Match from matching_engine import MatchingEngine from ai_rules import AIRule from document_processor import DocumentProcessor app = FastAPI( title="AI Bookkeeper - Data Science Engine", description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.", version="1.0.0" ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Initialize DS Engine components matching_engine = MatchingEngine() document_processor = DocumentProcessor() # In-memory storage for uploaded files (in production, use a database) uploaded_files = {} # Store imported transactions globally for easy access stored_transactions = [] processed_receipts = {} @app.get("/") async def root(): """Health check endpoint""" return { "message": "AI Bookkeeper Data Science Engine is running", "version": "1.0.0", "status": "healthy" } # ============================================================================ # TRANSACTION IMPORT ENDPOINTS # ============================================================================ @app.post("/transactions/import/csv") async def import_transactions_csv(file: UploadFile = File(...)): """ Import transactions from a CSV file (custom bank export format). """ try: content = await file.read() decoded = content.decode('utf-8') reader = csv.DictReader(io.StringIO(decoded)) transactions = [] errors = [] for idx, row in enumerate(reader): try: # Use correct headers and strip whitespace account_number = row.get('Account Number') or row.get('Account Number '.strip()) txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip()) amount_raw = row.get('Amount') or row.get('Amount '.strip()) payee_name = row.get('Description 2') or row.get('Description 2 '.strip()) memo = f"{row.get('Account Type','').strip()} {row.get('Cheque Number','').strip()} {row.get('Description 1','').strip()}".strip() # Compose ID txn_id = f"{account_number}_{idx+1}" # Parse date (try multiple formats) txn_date_str = txn_date_raw.strip() txn_date = None for fmt in ("%m/%d/%y", "%m/%d/%Y"): try: txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d") break except Exception: continue if not txn_date: raise ValueError(f"Could not parse date: {txn_date_str}") # Parse amount amount = float(amount_raw.replace(',', '').strip()) transactions.append({ "id": txn_id, "txn_date": txn_date, "amount": amount, "payee_name": payee_name.strip(), "memo": memo }) except Exception as e: errors.append(f"Row {idx+1}: {str(e)}") # Store transactions globally for auto-matching global stored_transactions stored_transactions = transactions return { "imported_count": len(transactions), "converted_transactions": transactions, "errors": errors } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/transactions/import/image") async def import_transactions_from_image(file: UploadFile = File(...)): """ Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction. """ try: # Validate file type allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf'] file_extension = file.filename.split('.')[-1].lower() if file_extension not in allowed_types: raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}") # Read file content content = await file.read() # Save file to disk image_path = await document_processor.save_uploaded_file(content, file.filename) # Extract transactions from image (pass file path) extraction_result = await document_processor.extract_transactions_from_image(image_path) if not extraction_result.get("extraction_success", False): raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed")) extracted_transactions = extraction_result.get("transactions", []) # Store transactions globally for auto-matching global stored_transactions stored_transactions = [] for idx, txn in enumerate(extracted_transactions): try: txn_id = f"img_{file.filename}_{idx+1}" txn_date_raw = txn.get("date") amount = txn.get("amount") vendor = txn.get("vendor") memo = txn.get("memo", "") # Parse date to YYYY-MM-DD format txn_date = document_processor._parse_date_to_iso(txn_date_raw) if not txn_date: # Fallback: use current year if parsing fails txn_date = f"2024-{txn_date_raw}" stored_transactions.append({ "id": txn_id, "txn_date": txn_date, "amount": amount, "payee_name": vendor, "memo": memo }) except Exception as e: continue return { "imported_count": len(stored_transactions), "converted_transactions": stored_transactions, "errors": [] } except Exception as e: logger.error(f"Error importing transactions from image: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) # ============================================================================ # DOCUMENT PROCESSING ENDPOINTS # ============================================================================ @app.post("/upload-multiple", response_model=List[DocumentUploadResponse]) async def upload_multiple_documents(files: List[UploadFile] = File(...)): """ Upload multiple receipt images for processing. This endpoint accepts multiple image files and returns file IDs that can be used with the /process/{file_id} endpoint. """ try: responses = [] for file in files: # Validate file type allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf'] file_extension = file.filename.split('.')[-1].lower() if file_extension not in allowed_types: raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}") # Generate unique file ID file_id = str(uuid.uuid4()) # Read and store file content content = await file.read() uploaded_files[file_id] = { "filename": file.filename, "content": content, "upload_date": datetime.now() } responses.append(DocumentUploadResponse( file_id=file_id, filename=file.filename, file_type=file_extension, upload_date=datetime.now(), status="uploaded" )) return responses except Exception as e: logger.error(f"Error uploading documents: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/process/{file_id}", response_model=DocumentProcessResponse) async def process_document(file_id: str): """ Process a previously uploaded document to extract receipt information. This endpoint uses AI to extract structured data from receipt images, including vendor, amount, date, and category information. """ try: # Check if file exists if file_id not in uploaded_files: raise HTTPException(status_code=404, detail=f"File {file_id} not found") file_data = uploaded_files[file_id] # Save file temporarily and process it file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"]) file_type = file_data["filename"].split('.')[-1].lower() receipt_data = await document_processor.process_file(file_path, file_type) # Store processed receipt processed_receipts[file_id] = receipt_data return DocumentProcessResponse( file_id=file_id, extraction_success=receipt_data.get("extraction_success", False), vendor=receipt_data.get("vendor", ""), description=receipt_data.get("description", ""), total_amount=receipt_data.get("total_amount", 0.0), tax_amount=receipt_data.get("tax_amount", 0.0), date=receipt_data.get("date", ""), category=receipt_data.get("category", ""), confidence=receipt_data.get("confidence", 0.0), error=receipt_data.get("error", None) ) except Exception as e: logger.error(f"Error processing document {file_id}: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) # ============================================================================ # MATCHING ENDPOINTS # ============================================================================ @app.post("/match-specific", response_model=MatchingResponse) async def match_specific_receipts(file_ids: List[str]): """ Match specific receipts against imported transactions. This endpoint takes a list of receipt file IDs and matches them against the currently imported transactions using AI-powered matching logic. """ try: logger.info(f"Starting match-specific for file IDs: {file_ids}") # Check if transactions are imported if not stored_transactions: logger.warning("No transactions imported") raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.") logger.info(f"Found {len(stored_transactions)} stored transactions") # Convert stored transactions to Transaction objects transactions = [] for txn in stored_transactions: try: txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d") transaction = Transaction( id=txn["id"], transaction_date=txn_date, amount=txn["amount"], vendor=txn["payee_name"], notes=txn["memo"] ) transactions.append(transaction) except Exception as e: logger.warning(f"Error converting transaction {txn['id']}: {str(e)}") continue logger.info(f"Converted {len(transactions)} transactions") # Get receipts for the specified file IDs receipts = [] missing_files = [] for file_id in file_ids: if file_id in processed_receipts: receipt_data = processed_receipts[file_id] logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}") logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}") try: # Handle missing date field if "date" not in receipt_data or not receipt_data["date"]: logger.warning(f"Missing date for receipt {file_id}, using current date") receipt_date = datetime.now() else: receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d") # Handle missing amount field - try multiple possible keys amount = receipt_data.get("amount") if amount is None: amount = receipt_data.get("total_amount") if amount is None: amount = receipt_data.get("amount_total") if amount is None: logger.warning(f"Missing amount for receipt {file_id}, using 0.0") amount = 0.0 # Ensure amount is a float try: amount = float(amount) except (ValueError, TypeError): logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0") amount = 0.0 logger.info(f"DEBUG: amount for {file_id}: {amount}") # Handle missing vendor field vendor = receipt_data.get("vendor", "") if not vendor: logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'") vendor = "Unknown" # Handle missing category field category = receipt_data.get("category", "Other") # Handle description field description = receipt_data.get("description", "") # Handle tax field tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0)) try: tax = float(tax) except (ValueError, TypeError): tax = 0.0 receipt = Receipt( id=file_id, file_name=uploaded_files[file_id]["filename"], upload_date=uploaded_files[file_id]["upload_date"], receipt_date=receipt_date, amount=amount, tax=tax, vendor=vendor, category=category, description=description ) receipts.append(receipt) logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}") except Exception as e: logger.warning(f"Error creating receipt object for {file_id}: {str(e)}") missing_files.append(f"{file_id} (error: {str(e)})") else: logger.warning(f"Receipt {file_id} not found in processed_receipts") missing_files.append(f"{file_id} (not found)") if missing_files: logger.error(f"Missing files: {missing_files}") raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}") logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions") # Perform matching try: logger.info("Starting direct matching call (without ThreadPoolExecutor)") logger.info(f"matching_engine type: {type(matching_engine)}") logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}") logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}") logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}") matches = matching_engine.process_matching(receipts, transactions) logger.info(f"Matching completed successfully. Found {len(matches)} matches") # Convert matches to response format match_responses = [] for match in matches: logger.info(f"Raw match object: {match}") logger.info(f" receipt_id: {match.receipt.id}") logger.info(f" transaction_id: {match.transaction.id}") logger.info(f" confidence_score: {match.confidence_score}") logger.info(f" match_reason: {match.match_reason}") logger.info(f" receipt_vendor: {match.receipt.vendor}") logger.info(f" receipt_amount: {match.receipt.amount}") logger.info(f" transaction_vendor: {match.transaction.vendor}") logger.info(f" transaction_amount: {match.transaction.amount}") match_response = MatchResponse( receipt_id=match.receipt.id, transaction_id=match.transaction.id, confidence_score=match.confidence_score, match_reason=match.match_reason, receipt_vendor=match.receipt.vendor, receipt_amount=match.receipt.amount, receipt_description=match.receipt.description, receipt_category=match.receipt.category, receipt_tax_amount=match.receipt.tax, transaction_vendor=match.transaction.vendor, transaction_amount=match.transaction.amount ) match_responses.append(match_response) logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}") logger.info(f"Formatted {len(match_responses)} match responses") # Calculate statistics if match_responses: high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8) low_confidence = len(match_responses) - high_confidence avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses) else: high_confidence = low_confidence = avg_score = 0 stats = { "total": len(match_responses), "high_confidence": high_confidence, "low_confidence": low_confidence, "avg_score": round(avg_score, 2) } logger.info(f"Generated stats: {stats}") logger.info(f"Match-specific completed successfully with {len(match_responses)} matches") return MatchingResponse( matches=match_responses, stats=stats ) except Exception as e: logger.error(f"Exception in matching section: {str(e)}") logger.error(f"Exception type: {type(e)}") logger.error(f"Exception args: {e.args}") logger.error(f"Traceback: {e.__traceback__}") raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}") except HTTPException: raise except Exception as e: logger.error(f"Unexpected error in match_specific_receipts: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) # ============================================================================ # RULES MANAGEMENT ENDPOINTS # ============================================================================ @app.post("/rules") async def add_rule(request: RuleRequest): """ Add a new AI rule for transaction matching. """ try: new_rule = AIRule( name=request.name, condition=request.condition, action=request.action, source=request.source ) matching_engine.rules_engine.rules.append(new_rule) return {"message": f"Rule '{request.name}' added successfully"} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/rules") async def get_rules(): """ Get all current AI rules. """ try: rules = [] for rule in matching_engine.rules_engine.rules: rules.append({ "name": rule.name, "condition": rule.condition, "action": rule.action, "source": rule.source, "status": rule.status }) return {"rules": rules} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.delete("/rules/{rule_name}") async def delete_rule(rule_name: str): """ Delete an AI rule by name. """ try: rules = matching_engine.rules_engine.rules for i, rule in enumerate(rules): if rule.name == rule_name: del rules[i] return {"message": f"Rule '{rule_name}' deleted successfully"} raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found") except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # ============================================================================ # STATISTICS ENDPOINT # ============================================================================ @app.get("/stats") async def get_stats(): """ Get system statistics. """ try: return { "total_transactions": len(stored_transactions), "total_receipts": len(processed_receipts), "total_uploaded_files": len(uploaded_files), "rules_count": len(matching_engine.rules_engine.rules) } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8343)