Files
ds_quickbooks/main.py
T

1060 lines
39 KiB
Python
Raw Normal View History

2025-08-05 22:25:51 +01:00
import csv
import io
import logging
import uuid
from datetime import datetime
from typing import List
2025-08-07 09:46:04 +01:00
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
2025-08-05 22:25:51 +01:00
from ai_rules import AIRule
from api_models import (
DocumentProcessResponse,
DocumentUploadResponse,
MatchingResponse,
MatchResponse,
2025-08-07 09:46:04 +01:00
MatchSpecificRequest,
RuleRequest,
)
from database import Receipt as DBReceipt
from database import Transaction as DBTransaction
from database import create_db_tables, db_dependency
from document_processor import DocumentProcessor
from matching_engine import MatchingEngine
from models import Receipt, Transaction
create_db_tables()
2025-08-05 22:25:51 +01:00
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.FileHandler("app.log"), logging.StreamHandler()],
2025-08-05 22:25:51 +01:00
)
logger = logging.getLogger(__name__)
app = FastAPI(
title="AI Bookkeeper - Data Science Engine",
description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
version="1.0.0",
2025-08-05 22:25:51 +01:00
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize DS Engine components
matching_engine = MatchingEngine()
document_processor = DocumentProcessor()
# In-memory storage for uploaded files (in production, use a database)
uploaded_files = {}
# Helper functions for database operations
def get_transactions_from_db(
db: Session, user_id: str = None, categorization_id: str = None
):
"""Retrieve transactions from database"""
query = db.query(DBTransaction)
if user_id:
query = query.filter(DBTransaction.user_id == user_id)
if categorization_id:
query = query.filter(DBTransaction.categorisation_id == categorization_id)
return query.all()
def get_receipt_from_db(db: Session, file_id: str):
"""Retrieve receipt from database by file_id"""
return db.query(DBReceipt).filter(DBReceipt.file_id == file_id).first()
def get_receipts_from_db(db: Session, file_ids: List[str]):
"""Retrieve multiple receipts from database by file_ids"""
return db.query(DBReceipt).filter(DBReceipt.file_id.in_(file_ids)).all()
2025-08-05 22:25:51 +01:00
2025-08-05 22:25:51 +01:00
@app.get("/")
async def root():
"""Health check endpoint"""
return {
"message": "AI Bookkeeper Data Science Engine is running",
"version": "1.0.0",
"status": "healthy",
2025-08-05 22:25:51 +01:00
}
2025-08-05 22:25:51 +01:00
# ============================================================================
# TRANSACTION IMPORT ENDPOINTS
# ============================================================================
2025-08-07 09:46:04 +01:00
# @app.post("/transactions/import/csv")
# async def import_transactions_csv(file: UploadFile = File(...), user_id: str = "", categorization_id: str = ""):
# """
# Import transactions from a CSV file (custom bank export format).
# """
# try:
# content = await file.read()
# decoded = content.decode("utf-8")
# reader = csv.DictReader(io.StringIO(decoded))
# transactions = []
# errors = []
# for idx, row in enumerate(reader):
# try:
# # Use correct headers and strip whitespace
# account_number = row.get("Account Number") or row.get(
# "Account Number ".strip()
# )
# txn_date_raw = row.get("Transaction Date") or row.get(
# "Transaction Date ".strip()
# )
# amount_raw = row.get("Amount") or row.get("Amount ".strip())
# payee_name = row.get("Description 2") or row.get(
# "Description 2 ".strip()
# )
# memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
# # Compose ID
# txn_id = f"{account_number}_{idx + 1}"
# # Parse date (try multiple formats)
# txn_date_str = txn_date_raw.strip()
# txn_date = None
# for fmt in ("%m/%d/%y", "%m/%d/%Y"):
# try:
# txn_date = datetime.strptime(txn_date_str, fmt).strftime(
# "%Y-%m-%d"
# )
# break
# except Exception:
# continue
# if not txn_date:
# raise ValueError(f"Could not parse date: {txn_date_str}")
# # Parse amount
# amount = float(amount_raw.replace(",", "").strip())
# transactions.append(
# {
# "id": txn_id,
# "txn_date": txn_date,
# "amount": amount,
# "payee_name": payee_name.strip(),
# "memo": memo,
# }
# )
# except Exception as e:
# errors.append(f"Row {idx + 1}: {str(e)}")
# # Store transactions globally for auto-matching
# global stored_transactions
# stored_transactions = transactions
# return {
# "imported_count": len(transactions),
# "converted_transactions": transactions,
# "errors": errors,
# }
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
@app.post("/transactions/import/csv")
2025-08-07 09:46:04 +01:00
async def import_transactions_csv(
db: db_dependency,
2025-08-07 09:46:04 +01:00
file: UploadFile = File(...),
categorization_id: str = Form(...),
user_id: str = Form(...),
):
2025-08-05 22:25:51 +01:00
"""
Import transactions from a CSV file (custom bank export format).
"""
try:
content = await file.read()
decoded = content.decode("utf-8")
2025-08-05 22:25:51 +01:00
reader = csv.DictReader(io.StringIO(decoded))
transactions = []
errors = []
for idx, row in enumerate(reader):
try:
# Use correct headers and strip whitespace
account_number = row.get("Account Number") or row.get(
"Account Number ".strip()
)
txn_date_raw = row.get("Transaction Date") or row.get(
"Transaction Date ".strip()
)
amount_raw = row.get("Amount") or row.get("Amount ".strip())
payee_name = row.get("Description 2") or row.get(
"Description 2 ".strip()
)
memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
2025-08-05 22:25:51 +01:00
# Compose ID
txn_id = f"{account_number}_{idx + 1}"
2025-08-05 22:25:51 +01:00
# Parse date (try multiple formats)
txn_date_str = txn_date_raw.strip()
txn_date = None
for fmt in ("%m/%d/%y", "%m/%d/%Y"):
try:
txn_date = datetime.strptime(txn_date_str, fmt).strftime(
"%Y-%m-%d"
)
2025-08-05 22:25:51 +01:00
break
except Exception:
continue
if not txn_date:
raise ValueError(f"Could not parse date: {txn_date_str}")
# Parse amount
amount = float(amount_raw.replace(",", "").strip())
# Create database transaction object
txn_date_obj = datetime.strptime(txn_date, "%Y-%m-%d")
db_transaction = DBTransaction(
transaction_id=txn_id,
amount=amount,
date=txn_date_obj,
vendor=payee_name.strip(),
description=memo,
categorisation_id=categorization_id,
user_id=user_id,
)
# Add to database
db.add(db_transaction)
transactions.append(
{
"id": txn_id,
"txn_date": txn_date,
"amount": amount,
"payee_name": payee_name.strip(),
"memo": memo,
"categorization_id": categorization_id,
"user_id": user_id,
}
)
2025-08-05 22:25:51 +01:00
except Exception as e:
errors.append(f"Row {idx + 1}: {str(e)}")
# Commit all transactions to database
db.commit()
2025-08-05 22:25:51 +01:00
return {
"imported_count": len(transactions),
"converted_transactions": transactions,
"errors": errors,
2025-08-07 09:46:04 +01:00
"categorization_id": categorization_id,
"user_id": user_id,
2025-08-05 22:25:51 +01:00
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
@app.post("/transactions/import/image")
async def import_transactions_from_image(
db: db_dependency,
file: UploadFile = File(...),
categorization_id: str = Form("image_import"),
user_id: str = Form("default"),
):
2025-08-05 22:25:51 +01:00
"""
Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction.
"""
try:
# Validate file type
allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
file_extension = file.filename.split(".")[-1].lower()
2025-08-05 22:25:51 +01:00
if file_extension not in allowed_types:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type. Allowed: {allowed_types}",
)
2025-08-05 22:25:51 +01:00
# Read file content
content = await file.read()
# Save file to disk
image_path = await document_processor.save_uploaded_file(content, file.filename)
# Extract transactions from image (pass file path)
extraction_result = await document_processor.extract_transactions_from_image(
image_path
)
2025-08-05 22:25:51 +01:00
if not extraction_result.get("extraction_success", False):
raise HTTPException(
status_code=500,
detail=extraction_result.get("error", "Extraction failed"),
)
2025-08-05 22:25:51 +01:00
extracted_transactions = extraction_result.get("transactions", [])
# Store transactions in database
transactions = []
2025-08-05 22:25:51 +01:00
for idx, txn in enumerate(extracted_transactions):
try:
txn_id = f"img_{file.filename}_{idx + 1}"
2025-08-05 22:25:51 +01:00
txn_date_raw = txn.get("date")
amount = txn.get("amount")
vendor = txn.get("vendor")
memo = txn.get("memo", "")
2025-08-05 22:25:51 +01:00
# Parse date to YYYY-MM-DD format
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
if not txn_date:
# Fallback: use current year if parsing fails
txn_date = f"2024-{txn_date_raw}"
# Parse date for database
txn_date_obj = datetime.strptime(txn_date, "%Y-%m-%d")
# Create database transaction object
db_transaction = DBTransaction(
transaction_id=txn_id,
amount=float(amount),
date=txn_date_obj,
vendor=vendor,
description=memo,
categorisation_id=categorization_id,
user_id=user_id,
)
# Add to database
db.add(db_transaction)
transactions.append(
{
"id": txn_id,
"txn_date": txn_date,
"amount": amount,
"payee_name": vendor,
"memo": memo,
}
)
except Exception as e:
logger.warning(f"Error processing transaction {idx}: {str(e)}")
2025-08-05 22:25:51 +01:00
continue
# Commit all transactions to database
db.commit()
2025-08-05 22:25:51 +01:00
return {
"imported_count": len(transactions),
"converted_transactions": transactions,
"errors": [],
2025-08-05 22:25:51 +01:00
}
except Exception as e:
logger.error(f"Error importing transactions from image: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
# ============================================================================
# DOCUMENT PROCESSING ENDPOINTS
# ============================================================================
2025-08-05 22:25:51 +01:00
@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
async def upload_multiple_documents(files: List[UploadFile] = File(...)):
"""
Upload multiple receipt images for processing.
2025-08-05 22:25:51 +01:00
This endpoint accepts multiple image files and returns file IDs
that can be used with the /process/{file_id} endpoint.
"""
try:
responses = []
2025-08-05 22:25:51 +01:00
for file in files:
# Validate file type
allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
file_extension = file.filename.split(".")[-1].lower()
2025-08-05 22:25:51 +01:00
if file_extension not in allowed_types:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}",
)
2025-08-05 22:25:51 +01:00
# Generate unique file ID
file_id = str(uuid.uuid4())
2025-08-05 22:25:51 +01:00
# Read and store file content
content = await file.read()
uploaded_files[file_id] = {
"filename": file.filename,
"content": content,
"upload_date": datetime.now(),
2025-08-05 22:25:51 +01:00
}
responses.append(
DocumentUploadResponse(
file_id=file_id,
filename=file.filename,
file_type=file_extension,
upload_date=datetime.now(),
status="uploaded",
)
)
2025-08-05 22:25:51 +01:00
return responses
2025-08-05 22:25:51 +01:00
except Exception as e:
logger.error(f"Error uploading documents: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
async def process_document(file_id: str, db: db_dependency):
2025-08-05 22:25:51 +01:00
"""
Process a previously uploaded document to extract receipt information.
2025-08-05 22:25:51 +01:00
This endpoint uses AI to extract structured data from receipt images,
including vendor, amount, date, and category information.
"""
try:
# Check if file exists
if file_id not in uploaded_files:
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
2025-08-05 22:25:51 +01:00
file_data = uploaded_files[file_id]
2025-08-05 22:25:51 +01:00
# Save file temporarily and process it
file_path = await document_processor.save_uploaded_file(
file_data["content"], file_data["filename"]
)
file_type = file_data["filename"].split(".")[-1].lower()
2025-08-05 22:25:51 +01:00
receipt_data = await document_processor.process_file(file_path, file_type)
# Parse date for database storage
receipt_date = None
if receipt_data.get("date"):
try:
receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
except ValueError:
receipt_date = datetime.now()
else:
receipt_date = datetime.now()
# Create database receipt object
db_receipt = DBReceipt(
receipt_id=f"receipt_{file_id}",
file_id=file_id,
amount=receipt_data.get("total_amount", 0.0),
date=receipt_date,
vendor=receipt_data.get("vendor", ""),
description=receipt_data.get("description", ""),
category=receipt_data.get("category", ""),
tax_amount=receipt_data.get("tax_amount", 0.0),
confidence=receipt_data.get("confidence", 0.0),
extraction_success=str(receipt_data.get("extraction_success", False)),
error_message=receipt_data.get("error"),
)
# Add to database
db.add(db_receipt)
db.commit()
2025-08-05 22:25:51 +01:00
return DocumentProcessResponse(
file_id=file_id,
extraction_success=receipt_data.get("extraction_success", False),
vendor=receipt_data.get("vendor", ""),
description=receipt_data.get("description", ""),
total_amount=receipt_data.get("total_amount", 0.0),
tax_amount=receipt_data.get("tax_amount", 0.0),
date=receipt_data.get("date", ""),
category=receipt_data.get("category", ""),
confidence=receipt_data.get("confidence", 0.0),
error=receipt_data.get("error", None),
2025-08-05 22:25:51 +01:00
)
2025-08-05 22:25:51 +01:00
except Exception as e:
logger.error(f"Error processing document {file_id}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
# ============================================================================
# MATCHING ENDPOINTS
# ============================================================================
2025-08-07 09:46:04 +01:00
# @app.post("/match-specific", response_model=MatchingResponse)
# async def match_specific_receipts(file_ids: List[str]):
# """
# Match specific receipts against imported transactions.
# This endpoint takes a list of receipt file IDs and matches them against
# the currently imported transactions using AI-powered matching logic.
# """
# try:
# logger.info(f"Starting match-specific for file IDs: {file_ids}")
# # Check if transactions are imported
# if not stored_transactions:
# logger.warning("No transactions imported")
# raise HTTPException(
# status_code=400,
# detail="No transactions imported. Please upload CSV first.",
# )
# logger.info(f"Found {len(stored_transactions)} stored transactions")
# # Convert stored transactions to Transaction objects
# transactions = []
# for txn in stored_transactions:
# try:
# txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d")
# transaction = Transaction(
# id=txn["id"],
# transaction_date=txn_date,
# amount=txn["amount"],
# vendor=txn["payee_name"],
# notes=txn["memo"],
# )
# transactions.append(transaction)
# except Exception as e:
# logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
# continue
# logger.info(f"Converted {len(transactions)} transactions")
# # Get receipts for the specified file IDs
# receipts = []
# missing_files = []
# for file_id in file_ids:
# if file_id in processed_receipts:
# receipt_data = processed_receipts[file_id]
# logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
# logger.info(
# f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}"
# )
# try:
# # Handle missing date field
# if "date" not in receipt_data or not receipt_data["date"]:
# logger.warning(
# f"Missing date for receipt {file_id}, using current date"
# )
# receipt_date = datetime.now()
# else:
# receipt_date = datetime.strptime(
# receipt_data["date"], "%Y-%m-%d"
# )
# # Handle missing amount field - try multiple possible keys
# amount = receipt_data.get("amount")
# if amount is None:
# amount = receipt_data.get("total_amount")
# if amount is None:
# amount = receipt_data.get("amount_total")
# if amount is None:
# logger.warning(
# f"Missing amount for receipt {file_id}, using 0.0"
# )
# amount = 0.0
# # Ensure amount is a float
# try:
# amount = float(amount)
# except (ValueError, TypeError):
# logger.warning(
# f"Invalid amount '{amount}' for receipt {file_id}, using 0.0"
# )
# amount = 0.0
# logger.info(f"DEBUG: amount for {file_id}: {amount}")
# # Handle missing vendor field
# vendor = receipt_data.get("vendor", "")
# if not vendor:
# logger.warning(
# f"Missing vendor for receipt {file_id}, using 'Unknown'"
# )
# vendor = "Unknown"
# # Handle missing category field
# category = receipt_data.get("category", "Other")
# # Handle description field
# description = receipt_data.get("description", "")
# # Handle tax field
# tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
# try:
# tax = float(tax)
# except (ValueError, TypeError):
# tax = 0.0
# receipt = Receipt(
# id=file_id,
# file_name=uploaded_files[file_id]["filename"],
# upload_date=uploaded_files[file_id]["upload_date"],
# receipt_date=receipt_date,
# amount=amount,
# tax=tax,
# vendor=vendor,
# category=category,
# description=description,
# )
# receipts.append(receipt)
# logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
# except Exception as e:
# logger.warning(
# f"Error creating receipt object for {file_id}: {str(e)}"
# )
# missing_files.append(f"{file_id} (error: {str(e)})")
# else:
# logger.warning(f"Receipt {file_id} not found in processed_receipts")
# missing_files.append(f"{file_id} (not found)")
# if missing_files:
# logger.error(f"Missing files: {missing_files}")
# raise HTTPException(
# status_code=400, detail=f"Missing files: {missing_files}"
# )
# logger.info(
# f"Processing {len(receipts)} receipts against {len(transactions)} transactions"
# )
# # Perform matching
# try:
# logger.info("Starting direct matching call (without ThreadPoolExecutor)")
# logger.info(f"matching_engine type: {type(matching_engine)}")
# logger.info(
# f"matching_engine.process_matching type: {type(matching_engine.process_matching)}"
# )
# logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
# logger.info(
# f"transactions type: {type(transactions)}, length: {len(transactions)}"
# )
# matches = matching_engine.process_matching(receipts, transactions)
# logger.info(
# f"Matching completed successfully. Found {len(matches)} matches"
# )
# # Convert matches to response format
# match_responses = []
# for match in matches:
# logger.info(f"Raw match object: {match}")
# logger.info(f" receipt_id: {match.receipt.id}")
# logger.info(f" transaction_id: {match.transaction.id}")
# logger.info(f" confidence_score: {match.confidence_score}")
# logger.info(f" match_reason: {match.match_reason}")
# logger.info(f" receipt_vendor: {match.receipt.vendor}")
# logger.info(f" receipt_amount: {match.receipt.amount}")
# logger.info(f" transaction_vendor: {match.transaction.vendor}")
# logger.info(f" transaction_amount: {match.transaction.amount}")
# match_response = MatchResponse(
# receipt_id=match.receipt.id,
# transaction_id=match.transaction.id,
# confidence_score=match.confidence_score,
# match_reason=match.match_reason,
# receipt_vendor=match.receipt.vendor,
# receipt_amount=match.receipt.amount,
# receipt_description=match.receipt.description,
# receipt_category=match.receipt.category,
# receipt_tax_amount=match.receipt.tax,
# transaction_vendor=match.transaction.vendor,
# transaction_amount=match.transaction.amount,
# )
# match_responses.append(match_response)
# logger.info(
# f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}"
# )
# logger.info(f"Formatted {len(match_responses)} match responses")
# # Calculate statistics
# if match_responses:
# high_confidence = sum(
# 1 for m in match_responses if m.confidence_score >= 0.8
# )
# low_confidence = len(match_responses) - high_confidence
# avg_score = sum(m.confidence_score for m in match_responses) / len(
# match_responses
# )
# else:
# high_confidence = low_confidence = avg_score = 0
# stats = {
# "total": len(match_responses),
# "high_confidence": high_confidence,
# "low_confidence": low_confidence,
# "avg_score": round(avg_score, 2),
# }
# logger.info(f"Generated stats: {stats}")
# logger.info(
# f"Match-specific completed successfully with {len(match_responses)} matches"
# )
# return MatchingResponse(matches=match_responses, stats=stats)
# except Exception as e:
# logger.error(f"Exception in matching section: {str(e)}")
# logger.error(f"Exception type: {type(e)}")
# logger.error(f"Exception args: {e.args}")
# logger.error(f"Traceback: {e.__traceback__}")
# raise HTTPException(
# status_code=500, detail=f"Unexpected matching error: {str(e)}"
# )
# except HTTPException:
# raise
# except Exception as e:
# logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
# raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
@app.post("/match-specific", response_model=MatchingResponse)
async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependency):
2025-08-05 22:25:51 +01:00
"""
Match specific receipts against imported transactions.
2025-08-07 09:46:04 +01:00
This endpoint takes a request with receipt file IDs and categorization ID,
and matches them against the currently imported transactions using AI-powered matching logic.
2025-08-05 22:25:51 +01:00
"""
try:
2025-08-07 09:46:04 +01:00
file_ids = request.file_ids
categorization_id = request.categorization_id
logger.info(
f"Starting match-specific for file IDs: {file_ids}, categorization_id: {categorization_id}"
)
# Get transactions from database
db_transactions = get_transactions_from_db(
db, categorization_id=categorization_id
)
if not db_transactions:
logger.warning("No transactions found in database")
raise HTTPException(
status_code=400,
detail="No transactions found. Please upload CSV first.",
)
logger.info(f"Found {len(db_transactions)} transactions in database")
# Convert database transactions to Transaction objects
2025-08-05 22:25:51 +01:00
transactions = []
for db_txn in db_transactions:
2025-08-05 22:25:51 +01:00
try:
transaction = Transaction(
id=db_txn.transaction_id,
transaction_date=db_txn.date,
amount=db_txn.amount,
vendor=db_txn.vendor,
notes=db_txn.description or "",
2025-08-05 22:25:51 +01:00
)
transactions.append(transaction)
except Exception as e:
logger.warning(
f"Error converting transaction {db_txn.transaction_id}: {str(e)}"
)
2025-08-05 22:25:51 +01:00
continue
2025-08-05 22:25:51 +01:00
logger.info(f"Converted {len(transactions)} transactions")
# Get receipts for the specified file IDs from database
db_receipts = get_receipts_from_db(db, file_ids)
2025-08-05 22:25:51 +01:00
receipts = []
missing_files = []
2025-08-05 22:25:51 +01:00
for file_id in file_ids:
# Find the corresponding database receipt
db_receipt = next((r for r in db_receipts if r.file_id == file_id), None)
if db_receipt:
try:
2025-08-05 22:25:51 +01:00
receipt = Receipt(
id=db_receipt.receipt_id,
receipt_date=db_receipt.date,
amount=db_receipt.amount,
vendor=db_receipt.vendor,
category=db_receipt.category or "Other",
description=db_receipt.description or "",
tax=db_receipt.tax_amount or 0.0,
file_name=db_receipt.file_id,
upload_date=datetime.now(),
2025-08-05 22:25:51 +01:00
)
receipts.append(receipt)
logger.info(f"Successfully loaded receipt for file_id: {file_id}")
2025-08-05 22:25:51 +01:00
except Exception as e:
logger.error(
f"Error creating receipt object for {file_id}: {str(e)}"
)
missing_files.append(file_id)
2025-08-05 22:25:51 +01:00
else:
logger.warning(f"Receipt {file_id} not found in database")
missing_files.append(file_id)
logger.info(f"Found {len(receipts)} receipts, {len(missing_files)} missing")
2025-08-05 22:25:51 +01:00
if missing_files:
logger.warning(f"Missing files: {missing_files}")
if not receipts:
logger.warning("No valid receipts found")
raise HTTPException(
status_code=400,
detail="No valid receipts found for matching.",
)
# Perform matching
logger.info(
f"Starting matching with {len(receipts)} receipts and {len(transactions)} transactions"
)
2025-08-05 22:25:51 +01:00
try:
matching_results = matching_engine.process_matching(receipts, transactions)
logger.info(f"Matching completed, got {len(matching_results)} results")
# Convert matching results to response format
2025-08-05 22:25:51 +01:00
match_responses = []
for result in matching_results:
2025-08-05 22:25:51 +01:00
match_response = MatchResponse(
receipt_id=result.receipt.id,
transaction_id=result.transaction.id
if result.transaction
else "no_match",
confidence_score=result.confidence_score,
match_reason=result.match_reason,
receipt_vendor=result.receipt.vendor,
receipt_amount=result.receipt.amount,
receipt_description=result.receipt.description,
receipt_category=result.receipt.category,
receipt_tax_amount=result.receipt.tax,
transaction_vendor=result.transaction.vendor
if result.transaction
else "",
transaction_amount=result.transaction.amount
if result.transaction
else 0.0,
2025-08-05 22:25:51 +01:00
)
match_responses.append(match_response)
2025-08-05 22:25:51 +01:00
# Calculate statistics
high_confidence = len(
[r for r in matching_results if r.confidence_score >= 0.8]
)
low_confidence = len(
[r for r in matching_results if r.confidence_score < 0.5]
)
avg_score = (
sum(r.confidence_score for r in matching_results)
/ len(matching_results)
if matching_results
else 0
)
2025-08-05 22:25:51 +01:00
stats = {
"total": len(match_responses),
"high_confidence": high_confidence,
"low_confidence": low_confidence,
"avg_score": round(avg_score, 2),
2025-08-05 22:25:51 +01:00
}
2025-08-05 22:25:51 +01:00
logger.info(f"Generated stats: {stats}")
logger.info(
f"Match-specific completed successfully with {len(match_responses)} matches"
2025-08-05 22:25:51 +01:00
)
return MatchingResponse(matches=match_responses, stats=stats)
2025-08-05 22:25:51 +01:00
except Exception as e:
logger.error(f"Exception in matching section: {str(e)}")
logger.error(f"Exception type: {type(e)}")
logger.error(f"Exception args: {e.args}")
raise HTTPException(
status_code=500, detail=f"Unexpected matching error: {str(e)}"
)
2025-08-05 22:25:51 +01:00
except HTTPException:
raise
except Exception as e:
logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
# ============================================================================
# DATABASE QUERY ENDPOINTS
# ============================================================================
@app.get("/transactions")
async def get_transactions(
db: db_dependency,
user_id: str = None,
categorization_id: str = None,
limit: int = 100,
):
"""
Get transactions from the database.
"""
try:
transactions = get_transactions_from_db(db, user_id, categorization_id)
# Limit results
transactions = transactions[:limit]
# Convert to response format
result = []
for txn in transactions:
result.append(
{
"id": txn.transaction_id,
"amount": txn.amount,
"date": txn.date.strftime("%Y-%m-%d"),
"vendor": txn.vendor,
"description": txn.description,
"category": txn.category,
"tax_amount": txn.tax_amount,
"categorisation_id": txn.categorisation_id,
"user_id": txn.user_id,
}
)
return {
"transactions": result,
"count": len(result),
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/receipts")
async def get_receipts(db: db_dependency, limit: int = 100):
"""
Get receipts from the database.
"""
try:
receipts = db.query(DBReceipt).limit(limit).all()
# Convert to response format
result = []
for receipt in receipts:
result.append(
{
"id": receipt.receipt_id,
"file_id": receipt.file_id,
"amount": receipt.amount,
"date": receipt.date.strftime("%Y-%m-%d"),
"vendor": receipt.vendor,
"description": receipt.description,
"category": receipt.category,
"tax_amount": receipt.tax_amount,
"confidence": receipt.confidence,
"extraction_success": receipt.extraction_success,
"error_message": receipt.error_message,
}
)
return {
"receipts": result,
"count": len(result),
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
# ============================================================================
# RULES MANAGEMENT ENDPOINTS
# ============================================================================
2025-08-05 22:25:51 +01:00
@app.post("/rules")
async def add_rule(request: RuleRequest):
"""
Add a new AI rule for transaction matching.
"""
try:
new_rule = AIRule(
name=request.name,
condition=request.condition,
action=request.action,
source=request.source,
2025-08-05 22:25:51 +01:00
)
2025-08-05 22:25:51 +01:00
matching_engine.rules_engine.rules.append(new_rule)
2025-08-05 22:25:51 +01:00
return {"message": f"Rule '{request.name}' added successfully"}
2025-08-05 22:25:51 +01:00
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
@app.get("/rules")
async def get_rules():
"""
Get all current AI rules.
"""
try:
rules = []
for rule in matching_engine.rules_engine.rules:
rules.append(
{
"name": rule.name,
"condition": rule.condition,
"action": rule.action,
"source": rule.source,
"status": rule.status,
}
)
2025-08-05 22:25:51 +01:00
return {"rules": rules}
2025-08-05 22:25:51 +01:00
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
@app.delete("/rules/{rule_name}")
async def delete_rule(rule_name: str):
"""
Delete an AI rule by name.
"""
try:
rules = matching_engine.rules_engine.rules
for i, rule in enumerate(rules):
if rule.name == rule_name:
del rules[i]
return {"message": f"Rule '{rule_name}' deleted successfully"}
2025-08-05 22:25:51 +01:00
raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")
2025-08-05 22:25:51 +01:00
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
# ============================================================================
# STATISTICS ENDPOINT
# ============================================================================
2025-08-05 22:25:51 +01:00
@app.get("/stats")
async def get_stats(db: db_dependency):
2025-08-05 22:25:51 +01:00
"""
Get system statistics.
"""
try:
# Count transactions and receipts from database
total_transactions = db.query(DBTransaction).count()
total_receipts = db.query(DBReceipt).count()
2025-08-05 22:25:51 +01:00
return {
"total_transactions": total_transactions,
"total_receipts": total_receipts,
2025-08-05 22:25:51 +01:00
"total_uploaded_files": len(uploaded_files),
"rules_count": len(matching_engine.rules_engine.rules),
2025-08-05 22:25:51 +01:00
}
2025-08-05 22:25:51 +01:00
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-08-05 22:25:51 +01:00
if __name__ == "__main__":
import uvicorn
2025-08-05 22:25:51 +01:00
uvicorn.run(app, host="0.0.0.0", port=8343)