added categorisation and user id

This commit is contained in:
bolade
2025-08-07 09:46:04 +01:00
parent 9698e2fcaf
commit 55ffc52339
3 changed files with 320 additions and 7 deletions
+315 -6
View File
@@ -5,7 +5,7 @@ import uuid
from datetime import datetime
from typing import List
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from ai_rules import AIRule
@@ -14,6 +14,7 @@ from api_models import (
DocumentUploadResponse,
MatchingResponse,
MatchResponse,
MatchSpecificRequest,
RuleRequest,
)
from document_processor import DocumentProcessor
@@ -69,8 +70,78 @@ async def root():
# ============================================================================
# @app.post("/transactions/import/csv")
# async def import_transactions_csv(file: UploadFile = File(...), user_id: str = "", categorization_id: str = ""):
# """
# Import transactions from a CSV file (custom bank export format).
# """
# try:
# content = await file.read()
# decoded = content.decode("utf-8")
# reader = csv.DictReader(io.StringIO(decoded))
# transactions = []
# errors = []
# for idx, row in enumerate(reader):
# try:
# # Use correct headers and strip whitespace
# account_number = row.get("Account Number") or row.get(
# "Account Number ".strip()
# )
# txn_date_raw = row.get("Transaction Date") or row.get(
# "Transaction Date ".strip()
# )
# amount_raw = row.get("Amount") or row.get("Amount ".strip())
# payee_name = row.get("Description 2") or row.get(
# "Description 2 ".strip()
# )
# memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
# # Compose ID
# txn_id = f"{account_number}_{idx + 1}"
# # Parse date (try multiple formats)
# txn_date_str = txn_date_raw.strip()
# txn_date = None
# for fmt in ("%m/%d/%y", "%m/%d/%Y"):
# try:
# txn_date = datetime.strptime(txn_date_str, fmt).strftime(
# "%Y-%m-%d"
# )
# break
# except Exception:
# continue
# if not txn_date:
# raise ValueError(f"Could not parse date: {txn_date_str}")
# # Parse amount
# amount = float(amount_raw.replace(",", "").strip())
# transactions.append(
# {
# "id": txn_id,
# "txn_date": txn_date,
# "amount": amount,
# "payee_name": payee_name.strip(),
# "memo": memo,
# }
# )
# except Exception as e:
# errors.append(f"Row {idx + 1}: {str(e)}")
# # Store transactions globally for auto-matching
# global stored_transactions
# stored_transactions = transactions
# return {
# "imported_count": len(transactions),
# "converted_transactions": transactions,
# "errors": errors,
# }
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
@app.post("/transactions/import/csv")
async def import_transactions_csv(file: UploadFile = File(...), user_id: str = "", categorization_id: str = ""):
async def import_transactions_csv(
file: UploadFile = File(...),
categorization_id: str = Form(...),
user_id: str = Form(...),
):
"""
Import transactions from a CSV file (custom bank export format).
"""
@@ -130,6 +201,8 @@ async def import_transactions_csv(file: UploadFile = File(...), user_id: str = "
"imported_count": len(transactions),
"converted_transactions": transactions,
"errors": errors,
"categorization_id": categorization_id,
"user_id": user_id,
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@@ -304,16 +377,252 @@ async def process_document(file_id: str):
# ============================================================================
# @app.post("/match-specific", response_model=MatchingResponse)
# async def match_specific_receipts(file_ids: List[str]):
# """
# Match specific receipts against imported transactions.
# This endpoint takes a list of receipt file IDs and matches them against
# the currently imported transactions using AI-powered matching logic.
# """
# try:
# logger.info(f"Starting match-specific for file IDs: {file_ids}")
# # Check if transactions are imported
# if not stored_transactions:
# logger.warning("No transactions imported")
# raise HTTPException(
# status_code=400,
# detail="No transactions imported. Please upload CSV first.",
# )
# logger.info(f"Found {len(stored_transactions)} stored transactions")
# # Convert stored transactions to Transaction objects
# transactions = []
# for txn in stored_transactions:
# try:
# txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d")
# transaction = Transaction(
# id=txn["id"],
# transaction_date=txn_date,
# amount=txn["amount"],
# vendor=txn["payee_name"],
# notes=txn["memo"],
# )
# transactions.append(transaction)
# except Exception as e:
# logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
# continue
# logger.info(f"Converted {len(transactions)} transactions")
# # Get receipts for the specified file IDs
# receipts = []
# missing_files = []
# for file_id in file_ids:
# if file_id in processed_receipts:
# receipt_data = processed_receipts[file_id]
# logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
# logger.info(
# f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}"
# )
# try:
# # Handle missing date field
# if "date" not in receipt_data or not receipt_data["date"]:
# logger.warning(
# f"Missing date for receipt {file_id}, using current date"
# )
# receipt_date = datetime.now()
# else:
# receipt_date = datetime.strptime(
# receipt_data["date"], "%Y-%m-%d"
# )
# # Handle missing amount field - try multiple possible keys
# amount = receipt_data.get("amount")
# if amount is None:
# amount = receipt_data.get("total_amount")
# if amount is None:
# amount = receipt_data.get("amount_total")
# if amount is None:
# logger.warning(
# f"Missing amount for receipt {file_id}, using 0.0"
# )
# amount = 0.0
# # Ensure amount is a float
# try:
# amount = float(amount)
# except (ValueError, TypeError):
# logger.warning(
# f"Invalid amount '{amount}' for receipt {file_id}, using 0.0"
# )
# amount = 0.0
# logger.info(f"DEBUG: amount for {file_id}: {amount}")
# # Handle missing vendor field
# vendor = receipt_data.get("vendor", "")
# if not vendor:
# logger.warning(
# f"Missing vendor for receipt {file_id}, using 'Unknown'"
# )
# vendor = "Unknown"
# # Handle missing category field
# category = receipt_data.get("category", "Other")
# # Handle description field
# description = receipt_data.get("description", "")
# # Handle tax field
# tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
# try:
# tax = float(tax)
# except (ValueError, TypeError):
# tax = 0.0
# receipt = Receipt(
# id=file_id,
# file_name=uploaded_files[file_id]["filename"],
# upload_date=uploaded_files[file_id]["upload_date"],
# receipt_date=receipt_date,
# amount=amount,
# tax=tax,
# vendor=vendor,
# category=category,
# description=description,
# )
# receipts.append(receipt)
# logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
# except Exception as e:
# logger.warning(
# f"Error creating receipt object for {file_id}: {str(e)}"
# )
# missing_files.append(f"{file_id} (error: {str(e)})")
# else:
# logger.warning(f"Receipt {file_id} not found in processed_receipts")
# missing_files.append(f"{file_id} (not found)")
# if missing_files:
# logger.error(f"Missing files: {missing_files}")
# raise HTTPException(
# status_code=400, detail=f"Missing files: {missing_files}"
# )
# logger.info(
# f"Processing {len(receipts)} receipts against {len(transactions)} transactions"
# )
# # Perform matching
# try:
# logger.info("Starting direct matching call (without ThreadPoolExecutor)")
# logger.info(f"matching_engine type: {type(matching_engine)}")
# logger.info(
# f"matching_engine.process_matching type: {type(matching_engine.process_matching)}"
# )
# logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
# logger.info(
# f"transactions type: {type(transactions)}, length: {len(transactions)}"
# )
# matches = matching_engine.process_matching(receipts, transactions)
# logger.info(
# f"Matching completed successfully. Found {len(matches)} matches"
# )
# # Convert matches to response format
# match_responses = []
# for match in matches:
# logger.info(f"Raw match object: {match}")
# logger.info(f" receipt_id: {match.receipt.id}")
# logger.info(f" transaction_id: {match.transaction.id}")
# logger.info(f" confidence_score: {match.confidence_score}")
# logger.info(f" match_reason: {match.match_reason}")
# logger.info(f" receipt_vendor: {match.receipt.vendor}")
# logger.info(f" receipt_amount: {match.receipt.amount}")
# logger.info(f" transaction_vendor: {match.transaction.vendor}")
# logger.info(f" transaction_amount: {match.transaction.amount}")
# match_response = MatchResponse(
# receipt_id=match.receipt.id,
# transaction_id=match.transaction.id,
# confidence_score=match.confidence_score,
# match_reason=match.match_reason,
# receipt_vendor=match.receipt.vendor,
# receipt_amount=match.receipt.amount,
# receipt_description=match.receipt.description,
# receipt_category=match.receipt.category,
# receipt_tax_amount=match.receipt.tax,
# transaction_vendor=match.transaction.vendor,
# transaction_amount=match.transaction.amount,
# )
# match_responses.append(match_response)
# logger.info(
# f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}"
# )
# logger.info(f"Formatted {len(match_responses)} match responses")
# # Calculate statistics
# if match_responses:
# high_confidence = sum(
# 1 for m in match_responses if m.confidence_score >= 0.8
# )
# low_confidence = len(match_responses) - high_confidence
# avg_score = sum(m.confidence_score for m in match_responses) / len(
# match_responses
# )
# else:
# high_confidence = low_confidence = avg_score = 0
# stats = {
# "total": len(match_responses),
# "high_confidence": high_confidence,
# "low_confidence": low_confidence,
# "avg_score": round(avg_score, 2),
# }
# logger.info(f"Generated stats: {stats}")
# logger.info(
# f"Match-specific completed successfully with {len(match_responses)} matches"
# )
# return MatchingResponse(matches=match_responses, stats=stats)
# except Exception as e:
# logger.error(f"Exception in matching section: {str(e)}")
# logger.error(f"Exception type: {type(e)}")
# logger.error(f"Exception args: {e.args}")
# logger.error(f"Traceback: {e.__traceback__}")
# raise HTTPException(
# status_code=500, detail=f"Unexpected matching error: {str(e)}"
# )
# except HTTPException:
# raise
# except Exception as e:
# logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
# raise HTTPException(status_code=500, detail=str(e))
@app.post("/match-specific", response_model=MatchingResponse)
async def match_specific_receipts(file_ids: List[str]):
async def match_specific_receipts(request: MatchSpecificRequest):
"""
Match specific receipts against imported transactions.
This endpoint takes a list of receipt file IDs and matches them against
the currently imported transactions using AI-powered matching logic.
This endpoint takes a request with receipt file IDs and categorization ID,
and matches them against the currently imported transactions using AI-powered matching logic.
"""
try:
logger.info(f"Starting match-specific for file IDs: {file_ids}")
file_ids = request.file_ids
categorization_id = request.categorization_id
logger.info(
f"Starting match-specific for file IDs: {file_ids}, categorization_id: {categorization_id}"
)
# Check if transactions are imported
if not stored_transactions: