ds_quickbooks/main.py

from fastapi import FastAPI, HTTPException, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from datetime import datetime
from typing import List
import uuid
import csv
import io
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('app.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

from api_models import (
    MatchingRequest, MatchingResponse, MatchResponse,
    ApprovalRequest, RuleRequest, DocumentUploadResponse,
    DocumentProcessResponse, TransactionRequest
)
from models import Receipt, Transaction, Match
from matching_engine import MatchingEngine
from ai_rules import AIRule
from document_processor import DocumentProcessor

app = FastAPI(
    title="AI Bookkeeper - Data Science Engine",
    description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
    version="1.0.0"
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize DS Engine components
matching_engine = MatchingEngine()
document_processor = DocumentProcessor()

# In-memory storage for uploaded files (in production, use a database)
uploaded_files = {}

# Store imported transactions globally for easy access
stored_transactions = []
processed_receipts = {}

@app.get("/")
async def root():
    """Health check endpoint"""
    return {
        "message": "AI Bookkeeper Data Science Engine is running",
        "version": "1.0.0",
        "status": "healthy"
    }

# ============================================================================
# TRANSACTION IMPORT ENDPOINTS
# ============================================================================

@app.post("/transactions/import/csv")
async def import_transactions_csv(file: UploadFile = File(...)):
    """
    Import transactions from a CSV file (custom bank export format).
    """
    try:
        content = await file.read()
        decoded = content.decode('utf-8')
        reader = csv.DictReader(io.StringIO(decoded))
        transactions = []
        errors = []
        for idx, row in enumerate(reader):
            try:
                # Use correct headers and strip whitespace
                account_number = row.get('Account Number') or row.get('Account Number '.strip())
                txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip())
                amount_raw = row.get('Amount') or row.get('Amount '.strip())
                payee_name = row.get('Description 2') or row.get('Description 2 '.strip())
                memo = f"{row.get('Account Type','').strip()} {row.get('Cheque Number','').strip()} {row.get('Description 1','').strip()}".strip()
                # Compose ID
                txn_id = f"{account_number}_{idx+1}"
                # Parse date (try multiple formats)
                txn_date_str = txn_date_raw.strip()
                txn_date = None
                for fmt in ("%m/%d/%y", "%m/%d/%Y"):
                    try:
                        txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d")
                        break
                    except Exception:
                        continue
                if not txn_date:
                    raise ValueError(f"Could not parse date: {txn_date_str}")
                # Parse amount
                amount = float(amount_raw.replace(',', '').strip())
                transactions.append({
                    "id": txn_id,
                    "txn_date": txn_date,
                    "amount": amount,
                    "payee_name": payee_name.strip(),
                    "memo": memo
                })
            except Exception as e:
                errors.append(f"Row {idx+1}: {str(e)}")
        # Store transactions globally for auto-matching
        global stored_transactions
        stored_transactions = transactions

        return {
            "imported_count": len(transactions),
            "converted_transactions": transactions,
            "errors": errors
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/transactions/import/image")
async def import_transactions_from_image(file: UploadFile = File(...)):
    """
    Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction.
    """
    try:
        # Validate file type
        allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
        file_extension = file.filename.split('.')[-1].lower()
        if file_extension not in allowed_types:
            raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}")
        # Read file content
        content = await file.read()
        # Save file to disk
        image_path = await document_processor.save_uploaded_file(content, file.filename)
        # Extract transactions from image (pass file path)
        extraction_result = await document_processor.extract_transactions_from_image(image_path)
        if not extraction_result.get("extraction_success", False):
            raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed"))
        extracted_transactions = extraction_result.get("transactions", [])
        # Store transactions globally for auto-matching
        global stored_transactions
        stored_transactions = []
        for idx, txn in enumerate(extracted_transactions):
            try:
                txn_id = f"img_{file.filename}_{idx+1}"
                txn_date_raw = txn.get("date")
                amount = txn.get("amount")
                vendor = txn.get("vendor")
                memo = txn.get("memo", "")

                # Parse date to YYYY-MM-DD format
                txn_date = document_processor._parse_date_to_iso(txn_date_raw)
                if not txn_date:
                    # Fallback: use current year if parsing fails
                    txn_date = f"2024-{txn_date_raw}"

                stored_transactions.append({
                    "id": txn_id,
                    "txn_date": txn_date,
                    "amount": amount,
                    "payee_name": vendor,
                    "memo": memo
                })
            except Exception as e:
                continue
        return {
            "imported_count": len(stored_transactions),
            "converted_transactions": stored_transactions,
            "errors": []
        }
    except Exception as e:
        logger.error(f"Error importing transactions from image: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

# ============================================================================
# DOCUMENT PROCESSING ENDPOINTS
# ============================================================================

@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
async def upload_multiple_documents(files: List[UploadFile] = File(...)):
    """
    Upload multiple receipt images for processing.

    This endpoint accepts multiple image files and returns file IDs
    that can be used with the /process/{file_id} endpoint.
    """
    try:
        responses = []

        for file in files:
            # Validate file type
            allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
            file_extension = file.filename.split('.')[-1].lower()

            if file_extension not in allowed_types:
                raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}")

            # Generate unique file ID
            file_id = str(uuid.uuid4())

            # Read and store file content
            content = await file.read()
            uploaded_files[file_id] = {
                "filename": file.filename,
                "content": content,
                "upload_date": datetime.now()
            }

            responses.append(DocumentUploadResponse(
                file_id=file_id,
                filename=file.filename,
                file_type=file_extension,
                upload_date=datetime.now(),
                status="uploaded"
            ))

        return responses

    except Exception as e:
        logger.error(f"Error uploading documents: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
async def process_document(file_id: str):
    """
    Process a previously uploaded document to extract receipt information.

    This endpoint uses AI to extract structured data from receipt images,
    including vendor, amount, date, and category information.
    """
    try:
        # Check if file exists
        if file_id not in uploaded_files:
            raise HTTPException(status_code=404, detail=f"File {file_id} not found")

        file_data = uploaded_files[file_id]

        # Save file temporarily and process it
        file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"])
        file_type = file_data["filename"].split('.')[-1].lower()
        receipt_data = await document_processor.process_file(file_path, file_type)

        # Store processed receipt
        processed_receipts[file_id] = receipt_data

        return DocumentProcessResponse(
            file_id=file_id,
            extraction_success=receipt_data.get("extraction_success", False),
            vendor=receipt_data.get("vendor", ""),
            description=receipt_data.get("description", ""),
            total_amount=receipt_data.get("total_amount", 0.0),
            tax_amount=receipt_data.get("tax_amount", 0.0),
            date=receipt_data.get("date", ""),
            category=receipt_data.get("category", ""),
            confidence=receipt_data.get("confidence", 0.0),
            error=receipt_data.get("error", None)
        )

    except Exception as e:
        logger.error(f"Error processing document {file_id}: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

# ============================================================================
# MATCHING ENDPOINTS
# ============================================================================

@app.post("/match-specific", response_model=MatchingResponse)
async def match_specific_receipts(file_ids: List[str]):
    """
    Match specific receipts against imported transactions.

    This endpoint takes a list of receipt file IDs and matches them against
    the currently imported transactions using AI-powered matching logic.
    """
    try:
        logger.info(f"Starting match-specific for file IDs: {file_ids}")

        # Check if transactions are imported
        if not stored_transactions:
            logger.warning("No transactions imported")
            raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")

        logger.info(f"Found {len(stored_transactions)} stored transactions")

        # Convert stored transactions to Transaction objects
        transactions = []
        for txn in stored_transactions:
            try:
                txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d")
                transaction = Transaction(
                    id=txn["id"],
                    transaction_date=txn_date,
                    amount=txn["amount"],
                    vendor=txn["payee_name"],
                    notes=txn["memo"]
                )
                transactions.append(transaction)
            except Exception as e:
                logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
                continue

        logger.info(f"Converted {len(transactions)} transactions")

        # Get receipts for the specified file IDs
        receipts = []
        missing_files = []

        for file_id in file_ids:
            if file_id in processed_receipts:
                receipt_data = processed_receipts[file_id]
                logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
                logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}")
                try:
                    # Handle missing date field
                    if "date" not in receipt_data or not receipt_data["date"]:
                        logger.warning(f"Missing date for receipt {file_id}, using current date")
                        receipt_date = datetime.now()
                    else:
                        receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")

                    # Handle missing amount field - try multiple possible keys
                    amount = receipt_data.get("amount")
                    if amount is None:
                        amount = receipt_data.get("total_amount")
                    if amount is None:
                        amount = receipt_data.get("amount_total")
                    if amount is None:
                        logger.warning(f"Missing amount for receipt {file_id}, using 0.0")
                        amount = 0.0

                    # Ensure amount is a float
                    try:
                        amount = float(amount)
                    except (ValueError, TypeError):
                        logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0")
                        amount = 0.0

                    logger.info(f"DEBUG: amount for {file_id}: {amount}")

                    # Handle missing vendor field
                    vendor = receipt_data.get("vendor", "")
                    if not vendor:
                        logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'")
                        vendor = "Unknown"

                    # Handle missing category field
                    category = receipt_data.get("category", "Other")

                    # Handle description field
                    description = receipt_data.get("description", "")

                    # Handle tax field
                    tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
                    try:
                        tax = float(tax)
                    except (ValueError, TypeError):
                        tax = 0.0

                    receipt = Receipt(
                        id=file_id,
                        file_name=uploaded_files[file_id]["filename"],
                        upload_date=uploaded_files[file_id]["upload_date"],
                        receipt_date=receipt_date,
                        amount=amount,
                        tax=tax,
                        vendor=vendor,
                        category=category,
                        description=description
                    )
                    receipts.append(receipt)
                    logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
                except Exception as e:
                    logger.warning(f"Error creating receipt object for {file_id}: {str(e)}")
                    missing_files.append(f"{file_id} (error: {str(e)})")
            else:
                logger.warning(f"Receipt {file_id} not found in processed_receipts")
                missing_files.append(f"{file_id} (not found)")

        if missing_files:
            logger.error(f"Missing files: {missing_files}")
            raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}")

        logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions")

        # Perform matching
        try:
            logger.info("Starting direct matching call (without ThreadPoolExecutor)")
            logger.info(f"matching_engine type: {type(matching_engine)}")
            logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}")
            logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
            logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}")

            matches = matching_engine.process_matching(receipts, transactions)

            logger.info(f"Matching completed successfully. Found {len(matches)} matches")

            # Convert matches to response format
            match_responses = []
            for match in matches:
                logger.info(f"Raw match object: {match}")
                logger.info(f"  receipt_id: {match.receipt.id}")
                logger.info(f"  transaction_id: {match.transaction.id}")
                logger.info(f"  confidence_score: {match.confidence_score}")
                logger.info(f"  match_reason: {match.match_reason}")
                logger.info(f"  receipt_vendor: {match.receipt.vendor}")
                logger.info(f"  receipt_amount: {match.receipt.amount}")
                logger.info(f"  transaction_vendor: {match.transaction.vendor}")
                logger.info(f"  transaction_amount: {match.transaction.amount}")

                match_response = MatchResponse(
                    receipt_id=match.receipt.id,
                    transaction_id=match.transaction.id,
                    confidence_score=match.confidence_score,
                    match_reason=match.match_reason,
                    receipt_vendor=match.receipt.vendor,
                    receipt_amount=match.receipt.amount,
                    receipt_description=match.receipt.description,
                    receipt_category=match.receipt.category,
                    receipt_tax_amount=match.receipt.tax,
                    transaction_vendor=match.transaction.vendor,
                    transaction_amount=match.transaction.amount
                )
                match_responses.append(match_response)
                logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}")

            logger.info(f"Formatted {len(match_responses)} match responses")

            # Calculate statistics
            if match_responses:
                high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8)
                low_confidence = len(match_responses) - high_confidence
                avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses)
            else:
                high_confidence = low_confidence = avg_score = 0

            stats = {
                "total": len(match_responses),
                "high_confidence": high_confidence,
                "low_confidence": low_confidence,
                "avg_score": round(avg_score, 2)
            }

            logger.info(f"Generated stats: {stats}")
            logger.info(f"Match-specific completed successfully with {len(match_responses)} matches")

            return MatchingResponse(
                matches=match_responses,
                stats=stats
            )

        except Exception as e:
            logger.error(f"Exception in matching section: {str(e)}")
            logger.error(f"Exception type: {type(e)}")
            logger.error(f"Exception args: {e.args}")
            logger.error(f"Traceback: {e.__traceback__}")
            raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}")

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

# ============================================================================
# RULES MANAGEMENT ENDPOINTS
# ============================================================================

@app.post("/rules")
async def add_rule(request: RuleRequest):
    """
    Add a new AI rule for transaction matching.
    """
    try:
        new_rule = AIRule(
            name=request.name,
            condition=request.condition,
            action=request.action,
            source=request.source
        )

        matching_engine.rules_engine.rules.append(new_rule)

        return {"message": f"Rule '{request.name}' added successfully"}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/rules")
async def get_rules():
    """
    Get all current AI rules.
    """
    try:
        rules = []
        for rule in matching_engine.rules_engine.rules:
            rules.append({
                "name": rule.name,
                "condition": rule.condition,
                "action": rule.action,
                "source": rule.source,
                "status": rule.status
            })

        return {"rules": rules}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.delete("/rules/{rule_name}")
async def delete_rule(rule_name: str):
    """
    Delete an AI rule by name.
    """
    try:
        rules = matching_engine.rules_engine.rules
        for i, rule in enumerate(rules):
            if rule.name == rule_name:
                del rules[i]
                return {"message": f"Rule '{rule_name}' deleted successfully"}

        raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# ============================================================================
# STATISTICS ENDPOINT
# ============================================================================

@app.get("/stats")
async def get_stats():
    """
    Get system statistics.
    """
    try:
        return {
            "total_transactions": len(stored_transactions),
            "total_receipts": len(processed_receipts),
            "total_uploaded_files": len(uploaded_files),
            "rules_count": len(matching_engine.rules_engine.rules)
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8343)