Add requirements.txt with essential dependencies for the project

2025-10-05 11:29:45 +00:00
commit 3d48cf0385
15 changed files with 3686 additions and 0 deletions
@@ -0,0 +1,11 @@
+.venv
+
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.db
+*.sqlite
+.env
+*.log
+/uploads
@@ -0,0 +1,12 @@
+from pydantic_settings import BaseSettings
+from typing import Optional
+
+class Settings(BaseSettings):
+    database_url: Optional[str] = None
+    secret_key: Optional[str] = None
+    api_key: Optional[str] = None
+    GROQ_API_KEY: str
+    class Config:
+        env_file = ".env"
+
+settings = Settings()
@@ -0,0 +1,90 @@
+from typing import Annotated
+
+from fastapi import Depends
+from sqlalchemy import Column, DateTime, Float, Integer, String, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import Session, sessionmaker
+
+SQLALCHEMY_DATABASE_URL = "sqlite:///./sql_app.db"
+
+engine = create_engine(
+    SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
+)
+
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+db_dependency = Annotated[Session, Depends(get_db)]
+Base = declarative_base()
+
+
+def create_db_tables():
+    Base.metadata.create_all(bind=engine)
+
+
+def clear_all_data():
+    """Clear all data from the database (useful for testing)"""
+    db = SessionLocal()
+    try:
+        db.query(DBTransaction).delete()
+        db.query(DBReceipt).delete()
+        db.query(DBUploadedFile).delete()
+        db.commit()
+    finally:
+        db.close()
+
+
+# Transactions table
+class DBTransaction(Base):
+    __tablename__ = "transactions"
+
+    id = Column(Integer, primary_key=True, index=True)
+    transaction_id = Column(String, index=True)
+    amount = Column(Float, nullable=False)
+    date = Column(DateTime, nullable=False)
+    vendor = Column(String, nullable=False)
+    description = Column(String, nullable=True)
+    category = Column(String, nullable=True)
+    tax_amount = Column(Float, nullable=True)
+    categorisation_id = Column(String, nullable=True)
+    user_id = Column(String, nullable=True)
+
+
+# Uploaded Files table
+class DBUploadedFile(Base):
+    __tablename__ = "uploaded_files"
+
+    id = Column(Integer, primary_key=True, index=True)
+    file_id = Column(String, unique=True, index=True)
+    filename = Column(String, nullable=False)
+    file_path = Column(String, nullable=False)
+    file_type = Column(String, nullable=False)
+    upload_date = Column(DateTime, nullable=False)
+    status = Column(String, nullable=False, default="uploaded")
+
+
+# Receipts table
+class DBReceipt(Base):
+    __tablename__ = "receipts"
+
+    id = Column(Integer, primary_key=True, index=True)
+    receipt_id = Column(String, unique=True, index=True)
+    file_id = Column(String, unique=True, index=True)
+    amount = Column(Float, nullable=False)
+    date = Column(DateTime, nullable=False)
+    vendor = Column(String, nullable=False)
+    description = Column(String, nullable=True)
+    category = Column(String, nullable=True)
+    tax_amount = Column(Float, nullable=True)
+    confidence = Column(Float, nullable=True)
+    extraction_success = Column(String, nullable=True)
+    error_message = Column(String, nullable=True)
+    receipt_currency = Column(String, nullable=True)
@@ -0,0 +1,821 @@
+import csv
+import io
+import logging
+import uuid
+from datetime import datetime
+from typing import List
+
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from sqlalchemy.orm import Session
+
+from database import (
+    DBReceipt,
+    DBTransaction,
+    DBUploadedFile,
+    create_db_tables,
+    db_dependency,
+)
+from schemas import (
+    DocumentProcessResponse,
+    DocumentUploadResponse,
+    MatchingResponse,
+    MatchResponse,
+    MatchSpecificRequest,
+    Receipt,
+    RuleRequest,
+    Transaction,
+)
+from services.ai_rules import AIRule
+from services.document_processor import DocumentProcessor
+from services.matching_engine import MatchingEngine
+
+create_db_tables()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler()],
+)
+logger = logging.getLogger(__name__)
+
+app = FastAPI(
+    title="AI Bookkeeper - Data Science Engine",
+    description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
+    version="1.0.0",
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Initialize DS Engine components
+matching_engine = MatchingEngine()
+document_processor = DocumentProcessor()
+
+
+# Helper functions for database operations
+def get_transactions_from_db(
+    db: Session, user_id: str = None, categorization_id: str = None
+):
+    """Retrieve transactions from database"""
+    query = db.query(DBTransaction)
+    if user_id:
+        query = query.filter(DBTransaction.user_id == user_id)
+    if categorization_id:
+        query = query.filter(DBTransaction.categorisation_id == categorization_id)
+    return query.all()
+
+
+def get_receipt_from_db(db: Session, file_id: str):
+    """Retrieve receipt from database by file_id"""
+    return db.query(DBReceipt).filter(DBReceipt.file_id == file_id).first()
+
+
+def get_receipts_from_db(db: Session, file_ids: List[str]):
+    """Retrieve multiple receipts from database by file_ids"""
+    return db.query(DBReceipt).filter(DBReceipt.file_id.in_(file_ids)).all()
+
+
+def get_uploaded_file_from_db(db: Session, file_id: str):
+    """Retrieve uploaded file from database by file_id"""
+    return db.query(DBUploadedFile).filter(DBUploadedFile.file_id == file_id).first()
+
+
+def get_uploaded_files_from_db(db: Session, file_ids: List[str]):
+    """Retrieve multiple uploaded files from database by file_ids"""
+    return db.query(DBUploadedFile).filter(DBUploadedFile.file_id.in_(file_ids)).all()
+
+
+@app.get("/", tags=["Health"])
+async def root():
+    """Health check endpoint"""
+    return {
+        "message": "AI Bookkeeper Data Science Engine is running",
+        "version": "1.0.0",
+        "status": "healthy",
+    }
+
+
+# ============================================================================
+# TRANSACTION IMPORT ENDPOINTS
+# ============================================================================
+@app.post("/transactions/import/csv", tags=["Transaction Import"])
+async def import_transactions_csv(
+    db: db_dependency,
+    file: UploadFile = File(...),
+    categorization_id: str = Form(...),
+    user_id: str = Form(...),
+):
+    """
+    Import transactions from a CSV file (custom bank export format).
+    """
+    try:
+        content = await file.read()
+        decoded = content.decode("utf-8")
+        reader = csv.DictReader(io.StringIO(decoded))
+        transactions = []
+        errors = []
+        for idx, row in enumerate(reader):
+            try:
+                # Use correct headers and strip whitespace
+                account_number = row.get("Account Number") or row.get(
+                    "Account Number ".strip()
+                )
+                txn_date_raw = row.get("Transaction Date") or row.get(
+                    "Transaction Date ".strip() or row.get("Date")
+                )
+                amount_raw = row.get("Amount") or row.get("Amount ".strip())
+                payee_name = row.get("Description 2") or row.get(
+                    "Description 2 ".strip()
+                )
+                memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
+                # Compose ID
+                txn_id = f"{account_number}_{idx + 1}"
+                # Parse date (try multiple formats)
+                txn_date_str = txn_date_raw.strip()
+                txn_date = None
+                for fmt in ("%m/%d/%y", "%m/%d/%Y"):
+                    try:
+                        txn_date = datetime.strptime(txn_date_str, fmt).strftime(
+                            "%Y-%m-%d"
+                        )
+                        break
+                    except Exception:
+                        continue
+                if not txn_date:
+                    raise ValueError(f"Could not parse date: {txn_date_str}")
+                # Parse amount
+                amount = float(amount_raw.replace(",", "").strip())
+
+                # Create database transaction object
+                txn_date_obj = datetime.strptime(txn_date, "%Y-%m-%d")
+                db_transaction = DBTransaction(
+                    transaction_id=txn_id,
+                    amount=amount,
+                    date=txn_date_obj,
+                    vendor=payee_name.strip(),
+                    description=memo,
+                    categorisation_id=categorization_id,
+                    user_id=user_id,
+                )
+
+                # Add to database
+                db.add(db_transaction)
+
+                transactions.append(
+                    {
+                        "id": txn_id,
+                        "txn_date": txn_date,
+                        "amount": amount,
+                        "payee_name": payee_name.strip(),
+                        "memo": memo,
+                        "categorization_id": categorization_id,
+                        "user_id": user_id,
+                    }
+                )
+            except Exception as e:
+                errors.append(f"Row {idx + 1}: {str(e)}")
+
+        # Commit all transactions to database
+        db.commit()
+
+        return {
+            "imported_count": len(transactions),
+            "converted_transactions": transactions,
+            "errors": errors,
+            "categorization_id": categorization_id,
+            "user_id": user_id,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/transactions/import/image", tags=["Transaction Import"])
+async def import_transactions_from_image(
+    db: db_dependency,
+    file: UploadFile = File(...),
+    categorization_id: str = Form("image_import"),
+    user_id: str = Form("default"),
+):
+    """
+    Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction.
+    """
+    try:
+        # Validate file type
+        allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
+        file_extension = file.filename.split(".")[-1].lower()
+        if file_extension not in allowed_types:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Unsupported file type. Allowed: {allowed_types}",
+            )
+        # Read file content
+        content = await file.read()
+        # Save file to disk
+        image_path = await document_processor.save_uploaded_file(content, file.filename)
+        # Extract transactions from image (pass file path)
+        extraction_result = await document_processor.extract_transactions_from_image(
+            image_path
+        )
+        if not extraction_result.get("extraction_success", False):
+            raise HTTPException(
+                status_code=500,
+                detail=extraction_result.get("error", "Extraction failed"),
+            )
+        extracted_transactions = extraction_result.get("transactions", [])
+
+        # Store transactions in database
+        transactions = []
+        for idx, txn in enumerate(extracted_transactions):
+            try:
+                txn_id = f"img_{file.filename}_{idx + 1}"
+                txn_date_raw = txn.get("date")
+                amount = txn.get("amount")
+                vendor = txn.get("vendor")
+                memo = txn.get("memo", "")
+
+                # Parse date to YYYY-MM-DD format
+                txn_date = document_processor._parse_date_to_iso(txn_date_raw)
+                if not txn_date:
+                    # Fallback: use current year if parsing fails
+                    txn_date = f"2024-{txn_date_raw}"
+
+                # Parse date for database
+                txn_date_obj = datetime.strptime(txn_date, "%Y-%m-%d")
+
+                # Create database transaction object
+                db_transaction = DBTransaction(
+                    transaction_id=txn_id,
+                    amount=float(amount),
+                    date=txn_date_obj,
+                    vendor=vendor,
+                    description=memo,
+                    categorisation_id=categorization_id,
+                    user_id=user_id,
+                )
+
+                # Add to database
+                db.add(db_transaction)
+
+                transactions.append(
+                    {
+                        "id": txn_id,
+                        "txn_date": txn_date,
+                        "amount": amount,
+                        "payee_name": vendor,
+                        "memo": memo,
+                    }
+                )
+            except Exception as e:
+                logger.warning(f"Error processing transaction {idx}: {str(e)}")
+                continue
+
+        # Commit all transactions to database
+        db.commit()
+
+        return {
+            "imported_count": len(transactions),
+            "converted_transactions": transactions,
+            "errors": [],
+        }
+    except Exception as e:
+        logger.error(f"Error importing transactions from image: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ============================================================================
+# DOCUMENT PROCESSING ENDPOINTS
+# ============================================================================
+
+
+@app.post(
+    "/upload-multiple",
+    response_model=List[DocumentUploadResponse],
+    tags=["Document Processing"],
+)
+async def upload_multiple_documents(
+    files: List[UploadFile] = File(...), db: db_dependency = None
+):
+    """
+    Upload multiple receipt images for processing.
+
+    This endpoint accepts multiple image files and returns file IDs
+    that can be used with the /process/{file_id} endpoint.
+    """
+    try:
+        responses = []
+
+        for file in files:
+            # Validate file type
+            allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
+            file_extension = file.filename.split(".")[-1].lower()
+
+            if file_extension not in allowed_types:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}",
+                )
+
+            # Generate unique file ID
+            file_id = str(uuid.uuid4())
+
+            # Read file content and save to disk
+            content = await file.read()
+            file_path = await document_processor.save_uploaded_file(
+                content, file.filename
+            )
+
+            # Create database record for uploaded file
+            db_uploaded_file = DBUploadedFile(
+                file_id=file_id,
+                filename=file.filename,
+                file_path=file_path,
+                file_type=file_extension,
+                upload_date=datetime.now(),
+                status="uploaded",
+            )
+
+            # Add to database
+            db.add(db_uploaded_file)
+
+            responses.append(
+                DocumentUploadResponse(
+                    file_id=file_id,
+                    filename=file.filename,
+                    file_type=file_extension,
+                    upload_date=datetime.now(),
+                    status="uploaded",
+                )
+            )
+
+        # Commit all uploaded files to database
+        db.commit()
+
+        return responses
+
+    except Exception as e:
+        logger.error(f"Error uploading documents: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post(
+    "/process/{file_id}",
+    response_model=DocumentProcessResponse,
+    tags=["Document Processing"],
+)
+async def process_document(file_id: str, db: db_dependency):
+    """
+    Process a previously uploaded document to extract receipt information.
+
+    This endpoint uses AI to extract structured data from receipt images,
+    including vendor, amount, date, and category information.
+    """
+    try:
+        # Get file info from database
+        db_uploaded_file = get_uploaded_file_from_db(db, file_id)
+        if not db_uploaded_file:
+            raise HTTPException(status_code=404, detail=f"File {file_id} not found")
+
+        # Process the file using the stored file path
+        receipt_data = await document_processor.process_file(
+            db_uploaded_file.file_path, db_uploaded_file.file_type
+        )
+
+        # Parse date for database storage
+        receipt_date = None
+        if receipt_data.get("date"):
+            try:
+                receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
+            except ValueError:
+                receipt_date = datetime.now()
+        else:
+            receipt_date = datetime.now()
+
+        # Create database receipt object
+        db_receipt = DBReceipt(
+            receipt_id=f"receipt_{file_id}",
+            file_id=file_id,
+            amount=receipt_data.get("total_amount", 0.0),
+            date=receipt_date,
+            vendor=receipt_data.get("vendor", ""),
+            description=receipt_data.get("description", ""),
+            category=receipt_data.get("category", ""),
+            tax_amount=receipt_data.get("tax_amount", 0.0),
+            confidence=receipt_data.get("confidence", 0.0),
+            extraction_success=str(receipt_data.get("extraction_success", False)),
+            error_message=receipt_data.get("error"),
+            receipt_currency=receipt_data.get("currency")
+        )
+
+        # Add to database
+        db.add(db_receipt)
+        db.commit()
+
+        return DocumentProcessResponse(
+            file_id=file_id,
+            receipt_id=db_receipt.receipt_id,
+            extraction_success=receipt_data.get("extraction_success", False),
+            vendor=receipt_data.get("vendor", ""),
+            description=receipt_data.get("description", ""),
+            total_amount=receipt_data.get("total_amount", 0.0),
+            tax_amount=receipt_data.get("tax_amount", 0.0),
+            date=receipt_data.get("date", ""),
+            category=receipt_data.get("category", ""),
+            confidence=receipt_data.get("confidence", 0.0),
+            error=receipt_data.get("error", None),
+            receipt_currency=receipt_data.get("currency")
+        )
+
+    except Exception as e:
+        logger.error(f"Error processing document {file_id}: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/match-specific", response_model=MatchingResponse, tags=["AI Matching"])
+async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependency):
+    """
+    Match specific receipts against imported transactions.
+
+    This endpoint takes a request with receipt file IDs and categorization ID,
+    and matches them against the currently imported transactions using AI-powered matching logic.
+    """
+    try:
+        file_ids = request.file_ids
+        categorization_id = request.categorization_id
+        logger.info(
+            f"Starting match-specific for file IDs: {file_ids}, categorization_id: {categorization_id}"
+        )
+
+        # Get transactions from database
+        db_transactions = get_transactions_from_db(
+            db, categorization_id=categorization_id
+        )
+
+        if not db_transactions:
+            logger.warning("No transactions found in database")
+            raise HTTPException(
+                status_code=400,
+                detail="No transactions found. Please upload CSV first.",
+            )
+
+        logger.info(f"Found {len(db_transactions)} transactions in database")
+
+        # Convert database transactions to Transaction objects
+        transactions = []
+        for db_txn in db_transactions:
+            try:
+                transaction = Transaction(
+                    id=db_txn.transaction_id,
+                    transaction_date=db_txn.date,
+                    amount=db_txn.amount,
+                    vendor=db_txn.vendor,
+                    notes=db_txn.description or "",
+                )
+                transactions.append(transaction)
+            except Exception as e:
+                logger.warning(
+                    f"Error converting transaction {db_txn.transaction_id}: {str(e)}"
+                )
+                continue
+
+        logger.info(f"Converted {len(transactions)} transactions")
+
+        # Get receipts for the specified file IDs from database
+        db_receipts = get_receipts_from_db(db, file_ids)
+        receipts = []
+        missing_files = []
+
+        for file_id in file_ids:
+            # Find the corresponding database receipt
+            db_receipt = next((r for r in db_receipts if r.file_id == file_id), None)
+
+            if db_receipt:
+                try:
+                    receipt = Receipt(
+                        id=db_receipt.receipt_id,
+                        receipt_date=db_receipt.date,
+                        amount=db_receipt.amount,
+                        vendor=db_receipt.vendor,
+                        category=db_receipt.category or "Other",
+                        description=db_receipt.description or "",
+                        tax=db_receipt.tax_amount or 0.0,
+                        file_name=db_receipt.file_id,
+                        upload_date=datetime.now(),
+                    )
+                    receipts.append(receipt)
+                    logger.info(f"Successfully loaded receipt for file_id: {file_id}")
+                except Exception as e:
+                    logger.error(
+                        f"Error creating receipt object for {file_id}: {str(e)}"
+                    )
+                    missing_files.append(file_id)
+            else:
+                logger.warning(f"Receipt {file_id} not found in database")
+                missing_files.append(file_id)
+
+        logger.info(f"Found {len(receipts)} receipts, {len(missing_files)} missing")
+
+        if missing_files:
+            logger.warning(f"Missing files: {missing_files}")
+
+        if not receipts:
+            logger.warning("No valid receipts found")
+            raise HTTPException(
+                status_code=400,
+                detail="No valid receipts found for matching.",
+            )
+
+        # Perform matching
+        logger.info(
+            f"Starting matching with {len(receipts)} receipts and {len(transactions)} transactions"
+        )
+
+        try:
+            matching_results = matching_engine.process_matching(receipts, transactions)
+            logger.info(f"Matching completed, got {len(matching_results)} results")
+
+            # Convert matching results to response format
+            match_responses = []
+            for result in matching_results:
+                match_response = MatchResponse(
+                    receipt_id=result.receipt.id,
+                    transaction_id=result.transaction.id
+                    if result.transaction
+                    else "no_match",
+                    confidence_score=result.confidence_score * 100,
+                    match_reason=result.match_reason,
+                    receipt_vendor=result.receipt.vendor,
+                    receipt_amount=result.receipt.amount,
+                    receipt_description=result.receipt.description,
+                    receipt_category=result.receipt.category,
+                    receipt_tax_amount=result.receipt.tax,
+                    transaction_vendor=result.transaction.vendor
+                    if result.transaction
+                    else "",
+                    transaction_amount=result.transaction.amount
+                    if result.transaction
+                    else 0.0,
+                )
+                match_responses.append(match_response)
+
+            # Calculate statistics
+            high_confidence = len(
+                [r for r in matching_results if r.confidence_score >= 0.8]
+            )
+            low_confidence = len(
+                [r for r in matching_results if r.confidence_score < 0.5]
+            )
+            avg_score = (
+                sum(r.confidence_score for r in matching_results)
+                / len(matching_results)
+                if matching_results
+                else 0
+            )
+
+            stats = {
+                "total": len(match_responses),
+                "high_confidence": high_confidence,
+                "low_confidence": low_confidence,
+                "avg_score": round(avg_score, 2),
+            }
+
+            logger.info(f"Generated stats: {stats}")
+            logger.info(
+                f"Match-specific completed successfully with {len(match_responses)} matches"
+            )
+
+            return MatchingResponse(matches=match_responses, stats=stats)
+
+        except Exception as e:
+            logger.error(f"Exception in matching section: {str(e)}")
+            logger.error(f"Exception type: {type(e)}")
+            logger.error(f"Exception args: {e.args}")
+            raise HTTPException(
+                status_code=500, detail=f"Unexpected matching error: {str(e)}"
+            )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ============================================================================
+# DATABASE QUERY ENDPOINTS
+# ============================================================================
+
+
+@app.get("/transactions", tags=["Database Queries"])
+async def get_transactions(
+    db: db_dependency,
+    user_id: str = None,
+    categorization_id: str = None,
+    limit: int = 100,
+):
+    """
+    Get transactions from the database.
+    """
+    try:
+        transactions = get_transactions_from_db(db, user_id, categorization_id)
+
+        # Limit results
+        transactions = transactions[:limit]
+
+        # Convert to response format
+        result = []
+        for txn in transactions:
+            result.append(
+                {
+                    "id": txn.transaction_id,
+                    "amount": txn.amount,
+                    "date": txn.date.strftime("%Y-%m-%d"),
+                    "vendor": txn.vendor,
+                    "description": txn.description,
+                    "category": txn.category,
+                    "tax_amount": txn.tax_amount,
+                    "categorisation_id": txn.categorisation_id,
+                    "user_id": txn.user_id,
+                }
+            )
+
+        return {
+            "transactions": result,
+            "count": len(result),
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/receipts", tags=["Database Queries"])
+async def get_receipts(db: db_dependency, limit: int = 100):
+    """
+    Get receipts from the database.
+    """
+    try:
+        receipts = db.query(DBReceipt).limit(limit).all()
+
+        # Convert to response format
+        result = []
+        for receipt in receipts:
+            result.append(
+                {
+                    "id": receipt.receipt_id,
+                    "file_id": receipt.file_id,
+                    "amount": receipt.amount,
+                    "date": receipt.date.strftime("%Y-%m-%d"),
+                    "vendor": receipt.vendor,
+                    "description": receipt.description,
+                    "category": receipt.category,
+                    "tax_amount": receipt.tax_amount,
+                    "confidence": receipt.confidence,
+                    "extraction_success": receipt.extraction_success,
+                    "error_message": receipt.error_message,
+                }
+            )
+
+        return {
+            "receipts": result,
+            "count": len(result),
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/uploaded-files", tags=["Database Queries"])
+async def get_uploaded_files(db: db_dependency, limit: int = 100):
+    """
+    Get uploaded files from the database.
+    """
+    try:
+        uploaded_files = db.query(DBUploadedFile).limit(limit).all()
+
+        # Convert to response format
+        result = []
+        for file in uploaded_files:
+            result.append(
+                {
+                    "file_id": file.file_id,
+                    "filename": file.filename,
+                    "file_path": file.file_path,
+                    "file_type": file.file_type,
+                    "upload_date": file.upload_date.strftime("%Y-%m-%d %H:%M:%S"),
+                    "status": file.status,
+                }
+            )
+
+        return {
+            "uploaded_files": result,
+            "count": len(result),
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ============================================================================
+# RULES MANAGEMENT ENDPOINTS
+# ============================================================================
+
+
+@app.post("/rules", tags=["AI Rules Management"])
+async def add_rule(request: RuleRequest):
+    """
+    Add a new AI rule for transaction matching.
+    """
+    try:
+        new_rule = AIRule(
+            name=request.name,
+            condition=request.condition,
+            action=request.action,
+            source=request.source,
+        )
+
+        matching_engine.rules_engine.rules.append(new_rule)
+
+        return {"message": f"Rule '{request.name}' added successfully"}
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/rules", tags=["AI Rules Management"])
+async def get_rules():
+    """
+    Get all current AI rules.
+    """
+    try:
+        rules = []
+        for rule in matching_engine.rules_engine.rules:
+            rules.append(
+                {
+                    "name": rule.name,
+                    "condition": rule.condition,
+                    "action": rule.action,
+                    "source": rule.source,
+                    "status": rule.status,
+                }
+            )
+
+        return {"rules": rules}
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.delete("/rules/{rule_name}", tags=["AI Rules Management"])
+async def delete_rule(rule_name: str):
+    """
+    Delete an AI rule by name.
+    """
+    try:
+        rules = matching_engine.rules_engine.rules
+        for i, rule in enumerate(rules):
+            if rule.name == rule_name:
+                del rules[i]
+                return {"message": f"Rule '{rule_name}' deleted successfully"}
+
+        raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ============================================================================
+# STATISTICS ENDPOINT
+# ============================================================================
+
+
+@app.get("/stats", tags=["Statistics"])
+async def get_stats(db: db_dependency):
+    """
+    Get system statistics.
+    """
+    try:
+        # Count transactions, receipts, and uploaded files from database
+        total_transactions = db.query(DBTransaction).count()
+        total_receipts = db.query(DBReceipt).count()
+        total_uploaded_files = db.query(DBUploadedFile).count()
+
+        return {
+            "total_transactions": total_transactions,
+            "total_receipts": total_receipts,
+            "total_uploaded_files": total_uploaded_files,
+            "rules_count": len(matching_engine.rules_engine.rules),
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8654)
@@ -0,0 +1,210 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import List, Optional
+
+from pydantic import BaseModel
+
+
+@dataclass
+class Address:
+    """Address information for tax calculations"""
+
+    province: str
+    city: str
+    postal_code: str
+    country: str = "Canada"
+
+
+@dataclass
+class Receipt:
+    id: str
+    file_name: str
+    upload_date: datetime
+    receipt_date: datetime
+    amount: float
+    tax: float
+    vendor: str
+    category: str
+    description: str
+    # Tax rule fields
+    billing_address: Optional[Address] = None
+    shipping_address: Optional[Address] = None
+    currency: str = "CAD"
+    is_meals_entertainment: bool = False
+
+
+@dataclass
+class Transaction:
+    id: str
+    transaction_date: datetime
+    amount: float
+    vendor: str
+    notes: str
+    # Tax rule fields
+    currency: str = "CAD"
+    fx_rate: Optional[float] = None
+
+
+@dataclass
+class Asset:
+    """Asset for depreciation calculations"""
+
+    id: str
+    name: str
+    purchase_date: datetime
+    purchase_amount: float
+    useful_life_years: int
+    residual_value: float
+    cca_rate: float  # Capital Cost Allowance rate
+    asset_class: str
+
+
+@dataclass
+class Match:
+    receipt: Receipt
+    transaction: Transaction
+    confidence_score: float
+    match_reason: str
+    tax_analysis: Optional[dict] = None
+
+
+class AddressRequest(BaseModel):
+    province: str
+    city: str
+    postal_code: str
+    country: str = "Canada"
+
+
+class ReceiptRequest(BaseModel):
+    id: str
+    file_name: str
+    upload_date: datetime
+    receipt_date: datetime
+    amount: float
+    tax: float
+    vendor: str
+    category: str
+    description: str
+    # Tax rule fields
+    billing_address: Optional[AddressRequest] = None
+    shipping_address: Optional[AddressRequest] = None
+    currency: str = "CAD"
+    is_meals_entertainment: bool = False
+
+
+class TransactionRequest(BaseModel):
+    id: str
+    transaction_date: datetime
+    amount: float
+    vendor: str
+    notes: str
+    # Tax rule fields
+    currency: str = "CAD"
+    fx_rate: Optional[float] = None
+
+
+class AssetRequest(BaseModel):
+    id: str
+    name: str
+    purchase_date: datetime
+    purchase_amount: float
+    useful_life_years: int
+    residual_value: float
+    cca_rate: float
+    asset_class: str
+
+
+class MatchingRequest(BaseModel):
+    receipt_ids: List[str]
+    transaction_ids: List[str]
+
+
+class MatchResponse(BaseModel):
+    receipt_id: str
+    transaction_id: str
+    confidence_score: float
+    match_reason: str
+    receipt_vendor: str
+    receipt_amount: float
+    receipt_description: str
+    receipt_category: str
+    receipt_tax_amount: float
+    transaction_vendor: str
+    transaction_amount: float
+
+
+class MatchingResponse(BaseModel):
+    matches: List[MatchResponse]
+    stats: dict
+
+
+class ApprovalRequest(BaseModel):
+    match_id: str
+    approved: bool
+    reason: Optional[str] = None
+
+
+class RuleRequest(BaseModel):
+    name: str
+    condition: str
+    action: str
+    source: str = "user"
+
+
+class DocumentUploadResponse(BaseModel):
+    file_id: str
+    filename: str
+    file_type: str
+    upload_date: datetime
+    status: str
+
+
+class DocumentProcessResponse(BaseModel):
+    file_id: str
+    receipt_id: str
+    extraction_success: bool
+    vendor: Optional[str] = None
+    description: Optional[str] = None
+    total_amount: Optional[float] = None
+    tax_amount: Optional[float] = None
+    date: Optional[str] = None
+    category: Optional[str] = None
+    confidence: Optional[float] = None
+    error: Optional[str] = None
+    receipt_currency: Optional[str] = "CAD"
+
+
+# New tax-related models
+class TaxCalculationRequest(BaseModel):
+    receipt_id: str
+    transaction_id: Optional[str] = None
+
+
+class TaxCalculationResponse(BaseModel):
+    receipt_id: str
+    rules_applied: List[str]
+    sales_tax: dict
+    fx_analysis: Optional[dict] = None
+    meals_entertainment: dict
+
+
+class DepreciationRequest(BaseModel):
+    asset: AssetRequest
+    year: int
+    method: str  # "straight_line" or "cca"
+
+
+class DepreciationResponse(BaseModel):
+    asset_id: str
+    year: int
+    method: str
+    depreciation: float
+    book_value: float
+    total_depreciation: Optional[float] = None
+    success: bool
+    error: Optional[str] = None
+
+
+class MatchSpecificRequest(BaseModel):
+    file_ids: List[str]
+    categorization_id: str
@@ -0,0 +1,469 @@
+import logging
+import time
+from typing import List, Tuple
+
+import groq
+
+from config import settings
+from schemas import Match, Receipt, Transaction
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class AIMatcher:
+    def __init__(self, use_batch_matching=True):
+        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
+        self.model = "llama-3.1-8b-instant"
+        self.max_retries = 3
+        self.retry_delay = 2  # seconds - increased for rate limiting
+        self.rate_limit_delay = 1.0  # seconds between API calls
+        self.last_api_call = 0
+        self.use_batch_matching = (
+            use_batch_matching  # Toggle between new and legacy methods
+        )
+
+    def match_receipts_to_transactions(
+        self, receipts: List[Receipt], transactions: List[Transaction]
+    ) -> List[Match]:
+        """Match receipts to transactions using AI"""
+        logger.info(
+            f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions"
+        )
+        matches = []
+
+        for i, receipt in enumerate(receipts):
+            logger.info(
+                f"Processing receipt {i + 1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}"
+            )
+
+            # Rate limiting
+            self._rate_limit()
+
+            # Get the BEST match for this receipt (highest confidence score)
+            best_match = self._find_best_match(receipt, transactions)
+            if best_match:
+                matches.append(best_match)
+                logger.info(
+                    f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}"
+                )
+            else:
+                logger.warning(
+                    f"No match found for receipt: {receipt.vendor} - ${receipt.amount}"
+                )
+
+        # Sort by confidence score (highest first)
+        matches = sorted(matches, key=lambda x: x.confidence_score, reverse=True)
+        logger.info(f"AI matching completed. Found {len(matches)} matches")
+        return matches
+
+    def _rate_limit(self):
+        """Implement rate limiting to avoid API quota exhaustion"""
+        current_time = time.time()
+        time_since_last_call = current_time - self.last_api_call
+
+        if time_since_last_call < self.rate_limit_delay:
+            sleep_time = self.rate_limit_delay - time_since_last_call
+            logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
+            time.sleep(sleep_time)
+
+        self.last_api_call = time.time()
+
+    def _find_best_match(
+        self, receipt: Receipt, transactions: List[Transaction]
+    ) -> Match:
+        """Find the BEST match for a receipt using a single AI call for all candidates"""
+        candidates = self._filter_candidates(receipt, transactions)
+        if not candidates:
+            logger.warning(
+                f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}"
+            )
+            return None
+
+        logger.info(f"Found {len(candidates)} candidates for receipt: {receipt.vendor}")
+
+        # Choose matching method based on configuration
+        if self.use_batch_matching:
+            # New efficient method: single AI call for all candidates
+            best_match = self._find_best_match_single_call(receipt, candidates)
+        else:
+            # Legacy method: individual AI calls (fallback)
+            best_match = self._find_best_match_legacy(receipt, candidates)
+
+        return best_match
+
+    def _find_best_match_single_call(
+        self, receipt: Receipt, candidates: List[Transaction]
+    ) -> Match:
+        """Find the best match using a single AI call to evaluate all candidates"""
+        if not candidates:
+            return None
+
+        # Limit candidates to avoid token limits (adjust based on your needs)
+        max_candidates = 10
+        if len(candidates) > max_candidates:
+            # Sort by amount similarity and take top candidates
+            candidates = sorted(
+                candidates, key=lambda t: abs(receipt.amount - abs(t.amount))
+            )[:max_candidates]
+            logger.info(
+                f"Limited candidates to top {max_candidates} by amount similarity"
+            )
+
+        # Build comprehensive prompt with all candidates
+        candidates_text = ""
+        for i, transaction in enumerate(candidates):
+            transaction_amount_abs = abs(transaction.amount)
+            date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+            amount_diff = abs(receipt.amount - transaction_amount_abs)
+            amount_percent_diff = (
+                (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+            )
+
+            candidates_text += f"""
+Candidate {i + 1}:
+- Vendor: {transaction.vendor}
+- Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
+- Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
+- Notes: {transaction.notes}
+- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
+"""
+
+        prompt = f"""
+You are an expert at matching receipts to bank transactions. Analyze the receipt below against ALL the candidate transactions and return the BEST match.
+
+RECEIPT TO MATCH:
+- Vendor: {receipt.vendor}
+- Amount: ${receipt.amount}
+- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
+- Description: {receipt.description}
+- Category: {receipt.category}
+
+CANDIDATE TRANSACTIONS:
+{candidates_text}
+
+SCORING CRITERIA:
+- Perfect matches (same vendor, amount, date): 0.95-1.0
+- High confidence (minor differences): 0.8-0.94
+- Medium confidence (moderate differences): 0.6-0.79
+- Low confidence (significant differences): 0.4-0.59
+- Very low confidence (major differences): 0.2-0.39
+- Minimal similarity: 0.1-0.19
+- No meaningful similarity: 0.0-0.09
+
+Consider vendor name similarity, amount accuracy, date proximity, and description/notes relevance.
+
+IMPORTANT: You MUST return the candidate with the highest match score, even if it's very low. Never return NONE.
+Return ONLY the best match in this exact format:
+CANDIDATE_NUMBER|CONFIDENCE_SCORE|REASON
+
+Example: 3|0.87|Same vendor name, exact amount match, 1 day apart
+Example of low match: 5|0.15|Best available option despite significant differences in vendor and amount
+"""
+
+        for attempt in range(self.max_retries):
+            try:
+                result = self._call_groq_api_with_timeout(
+                    prompt, timeout=45
+                )  # Longer timeout for complex prompt
+
+                # Parse the single result
+                candidate_num, score, reason = self._parse_single_match_response(result)
+
+                if candidate_num == -1:  # Parsing error occurred
+                    logger.warning(
+                        f"Failed to parse AI response for receipt: {receipt.vendor}"
+                    )
+                    return None
+
+                if 0 <= candidate_num < len(candidates):
+                    best_transaction = candidates[candidate_num]
+                    logger.info(
+                        f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
+                    )
+                    return Match(receipt, best_transaction, score, reason)
+                else:
+                    logger.warning(
+                        f"AI returned invalid candidate number: {candidate_num}"
+                    )
+                    return None
+
+            except Exception as e:
+                logger.warning(
+                    f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
+                )
+                if attempt < self.max_retries - 1:
+                    sleep_time = self.retry_delay * (2**attempt)
+                    logger.info(f"Waiting {sleep_time} seconds before retry...")
+                    time.sleep(sleep_time)
+                else:
+                    logger.error(f"All attempts failed for receipt {receipt.id}")
+                    return None
+
+        return None
+
+    def _parse_single_match_response(self, result: str) -> Tuple[int, float, str]:
+        """Parse AI response for single best match"""
+        result = result.strip()
+        logger.debug(f"Parsing single match response: {result}")
+
+        try:
+            if result.upper().startswith("NONE"):
+                # This should not happen with new prompt, but handle as parsing error
+                logger.warning(
+                    "AI returned NONE despite being instructed to always return best match"
+                )
+                return -1, 0.0, "AI returned NONE unexpectedly"
+
+            if "|" in result:
+                parts = result.split("|")
+                if len(parts) >= 3:
+                    candidate_str = parts[0].strip()
+                    score_str = parts[1].strip()
+                    reason = "|".join(parts[2:]).strip()
+
+                    # Extract candidate number
+                    import re
+
+                    candidate_match = re.search(r"\d+", candidate_str)
+                    if candidate_match:
+                        candidate_num = (
+                            int(candidate_match.group()) - 1
+                        )  # Convert to 0-based index
+                    else:
+                        raise ValueError("No candidate number found")
+
+                    # Extract score
+                    score_clean = "".join(
+                        c for c in score_str if c.isdigit() or c == "."
+                    )
+                    score = float(score_clean) if score_clean else 0.0
+
+                    # Ensure score is in valid range
+                    score = max(0.0, min(1.0, score))
+
+                    logger.debug(
+                        f"Parsed: candidate={candidate_num}, score={score}, reason={reason}"
+                    )
+                    return candidate_num, score, reason
+
+        except Exception as e:
+            logger.warning(f"Error parsing single match response: {e}")
+
+        # Fallback
+        logger.warning(f"Could not parse single match response: {result}")
+        return -1, 0.0, f"Parse error: {result[:50]}..."
+
+    def _filter_candidates(
+        self, receipt: Receipt, transactions: List[Transaction]
+    ) -> List[Transaction]:
+        """Filter transactions to create a reasonable candidate list"""
+        candidates = []
+        amount_threshold = receipt.amount * 2.0  # 200% threshold - very inclusive
+
+        for transaction in transactions:
+            # Use absolute value for transaction amount comparison
+            transaction_amount_abs = abs(transaction.amount)
+
+            # Only exclude transactions with obviously different amounts
+            if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
+                candidates.append(transaction)
+
+        logger.debug(
+            f"Filtered {len(transactions)} transactions to {len(candidates)} candidates"
+        )
+        return candidates
+
+    def _find_best_match_legacy(
+        self, receipt: Receipt, transactions: List[Transaction]
+    ) -> Match:
+        """Legacy method: Find the best match using individual API calls (kept as fallback)"""
+        candidates = self._filter_candidates(receipt, transactions)
+        if not candidates:
+            return None
+
+        best_match = None
+        highest_score = 0
+
+        for transaction in candidates:
+            score, reason = self._calculate_match_score(receipt, transaction)
+            logger.debug(
+                f"Score {score:.3f} for transaction {transaction.vendor}: {reason}"
+            )
+
+            if score > highest_score:
+                highest_score = score
+                best_match = Match(receipt, transaction, score, reason)
+
+        return best_match
+
+    def _calculate_match_score(
+        self, receipt: Receipt, transaction: Transaction
+    ) -> Tuple[float, str]:
+        """Calculate match score using AI"""
+        # Calculate differences for the AI to consider
+        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+        transaction_amount_abs = abs(transaction.amount)
+        amount_diff = abs(receipt.amount - transaction_amount_abs)
+        amount_percent_diff = (
+            (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+        )
+
+        prompt = f"""
+        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason, the reason must be a single sentence without any special formatting.
+        
+        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime("%Y-%m-%d")}
+        Receipt Description: {receipt.description}
+        Receipt Category: {receipt.category}
+        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime("%Y-%m-%d")}
+        Transaction Notes: {transaction.notes}
+        
+        Differences:
+        - Date difference: {date_diff} days
+        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
+        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
+        - Description/Notes comparison: "{receipt.description}" vs "{transaction.notes}"
+        - Category: {receipt.category}
+        
+        Score this potential match based on how likely it is the correct match:
+        
+        - Perfect matches (same vendor, amount, date): 0.95-1.0
+        - High confidence (minor differences): 0.8-0.94
+        - Medium confidence (moderate differences): 0.6-0.79
+        - Low confidence (significant differences): 0.4-0.59
+        - Very low confidence (major differences): 0.2-0.39
+        - Minimal similarity: 0.1-0.19
+        - No meaningful similarity: 0.0-0.09
+        
+        Consider description and category similarity in your scoring.
+        
+        IMPORTANT: Return ONLY the score and reason separated by a pipe character.
+        Format: [score]|[reason]
+        Example: 0.85|Same vendor, same amount, 2 days apart
+        """
+
+        for attempt in range(self.max_retries):
+            try:
+                result = self._call_groq_api_with_timeout(
+                    prompt, timeout=30
+                )  # Increased timeout
+
+                # Parse the result - handle multiple formats
+                score, reason = self._parse_ai_response(result)
+
+                logger.debug(f"AI Response: {result}")
+                logger.debug(f"Parsed: score={score}, reason={reason}")
+
+                return score, reason
+
+            except Exception as e:
+                logger.warning(
+                    f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
+                )
+                if attempt < self.max_retries - 1:
+                    # Exponential backoff for rate limiting
+                    sleep_time = self.retry_delay * (2**attempt)
+                    logger.info(f"Waiting {sleep_time} seconds before retry...")
+                    time.sleep(sleep_time)
+                else:
+                    logger.error(f"All attempts failed for receipt {receipt.id}")
+                    return 0.0, f"AI error after {self.max_retries} attempts: {str(e)}"
+
+    def _parse_ai_response(self, result: str) -> Tuple[float, str]:
+        """Parse AI response with robust error handling"""
+        result = result.strip()
+        logger.debug(f"Parsing AI response: {result}")
+
+        # Try to find score in various formats
+        if "|" in result:
+            parts = result.split("|")
+            logger.debug(f"Split response into {len(parts)} parts: {parts}")
+
+            # Look for a numeric score in any part
+            for i, part in enumerate(parts):
+                part = part.strip()
+                try:
+                    # Remove any non-numeric characters except decimal point
+                    score_str_clean = "".join(
+                        c for c in part if c.isdigit() or c == "."
+                    )
+                    if score_str_clean:
+                        score = float(score_str_clean)
+                        if 0 <= score <= 1:  # Valid confidence score
+                            # Get reason from other parts
+                            reason_parts = [
+                                p.strip()
+                                for j, p in enumerate(parts)
+                                if j != i and p.strip()
+                            ]
+                            reason = (
+                                " | ".join(reason_parts)
+                                if reason_parts
+                                else "Score extracted"
+                            )
+                            logger.debug(
+                                f"Found score {score} in part {i}, reason: {reason}"
+                            )
+                            return score, reason
+                except ValueError:
+                    continue
+
+        # Try to extract just a number from the response
+        try:
+            import re
+
+            numbers = re.findall(r"\d+\.?\d*", result)
+            if numbers:
+                for num_str in numbers:
+                    score = float(num_str)
+                    if 0 <= score <= 1:  # Valid confidence score
+                        logger.debug(f"Extracted score {score} from response")
+                        return score, f"Extracted from response: {result[:50]}..."
+        except (ValueError, IndexError):
+            pass
+
+        # Fallback - try to find any number and normalize it
+        try:
+            import re
+
+            numbers = re.findall(r"\d+\.?\d*", result)
+            if numbers:
+                score = float(numbers[0])
+                # Normalize to 0-1 range if it's a percentage or other scale
+                if score > 1:
+                    score = score / 100  # Assume percentage
+                score = max(0, min(1, score))  # Clamp to 0-1
+                logger.debug(f"Normalized score {score} from response")
+                return score, f"Normalized from response: {result[:50]}..."
+        except (ValueError, IndexError):
+            pass
+
+        # Final fallback
+        logger.warning(f"Could not parse AI response: {result}")
+        return 0.0, f"Unparseable response: {result[:50]}..."
+
+    def _call_groq_api_with_timeout(self, prompt: str, timeout: int = 15) -> str:
+        """Make API call with timeout and retry logic"""
+        import concurrent.futures
+
+        def api_call():
+            try:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=200,
+                    temperature=0.1,
+                )
+                return response.choices[0].message.content.strip()
+            except Exception as e:
+                raise e
+
+        try:
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future = executor.submit(api_call)
+                return future.result(timeout=timeout)
+        except concurrent.futures.TimeoutError:
+            raise Exception(f"API call timed out after {timeout} seconds")
+        except Exception as e:
+            raise e
@@ -0,0 +1,175 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List
+
+from schemas import Receipt, Transaction
+from services.tax_rules_engine import TaxRulesEngine
+
+
+@dataclass
+class AIRule:
+    name: str
+    condition: str
+    action: str
+    source: str
+    status: str = "active"
+
+
+class AIRulesEngine:
+    def __init__(self):
+        self.rules: List[AIRule] = []
+        self.tax_rules_engine = TaxRulesEngine()
+        self._load_default_rules()
+
+    def _load_default_rules(self):
+        self.rules = [
+            AIRule(
+                "exact_amount_match", "amount_diff <= 0.01", "auto_approve", "system"
+            ),
+            AIRule(
+                "same_vendor_same_date",
+                "vendor_match and date_diff <= 1",
+                "high_confidence",
+                "system",
+            ),
+            AIRule(
+                "gas_station_pattern",
+                "vendor_contains_gas_or_fuel",
+                "categorize_transport",
+                "system",
+            ),
+            # Tax-related rules
+            AIRule(
+                "fx_currency_mismatch",
+                "currency_mismatch",
+                "flag_fx_review",
+                "tax_system",
+            ),
+            AIRule(
+                "meals_entertainment",
+                "is_meals_entertainment",
+                "apply_me_tax_rule",
+                "tax_system",
+            ),
+            AIRule(
+                "provincial_tax_calculation",
+                "has_address_info",
+                "calculate_provincial_tax",
+                "tax_system",
+            ),
+        ]
+
+    def apply_rules(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
+        results = {
+            "auto_approve": False,
+            "confidence_boost": 0,
+            "category": None,
+            "tax_analysis": {},
+        }
+
+        for rule in self.rules:
+            if rule.status != "active":
+                continue
+
+            if self._evaluate_condition(rule.condition, receipt, transaction):
+                self._execute_action(rule.action, results, receipt, transaction)
+
+        return results
+
+    def _evaluate_condition(
+        self, condition: str, receipt: Receipt, transaction: Transaction
+    ) -> bool:
+        """Safely evaluate rule conditions without using eval()"""
+        amount_diff = abs(receipt.amount - abs(transaction.amount))
+        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+        vendor_match = (
+            receipt.vendor.lower() in transaction.vendor.lower()
+            or transaction.vendor.lower() in receipt.vendor.lower()
+        )
+        vendor_lower = receipt.vendor.lower()
+        vendor_contains_gas_or_fuel = "gas" in vendor_lower or "fuel" in vendor_lower
+
+        # Tax-related conditions
+        currency_mismatch = receipt.currency != transaction.currency
+        is_meals_entertainment = receipt.is_meals_entertainment
+        has_address_info = (
+            receipt.billing_address is not None or receipt.shipping_address is not None
+        )
+
+        # Handle specific condition types safely
+        if condition == "amount_diff <= 0.01":
+            return amount_diff <= 0.01
+        elif condition == "vendor_match and date_diff <= 1":
+            return vendor_match and date_diff <= 1
+        elif condition == "vendor_contains_gas_or_fuel":
+            return vendor_contains_gas_or_fuel
+        elif condition == "currency_mismatch":
+            return currency_mismatch
+        elif condition == "is_meals_entertainment":
+            return is_meals_entertainment
+        elif condition == "has_address_info":
+            return has_address_info
+        else:
+            # For any other conditions, try to evaluate them safely
+            try:
+                # Only allow safe operations
+                safe_globals = {
+                    "amount_diff": amount_diff,
+                    "date_diff": date_diff,
+                    "vendor_match": vendor_match,
+                    "vendor_contains_gas_or_fuel": vendor_contains_gas_or_fuel,
+                    "currency_mismatch": currency_mismatch,
+                    "is_meals_entertainment": is_meals_entertainment,
+                    "has_address_info": has_address_info,
+                    "receipt": receipt,
+                    "transaction": transaction,
+                    "abs": abs,
+                    "len": len,
+                    "min": min,
+                    "max": max,
+                    "sum": sum,
+                    "round": round,
+                }
+                return eval(condition, safe_globals, {})
+            except (SyntaxError, NameError, TypeError) as e:
+                print(f"Warning: Invalid condition '{condition}': {e}")
+                return False
+
+    def _execute_action(
+        self,
+        action: str,
+        results: Dict[str, Any],
+        receipt: Receipt,
+        transaction: Transaction,
+    ):
+        if action == "auto_approve":
+            results["auto_approve"] = True
+        elif action == "high_confidence":
+            results["confidence_boost"] += 0.2
+        elif action == "categorize_transport":
+            results["category"] = "Transportation"
+        elif action == "flag_fx_review":
+            # Apply FX rule and flag for review
+            fx_result = self.tax_rules_engine.apply_fx_rule(receipt, transaction)
+            results["tax_analysis"]["fx"] = fx_result
+            if fx_result.get("requires_manual_review", False):
+                results["confidence_boost"] -= 0.1  # Reduce confidence for FX issues
+        elif action == "apply_me_tax_rule":
+            # Apply meals & entertainment rule
+            me_result = self.tax_rules_engine.apply_meals_entertainment_rule(receipt)
+            results["tax_analysis"]["meals_entertainment"] = me_result
+        elif action == "calculate_provincial_tax":
+            # Calculate provincial tax
+            tax_result = self.tax_rules_engine.apply_sales_tax_rule(receipt)
+            results["tax_analysis"]["sales_tax"] = tax_result
+
+    def add_rule(self, rule: AIRule):
+        self.rules.append(rule)
+
+    def remove_rule(self, rule_name: str):
+        self.rules = [r for r in self.rules if r.name != rule_name]
+
+    def apply_tax_rules(
+        self, receipt: Receipt, transaction: Transaction = None
+    ) -> Dict[str, Any]:
+        """Apply all tax rules to a receipt/transaction pair"""
+        return self.tax_rules_engine.apply_all_tax_rules(receipt, transaction)
@@ -0,0 +1,547 @@
+import base64
+import logging
+import os
+from datetime import datetime
+from typing import Any, Dict
+
+import aiofiles
+import groq
+import PyPDF2
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentProcessor:
+    def __init__(self):
+        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
+        self.model = "meta-llama/llama-4-scout-17b-16e-instruct"  # Vision model
+
+    async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
+        """Process uploaded file and extract receipt data"""
+        try:
+            if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
+                return await self._process_image(file_path)
+            elif file_type.lower() == "pdf":
+                return await self._process_pdf(file_path)
+            else:
+                raise ValueError(f"Unsupported file type: {file_type}")
+        except Exception as e:
+            return {"error": str(e)}
+
+    async def _process_image(self, image_path: str) -> Dict[str, Any]:
+        """Extract data from image using Groq vision"""
+        try:
+            # Encode image to base64
+            base64_image = self._encode_image(image_path)
+
+            # Create Groq vision prompt
+            prompt = """
+            Analyze this receipt image and extract the following information in JSON format:
+            {
+                "vendor": "Store/company name",
+                "description": "Detailed description of items/services purchased",
+                "total_amount": 0.00,
+                "tax_amount": 0.00,
+                "date": "YYYY-MM-DD",
+                "category": "Food/Transport/Office/Other",
+                "confidence": 0.95,
+                "currency": "USD"
+            }
+            
+            Rules:
+            - Extract vendor name as it appears on receipt
+            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
+            - Total amount should be the final total including tax
+            - Tax amount is separate tax line if available
+            - Date should be the date on the receipt
+            - Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
+            - Confidence score 0-1 based on how clear the receipt is
+            - Currency should be the currency used on the receipt (e.g., "USD", "EUR")
+
+            Return only valid JSON.
+            """
+
+            # Call Groq vision API with correct format
+            response = self.client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}",
+                                },
+                            },
+                        ],
+                    }
+                ],
+                model=self.model,
+                max_tokens=500,
+                temperature=0.1,
+            )
+
+            # Parse response
+            result_text = response.choices[0].message.content.strip()
+            return self._parse_extraction_result(result_text)
+
+        except Exception as e:
+            return {"error": f"Image processing error: {str(e)}"}
+
+    def _encode_image(self, image_path: str) -> str:
+        """Encode image to base64 string"""
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+
+    async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
+        """Extract data from PDF by converting to image first"""
+        try:
+            # For now, extract text from PDF and process as text
+            text_content = self._extract_text_from_pdf(pdf_path)
+            return self._process_text_content(text_content)
+
+        except Exception as e:
+            return {"error": f"PDF processing error: {str(e)}"}
+
+    def _extract_text_from_pdf(self, pdf_path: str) -> str:
+        """Extract text from PDF"""
+        try:
+            with open(pdf_path, "rb") as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n"
+                return text
+        except Exception:
+            return ""
+
+    def _process_text_content(self, text_content: str) -> Dict[str, Any]:
+        """Process text content using Groq (fallback for PDFs)"""
+        try:
+            prompt = f"""
+            Analyze this receipt text and extract the following information in JSON format:
+            
+            Receipt Text:
+            {text_content}
+            
+            Extract:
+            {{
+                "vendor": "Store/company name",
+                "description": "Detailed description of items/services purchased",
+                "total_amount": 0.00,
+                "tax_amount": 0.00,
+                "date": "YYYY-MM-DD",
+                "category": "Food/Transport/Office/Other",
+                "confidence": 0.95,
+                "currency": "USD"
+            }}
+            
+            Rules:
+            - Extract vendor name as it appears on receipt
+            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
+            - Total amount should be the final total including tax
+            - Tax amount is separate tax line if available
+            - Date should be the date on the receipt
+            - Categorize based on vendor type
+            - Confidence score 0-1 based on clarity
+            - Currency should be the currency used on the receipt (e.g., "USD", "EUR")
+            
+            Return only valid JSON.
+            """
+
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=500,
+                temperature=0.1,
+            )
+
+            result_text = response.choices[0].message.content.strip()
+            return self._parse_extraction_result(result_text)
+
+        except Exception as e:
+            return {"error": f"Text processing error: {str(e)}"}
+
+    def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
+        """Parse Groq response and extract JSON data"""
+        try:
+            # Clean up response and extract JSON
+            import json
+            import re
+
+            # Find JSON in response - try multiple patterns
+            json_match = re.search(r"\{.*\}", result_text, re.DOTALL)
+            if json_match:
+                json_str = json_match.group()
+
+                # Clean up common JSON issues
+                json_str = re.sub(
+                    r",\s*([}\]])", r"\1", json_str
+                )  # Remove trailing commas
+                json_str = re.sub(
+                    r"([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:", r'\1"\2":', json_str
+                )  # Quote unquoted keys
+
+                try:
+                    data = json.loads(json_str)
+                except json.JSONDecodeError as e:
+                    # Try to fix common JSON issues
+                    logger.warning(f"Initial JSON parsing failed: {e}")
+
+                    # Try to extract individual fields using regex
+                    vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
+                    description_match = re.search(
+                        r'"description"\s*:\s*"([^"]*)"', json_str
+                    )
+                    total_amount_match = re.search(
+                        r'"total_amount"\s*:\s*([0-9.]+)', json_str
+                    )
+                    tax_amount_match = re.search(
+                        r'"tax_amount"\s*:\s*([0-9.]+)', json_str
+                    )
+                    date_match = re.search(r'"date"\s*:\s*"([^"]*)"', json_str)
+                    category_match = re.search(r'"category"\s*:\s*"([^"]*)"', json_str)
+                    confidence_match = re.search(
+                        r'"confidence"\s*:\s*([0-9.]+)', json_str
+                    )
+                    currency_match = re.search(
+                        r'"currency"\s*:\s*"([^"]*)"', json_str
+                    )
+
+                    data = {
+                        "vendor": vendor_match.group(1) if vendor_match else "",
+                        "description": description_match.group(1)
+                        if description_match
+                        else "",
+                        "total_amount": float(total_amount_match.group(1))
+                        if total_amount_match
+                        else 0.0,
+                        "tax_amount": float(tax_amount_match.group(1))
+                        if tax_amount_match
+                        else 0.0,
+                        "date": date_match.group(1) if date_match else "",
+                        "category": category_match.group(1)
+                        if category_match
+                        else "Other",
+                        "confidence": float(confidence_match.group(1))
+                        if confidence_match
+                        else 0.5,
+                        "currency": currency_match.group(1) if currency_match else "CAD"
+                    }
+
+                # Validate and clean data
+                return {
+                    "vendor": str(data.get("vendor", "")).strip(),
+                    "description": str(data.get("description", "")).strip(),
+                    "total_amount": float(data.get("total_amount", 0)),
+                    "tax_amount": float(data.get("tax_amount", 0)),
+                    "date": str(data.get("date", "")).strip(),
+                    "category": str(data.get("category", "Other")).strip(),
+                    "confidence": float(data.get("confidence", 0.5)),
+                    "extraction_success": True,
+                    "currency": data.get("currency", "CAD").strip(),
+                }
+            else:
+                # Try to extract fields from plain text
+                logger.warning("No JSON found in response, attempting text extraction")
+                return self._extract_from_plain_text(result_text)
+
+        except Exception as e:
+            logger.error(f"JSON parsing error: {str(e)}")
+            return {
+                "error": f"JSON parsing error: {str(e)}",
+                "extraction_success": False,
+            }
+
+    def _extract_from_plain_text(self, text: str) -> Dict[str, Any]:
+        """Extract receipt data from plain text when JSON parsing fails"""
+        try:
+            import re
+
+            # Extract vendor (look for common patterns)
+            vendor_patterns = [
+                r"(?:vendor|store|merchant|company)\s*[:\-]?\s*([A-Za-z0-9\s&.,]+)",
+                r"([A-Z][A-Za-z0-9\s&.,]{3,30})",  # Capitalized words
+            ]
+
+            vendor = ""
+            for pattern in vendor_patterns:
+                match = re.search(pattern, text, re.IGNORECASE)
+                if match:
+                    vendor = match.group(1).strip()
+                    break
+
+            # Extract amount (look for currency patterns)
+            amount_patterns = [
+                r"\$?\s*([0-9,]+\.?[0-9]*)",
+                r"(?:total|amount|sum)\s*[:\-]?\s*\$?\s*([0-9,]+\.?[0-9]*)",
+            ]
+
+            total_amount = 0.0
+            for pattern in amount_patterns:
+                match = re.search(pattern, text, re.IGNORECASE)
+                if match:
+                    try:
+                        total_amount = float(match.group(1).replace(",", ""))
+                        break
+                    except ValueError:
+                        continue
+
+            # Extract date
+            date_patterns = [
+                r"(\d{4}-\d{2}-\d{2})",
+                r"(\d{1,2}/\d{1,2}/\d{2,4})",
+                r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}",
+            ]
+
+            date = ""
+            for pattern in date_patterns:
+                match = re.search(pattern, text, re.IGNORECASE)
+                if match:
+                    date = match.group(0)
+                    break
+
+            return {
+                "vendor": vendor or "Unknown",
+                "total_amount": total_amount,
+                "tax_amount": 0.0,
+                "date": date or "",
+                "category": "Other",
+                "confidence": 0.3,  # Low confidence for text extraction
+                "extraction_success": True,
+            }
+
+        except Exception as e:
+            logger.error(f"Text extraction error: {str(e)}")
+            return {
+                "vendor": "Unknown",
+                "total_amount": 0.0,
+                "tax_amount": 0.0,
+                "date": "",
+                "category": "Other",
+                "confidence": 0.1,
+                "extraction_success": False,
+                "error": f"Text extraction failed: {str(e)}",
+            }
+
+    async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
+        """Save uploaded file to temporary storage"""
+        try:
+            # Create uploads directory if it doesn't exist
+            upload_dir = "uploads"
+            os.makedirs(upload_dir, exist_ok=True)
+
+            # Generate unique filename
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            safe_filename = f"{timestamp}_{filename.replace(' ', '_')}"
+            file_path = os.path.join(upload_dir, safe_filename)
+
+            # Save file
+            async with aiofiles.open(file_path, "wb") as f:
+                await f.write(file_content)
+
+            return file_path
+
+        except Exception as e:
+            raise Exception(f"Failed to save file: {str(e)}")
+
+    async def extract_transactions_from_image(self, image_path: str) -> Dict[str, Any]:
+        """Extract multiple transactions from an image (bank statement, credit card statement, etc.)"""
+        try:
+            # Encode image to base64
+            base64_image = self._encode_image(image_path)
+
+            # Create Groq vision prompt for transaction extraction
+            prompt = """
+            Analyze this financial document image (bank statement, credit card statement, etc.) and extract ALL transactions in JSON format.
+            
+            Look for transaction lists, payment records, or any financial entries that show:
+            - Date
+            - Amount (positive or negative)
+            - Vendor/Description/Payee name
+            - Any additional notes or memo
+            
+            Return the transactions as a JSON array:
+            {
+                "extraction_success": true,
+                "transactions": [
+                    {
+                        "date": "YYYY-MM-DD",
+                        "amount": 0.00,
+                        "vendor": "Vendor name",
+                        "memo": "Additional notes"
+                    },
+                    {
+                        "date": "YYYY-MM-DD", 
+                        "amount": -0.00,
+                        "vendor": "Another vendor",
+                        "memo": "Payment or charge description"
+                    }
+                ]
+            }
+            
+            Rules:
+            - Extract ALL visible transactions
+            - Include both positive (credits) and negative (debits) amounts
+            - Use the actual date format from the document
+            - Vendor should be the merchant/payee name
+            - Memo can include transaction type, reference numbers, etc.
+            - If no transactions found, return empty array but set extraction_success to true
+            
+            Return only valid JSON.
+            """
+
+            # Call Groq vision API
+            response = self.client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}",
+                                },
+                            },
+                        ],
+                    }
+                ],
+                model=self.model,
+                max_tokens=2000,  # Higher token limit for multiple transactions
+                temperature=0.1,
+            )
+
+            # Parse response
+            result_text = response.choices[0].message.content.strip()
+            return self._parse_transaction_extraction_result(result_text)
+
+        except Exception as e:
+            return {
+                "extraction_success": False,
+                "error": f"Transaction extraction error: {str(e)}",
+                "transactions": [],
+            }
+
+    def _parse_transaction_extraction_result(self, result_text: str) -> Dict[str, Any]:
+        """Parse Groq response for transaction extraction"""
+        try:
+            import json
+            import re
+
+            # Find the first '{' and last '}'
+            start = result_text.find("{")
+            end = result_text.rfind("}")
+            if start == -1 or end == -1 or end <= start:
+                return {
+                    "extraction_success": False,
+                    "error": "Could not find JSON object in AI response",
+                    "transactions": [],
+                }
+            json_str = result_text[start : end + 1]
+
+            # Remove trailing commas before } or ]
+            json_str = re.sub(r",\s*([}\]])", r"\1", json_str)
+
+            try:
+                data = json.loads(json_str)
+            except Exception as e:
+                import logging
+
+                logging.error(f"JSON parsing error: {str(e)}")
+                logging.error(f"Offending JSON string:\n{json_str}")
+                return {
+                    "extraction_success": False,
+                    "error": f"JSON parsing error: {str(e)}",
+                    "transactions": [],
+                }
+
+            # Validate and clean data
+            transactions = data.get("transactions", [])
+            cleaned_transactions = []
+            for txn in transactions:
+                try:
+                    cleaned_txn = {
+                        "date": str(txn.get("date", "")).strip(),
+                        "amount": float(
+                            str(txn.get("amount", 0)).replace("$", "").replace(",", "")
+                        ),
+                        "vendor": str(txn.get("vendor", "")).strip(),
+                        "memo": str(txn.get("memo", "")).strip(),
+                    }
+                    cleaned_transactions.append(cleaned_txn)
+                except Exception:
+                    continue
+            return {
+                "extraction_success": data.get("extraction_success", True),
+                "transactions": cleaned_transactions,
+                "total_transactions": len(cleaned_transactions),
+            }
+        except Exception as e:
+            import logging
+
+            logging.error(f"JSON parsing error (outer): {str(e)}")
+            return {
+                "extraction_success": False,
+                "error": f"JSON parsing error: {str(e)}",
+                "transactions": [],
+            }
+
+    def _parse_date_to_iso(self, date_str: str) -> str:
+        """Parse various date formats and convert to YYYY-MM-DD"""
+        try:
+            import re
+            from datetime import datetime
+
+            date_str = date_str.strip().upper()
+
+            # Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
+            month_pattern = r"(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?"
+            match = re.match(month_pattern, date_str)
+
+            if match:
+                month_abbr, day, year = match.groups()
+                month_map = {
+                    "JAN": 1,
+                    "FEB": 2,
+                    "MAR": 3,
+                    "APR": 4,
+                    "MAY": 5,
+                    "JUN": 6,
+                    "JUL": 7,
+                    "AUG": 8,
+                    "SEP": 9,
+                    "OCT": 10,
+                    "NOV": 11,
+                    "DEC": 12,
+                }
+
+                month = month_map[month_abbr]
+                day = int(day)
+                year = int(year) if year else datetime.now().year
+
+                # Handle 2-digit years
+                if year < 100:
+                    year += 2000
+
+                return f"{year:04d}-{month:02d}-{day:02d}"
+
+            # Handle YYYY-MM-DD format
+            if re.match(r"\d{4}-\d{2}-\d{2}", date_str):
+                return date_str
+
+            # Handle MM/DD/YYYY format
+            if re.match(r"\d{1,2}/\d{1,2}/\d{4}", date_str):
+                return datetime.strptime(date_str, "%m/%d/%Y").strftime("%Y-%m-%d")
+
+            # Handle MM/DD/YY format
+            if re.match(r"\d{1,2}/\d{1,2}/\d{2}", date_str):
+                return datetime.strptime(date_str, "%m/%d/%y").strftime("%Y-%m-%d")
+
+            return None
+
+        except Exception:
+            return None
@@ -0,0 +1,76 @@
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import List
+
+
+@dataclass
+class FeedbackLog:
+    transaction_id: str
+    original_match: str
+    correction: str
+    reason: str
+    timestamp: datetime
+    user_id: str
+
+
+class FeedbackLogger:
+    def __init__(self, log_file: str = "feedback_logs.json"):
+        self.log_file = log_file
+        self.logs: List[FeedbackLog] = self._load_logs()
+
+    def _load_logs(self) -> List[FeedbackLog]:
+        if not os.path.exists(self.log_file):
+            return []
+
+        try:
+            with open(self.log_file, "r") as f:
+                data = json.load(f)
+                return [FeedbackLog(**log) for log in data]
+        except Exception:
+            return []
+
+    def _save_logs(self):
+        with open(self.log_file, "w") as f:
+            json.dump(
+                [
+                    {
+                        "transaction_id": log.transaction_id,
+                        "original_match": log.original_match,
+                        "correction": log.correction,
+                        "reason": log.reason,
+                        "timestamp": log.timestamp.isoformat(),
+                        "user_id": log.user_id,
+                    }
+                    for log in self.logs
+                ],
+                f,
+                indent=2,
+            )
+
+    def log_override(
+        self,
+        transaction_id: str,
+        original_match: str,
+        correction: str,
+        reason: str,
+        user_id: str,
+    ):
+        log = FeedbackLog(
+            transaction_id=transaction_id,
+            original_match=original_match,
+            correction=correction,
+            reason=reason,
+            timestamp=datetime.now(),
+            user_id=user_id,
+        )
+        self.logs.append(log)
+        self._save_logs()
+
+    def get_logs_by_transaction(self, transaction_id: str) -> List[FeedbackLog]:
+        return [log for log in self.logs if log.transaction_id == transaction_id]
+
+    def get_recent_logs(self, days: int = 30) -> List[FeedbackLog]:
+        cutoff = datetime.now() - timedelta(days=days)
+        return [log for log in self.logs if log.timestamp > cutoff]
@@ -0,0 +1,89 @@
+from typing import Any, Dict, List
+
+from services.ai_matcher import AIMatcher
+from services.ai_rules import AIRulesEngine
+from services.feedback_logger import FeedbackLogger
+from schemas import Match, Receipt, Transaction
+
+
+class MatchingEngine:
+    def __init__(self):
+        self.ai_matcher = AIMatcher()
+        self.rules_engine = AIRulesEngine()
+        self.feedback_logger = FeedbackLogger()
+
+    def process_matching(
+        self, receipts: List[Receipt], transactions: List[Transaction]
+    ) -> List[Match]:
+        # Get AI matches
+        ai_matches = self.ai_matcher.match_receipts_to_transactions(
+            receipts, transactions
+        )
+
+        # Apply rules and enhance matches
+        enhanced_matches = []
+        for match in ai_matches:
+            enhanced_match = self._enhance_match_with_rules(match)
+            enhanced_matches.append(enhanced_match)
+
+        return enhanced_matches
+
+    def _enhance_match_with_rules(self, match: Match) -> Match:
+        rule_results = self.rules_engine.apply_rules(match.receipt, match.transaction)
+
+        # Apply confidence boost from rules
+        if rule_results["confidence_boost"] > 0:
+            match.confidence_score = min(
+                1.0, match.confidence_score + rule_results["confidence_boost"]
+            )
+
+        # Auto-approve if rules say so
+        if rule_results["auto_approve"]:
+            match.confidence_score = 1.0
+            match.match_reason += " (Auto-approved by rules)"
+
+        # Add tax analysis to match
+        if rule_results.get("tax_analysis"):
+            match.tax_analysis = rule_results["tax_analysis"]
+
+        return match
+
+    def approve_match(self, match: Match, user_id: str):
+        # Log the approval
+        self.feedback_logger.log_override(
+            transaction_id=match.transaction.id,
+            original_match=f"AI Score: {match.confidence_score}",
+            correction="Approved",
+            reason="User approved match",
+            user_id=user_id,
+        )
+
+    def reject_match(self, match: Match, reason: str, user_id: str):
+        # Log the rejection
+        self.feedback_logger.log_override(
+            transaction_id=match.transaction.id,
+            original_match=f"AI Score: {match.confidence_score}",
+            correction="Rejected",
+            reason=reason,
+            user_id=user_id,
+        )
+
+    def get_matching_stats(self, matches: List[Match]) -> Dict[str, Any]:
+        if not matches:
+            return {
+                "total": 0,
+                "high_confidence": 0,
+                "low_confidence": 0,
+                "avg_score": 0,
+            }
+
+        high_confidence = len([m for m in matches if m.confidence_score >= 0.8])
+        low_confidence = len([m for m in matches if m.confidence_score < 0.8])
+        avg_score = sum(m.confidence_score for m in matches) / len(matches)
+
+        return {
+            "total": len(matches),
+            "high_confidence": high_confidence,
+            "low_confidence": low_confidence,
+            "avg_score": round(avg_score, 3),
+        }
@@ -0,0 +1,276 @@
+import logging
+from typing import Any, Dict, Optional
+
+from schemas import Address, Asset, Receipt, Transaction
+
+logger = logging.getLogger(__name__)
+
+
+class TaxRulesEngine:
+    """Engine to handle tax calculations based on the four tax rules"""
+
+    # Provincial tax rates (simplified - in production, use a tax rate API)
+    PROVINCIAL_TAX_RATES = {
+        "ON": 0.13,  # Ontario HST
+        "QC": 0.14975,  # Quebec QST
+        "BC": 0.12,  # British Columbia
+        "AB": 0.05,  # Alberta
+        "SK": 0.11,  # Saskatchewan
+        "MB": 0.12,  # Manitoba
+        "NS": 0.15,  # Nova Scotia
+        "NB": 0.15,  # New Brunswick
+        "NL": 0.15,  # Newfoundland and Labrador
+        "PE": 0.15,  # Prince Edward Island
+        "NT": 0.05,  # Northwest Territories
+        "NU": 0.05,  # Nunavut
+        "YT": 0.05,  # Yukon
+    }
+
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+
+    def apply_sales_tax_rule(self, receipt: Receipt) -> Dict[str, Any]:
+        """
+        Sales Tax Rule: Apply correct sales tax based on billing vs shipping addresses
+        """
+        try:
+            # Determine which address to use for tax calculation
+            tax_address = self._get_tax_address(receipt)
+
+            if not tax_address:
+                return {
+                    "success": False,
+                    "error": "No valid address found for tax calculation",
+                    "calculated_tax": 0.0,
+                    "tax_rate": 0.0,
+                }
+
+            # Get tax rate for the province
+            tax_rate = self.PROVINCIAL_TAX_RATES.get(tax_address.province, 0.0)
+
+            # Calculate tax amount
+            calculated_tax = receipt.amount * tax_rate
+
+            return {
+                "success": True,
+                "calculated_tax": calculated_tax,
+                "tax_rate": tax_rate,
+                "tax_address": tax_address.province,
+                "rule_applied": "Sales Tax Rule",
+            }
+
+        except Exception as e:
+            self.logger.error(f"Error applying sales tax rule: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "calculated_tax": 0.0,
+                "tax_rate": 0.0,
+            }
+
+    def _get_tax_address(self, receipt: Receipt) -> Optional[Address]:
+        """Determine which address to use for tax calculation"""
+        # Rule: Use shipping address if different from billing, otherwise use billing
+        if receipt.shipping_address and receipt.billing_address:
+            if self._addresses_different(
+                receipt.billing_address, receipt.shipping_address
+            ):
+                return receipt.shipping_address
+            else:
+                return receipt.billing_address
+        elif receipt.shipping_address:
+            return receipt.shipping_address
+        elif receipt.billing_address:
+            return receipt.billing_address
+        else:
+            return None
+
+    def _addresses_different(self, billing: Address, shipping: Address) -> bool:
+        """Check if billing and shipping addresses are different"""
+        return (
+            billing.province != shipping.province
+            or billing.city != shipping.city
+            or billing.postal_code != shipping.postal_code
+        )
+
+    def apply_fx_rule(
+        self, receipt: Receipt, transaction: Transaction
+    ) -> Dict[str, Any]:
+        """
+        Foreign Exchange Rule: Handle currency mismatches
+        """
+        try:
+            # Check for currency mismatch
+            if receipt.currency != transaction.currency:
+                fx_discrepancy = abs(receipt.amount - abs(transaction.amount))
+
+                return {
+                    "success": True,
+                    "fx_discrepancy": fx_discrepancy,
+                    "receipt_currency": receipt.currency,
+                    "transaction_currency": transaction.currency,
+                    "receipt_amount": receipt.amount,
+                    "transaction_amount": abs(transaction.amount),
+                    "requires_manual_review": True,
+                    "rule_applied": "Foreign Exchange Rule",
+                }
+            else:
+                return {
+                    "success": True,
+                    "fx_discrepancy": 0.0,
+                    "requires_manual_review": False,
+                    "rule_applied": "No FX Rule (same currency)",
+                }
+
+        except Exception as e:
+            self.logger.error(f"Error applying FX rule: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "fx_discrepancy": 0.0,
+                "requires_manual_review": False,
+            }
+
+    def calculate_straight_line_depreciation(
+        self, asset: Asset, year: int
+    ) -> Dict[str, Any]:
+        """
+        Straight-Line Depreciation for accounting purposes
+        """
+        try:
+            if year > asset.useful_life_years:
+                return {
+                    "success": False,
+                    "error": f"Year {year} exceeds useful life of {asset.useful_life_years} years",
+                    "depreciation": 0.0,
+                }
+
+            # Straight-line formula: (Cost - Residual Value) / Useful Life
+            annual_depreciation = (
+                asset.purchase_amount - asset.residual_value
+            ) / asset.useful_life_years
+
+            return {
+                "success": True,
+                "depreciation": annual_depreciation,
+                "book_value": asset.purchase_amount - (annual_depreciation * year),
+                "method": "Straight-Line",
+                "rule_applied": "Depreciation Rule (Accounting)",
+            }
+
+        except Exception as e:
+            self.logger.error(f"Error calculating straight-line depreciation: {str(e)}")
+            return {"success": False, "error": str(e), "depreciation": 0.0}
+
+    def calculate_cca_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
+        """
+        CCA (Capital Cost Allowance) Depreciation for tax purposes
+        """
+        try:
+            if year < 1:
+                return {
+                    "success": False,
+                    "error": "Year must be at least 1",
+                    "depreciation": 0.0,
+                }
+
+            # CCA uses declining balance method
+            book_value = asset.purchase_amount
+            total_depreciation = 0.0
+
+            for current_year in range(1, year + 1):
+                # CCA is calculated on the declining balance
+                cca_amount = book_value * asset.cca_rate
+                book_value -= cca_amount
+                total_depreciation += cca_amount
+
+                # Stop if book value reaches residual value
+                if book_value <= asset.residual_value:
+                    break
+
+            return {
+                "success": True,
+                "depreciation": cca_amount,  # Current year depreciation
+                "total_depreciation": total_depreciation,
+                "book_value": max(book_value, asset.residual_value),
+                "method": "CCA Declining Balance",
+                "rule_applied": "Depreciation Rule (Tax)",
+            }
+
+        except Exception as e:
+            self.logger.error(f"Error calculating CCA depreciation: {str(e)}")
+            return {"success": False, "error": str(e), "depreciation": 0.0}
+
+    def apply_meals_entertainment_rule(self, receipt: Receipt) -> Dict[str, Any]:
+        """
+        Meals & Entertainment Tax Deduction Rule
+        """
+        try:
+            if not receipt.is_meals_entertainment:
+                return {
+                    "success": True,
+                    "tax_deduction": receipt.amount,
+                    "accounting_deduction": receipt.amount,
+                    "rule_applied": "No M&E Rule (not meals/entertainment)",
+                }
+
+            # For tax purposes: 50% deductible
+            tax_deduction = receipt.amount * 0.5
+
+            # For accounting purposes: 100% deductible
+            accounting_deduction = receipt.amount
+
+            # Sales tax is fully deductible for accounting
+            tax_on_meal = receipt.tax
+
+            return {
+                "success": True,
+                "tax_deduction": tax_deduction,
+                "accounting_deduction": accounting_deduction,
+                "tax_on_meal": tax_on_meal,
+                "rule_applied": "Meals & Entertainment Rule",
+            }
+
+        except Exception as e:
+            self.logger.error(f"Error applying meals & entertainment rule: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "tax_deduction": 0.0,
+                "accounting_deduction": 0.0,
+            }
+
+    def apply_all_tax_rules(
+        self, receipt: Receipt, transaction: Transaction = None
+    ) -> Dict[str, Any]:
+        """
+        Apply all tax rules to a receipt
+        """
+        results = {
+            "receipt_id": receipt.id,
+            "rules_applied": [],
+            "sales_tax": {},
+            "fx_analysis": {},
+            "meals_entertainment": {},
+        }
+
+        # Apply Sales Tax Rule
+        sales_tax_result = self.apply_sales_tax_rule(receipt)
+        results["sales_tax"] = sales_tax_result
+        if sales_tax_result["success"]:
+            results["rules_applied"].append("Sales Tax Rule")
+
+        # Apply FX Rule (if transaction provided)
+        if transaction:
+            fx_result = self.apply_fx_rule(receipt, transaction)
+            results["fx_analysis"] = fx_result
+            if fx_result["success"]:
+                results["rules_applied"].append("Foreign Exchange Rule")
+
+        # Apply Meals & Entertainment Rule
+        me_result = self.apply_meals_entertainment_rule(receipt)
+        results["meals_entertainment"] = me_result
+        if me_result["success"]:
+            results["rules_applied"].append("Meals & Entertainment Rule")
+
+        return results
@@ -0,0 +1,892 @@
+INFO:     Started server process [18995]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8765 (Press CTRL+C to quit)
+INFO:     Shutting down
+INFO:     Waiting for application shutdown.
+INFO:     Application shutdown complete.
+INFO:     Finished server process [18995]
+INFO:     Started server process [19157]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8654 (Press CTRL+C to quit)
+INFO:     102.89.45.216:11636 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:     102.89.45.216:14600 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:__main__:Starting match-specific for file IDs: ['0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgchkov1_x8jntm
+INFO:__main__:Found 7 transactions in database
+INFO:__main__:Converted 7 transactions
+INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
+INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
+INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
+INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
+INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
+INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
+INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
+INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
+INFO:__main__:Found 8 receipts, 0 missing
+INFO:__main__:Starting matching with 8 receipts and 7 transactions
+INFO:services.ai_matcher:Starting AI matching for 8 receipts against 7 transactions
+INFO:services.ai_matcher:Processing receipt 1/8: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZA BAWSKYJ
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+The reason for this low score is that none of the candidate transactions have a perfect match with the receipt. The closest candidate is Candidate 1, but it has significant differences in vendor name, amount, and date, resulting in a very low confidence score.
+INFO:services.ai_matcher:Processing receipt 2/8: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, since I'm not allowed to return "NONE" and must return the best match, I'll provide the next best option:
+
+1|0.0|No meaningful similarity
+
+Since there are no perfect matches, I'll consider the next best option. 
+
+Candidate 1 has a vendor name difference, amount difference, and date difference. However, it's the closest option available.
+
+1|0.0|No meaningful similarity
+
+However, I can provide a more detailed explanation of why it's the best option available.
+
+The vendor name difference is significant, with "Figma, Inc." and "BOOKS BY BESSIE" being unrelated. The amount difference is also significant, with $27.0 and $55.0 being 103.7% apart. The date difference is 136 days, which is a significant difference.
+
+However, since I
+INFO:services.ai_matcher:Processing receipt 3/8: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will evaluate each candidate based on the scoring criteria.
+
+Candidate 1:
+- Vendor similarity: 0.0 (A1 RENTAL BACKHOE DEPOSIT REFUND vs Eleven Labs Inc.)
+- Amount difference: 88.13 (78.8%)
+- Date difference: 115 days
+- Description/notes relevance: 0.0 (no relevance)
+- Total score: 0.0
+
+Candidate 2:
+- Vendor similarity: 0.0 (BOOKS BY BESSIE vs Eleven Labs Inc.)
+- Amount difference: 56.87 (50.8%)
+- Date difference: 145 days
+- Description/notes relevance: 0.0 (no relevance)
+- Total score: 0.0
+
+Candidate 3:
+- Vendor similarity: 0.0 (No Vendor vs Eleven Labs Inc.)
+- Amount difference: 106.88 (95.5%)
+- Date difference: 87 days
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
+WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Processing receipt 4/8: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Processing receipt 5/8: PAYPAL *BZABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZABAWSKYJ
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+This is because none of the candidate transactions have a perfect match with the receipt. However, I must return the candidate with the highest match score, even if it's very low.
+
+To calculate the match score, I considered the following:
+
+- Vendor name similarity: None of the candidate transactions have a vendor name that matches the receipt.
+- Amount accuracy: The amount on the receipt ($37.55) does not match any of the candidate transactions.
+- Date proximity: The date on the receipt (2023-05-22) is significantly different from the dates on the candidate transactions.
+- Description/notes relevance: None of the candidate transactions have a description or notes that match the receipt.
+
+Since none of the candidate transactions have a meaningful similarity with the receipt, the best match is the one with the lowest possible score, which is 0.0.
+INFO:services.ai_matcher:Processing receipt 6/8: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, since I must return the candidate with the highest match score, even if it's very low, I will provide the next best option:
+
+5|0.2|Minimal similarity due to vendor name difference, amount difference of $28.0, and 136 days apart
+INFO:services.ai_matcher:Processing receipt 7/8: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will analyze each candidate transaction against the given receipt.
+
+Candidate 1:
+- Vendor similarity: 0.0 (A1 RENTAL BACKHOE DEPOSIT REFUND vs Eleven Labs Inc.)
+- Amount difference: 88.13 (78.8%)
+- Date difference: 115 days
+- Description/notes relevance: 0.0 (no relevance)
+- Overall score: 0.0 (no meaningful similarity)
+
+Candidate 2:
+- Vendor similarity: 0.0 (BOOKS BY BESSIE vs Eleven Labs Inc.)
+- Amount difference: 56.87 (50.8%)
+- Date difference: 145 days
+- Description/notes relevance: 0.0 (no relevance)
+- Overall score: 0.0 (no meaningful similarity)
+
+Candidate 3:
+- Vendor similarity: 0.0 (No Vendor vs Eleven Labs Inc.)
+- Amount difference: 106.88 (95.5%)
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
+WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Processing receipt 8/8: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:AI matching completed. Found 4 matches
+INFO:__main__:Matching completed, got 4 results
+INFO:__main__:Generated stats: {'total': 4, 'high_confidence': 0, 'low_confidence': 4, 'avg_score': 0.0}
+INFO:__main__:Match-specific completed successfully with 4 matches
+INFO:     102.89.45.216:14600 - "POST /match-specific HTTP/1.1" 200 OK
+INFO:     102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:16587 - "POST /process/a8969315-6ed6-4dcd-9a47-3eb542d85d64 HTTP/1.1" 200 OK
+INFO:     102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:16587 - "POST /process/9845ef9d-2bd3-4803-93f8-d8d5bca0de7b HTTP/1.1" 200 OK
+INFO:     102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
+INFO:     102.89.45.216:16587 - "POST /process/ba36aa95-8fdb-4f16-973e-479f99da3100 HTTP/1.1" 200 OK
+INFO:     102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:16587 - "POST /process/dc542f59-1105-470c-a401-56407f2bbecf HTTP/1.1" 200 OK
+INFO:     102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:16587 - "POST /process/d0d43d67-1e25-47b8-bf74-8ce9695cb699 HTTP/1.1" 200 OK
+INFO:     102.89.45.216:16533 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:__main__:Starting match-specific for file IDs: ['d0d43d67-1e25-47b8-bf74-8ce9695cb699', 'dc542f59-1105-470c-a401-56407f2bbecf', 'ba36aa95-8fdb-4f16-973e-479f99da3100', '9845ef9d-2bd3-4803-93f8-d8d5bca0de7b', 'a8969315-6ed6-4dcd-9a47-3eb542d85d64', '0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgci9kky_b9qz7l
+INFO:__main__:Found 7 transactions in database
+INFO:__main__:Converted 7 transactions
+INFO:__main__:Successfully loaded receipt for file_id: d0d43d67-1e25-47b8-bf74-8ce9695cb699
+INFO:__main__:Successfully loaded receipt for file_id: dc542f59-1105-470c-a401-56407f2bbecf
+INFO:__main__:Successfully loaded receipt for file_id: ba36aa95-8fdb-4f16-973e-479f99da3100
+INFO:__main__:Successfully loaded receipt for file_id: 9845ef9d-2bd3-4803-93f8-d8d5bca0de7b
+INFO:__main__:Successfully loaded receipt for file_id: a8969315-6ed6-4dcd-9a47-3eb542d85d64
+INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
+INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
+INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
+INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
+INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
+INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
+INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
+INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
+INFO:__main__:Found 13 receipts, 0 missing
+INFO:__main__:Starting matching with 13 receipts and 7 transactions
+INFO:services.ai_matcher:Starting AI matching for 13 receipts against 7 transactions
+INFO:services.ai_matcher:Processing receipt 1/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, I can provide a more detailed analysis of why this is the case and what the closest match is.
+
+The receipt has a vendor name of "Figma, Inc.", which does not match any of the candidate transactions. The closest match in terms of vendor name similarity is none, as there are no similar names.
+
+The amount on the receipt is $27.0, which is significantly different from the amounts on the candidate transactions. The closest match in terms of amount accuracy is Candidate 1, but it has a difference of $28.0, which is a 103.7% difference.
+
+The date on the receipt is 2025-06-19, which is also significantly different from the dates on the candidate transactions. The closest match in terms of date proximity is Candidate 1, but it is 136 days apart.
+
+The description on the receipt is
+INFO:services.ai_matcher:Processing receipt 2/13: Google LLC - $21.15
+INFO:services.ai_matcher:Found 1 candidates for receipt: Google LLC
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+The reason for this low score is that there are significant differences between the receipt and the candidate transactions. The vendor name is completely different ("Google LLC" vs. "BOOKS BY BESSIE"), the amount is significantly different ($21.15 vs. $55.0), and the date is 155 days apart.
+INFO:services.ai_matcher:Processing receipt 3/13: PAYPAL *BZAABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZAABAWSKYJ
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, since I must return the candidate with the highest match score, even if it's very low, I will provide the next best option:
+
+5|0.15|Best available option despite significant differences in vendor and amount
+INFO:services.ai_matcher:Processing receipt 4/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: A1 RENTAL BACKHOE DEPOSIT REFUND (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+Explanation: None of the candidate transactions match the receipt in terms of vendor name, amount, date, or description. However, I must return a candidate, so I'm returning the first one with a confidence score of 0.0, indicating no meaningful similarity.
+INFO:services.ai_matcher:Processing receipt 5/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+The reason for this low score is that there are significant differences in vendor name, amount, date, and description between the receipt and the candidate transactions. The vendor name is completely different, the amount is off by $28, the date is 136 days apart, and the description does not match.
+INFO:services.ai_matcher:Processing receipt 6/13: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZA BAWSKYJ
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, since I'm not allowed to return "NONE" and must return the best match, I'll provide the next best option:
+
+1|0.0|No meaningful similarity
+
+Since there are no perfect matches, I'll look for the next best option. 
+
+Candidate 1 has a significant difference in vendor name (46.5%), amount difference (46.5%), and a large date difference (895 days). However, it's the only candidate available, so it's the best match.
+
+1|0.0|No meaningful similarity
+INFO:services.ai_matcher:Processing receipt 7/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, I can provide a more detailed explanation of why this is the case. None of the candidate transactions match the receipt perfectly, but I can calculate a score for each candidate based on the given criteria.
+
+Candidate 1:
+- Vendor name similarity: 0 ( BOOKS BY BESSIE vs Figma, Inc. )
+- Amount accuracy: 0 ( $55.0 vs $27.0 )
+- Date proximity: 0.007 ( 136 days difference )
+- Description/notes relevance: 0 ( No relevance )
+- Amount difference: 103.7% ( significant difference )
+- Overall score: 0.0
+
+Since none of the candidate transactions match the receipt perfectly, I will return the candidate with the highest score, which is still 0.0. However, I can suggest that the best available option is actually none of the
+INFO:services.ai_matcher:Processing receipt 8/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will evaluate each candidate transaction based on the scoring criteria.
+
+Candidate 1:
+- Vendor similarity: 0.0 (Eleven Labs Inc. vs A1 RENTAL BACKHOE DEPOSIT REFUND)
+- Amount difference: 88.13 (78.8%)
+- Date difference: 115 days
+- Description/notes relevance: 0.0 (no relevance)
+- Total score: 0.0
+
+Candidate 2:
+- Vendor similarity: 0.0 (Eleven Labs Inc. vs BOOKS BY BESSIE)
+- Amount difference: 56.87 (50.8%)
+- Date difference: 145 days
+- Description/notes relevance: 0.0 (no relevance)
+- Total score: 0.0
+
+Candidate 3:
+- Vendor similarity: 0.0 (Eleven Labs Inc. vs No Vendor)
+- Amount difference: 106.88 (95.5%)
+-
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
+WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Processing receipt 9/13: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Processing receipt 10/13: PAYPAL *BZABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZABAWSKYJ
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+The reason for this low score is that there are significant differences in vendor name, amount, and date between the receipt and the candidate transactions. The vendor name is completely different, the amount is off by $17.45, and the date is 895 days apart.
+INFO:services.ai_matcher:Processing receipt 11/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
+INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
+
+However, since I must return the candidate with the highest match score, even if it's very low, I will provide the next best option:
+
+5|0.2|Minimal similarity due to vendor name difference, but same category and date proximity
+INFO:services.ai_matcher:Processing receipt 12/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will evaluate each candidate transaction based on the scoring criteria.
+
+Candidate 1:
+- Vendor similarity: 0.0 (Eleven Labs Inc. vs A1 RENTAL BACKHOE DEPOSIT REFUND)
+- Amount accuracy: 0.0 (no exact match)
+- Date proximity: 0.0 (115 days difference)
+- Description/notes relevance: 0.0 (no relevance)
+Total score: 0.0
+
+Candidate 2:
+- Vendor similarity: 0.0 (Eleven Labs Inc. vs BOOKS BY BESSIE)
+- Amount accuracy: 0.0 (no exact match)
+- Date proximity: 0.0 (145 days difference)
+- Description/notes relevance: 0.0 (no relevance)
+Total score: 0.0
+
+Candidate 3:
+- Vendor similarity: 0.0 (Eleven Labs Inc. vs No Vendor)
+- Amount accuracy: 0
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
+WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Processing receipt 13/13: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:AI matching completed. Found 9 matches
+INFO:__main__:Matching completed, got 9 results
+INFO:__main__:Generated stats: {'total': 9, 'high_confidence': 0, 'low_confidence': 9, 'avg_score': 0.0}
+INFO:__main__:Match-specific completed successfully with 9 matches
+INFO:     102.89.45.216:11676 - "POST /match-specific HTTP/1.1" 200 OK
+INFO:     102.89.45.216:28828 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:     102.89.45.216:14522 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:     102.89.45.216:2730 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:__main__:Starting match-specific for file IDs: ['d0d43d67-1e25-47b8-bf74-8ce9695cb699', 'dc542f59-1105-470c-a401-56407f2bbecf', 'ba36aa95-8fdb-4f16-973e-479f99da3100', '9845ef9d-2bd3-4803-93f8-d8d5bca0de7b', 'a8969315-6ed6-4dcd-9a47-3eb542d85d64', '0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgcolko1_wmfzzd
+INFO:__main__:Found 119 transactions in database
+INFO:__main__:Converted 119 transactions
+INFO:__main__:Successfully loaded receipt for file_id: d0d43d67-1e25-47b8-bf74-8ce9695cb699
+INFO:__main__:Successfully loaded receipt for file_id: dc542f59-1105-470c-a401-56407f2bbecf
+INFO:__main__:Successfully loaded receipt for file_id: ba36aa95-8fdb-4f16-973e-479f99da3100
+INFO:__main__:Successfully loaded receipt for file_id: 9845ef9d-2bd3-4803-93f8-d8d5bca0de7b
+INFO:__main__:Successfully loaded receipt for file_id: a8969315-6ed6-4dcd-9a47-3eb542d85d64
+INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
+INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
+INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
+INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
+INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
+INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
+INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
+INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
+INFO:__main__:Found 13 receipts, 0 missing
+INFO:__main__:Starting matching with 13 receipts and 119 transactions
+INFO:services.ai_matcher:Starting AI matching for 13 receipts against 119 transactions
+INFO:services.ai_matcher:Processing receipt 1/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
+INFO:services.ai_matcher:Found match: 0.290 - Close amount match, relevant note about office expenses, but significant date difference
+
+This candidate has a relatively low confidence score due to the significant date difference (85 days apart) and the fact that the vendor name is unknown. However, the amount difference is moderate ($8.03), and the note mentions "Bought lunch for crew 102" which could be related to office expenses, making it a slightly better match than the other candidates.
+INFO:services.ai_matcher:Processing receipt 2/13: Google LLC - $21.15
+INFO:services.ai_matcher:Found 25 candidates for receipt: Google LLC
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 7: Unknown (score: 0.140)
+INFO:services.ai_matcher:Found match: 0.140 - Closest amount match, but significant difference in vendor name and date
+
+Reasoning:
+
+- Vendor name similarity: 0 (Unknown vs Google LLC)
+- Amount accuracy: 0.14 (18.08 vs 21.15, 14.5% difference)
+- Date proximity: 0 (93 days difference)
+- Description/notes relevance: 0 (Office Supplies vs Google Workspace)
+
+Although the amount match is the closest among all candidates, the significant differences in vendor name and date result in a low confidence score.
+INFO:services.ai_matcher:Processing receipt 3/13: PAYPAL *BZAABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZAABAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
+
+Candidate 1: 0.09|Vendor name similarity, significant amount difference, and large date difference
+
+Reason: Although the vendor name is unknown, the amount difference is relatively minor ($3.55) compared to other candidates. However, the date difference is significant (864 days), and the vendor name is unknown, resulting in a low confidence score.
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZAABAWSKYJ
+WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZAABAWSKYJ - $37.55
+INFO:services.ai_matcher:Processing receipt 4/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
+INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor difference in vendor name, and relatively close date
+
+Reasoning:
+- The amount matches exactly, with a minor difference of 0.1%.
+- Although the vendor name is unknown, it's likely a typo or variation of Eleven Labs Inc.
+- The date difference is 87 days, which is relatively close considering the other options.
+
+This candidate has the highest match score, despite not being a perfect match.
+INFO:services.ai_matcher:Processing receipt 5/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
+INFO:services.ai_matcher:Found match: 0.290 - Close amount match, relevant note about office expenses, but significant date difference
+
+This candidate has a close amount match ($18.97 vs $27.0), a relevant note about office expenses, but a significant date difference of 85 days.
+INFO:services.ai_matcher:Processing receipt 6/13: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZA BAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
+
+Candidate 1: 0.09|Vendor name similarity, significant amount difference, and large date difference
+
+Reason: Although the vendor name is unknown, the description in the receipt contains the vendor's name, which is a good match. However, the amount difference is significant (9.5%), and the date difference is large (864 days). This is the best available option despite the significant differences.
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZA BAWSKYJ
+WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Processing receipt 7/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 9.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.190)
+INFO:services.ai_matcher:Found match: 0.190 - Vendor name similarity, amount difference of 9.8%, and no description match
+
+This is because Candidate 1 has the closest vendor name similarity (Unknown vs Figma, Inc. is not possible, but it's the closest) and the smallest amount difference among all the candidates. Although the date difference is significant (62 days), it's still the best available option given the other factors.
+INFO:services.ai_matcher:Processing receipt 8/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
+INFO:services.ai_matcher:Found match: 0.900 - Vendor name similarity, exact amount match, 87 days apart
+
+Reasoning:
+- Vendor name similarity: Although the vendor name is unknown, it's likely that Eleven Labs Inc. is a similar or related entity to the vendor in Candidate 1, given the context of the transaction.
+- Amount accuracy: The amount in Candidate 1 ($112.0) is very close to the amount in the receipt ($111.87), with a difference of only 0.1%.
+- Date proximity: The date in Candidate 1 (2025-09-05) is 87 days apart from the date in the receipt (2025-06-10), which is a relatively small difference.
+- Description/notes relevance: Although the description in Candidate 1 is not directly related to the receipt, it mentions "Bank Equipment rental for 5 days," which could
+INFO:services.ai_matcher:Processing receipt 9/13: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 7.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: Based on the given receipt and candidate transactions, I will analyze each candidate and return the best match.
+
+Candidate 1:
+- Vendor: Unknown (0.0 similarity)
+- Amount: $3.86 (3.5% difference from $4.0)
+- Date: 2025-09-03 (65 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+
+Score: 0.6 (Medium confidence due to minor amount difference, but unknown vendor and no description relevance)
+
+Candidate 2:
+- Vendor: Unknown (0.0 similarity)
+- Amount: $5.66 (41.5% difference from $4.0)
+- Date: 2025-08-29 (60 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+
+Score: 0.4 (Low confidence due to significant amount difference and unknown vendor)
+
+Since neither candidate has a perfect match, I will choose
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Processing receipt 10/13: PAYPAL *BZABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZABAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 9.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
+
+Candidate 1: 0.09|Vendor name similarity, significant amount difference, and large date difference
+
+Reason: Although the amount difference is significant (9.5%), the vendor name similarity is the closest match among all candidates. The date difference is also substantial, but it's the best available option given the other differences.
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZABAWSKYJ
+WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZABAWSKYJ - $37.55
+INFO:services.ai_matcher:Processing receipt 11/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
+INFO:services.ai_matcher:Found match: 0.290 - Closest amount match, minor date difference, and relevant note about office expenses
+
+This candidate has a relatively low confidence score due to significant differences in vendor name and amount. However, it is the best available option given the provided candidate transactions.
+INFO:services.ai_matcher:Processing receipt 12/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
+INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor difference in vendor name, and relatively close date
+
+Reasoning:
+- Vendor name similarity: The vendor name is unknown in both the receipt and the candidate transaction, so it's not a strong match. However, it's not a major difference either.
+- Amount accuracy: The amount is $111.87 in the receipt and $112.0 in the candidate transaction, which is a minor difference of 0.1%.
+- Date proximity: The date is 2025-06-10 in the receipt and 2025-09-05 in the candidate transaction, which is a difference of 87 days. This is not ideal, but it's not a major difference either.
+- Description/notes relevance: There is no description or notes in the receipt, but the candidate transaction has a note about bank equipment rental. This is not directly
+INFO:services.ai_matcher:Processing receipt 13/13: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 7.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: Based on the provided receipt and candidate transactions, I will analyze each candidate and return the best match.
+
+Candidate 1:
+- Vendor: Unknown (0.0 similarity)
+- Amount: $3.86 (3.5% difference from $4.0)
+- Date: 2025-09-03 (65 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+- Amount difference: $0.14000000000000012 (3.5%)
+
+Score: 0.6 (Medium confidence, minor differences in amount and date)
+
+Candidate 2:
+- Vendor: Unknown (0.0 similarity)
+- Amount: $5.66 (41.5% difference from $4.0)
+- Date: 2025-08-29 (60 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+- Amount difference: $1.6600000000000001 (41.5
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:AI matching completed. Found 8 matches
+INFO:__main__:Matching completed, got 8 results
+INFO:__main__:Generated stats: {'total': 8, 'high_confidence': 3, 'low_confidence': 5, 'avg_score': 0.49}
+INFO:__main__:Match-specific completed successfully with 8 matches
+INFO:__main__:Starting match-specific for file IDs: ['d0d43d67-1e25-47b8-bf74-8ce9695cb699', 'dc542f59-1105-470c-a401-56407f2bbecf', 'ba36aa95-8fdb-4f16-973e-479f99da3100', '9845ef9d-2bd3-4803-93f8-d8d5bca0de7b', 'a8969315-6ed6-4dcd-9a47-3eb542d85d64', '0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgcolko1_wmfzzd
+INFO:__main__:Found 119 transactions in database
+INFO:__main__:Converted 119 transactions
+INFO:__main__:Successfully loaded receipt for file_id: d0d43d67-1e25-47b8-bf74-8ce9695cb699
+INFO:__main__:Successfully loaded receipt for file_id: dc542f59-1105-470c-a401-56407f2bbecf
+INFO:__main__:Successfully loaded receipt for file_id: ba36aa95-8fdb-4f16-973e-479f99da3100
+INFO:__main__:Successfully loaded receipt for file_id: 9845ef9d-2bd3-4803-93f8-d8d5bca0de7b
+INFO:__main__:Successfully loaded receipt for file_id: a8969315-6ed6-4dcd-9a47-3eb542d85d64
+INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
+INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
+INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
+INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
+INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
+INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
+INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
+INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
+INFO:__main__:Found 13 receipts, 0 missing
+INFO:__main__:Starting matching with 13 receipts and 119 transactions
+INFO:services.ai_matcher:Starting AI matching for 13 receipts against 119 transactions
+INFO:services.ai_matcher:Processing receipt 1/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.390)
+INFO:services.ai_matcher:Found match: 0.390 - Date proximity, description relevance, but significant amount difference
+INFO:services.ai_matcher:Processing receipt 2/13: Google LLC - $21.15
+INFO:services.ai_matcher:Found 25 candidates for receipt: Google LLC
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 7: Unknown (score: 0.140)
+INFO:services.ai_matcher:Found match: 0.140 - Vendor name similarity (Google LLC vs Unknown), exact amount match is not possible, but amount difference is moderate, and date proximity is relatively good (93 days difference)
+
+Note: The confidence score is low due to significant differences in vendor name and amount, but it's the best available option given the provided candidate transactions.
+INFO:services.ai_matcher:Processing receipt 3/13: PAYPAL *BZAABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZAABAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Exact amount match, minor date difference
+
+Reasoning:
+- The amount on the receipt ($37.55) matches exactly with Candidate 1 ($34.0, but considering the absolute value, it's $34.0).
+- Although the date difference is significant (864 days), the amount match is a strong indicator of a potential match.
+- The vendor name is unknown, but the description is not provided for any candidate, so it's not a deciding factor in this case.
+
+Note that the confidence score is high despite the significant date difference, as the amount match is a strong indicator of a potential match.
+INFO:services.ai_matcher:Processing receipt 4/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
+INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor difference in vendor name, and relatively close date
+
+Reasoning:
+- Vendor name similarity: 0.8 (unknown vs Eleven Labs Inc. is not a perfect match, but the difference is minor)
+- Amount accuracy: 0.95 (amount difference is 0.1%, which is considered minor)
+- Date proximity: 0.9 (87 days difference is relatively close)
+- Description/notes relevance: 0.8 (the description is not directly related to the receipt, but it's a plausible explanation for the transaction)
+
+The confidence score is 0.9, which falls under the high confidence category.
+INFO:services.ai_matcher:Processing receipt 5/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.890)
+INFO:services.ai_matcher:Found match: 0.890 - Close vendor name match, minor amount difference, and relatively close date
+
+Reasoning:
+- Vendor name similarity: Figma, Inc. is not explicitly mentioned in the candidate transactions, but "Unknown" is a close match to the vendor name.
+- Amount accuracy: The amount difference is $2.64, which is a relatively minor difference of 9.8%.
+- Date proximity: The date difference is 62 days, which is not ideal but still relatively close.
+- Description/notes relevance: There is no description or notes in the candidate transactions, so this factor does not contribute to the match score.
+
+Note that while the match score is not perfect, Candidate 1 has the highest score among all the candidate transactions, making it the best available option despite significant differences in vendor and amount.
+INFO:services.ai_matcher:Processing receipt 6/13: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZA BAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 1.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
+
+Candidate 1: 0.09|Vendor name similarity, but significant amount difference and large date gap
+
+This candidate has the highest match score despite significant differences in amount and date. The vendor name similarity is the primary reason for this match, but the large date gap and significant amount difference reduce the overall confidence score.
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZA BAWSKYJ
+WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Processing receipt 7/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
+INFO:services.ai_matcher:Found match: 0.290 - Closest amount match, minor difference in vendor name, and some relevance in the notes (Bought lunch for crew, which might be related to office expenses)
+
+Note: Although the amount difference is significant (29.7%), it's the closest match in terms of amount, and the notes provide some relevance to the office category.
+INFO:services.ai_matcher:Processing receipt 8/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
+INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor date difference, unknown vendor matches with the receipt's unknown vendor
+
+Explanation:
+
+- Vendor similarity: The receipt's vendor is unknown, and Candidate 1's vendor is also unknown, so this is a perfect match in terms of vendor similarity.
+- Amount accuracy: The amount on the receipt ($111.87) is very close to the amount in Candidate 1 ($112.0), with a difference of only $0.12999999999999545 (0.1%).
+- Date proximity: The date on the receipt (2025-06-10) is 87 days apart from the date in Candidate 1 (2025-09-05), which is a relatively minor difference.
+- Description/notes relevance: While the description in Candidate 1 does not match the description on the receipt, the notes mention "Bank Equipment rental
+INFO:services.ai_matcher:Processing receipt 9/13: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 7.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: Based on the provided receipt and candidate transactions, I will analyze each candidate and return the best match.
+
+Candidate 1:
+- Vendor: Unknown (0.0 similarity)
+- Amount: $3.86 (3.5% difference from $4.0)
+- Date: 2025-09-03 (65 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+- Amount difference: $0.14000000000000012 (3.5%)
+
+Score: 0.6 (Medium confidence due to minor amount difference and lack of vendor and date match)
+
+Candidate 2:
+- Vendor: Unknown (0.0 similarity)
+- Amount: $5.66 (41.5% difference from $4.0)
+- Date: 2025-08-29 (60 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+- Amount difference: $1.660000000000000
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Processing receipt 10/13: PAYPAL *BZABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZABAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.190)
+INFO:services.ai_matcher:Found match: 0.190 - Vendor name similarity, amount difference of 9.5%
+
+This is because the vendor name is similar (PAYPAL *BZABAWSKYJ vs Unknown), but the amount is off by 9.5%. The date difference is significant (864 days), and the description/notes do not match. However, this is the best available option given the significant differences in the other candidates.
+INFO:services.ai_matcher:Processing receipt 11/13: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.600)
+INFO:services.ai_matcher:Found match: 0.600 - Vendor name similarity (Figma, Inc. is similar to Unknown), moderate amount difference ($2.64), and date proximity (62 days apart)
+
+Note: Although the amount difference is significant, the vendor name similarity and date proximity contribute to a moderate confidence score.
+INFO:services.ai_matcher:Processing receipt 12/13: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
+INFO:services.ai_matcher:Found match: 0.900 - Same vendor name similarity (Eleven Labs Inc. and Unknown), minor amount difference (0.1%), and relatively close date (87 days apart)
+
+Note that while the vendor name is not an exact match, it is the closest match available, and the amount difference is minor. The date difference is also relatively close, considering the time frame.
+INFO:services.ai_matcher:Processing receipt 13/13: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 6.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.ai_matcher:Could not parse single match response: Based on the provided receipt and candidate transactions, I will analyze each candidate and return the best match.
+
+Candidate 1:
+- Vendor: Unknown (0.0 similarity to Twitter, Inc.)
+- Amount: $3.86 (3.5% difference from $4.0)
+- Date: 2025-09-03 (65 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+- Amount difference: $0.14000000000000012 (3.5%)
+
+Score: 0.15 (Minimal similarity due to significant vendor name difference and moderate amount difference)
+
+Candidate 2:
+- Vendor: Unknown (0.0 similarity to Twitter, Inc.)
+- Amount: $5.66 (41.5% difference from $4.0)
+- Date: 2025-08-29 (60 days difference)
+- Notes: Bank  No Description (no relevance to "X Premium Basic")
+- Amount difference: $1
+WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
+WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
+INFO:services.ai_matcher:AI matching completed. Found 10 matches
+INFO:__main__:Matching completed, got 10 results
+INFO:__main__:Generated stats: {'total': 10, 'high_confidence': 5, 'low_confidence': 4, 'avg_score': 0.61}
+INFO:__main__:Match-specific completed successfully with 10 matches
+INFO:     102.89.45.216:29795 - "POST /match-specific HTTP/1.1" 200 OK
+INFO:     102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:22092 - "POST /process/82e672e4-a1a1-4df2-9b7d-f0cfa3307ed9 HTTP/1.1" 200 OK
+INFO:     102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:22092 - "POST /process/c4a7f61d-9d2a-4e6a-b86d-bb958a06d5f3 HTTP/1.1" 200 OK
+INFO:     102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
+INFO:     102.89.45.216:22092 - "POST /process/1281627c-59fc-4efa-beae-a8a69f3dd508 HTTP/1.1" 200 OK
+INFO:     102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:22092 - "POST /process/ee93fc23-e6f6-47ee-81da-c5b41319d1bc HTTP/1.1" 200 OK
+INFO:     102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     102.89.45.216:22092 - "POST /process/058a0bcf-d25e-49b3-903c-45559de871ad HTTP/1.1" 200 OK
+INFO:     199.241.139.243:49820 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:49836 - "POST /process/2d005728-3cce-4456-be4a-952188203772 HTTP/1.1" 200 OK
+INFO:     199.241.139.243:49850 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:49866 - "POST /process/de39fc65-0565-4c45-a559-bcda66af9c4a HTTP/1.1" 200 OK
+INFO:     199.241.139.243:17706 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:17710 - "POST /process/0f9b5c0f-ab7f-47f6-8edf-f5dab0badd64 HTTP/1.1" 200 OK
+INFO:     199.241.139.243:17714 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
+INFO:     199.241.139.243:17730 - "POST /process/cd679479-376d-42f0-ad9e-0743c89cd9fe HTTP/1.1" 200 OK
+INFO:     199.241.139.243:17740 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:17754 - "POST /process/0046dcd7-86a7-4153-be65-cddd3774a232 HTTP/1.1" 200 OK
+INFO:     199.241.139.243:39628 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:39644 - "POST /process/d0fe3ebb-094b-4191-9202-9ab216811ec9 HTTP/1.1" 200 OK
+INFO:     199.241.139.243:39652 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:39656 - "POST /process/1a23de15-07a5-4998-9d3f-6a6345aba237 HTTP/1.1" 200 OK
+INFO:     199.241.139.243:39658 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:39674 - "POST /process/cd3cc6e2-100e-462a-ba4a-3d03ee2da57f HTTP/1.1" 200 OK
+INFO:     199.241.139.243:26574 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
+INFO:     199.241.139.243:26586 - "POST /process/ffb999aa-bfd1-4a8a-a7e6-4700b284c30a HTTP/1.1" 200 OK
+INFO:     199.241.139.243:26596 - "POST /upload-multiple HTTP/1.1" 200 OK
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:     199.241.139.243:26602 - "POST /process/a1a16ce3-ef6d-466c-8606-4ba9501f86a7 HTTP/1.1" 200 OK
+INFO:     199.241.139.243:46078 - "POST /transactions/import/csv HTTP/1.1" 200 OK
+INFO:__main__:Starting match-specific for file IDs: ['a1a16ce3-ef6d-466c-8606-4ba9501f86a7', 'ffb999aa-bfd1-4a8a-a7e6-4700b284c30a', 'cd3cc6e2-100e-462a-ba4a-3d03ee2da57f', '1a23de15-07a5-4998-9d3f-6a6345aba237', 'd0fe3ebb-094b-4191-9202-9ab216811ec9', '0046dcd7-86a7-4153-be65-cddd3774a232', 'cd679479-376d-42f0-ad9e-0743c89cd9fe', '0f9b5c0f-ab7f-47f6-8edf-f5dab0badd64', 'de39fc65-0565-4c45-a559-bcda66af9c4a', '2d005728-3cce-4456-be4a-952188203772'], categorization_id: cat_mgcvsk8r_6upxfy
+INFO:__main__:Found 123 transactions in database
+INFO:__main__:Converted 123 transactions
+INFO:__main__:Successfully loaded receipt for file_id: a1a16ce3-ef6d-466c-8606-4ba9501f86a7
+INFO:__main__:Successfully loaded receipt for file_id: ffb999aa-bfd1-4a8a-a7e6-4700b284c30a
+INFO:__main__:Successfully loaded receipt for file_id: cd3cc6e2-100e-462a-ba4a-3d03ee2da57f
+INFO:__main__:Successfully loaded receipt for file_id: 1a23de15-07a5-4998-9d3f-6a6345aba237
+INFO:__main__:Successfully loaded receipt for file_id: d0fe3ebb-094b-4191-9202-9ab216811ec9
+INFO:__main__:Successfully loaded receipt for file_id: 0046dcd7-86a7-4153-be65-cddd3774a232
+INFO:__main__:Successfully loaded receipt for file_id: cd679479-376d-42f0-ad9e-0743c89cd9fe
+INFO:__main__:Successfully loaded receipt for file_id: 0f9b5c0f-ab7f-47f6-8edf-f5dab0badd64
+INFO:__main__:Successfully loaded receipt for file_id: de39fc65-0565-4c45-a559-bcda66af9c4a
+INFO:__main__:Successfully loaded receipt for file_id: 2d005728-3cce-4456-be4a-952188203772
+INFO:__main__:Found 10 receipts, 0 missing
+INFO:__main__:Starting matching with 10 receipts and 123 transactions
+INFO:services.ai_matcher:Starting AI matching for 10 receipts against 123 transactions
+INFO:services.ai_matcher:Processing receipt 1/10: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 94 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.870)
+INFO:services.ai_matcher:Found match: 0.870 - Same vendor name, exact amount match, 87 days apart
+
+Reasoning:
+- Vendor name similarity: 0.95 (perfect match)
+- Amount accuracy: 0.95 (exact match)
+- Date proximity: 0.8 (87 days apart, which is a relatively minor difference)
+- Description/notes relevance: 0.8 (no direct relevance, but the vendor name is the same)
+
+The candidate with the highest match score is Candidate 1, with a confidence score of 0.87.
+INFO:services.ai_matcher:Processing receipt 2/10: PAYPAL *BZAABAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 66 candidates for receipt: PAYPAL *BZAABAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match.
+
+This is because Candidate 1 has a perfect match in vendor name, amount, and date, which is the highest scoring criteria.
+INFO:services.ai_matcher:Processing receipt 3/10: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Same vendor name, exact amount match, exact date match
+
+This is because Candidate 1 has the exact same vendor name, amount, and date as the receipt, resulting in a perfect match score of 0.95.
+INFO:services.ai_matcher:Processing receipt 4/10: Google LLC - $21.15
+INFO:services.ai_matcher:Found 29 candidates for receipt: Google LLC
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
+INFO:services.ai_matcher:Processing receipt 5/10: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
+INFO:services.ai_matcher:Processing receipt 6/10: Eleven Labs Inc. - $111.87
+INFO:services.ai_matcher:Found 94 candidates for receipt: Eleven Labs Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.870)
+INFO:services.ai_matcher:Found match: 0.870 - Same vendor name, exact amount match, 87 days apart
+
+Reasoning:
+- Vendor name similarity: 0.95 (same vendor name, Eleven Labs Inc.)
+- Amount accuracy: 0.95 (exact amount match, $111.87)
+- Date proximity: 0.85 (87 days apart, which is a relatively small difference)
+- Description/notes relevance: 0.80 (no direct match, but the transaction is related to a bank equipment rental)
+
+The candidate with the highest match score is Candidate 1, with a confidence score of 0.87.
+INFO:services.ai_matcher:Processing receipt 7/10: PAYPAL *BZA BAWSKYJ - $37.55
+INFO:services.ai_matcher:Found 66 candidates for receipt: PAYPAL *BZA BAWSKYJ
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
+
+This is because Candidate 1 has a perfect match in vendor name ("PAYPAL *BZA BAWSKYJ" vs "PAYPAL *BZABAWSKYJ"), exact amount match ($37.55), and exact date match (2023-05-22).
+INFO:services.ai_matcher:Processing receipt 8/10: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
+INFO:services.ai_matcher:Processing receipt 9/10: Google LLC - $21.15
+INFO:services.ai_matcher:Found 29 candidates for receipt: Google LLC
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Perfect match: same vendor, amount, and date
+
+This candidate has a perfect match score of 0.95 due to the exact match in vendor name, amount, and date.
+INFO:services.ai_matcher:Processing receipt 10/10: Figma, Inc. - $27.0
+INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
+INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
+INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
+INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
+INFO:services.ai_matcher:Found match: 0.950 - Same vendor name, exact amount match, exact date match
+
+This is because Candidate 1 has an exact match in vendor name, amount, and date, which meets the scoring criteria for a perfect match.
+INFO:services.ai_matcher:AI matching completed. Found 10 matches
+INFO:__main__:Matching completed, got 10 results
+INFO:__main__:Generated stats: {'total': 10, 'high_confidence': 10, 'low_confidence': 0, 'avg_score': 0.97}
+INFO:__main__:Match-specific completed successfully with 10 matches
+INFO:     199.241.139.243:50450 - "POST /match-specific HTTP/1.1" 200 OK
@@ -0,0 +1,18 @@
+groq
+python-dotenv
+pandas
+numpy
+fastapi
+uvicorn
+pydantic
+requests
+python-multipart
+Pillow
+PyPDF2
+aiofiles
+google-auth
+google-auth-oauthlib
+google-auth-httplib2
+google-api-python-client
+sqlalchemy
+pydantic-settings