Refactor main application structure and improve logging

- Reorganized imports in main.py for better readability and structure. - Enhanced logging configuration and added more detailed log messages throughout the application. - Improved error handling and response formatting in transaction import endpoints. - Streamlined transaction processing logic for CSV and image uploads. - Updated matching engine to enhance match results with rules and improved logging. - Refactored tax rules engine for better clarity and maintainability. - Cleaned up requirements.txt by removing specific versioning for easier dependency management.
2025-08-06 16:12:53 +01:00
parent 5b3c066cea
commit 1f530da7c4
5 changed files with 668 additions and 346 deletions
@@ -1,32 +1,42 @@
-import groq
-from datetime import datetime, timedelta
-from typing import List, Tuple
-import config
-from models import Receipt, Transaction, Match
-import time
 import logging
-import asyncio
+import time
+from typing import List, Tuple
+
+import groq
+
+import config
+from models import Match, Receipt, Transaction

 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

+
 class AIMatcher:
-    def __init__(self):
+    def __init__(self, use_batch_matching=True):
        self.client = groq.Groq(api_key=config.GROQ_API_KEY)
        self.model = "llama3-8b-8192"
        self.max_retries = 3
        self.retry_delay = 2  # seconds - increased for rate limiting
        self.rate_limit_delay = 1.0  # seconds between API calls
        self.last_api_call = 0
+        self.use_batch_matching = (
+            use_batch_matching  # Toggle between new and legacy methods
+        )

-    def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
+    def match_receipts_to_transactions(
+        self, receipts: List[Receipt], transactions: List[Transaction]
+    ) -> List[Match]:
        """Match receipts to transactions using AI"""
-        logger.info(f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions")
+        logger.info(
+            f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions"
+        )
        matches = []

        for i, receipt in enumerate(receipts):
-            logger.info(f"Processing receipt {i+1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}")
+            logger.info(
+                f"Processing receipt {i + 1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}"
+            )

            # Rate limiting
            self._rate_limit()
@@ -35,9 +45,13 @@ class AIMatcher:
            best_match = self._find_best_match(receipt, transactions)
            if best_match:
                matches.append(best_match)
-                logger.info(f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}")
+                logger.info(
+                    f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}"
+                )
            else:
-                logger.warning(f"No match found for receipt: {receipt.vendor} - ${receipt.amount}")
+                logger.warning(
+                    f"No match found for receipt: {receipt.vendor} - ${receipt.amount}"
+                )

        # Sort by confidence score (highest first)
        matches = sorted(matches, key=lambda x: x.confidence_score, reverse=True)
@@ -56,30 +70,194 @@ class AIMatcher:

        self.last_api_call = time.time()

-    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
-        """Find the BEST match for a receipt (highest confidence score)"""
+    def _find_best_match(
+        self, receipt: Receipt, transactions: List[Transaction]
+    ) -> Match:
+        """Find the BEST match for a receipt using a single AI call for all candidates"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
-            logger.warning(f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}")
+            logger.warning(
+                f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}"
+            )
            return None

        logger.info(f"Found {len(candidates)} candidates for receipt: {receipt.vendor}")

-        best_match = None
-        highest_score = 0
-        
-        for transaction in candidates:
-            score, reason = self._calculate_match_score(receipt, transaction)
-            logger.debug(f"Score {score:.3f} for transaction {transaction.vendor}: {reason}")
-            
-            # Keep the match with the highest score, regardless of how low it is
-            if score > highest_score:
-                highest_score = score
-                best_match = Match(receipt, transaction, score, reason)
+        # Choose matching method based on configuration
+        if self.use_batch_matching:
+            # New efficient method: single AI call for all candidates
+            best_match = self._find_best_match_single_call(receipt, candidates)
+        else:
+            # Legacy method: individual AI calls (fallback)
+            best_match = self._find_best_match_legacy(receipt, candidates)

        return best_match

-    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
+    def _find_best_match_single_call(
+        self, receipt: Receipt, candidates: List[Transaction]
+    ) -> Match:
+        """Find the best match using a single AI call to evaluate all candidates"""
+        if not candidates:
+            return None
+
+        # Limit candidates to avoid token limits (adjust based on your needs)
+        max_candidates = 10
+        if len(candidates) > max_candidates:
+            # Sort by amount similarity and take top candidates
+            candidates = sorted(
+                candidates, key=lambda t: abs(receipt.amount - abs(t.amount))
+            )[:max_candidates]
+            logger.info(
+                f"Limited candidates to top {max_candidates} by amount similarity"
+            )
+
+        # Build comprehensive prompt with all candidates
+        candidates_text = ""
+        for i, transaction in enumerate(candidates):
+            transaction_amount_abs = abs(transaction.amount)
+            date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+            amount_diff = abs(receipt.amount - transaction_amount_abs)
+            amount_percent_diff = (
+                (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+            )
+
+            candidates_text += f"""
+Candidate {i + 1}:
+- Vendor: {transaction.vendor}
+- Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
+- Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
+- Notes: {transaction.notes}
+- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
+"""
+
+        prompt = f"""
+You are an expert at matching receipts to bank transactions. Analyze the receipt below against ALL the candidate transactions and return the BEST match.
+
+RECEIPT TO MATCH:
+- Vendor: {receipt.vendor}
+- Amount: ${receipt.amount}
+- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
+- Description: {receipt.description}
+- Category: {receipt.category}
+
+CANDIDATE TRANSACTIONS:
+{candidates_text}
+
+SCORING CRITERIA:
+- Perfect matches (same vendor, amount, date): 0.95-1.0
+- High confidence (minor differences): 0.8-0.94
+- Medium confidence (moderate differences): 0.6-0.79
+- Low confidence (significant differences): 0.4-0.59
+- Very low confidence (major differences): 0.2-0.39
+- Minimal similarity: 0.1-0.19
+- No meaningful similarity: 0.0-0.09
+
+Consider vendor name similarity, amount accuracy, date proximity, and description/notes relevance.
+
+IMPORTANT: You MUST return the candidate with the highest match score, even if it's very low. Never return NONE.
+Return ONLY the best match in this exact format:
+CANDIDATE_NUMBER|CONFIDENCE_SCORE|REASON
+
+Example: 3|0.87|Same vendor name, exact amount match, 1 day apart
+Example of low match: 5|0.15|Best available option despite significant differences in vendor and amount
+"""
+
+        for attempt in range(self.max_retries):
+            try:
+                result = self._call_groq_api_with_timeout(
+                    prompt, timeout=45
+                )  # Longer timeout for complex prompt
+
+                # Parse the single result
+                candidate_num, score, reason = self._parse_single_match_response(result)
+
+                if candidate_num == -1:  # Parsing error occurred
+                    logger.warning(
+                        f"Failed to parse AI response for receipt: {receipt.vendor}"
+                    )
+                    return None
+
+                if 0 <= candidate_num < len(candidates):
+                    best_transaction = candidates[candidate_num]
+                    logger.info(
+                        f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
+                    )
+                    return Match(receipt, best_transaction, score, reason)
+                else:
+                    logger.warning(
+                        f"AI returned invalid candidate number: {candidate_num}"
+                    )
+                    return None
+
+            except Exception as e:
+                logger.warning(
+                    f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
+                )
+                if attempt < self.max_retries - 1:
+                    sleep_time = self.retry_delay * (2**attempt)
+                    logger.info(f"Waiting {sleep_time} seconds before retry...")
+                    time.sleep(sleep_time)
+                else:
+                    logger.error(f"All attempts failed for receipt {receipt.id}")
+                    return None
+
+        return None
+
+    def _parse_single_match_response(self, result: str) -> Tuple[int, float, str]:
+        """Parse AI response for single best match"""
+        result = result.strip()
+        logger.debug(f"Parsing single match response: {result}")
+
+        try:
+            if result.upper().startswith("NONE"):
+                # This should not happen with new prompt, but handle as parsing error
+                logger.warning(
+                    "AI returned NONE despite being instructed to always return best match"
+                )
+                return -1, 0.0, "AI returned NONE unexpectedly"
+
+            if "|" in result:
+                parts = result.split("|")
+                if len(parts) >= 3:
+                    candidate_str = parts[0].strip()
+                    score_str = parts[1].strip()
+                    reason = "|".join(parts[2:]).strip()
+
+                    # Extract candidate number
+                    import re
+
+                    candidate_match = re.search(r"\d+", candidate_str)
+                    if candidate_match:
+                        candidate_num = (
+                            int(candidate_match.group()) - 1
+                        )  # Convert to 0-based index
+                    else:
+                        raise ValueError("No candidate number found")
+
+                    # Extract score
+                    score_clean = "".join(
+                        c for c in score_str if c.isdigit() or c == "."
+                    )
+                    score = float(score_clean) if score_clean else 0.0
+
+                    # Ensure score is in valid range
+                    score = max(0.0, min(1.0, score))
+
+                    logger.debug(
+                        f"Parsed: candidate={candidate_num}, score={score}, reason={reason}"
+                    )
+                    return candidate_num, score, reason
+
+        except Exception as e:
+            logger.warning(f"Error parsing single match response: {e}")
+
+        # Fallback
+        logger.warning(f"Could not parse single match response: {result}")
+        return -1, 0.0, f"Parse error: {result[:50]}..."
+
+    def _filter_candidates(
+        self, receipt: Receipt, transactions: List[Transaction]
+    ) -> List[Transaction]:
        """Filter transactions to create a reasonable candidate list"""
        candidates = []
        amount_threshold = receipt.amount * 2.0  # 200% threshold - very inclusive
@@ -92,24 +270,53 @@ class AIMatcher:
            if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
                candidates.append(transaction)

-        logger.debug(f"Filtered {len(transactions)} transactions to {len(candidates)} candidates")
+        logger.debug(
+            f"Filtered {len(transactions)} transactions to {len(candidates)} candidates"
+        )
        return candidates

-    def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
+    def _find_best_match_legacy(
+        self, receipt: Receipt, transactions: List[Transaction]
+    ) -> Match:
+        """Legacy method: Find the best match using individual API calls (kept as fallback)"""
+        candidates = self._filter_candidates(receipt, transactions)
+        if not candidates:
+            return None
+
+        best_match = None
+        highest_score = 0
+
+        for transaction in candidates:
+            score, reason = self._calculate_match_score(receipt, transaction)
+            logger.debug(
+                f"Score {score:.3f} for transaction {transaction.vendor}: {reason}"
+            )
+
+            if score > highest_score:
+                highest_score = score
+                best_match = Match(receipt, transaction, score, reason)
+
+        return best_match
+
+    def _calculate_match_score(
+        self, receipt: Receipt, transaction: Transaction
+    ) -> Tuple[float, str]:
        """Calculate match score using AI"""
        # Calculate differences for the AI to consider
        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
        transaction_amount_abs = abs(transaction.amount)
        amount_diff = abs(receipt.amount - transaction_amount_abs)
-        amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+        amount_percent_diff = (
+            (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+        )

        prompt = f"""
-        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason.
+        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason, the reason must be a single sentence without any special formatting.
        
-        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
+        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime("%Y-%m-%d")}
        Receipt Description: {receipt.description}
        Receipt Category: {receipt.category}
-        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
+        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime("%Y-%m-%d")}
        Transaction Notes: {transaction.notes}
        
        Differences:
@@ -138,7 +345,9 @@ class AIMatcher:

        for attempt in range(self.max_retries):
            try:
-                result = self._call_groq_api_with_timeout(prompt, timeout=30)  # Increased timeout
+                result = self._call_groq_api_with_timeout(
+                    prompt, timeout=30
+                )  # Increased timeout

                # Parse the result - handle multiple formats
                score, reason = self._parse_ai_response(result)
@@ -149,7 +358,9 @@ class AIMatcher:
                return score, reason

            except Exception as e:
-                logger.warning(f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}")
+                logger.warning(
+                    f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
+                )
                if attempt < self.max_retries - 1:
                    # Exponential backoff for rate limiting
                    sleep_time = self.retry_delay * (2**attempt)
@@ -165,8 +376,8 @@ class AIMatcher:
        logger.debug(f"Parsing AI response: {result}")

        # Try to find score in various formats
-        if '|' in result:
-            parts = result.split('|')
+        if "|" in result:
+            parts = result.split("|")
            logger.debug(f"Split response into {len(parts)} parts: {parts}")

            # Look for a numeric score in any part
@@ -174,14 +385,26 @@ class AIMatcher:
                part = part.strip()
                try:
                    # Remove any non-numeric characters except decimal point
-                    score_str_clean = ''.join(c for c in part if c.isdigit() or c == '.')
+                    score_str_clean = "".join(
+                        c for c in part if c.isdigit() or c == "."
+                    )
                    if score_str_clean:
                        score = float(score_str_clean)
                        if 0 <= score <= 1:  # Valid confidence score
                            # Get reason from other parts
-                            reason_parts = [p.strip() for j, p in enumerate(parts) if j != i and p.strip()]
-                            reason = ' | '.join(reason_parts) if reason_parts else "Score extracted"
-                            logger.debug(f"Found score {score} in part {i}, reason: {reason}")
+                            reason_parts = [
+                                p.strip()
+                                for j, p in enumerate(parts)
+                                if j != i and p.strip()
+                            ]
+                            reason = (
+                                " | ".join(reason_parts)
+                                if reason_parts
+                                else "Score extracted"
+                            )
+                            logger.debug(
+                                f"Found score {score} in part {i}, reason: {reason}"
+                            )
                            return score, reason
                except ValueError:
                    continue
@@ -189,7 +412,8 @@ class AIMatcher:
        # Try to extract just a number from the response
        try:
            import re
-            numbers = re.findall(r'\d+\.?\d*', result)
+
+            numbers = re.findall(r"\d+\.?\d*", result)
            if numbers:
                for num_str in numbers:
                    score = float(num_str)
@@ -202,7 +426,8 @@ class AIMatcher:
        # Fallback - try to find any number and normalize it
        try:
            import re
-            numbers = re.findall(r'\d+\.?\d*', result)
+
+            numbers = re.findall(r"\d+\.?\d*", result)
            if numbers:
                score = float(numbers[0])
                # Normalize to 0-1 range if it's a percentage or other scale
@@ -228,7 +453,7 @@ class AIMatcher:
                    model=self.model,
                    messages=[{"role": "user", "content": prompt}],
                    max_tokens=200,
-                    temperature=0.1
+                    temperature=0.1,
                )
                return response.choices[0].message.content.strip()
            except Exception as e:
@@ -1,37 +1,37 @@
-from fastapi import FastAPI, HTTPException, UploadFile, File
-from fastapi.middleware.cors import CORSMiddleware
-from datetime import datetime
-from typing import List
-import uuid
 import csv
 import io
 import logging
+import uuid
+from datetime import datetime
+from typing import List
+
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.middleware.cors import CORSMiddleware

 # Configure logging
+from ai_rules import AIRule
+from api_models import (
+    DocumentProcessResponse,
+    DocumentUploadResponse,
+    MatchingResponse,
+    MatchResponse,
+    RuleRequest,
+)
+from document_processor import DocumentProcessor
+from matching_engine import MatchingEngine
+from models import Receipt, Transaction
+
 logging.basicConfig(
    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('app.log'),
-        logging.StreamHandler()
-    ]
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[logging.FileHandler("app.log"), logging.StreamHandler()],
 )
 logger = logging.getLogger(__name__)

-from api_models import (
-    MatchingRequest, MatchingResponse, MatchResponse,
-    ApprovalRequest, RuleRequest, DocumentUploadResponse, 
-    DocumentProcessResponse, TransactionRequest
-)
-from models import Receipt, Transaction, Match
-from matching_engine import MatchingEngine
-from ai_rules import AIRule
-from document_processor import DocumentProcessor
-
 app = FastAPI(
    title="AI Bookkeeper - Data Science Engine",
    description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
-    version="1.0.0"
+    version="1.0.0",
 )

 # CORS middleware
@@ -54,19 +54,22 @@ uploaded_files = {}
 stored_transactions = []
 processed_receipts = {}

+
@app.get("/")
 async def root():
    """Health check endpoint"""
    return {
        "message": "AI Bookkeeper Data Science Engine is running",
        "version": "1.0.0",
-        "status": "healthy"
+        "status": "healthy",
    }

+
 # ============================================================================
 # TRANSACTION IMPORT ENDPOINTS
 # ============================================================================

+
@app.post("/transactions/import/csv")
 async def import_transactions_csv(file: UploadFile = File(...)):
    """
@@ -74,17 +77,23 @@ async def import_transactions_csv(file: UploadFile = File(...)):
    """
    try:
        content = await file.read()
-        decoded = content.decode('utf-8')
+        decoded = content.decode("utf-8")
        reader = csv.DictReader(io.StringIO(decoded))
        transactions = []
        errors = []
        for idx, row in enumerate(reader):
            try:
                # Use correct headers and strip whitespace
-                account_number = row.get('Account Number') or row.get('Account Number '.strip())
-                txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip())
-                amount_raw = row.get('Amount') or row.get('Amount '.strip())
-                payee_name = row.get('Description 2') or row.get('Description 2 '.strip())
+                account_number = row.get("Account Number") or row.get(
+                    "Account Number ".strip()
+                )
+                txn_date_raw = row.get("Transaction Date") or row.get(
+                    "Transaction Date ".strip()
+                )
+                amount_raw = row.get("Amount") or row.get("Amount ".strip())
+                payee_name = row.get("Description 2") or row.get(
+                    "Description 2 ".strip()
+                )
                memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
                # Compose ID
                txn_id = f"{account_number}_{idx + 1}"
@@ -93,21 +102,25 @@ async def import_transactions_csv(file: UploadFile = File(...)):
                txn_date = None
                for fmt in ("%m/%d/%y", "%m/%d/%Y"):
                    try:
-                        txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d")
+                        txn_date = datetime.strptime(txn_date_str, fmt).strftime(
+                            "%Y-%m-%d"
+                        )
                        break
                    except Exception:
                        continue
                if not txn_date:
                    raise ValueError(f"Could not parse date: {txn_date_str}")
                # Parse amount
-                amount = float(amount_raw.replace(',', '').strip())
-                transactions.append({
+                amount = float(amount_raw.replace(",", "").strip())
+                transactions.append(
+                    {
                        "id": txn_id,
                        "txn_date": txn_date,
                        "amount": amount,
                        "payee_name": payee_name.strip(),
-                    "memo": memo
-                })
+                        "memo": memo,
+                    }
+                )
            except Exception as e:
                errors.append(f"Row {idx + 1}: {str(e)}")
        # Store transactions globally for auto-matching
@@ -117,11 +130,12 @@ async def import_transactions_csv(file: UploadFile = File(...)):
        return {
            "imported_count": len(transactions),
            "converted_transactions": transactions,
-            "errors": errors
+            "errors": errors,
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+
@app.post("/transactions/import/image")
 async def import_transactions_from_image(file: UploadFile = File(...)):
    """
@@ -129,18 +143,26 @@ async def import_transactions_from_image(file: UploadFile = File(...)):
    """
    try:
        # Validate file type
-        allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
-        file_extension = file.filename.split('.')[-1].lower()
+        allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
+        file_extension = file.filename.split(".")[-1].lower()
        if file_extension not in allowed_types:
-            raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}")
+            raise HTTPException(
+                status_code=400,
+                detail=f"Unsupported file type. Allowed: {allowed_types}",
+            )
        # Read file content
        content = await file.read()
        # Save file to disk
        image_path = await document_processor.save_uploaded_file(content, file.filename)
        # Extract transactions from image (pass file path)
-        extraction_result = await document_processor.extract_transactions_from_image(image_path)
+        extraction_result = await document_processor.extract_transactions_from_image(
+            image_path
+        )
        if not extraction_result.get("extraction_success", False):
-            raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed"))
+            raise HTTPException(
+                status_code=500,
+                detail=extraction_result.get("error", "Extraction failed"),
+            )
        extracted_transactions = extraction_result.get("transactions", [])
        # Store transactions globally for auto-matching
        global stored_transactions
@@ -159,28 +181,32 @@ async def import_transactions_from_image(file: UploadFile = File(...)):
                    # Fallback: use current year if parsing fails
                    txn_date = f"2024-{txn_date_raw}"

-                stored_transactions.append({
+                stored_transactions.append(
+                    {
                        "id": txn_id,
                        "txn_date": txn_date,
                        "amount": amount,
                        "payee_name": vendor,
-                    "memo": memo
-                })
-            except Exception as e:
+                        "memo": memo,
+                    }
+                )
+            except Exception:
                continue
        return {
            "imported_count": len(stored_transactions),
            "converted_transactions": stored_transactions,
-            "errors": []
+            "errors": [],
        }
    except Exception as e:
        logger.error(f"Error importing transactions from image: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

+
 # ============================================================================
 # DOCUMENT PROCESSING ENDPOINTS
 # ============================================================================

+
@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
 async def upload_multiple_documents(files: List[UploadFile] = File(...)):
    """
@@ -194,11 +220,14 @@ async def upload_multiple_documents(files: List[UploadFile] = File(...)):

        for file in files:
            # Validate file type
-            allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
-            file_extension = file.filename.split('.')[-1].lower()
+            allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
+            file_extension = file.filename.split(".")[-1].lower()

            if file_extension not in allowed_types:
-                raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}")
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}",
+                )

            # Generate unique file ID
            file_id = str(uuid.uuid4())
@@ -208,16 +237,18 @@ async def upload_multiple_documents(files: List[UploadFile] = File(...)):
            uploaded_files[file_id] = {
                "filename": file.filename,
                "content": content,
-                "upload_date": datetime.now()
+                "upload_date": datetime.now(),
            }

-            responses.append(DocumentUploadResponse(
+            responses.append(
+                DocumentUploadResponse(
                    file_id=file_id,
                    filename=file.filename,
                    file_type=file_extension,
                    upload_date=datetime.now(),
-                status="uploaded"
-            ))
+                    status="uploaded",
+                )
+            )

        return responses

@@ -225,6 +256,7 @@ async def upload_multiple_documents(files: List[UploadFile] = File(...)):
        logger.error(f"Error uploading documents: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

+
@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
 async def process_document(file_id: str):
    """
@@ -241,8 +273,10 @@ async def process_document(file_id: str):
        file_data = uploaded_files[file_id]

        # Save file temporarily and process it
-        file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"])
-        file_type = file_data["filename"].split('.')[-1].lower()
+        file_path = await document_processor.save_uploaded_file(
+            file_data["content"], file_data["filename"]
+        )
+        file_type = file_data["filename"].split(".")[-1].lower()
        receipt_data = await document_processor.process_file(file_path, file_type)

        # Store processed receipt
@@ -258,17 +292,19 @@ async def process_document(file_id: str):
            date=receipt_data.get("date", ""),
            category=receipt_data.get("category", ""),
            confidence=receipt_data.get("confidence", 0.0),
-            error=receipt_data.get("error", None)
+            error=receipt_data.get("error", None),
        )

    except Exception as e:
        logger.error(f"Error processing document {file_id}: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

+
 # ============================================================================
 # MATCHING ENDPOINTS
 # ============================================================================

+
@app.post("/match-specific", response_model=MatchingResponse)
 async def match_specific_receipts(file_ids: List[str]):
    """
@@ -283,7 +319,10 @@ async def match_specific_receipts(file_ids: List[str]):
        # Check if transactions are imported
        if not stored_transactions:
            logger.warning("No transactions imported")
-            raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
+            raise HTTPException(
+                status_code=400,
+                detail="No transactions imported. Please upload CSV first.",
+            )

        logger.info(f"Found {len(stored_transactions)} stored transactions")

@@ -297,7 +336,7 @@ async def match_specific_receipts(file_ids: List[str]):
                    transaction_date=txn_date,
                    amount=txn["amount"],
                    vendor=txn["payee_name"],
-                    notes=txn["memo"]
+                    notes=txn["memo"],
                )
                transactions.append(transaction)
            except Exception as e:
@@ -314,14 +353,20 @@ async def match_specific_receipts(file_ids: List[str]):
            if file_id in processed_receipts:
                receipt_data = processed_receipts[file_id]
                logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
-                logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}")
+                logger.info(
+                    f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}"
+                )
                try:
                    # Handle missing date field
                    if "date" not in receipt_data or not receipt_data["date"]:
-                        logger.warning(f"Missing date for receipt {file_id}, using current date")
+                        logger.warning(
+                            f"Missing date for receipt {file_id}, using current date"
+                        )
                        receipt_date = datetime.now()
                    else:
-                        receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
+                        receipt_date = datetime.strptime(
+                            receipt_data["date"], "%Y-%m-%d"
+                        )

                    # Handle missing amount field - try multiple possible keys
                    amount = receipt_data.get("amount")
@@ -330,14 +375,18 @@ async def match_specific_receipts(file_ids: List[str]):
                    if amount is None:
                        amount = receipt_data.get("amount_total")
                    if amount is None:
-                        logger.warning(f"Missing amount for receipt {file_id}, using 0.0")
+                        logger.warning(
+                            f"Missing amount for receipt {file_id}, using 0.0"
+                        )
                        amount = 0.0

                    # Ensure amount is a float
                    try:
                        amount = float(amount)
                    except (ValueError, TypeError):
-                        logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0")
+                        logger.warning(
+                            f"Invalid amount '{amount}' for receipt {file_id}, using 0.0"
+                        )
                        amount = 0.0

                    logger.info(f"DEBUG: amount for {file_id}: {amount}")
@@ -345,7 +394,9 @@ async def match_specific_receipts(file_ids: List[str]):
                    # Handle missing vendor field
                    vendor = receipt_data.get("vendor", "")
                    if not vendor:
-                        logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'")
+                        logger.warning(
+                            f"Missing vendor for receipt {file_id}, using 'Unknown'"
+                        )
                        vendor = "Unknown"

                    # Handle missing category field
@@ -370,12 +421,14 @@ async def match_specific_receipts(file_ids: List[str]):
                        tax=tax,
                        vendor=vendor,
                        category=category,
-                        description=description
+                        description=description,
                    )
                    receipts.append(receipt)
                    logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
                except Exception as e:
-                    logger.warning(f"Error creating receipt object for {file_id}: {str(e)}")
+                    logger.warning(
+                        f"Error creating receipt object for {file_id}: {str(e)}"
+                    )
                    missing_files.append(f"{file_id} (error: {str(e)})")
            else:
                logger.warning(f"Receipt {file_id} not found in processed_receipts")
@@ -383,21 +436,31 @@ async def match_specific_receipts(file_ids: List[str]):

        if missing_files:
            logger.error(f"Missing files: {missing_files}")
-            raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}")
+            raise HTTPException(
+                status_code=400, detail=f"Missing files: {missing_files}"
+            )

-        logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions")
+        logger.info(
+            f"Processing {len(receipts)} receipts against {len(transactions)} transactions"
+        )

        # Perform matching
        try:
            logger.info("Starting direct matching call (without ThreadPoolExecutor)")
            logger.info(f"matching_engine type: {type(matching_engine)}")
-            logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}")
+            logger.info(
+                f"matching_engine.process_matching type: {type(matching_engine.process_matching)}"
+            )
            logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
-            logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}")
+            logger.info(
+                f"transactions type: {type(transactions)}, length: {len(transactions)}"
+            )

            matches = matching_engine.process_matching(receipts, transactions)

-            logger.info(f"Matching completed successfully. Found {len(matches)} matches")
+            logger.info(
+                f"Matching completed successfully. Found {len(matches)} matches"
+            )

            # Convert matches to response format
            match_responses = []
@@ -423,18 +486,24 @@ async def match_specific_receipts(file_ids: List[str]):
                    receipt_category=match.receipt.category,
                    receipt_tax_amount=match.receipt.tax,
                    transaction_vendor=match.transaction.vendor,
-                    transaction_amount=match.transaction.amount
+                    transaction_amount=match.transaction.amount,
                )
                match_responses.append(match_response)
-                logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}")
+                logger.info(
+                    f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}"
+                )

            logger.info(f"Formatted {len(match_responses)} match responses")

            # Calculate statistics
            if match_responses:
-                high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8)
+                high_confidence = sum(
+                    1 for m in match_responses if m.confidence_score >= 0.8
+                )
                low_confidence = len(match_responses) - high_confidence
-                avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses)
+                avg_score = sum(m.confidence_score for m in match_responses) / len(
+                    match_responses
+                )
            else:
                high_confidence = low_confidence = avg_score = 0

@@ -442,23 +511,24 @@ async def match_specific_receipts(file_ids: List[str]):
                "total": len(match_responses),
                "high_confidence": high_confidence,
                "low_confidence": low_confidence,
-                "avg_score": round(avg_score, 2)
+                "avg_score": round(avg_score, 2),
            }

            logger.info(f"Generated stats: {stats}")
-            logger.info(f"Match-specific completed successfully with {len(match_responses)} matches")
-            
-            return MatchingResponse(
-                matches=match_responses,
-                stats=stats
+            logger.info(
+                f"Match-specific completed successfully with {len(match_responses)} matches"
            )

+            return MatchingResponse(matches=match_responses, stats=stats)
+
        except Exception as e:
            logger.error(f"Exception in matching section: {str(e)}")
            logger.error(f"Exception type: {type(e)}")
            logger.error(f"Exception args: {e.args}")
            logger.error(f"Traceback: {e.__traceback__}")
-            raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}")
+            raise HTTPException(
+                status_code=500, detail=f"Unexpected matching error: {str(e)}"
+            )

    except HTTPException:
        raise
@@ -466,10 +536,12 @@ async def match_specific_receipts(file_ids: List[str]):
        logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

+
 # ============================================================================
 # RULES MANAGEMENT ENDPOINTS
 # ============================================================================

+
@app.post("/rules")
 async def add_rule(request: RuleRequest):
    """
@@ -480,7 +552,7 @@ async def add_rule(request: RuleRequest):
            name=request.name,
            condition=request.condition,
            action=request.action,
-            source=request.source
+            source=request.source,
        )

        matching_engine.rules_engine.rules.append(new_rule)
@@ -490,6 +562,7 @@ async def add_rule(request: RuleRequest):
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+
@app.get("/rules")
 async def get_rules():
    """
@@ -498,19 +571,22 @@ async def get_rules():
    try:
        rules = []
        for rule in matching_engine.rules_engine.rules:
-            rules.append({
+            rules.append(
+                {
                    "name": rule.name,
                    "condition": rule.condition,
                    "action": rule.action,
                    "source": rule.source,
-                "status": rule.status
-            })
+                    "status": rule.status,
+                }
+            )

        return {"rules": rules}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+
@app.delete("/rules/{rule_name}")
 async def delete_rule(rule_name: str):
    """
@@ -530,10 +606,12 @@ async def delete_rule(rule_name: str):
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+
 # ============================================================================
 # STATISTICS ENDPOINT
 # ============================================================================

+
@app.get("/stats")
 async def get_stats():
    """
@@ -544,12 +622,14 @@ async def get_stats():
            "total_transactions": len(stored_transactions),
            "total_receipts": len(processed_receipts),
            "total_uploaded_files": len(uploaded_files),
-            "rules_count": len(matching_engine.rules_engine.rules)
+            "rules_count": len(matching_engine.rules_engine.rules),
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

+
 if __name__ == "__main__":
    import uvicorn
+
    uvicorn.run(app, host="0.0.0.0", port=8343)
@@ -1,9 +1,10 @@
-from typing import List, Dict, Any
-from datetime import datetime
+from typing import Any, Dict, List
+
 from ai_matcher import AIMatcher
 from ai_rules import AIRulesEngine
 from feedback_logger import FeedbackLogger
-from models import Receipt, Transaction, Match
+from models import Match, Receipt, Transaction
+

 class MatchingEngine:
    def __init__(self):
@@ -11,9 +12,13 @@ class MatchingEngine:
        self.rules_engine = AIRulesEngine()
        self.feedback_logger = FeedbackLogger()

-    def process_matching(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
+    def process_matching(
+        self, receipts: List[Receipt], transactions: List[Transaction]
+    ) -> List[Match]:
        # Get AI matches
-        ai_matches = self.ai_matcher.match_receipts_to_transactions(receipts, transactions)
+        ai_matches = self.ai_matcher.match_receipts_to_transactions(
+            receipts, transactions
+        )

        # Apply rules and enhance matches
        enhanced_matches = []
@@ -28,7 +33,9 @@ class MatchingEngine:

        # Apply confidence boost from rules
        if rule_results["confidence_boost"] > 0:
-            match.confidence_score = min(1.0, match.confidence_score + rule_results["confidence_boost"])
+            match.confidence_score = min(
+                1.0, match.confidence_score + rule_results["confidence_boost"]
+            )

        # Auto-approve if rules say so
        if rule_results["auto_approve"]:
@@ -48,7 +55,7 @@ class MatchingEngine:
            original_match=f"AI Score: {match.confidence_score}",
            correction="Approved",
            reason="User approved match",
-            user_id=user_id
+            user_id=user_id,
        )

    def reject_match(self, match: Match, reason: str, user_id: str):
@@ -58,12 +65,17 @@ class MatchingEngine:
            original_match=f"AI Score: {match.confidence_score}",
            correction="Rejected",
            reason=reason,
-            user_id=user_id
+            user_id=user_id,
        )

    def get_matching_stats(self, matches: List[Match]) -> Dict[str, Any]:
        if not matches:
-            return {"total": 0, "high_confidence": 0, "low_confidence": 0, "avg_score": 0}
+            return {
+                "total": 0,
+                "high_confidence": 0,
+                "low_confidence": 0,
+                "avg_score": 0,
+            }

        high_confidence = len([m for m in matches if m.confidence_score >= 0.8])
        low_confidence = len([m for m in matches if m.confidence_score < 0.8])
@@ -73,5 +85,5 @@ class MatchingEngine:
            "total": len(matches),
            "high_confidence": high_confidence,
            "low_confidence": low_confidence,
-            "avg_score": round(avg_score, 3)
+            "avg_score": round(avg_score, 3),
        }
@@ -1,16 +1,16 @@
-groq>=0.5.0
-python-dotenv==1.0.0
-pandas==2.1.4
-numpy==1.24.3
-fastapi==0.104.1
-uvicorn==0.24.0
-pydantic==2.5.0
-requests==2.31.0
-python-multipart==0.0.6
-Pillow==10.0.1
-PyPDF2==3.0.1
-aiofiles==23.2.1
-google-auth==2.23.4
-google-auth-oauthlib==1.1.0
-google-auth-httplib2==0.1.1
-google-api-python-client==2.108.0 
+groq
+python-dotenv
+pandas
+numpy
+fastapi
+uvicorn
+pydantic
+requests
+python-multipart
+Pillow
+PyPDF2
+aiofiles
+google-auth
+google-auth-oauthlib
+google-auth-httplib2
+google-api-python-client
@@ -1,10 +1,11 @@
-from typing import Dict, Any, Optional, Tuple
-from datetime import datetime
-from models import Receipt, Transaction, Address, Asset
 import logging
+from typing import Any, Dict, Optional
+
+from models import Address, Asset, Receipt, Transaction

 logger = logging.getLogger(__name__)

+
 class TaxRulesEngine:
    """Engine to handle tax calculations based on the four tax rules"""

@@ -41,7 +42,7 @@ class TaxRulesEngine:
                    "success": False,
                    "error": "No valid address found for tax calculation",
                    "calculated_tax": 0.0,
-                    "tax_rate": 0.0
+                    "tax_rate": 0.0,
                }

            # Get tax rate for the province
@@ -55,7 +56,7 @@ class TaxRulesEngine:
                "calculated_tax": calculated_tax,
                "tax_rate": tax_rate,
                "tax_address": tax_address.province,
-                "rule_applied": "Sales Tax Rule"
+                "rule_applied": "Sales Tax Rule",
            }

        except Exception as e:
@@ -64,14 +65,16 @@ class TaxRulesEngine:
                "success": False,
                "error": str(e),
                "calculated_tax": 0.0,
-                "tax_rate": 0.0
+                "tax_rate": 0.0,
            }

    def _get_tax_address(self, receipt: Receipt) -> Optional[Address]:
        """Determine which address to use for tax calculation"""
        # Rule: Use shipping address if different from billing, otherwise use billing
        if receipt.shipping_address and receipt.billing_address:
-            if self._addresses_different(receipt.billing_address, receipt.shipping_address):
+            if self._addresses_different(
+                receipt.billing_address, receipt.shipping_address
+            ):
                return receipt.shipping_address
            else:
                return receipt.billing_address
@@ -84,11 +87,15 @@ class TaxRulesEngine:

    def _addresses_different(self, billing: Address, shipping: Address) -> bool:
        """Check if billing and shipping addresses are different"""
-        return (billing.province != shipping.province or 
-                billing.city != shipping.city or 
-                billing.postal_code != shipping.postal_code)
+        return (
+            billing.province != shipping.province
+            or billing.city != shipping.city
+            or billing.postal_code != shipping.postal_code
+        )

-    def apply_fx_rule(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
+    def apply_fx_rule(
+        self, receipt: Receipt, transaction: Transaction
+    ) -> Dict[str, Any]:
        """
        Foreign Exchange Rule: Handle currency mismatches
        """
@@ -105,14 +112,14 @@ class TaxRulesEngine:
                    "receipt_amount": receipt.amount,
                    "transaction_amount": abs(transaction.amount),
                    "requires_manual_review": True,
-                    "rule_applied": "Foreign Exchange Rule"
+                    "rule_applied": "Foreign Exchange Rule",
                }
            else:
                return {
                    "success": True,
                    "fx_discrepancy": 0.0,
                    "requires_manual_review": False,
-                    "rule_applied": "No FX Rule (same currency)"
+                    "rule_applied": "No FX Rule (same currency)",
                }

        except Exception as e:
@@ -121,10 +128,12 @@ class TaxRulesEngine:
                "success": False,
                "error": str(e),
                "fx_discrepancy": 0.0,
-                "requires_manual_review": False
+                "requires_manual_review": False,
            }

-    def calculate_straight_line_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
+    def calculate_straight_line_depreciation(
+        self, asset: Asset, year: int
+    ) -> Dict[str, Any]:
        """
        Straight-Line Depreciation for accounting purposes
        """
@@ -133,27 +142,25 @@ class TaxRulesEngine:
                return {
                    "success": False,
                    "error": f"Year {year} exceeds useful life of {asset.useful_life_years} years",
-                    "depreciation": 0.0
+                    "depreciation": 0.0,
                }

            # Straight-line formula: (Cost - Residual Value) / Useful Life
-            annual_depreciation = (asset.purchase_amount - asset.residual_value) / asset.useful_life_years
+            annual_depreciation = (
+                asset.purchase_amount - asset.residual_value
+            ) / asset.useful_life_years

            return {
                "success": True,
                "depreciation": annual_depreciation,
                "book_value": asset.purchase_amount - (annual_depreciation * year),
                "method": "Straight-Line",
-                "rule_applied": "Depreciation Rule (Accounting)"
+                "rule_applied": "Depreciation Rule (Accounting)",
            }

        except Exception as e:
            self.logger.error(f"Error calculating straight-line depreciation: {str(e)}")
-            return {
-                "success": False,
-                "error": str(e),
-                "depreciation": 0.0
-            }
+            return {"success": False, "error": str(e), "depreciation": 0.0}

    def calculate_cca_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
        """
@@ -164,7 +171,7 @@ class TaxRulesEngine:
                return {
                    "success": False,
                    "error": "Year must be at least 1",
-                    "depreciation": 0.0
+                    "depreciation": 0.0,
                }

            # CCA uses declining balance method
@@ -187,16 +194,12 @@ class TaxRulesEngine:
                "total_depreciation": total_depreciation,
                "book_value": max(book_value, asset.residual_value),
                "method": "CCA Declining Balance",
-                "rule_applied": "Depreciation Rule (Tax)"
+                "rule_applied": "Depreciation Rule (Tax)",
            }

        except Exception as e:
            self.logger.error(f"Error calculating CCA depreciation: {str(e)}")
-            return {
-                "success": False,
-                "error": str(e),
-                "depreciation": 0.0
-            }
+            return {"success": False, "error": str(e), "depreciation": 0.0}

    def apply_meals_entertainment_rule(self, receipt: Receipt) -> Dict[str, Any]:
        """
@@ -208,7 +211,7 @@ class TaxRulesEngine:
                    "success": True,
                    "tax_deduction": receipt.amount,
                    "accounting_deduction": receipt.amount,
-                    "rule_applied": "No M&E Rule (not meals/entertainment)"
+                    "rule_applied": "No M&E Rule (not meals/entertainment)",
                }

            # For tax purposes: 50% deductible
@@ -225,7 +228,7 @@ class TaxRulesEngine:
                "tax_deduction": tax_deduction,
                "accounting_deduction": accounting_deduction,
                "tax_on_meal": tax_on_meal,
-                "rule_applied": "Meals & Entertainment Rule"
+                "rule_applied": "Meals & Entertainment Rule",
            }

        except Exception as e:
@@ -234,10 +237,12 @@ class TaxRulesEngine:
                "success": False,
                "error": str(e),
                "tax_deduction": 0.0,
-                "accounting_deduction": 0.0
+                "accounting_deduction": 0.0,
            }

-    def apply_all_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
+    def apply_all_tax_rules(
+        self, receipt: Receipt, transaction: Transaction = None
+    ) -> Dict[str, Any]:
        """
        Apply all tax rules to a receipt
        """
@@ -246,7 +251,7 @@ class TaxRulesEngine:
            "rules_applied": [],
            "sales_tax": {},
            "fx_analysis": {},
-            "meals_entertainment": {}
+            "meals_entertainment": {},
        }

        # Apply Sales Tax Rule