first commit

2025-08-05 22:25:51 +01:00
commit 5b3c066cea
14 changed files with 2670 additions and 0 deletions
@@ -0,0 +1,229 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be added to the global gitignore or merged into this project gitignore.  For a PyCharm
+#  project, it is recommended to include the following files:
+#  .idea/
+#  *.iml
+#  *.ipr
+#  *.iws
+
+# VS Code
+.vscode/
+
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Windows
+Thumbs.db
+ehthumbs.db
+Desktop.ini
+
+# Linux
+*~
+
+# Temporary files
+*.tmp
+*.temp
+*.swp
+*.swo
+*~
+
+# Log files
+*.log
+
+# Database files
+*.db
+*.sqlite
+*.sqlite3
+
+# Configuration files with sensitive data
+config.ini
+secrets.json
+.env.local
+.env.production
+
+# Test files
+test_*.py
+*_test.py
+tests/
+
+# Documentation
+docs/
+*.md
+!README.md
+
+# IDE files
+.idea/
+.vscode/
+*.sublime-*
+.atom/
+
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+uploads/
+chequing statement.csv
+test_images/ 
+.cursorrules.md
@@ -0,0 +1,262 @@
+# AI Bookkeeper - Data Science Engine
+
+AI-powered receipt-to-transaction matching engine using Groq LLM. This is a **Data Science Engine** that provides intelligent matching capabilities for backend applications.
+
+## 🎯 Purpose
+
+This Data Science Engine receives QuickBooks transaction data from backend applications and provides:
+- **AI-powered receipt processing** (OCR and data extraction)
+- **Intelligent receipt-transaction matching** with confidence scores
+- **Configurable AI rules** for business logic
+- **Feedback logging** for continuous improvement
+- **RESTful API** for easy integration
+
+## 🚀 Quick Start
+
+### 1. Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+
+### 2. Configure API Keys
+Create a `.env` file in the project root with your Groq API key:
+
+```bash
+# Create .env file
+echo "GROQ_API_KEY=your_actual_groq_api_key_here" > .env
+```
+
+**Important**: Get your API key from [Groq Console](https://console.groq.com/)
+
+### 3. Start the Server
+```bash
+# Option 1: Using the main script
+python main.py
+
+# Option 2: Using uvicorn directly
+uvicorn main:app --host 0.0.0.0 --port 8343 --reload
+```
+
+### 4. Access API Documentation
+- **Swagger UI**: http://localhost:8343/docs
+- **ReDoc**: http://localhost:8343/redoc
+
+## 📋 API Endpoints
+
+### Transaction Import
+- `POST /transactions/import/csv` - Import transactions from CSV file
+- `POST /transactions/import/image` - Import transactions from image/PDF
+
+### Receipt Processing
+- `POST /upload-multiple` - Upload multiple receipt documents
+- `POST /process/{file_id}` - Extract data from uploaded documents
+
+### AI Matching Engine
+- `POST /match-specific` - Match specific receipts to transactions using AI
+
+### AI Rules Management
+- `POST /rules` - Add new AI rules
+- `GET /rules` - List all active rules
+- `DELETE /rules/{rule_name}` - Delete rules
+
+### System Monitoring
+- `GET /stats` - Get system statistics and performance metrics
+- `GET /` - Health check endpoint
+
+## 🔧 Core Components
+
+### **AIMatcher** (`ai_matcher.py`)
+- Uses Groq LLM to compare receipts and transactions
+- Provides confidence scores and reasoning
+- Configurable matching criteria (amount, date, vendor)
+- Rate limiting to prevent API quota exhaustion
+
+### **AIRulesEngine** (`ai_rules.py`)
+- Applies business rules for auto-approval and categorization
+- Configurable rule conditions and actions
+- Supports system and user-generated rules
+- Safe condition evaluation with proper error handling
+
+### **DocumentProcessor** (`document_processor.py`)
+- AI-powered receipt data extraction using Groq vision model
+- Supports PDF and image formats
+- Robust JSON parsing with error handling
+- Extracts vendor, amount, date, tax, and category information
+
+### **MatchingEngine** (`matching_engine.py`)
+- Main orchestrator combining all components
+- Handles the complete matching workflow
+- Provides statistics and feedback logging
+- Configurable confidence thresholds
+
+### **FeedbackLogger** (`feedback_logger.py`)
+- Tracks manual overrides for AI training
+- Maintains audit trail of user decisions
+- Enables continuous model improvement
+
+## 📊 Configuration
+
+Edit `config.py` to adjust:
+- **Confidence threshold** (default: 0.3)
+- **Date tolerance days** (default: 7)
+- **Amount tolerance percent** (default: 5%)
+- **Groq API key** (from environment variable)
+
+## 🔄 Integration Workflow
+
+### 1. Import Transactions
+```bash
+# Import from CSV
+curl -X POST -F "file=@transactions.csv" http://localhost:8343/transactions/import/csv
+
+# Import from image
+curl -X POST -F "file=@statement.jpg" http://localhost:8343/transactions/import/image
+```
+
+### 2. Upload and Process Receipts
+```bash
+# Upload receipts
+curl -X POST -F "files=@receipt1.jpg" -F "files=@receipt2.jpg" http://localhost:8343/upload-multiple
+
+# Process a specific receipt
+curl -X POST http://localhost:8343/process/{file_id}
+```
+
+### 3. AI Matching
+```bash
+# Match specific receipts
+curl -X POST -H "Content-Type: application/json" \
+  -d '["file_id_1", "file_id_2"]' \
+  http://localhost:8343/match-specific
+```
+
+### 4. Check Results
+```bash
+# Get system stats
+curl http://localhost:8343/stats
+
+# View AI rules
+curl http://localhost:8343/rules
+```
+
+## 🎯 Key Features
+
+- **AI-powered matching** with confidence scores
+- **Rule-based auto-approval** and categorization
+- **Feedback logging** for continuous improvement
+- **Configurable matching parameters**
+- **RESTful JSON API** for easy backend integration
+- **Comprehensive error handling**
+- **Rate limiting** to prevent API quota exhaustion
+- **Robust JSON parsing** for AI responses
+
+## 📝 Data Formats
+
+### Transaction Input (CSV)
+```csv
+Date,Description,Amount,Category
+2024-01-15,Starbucks Coffee,12.50,Food & Dining
+2024-01-16,Office Supplies,45.99,Office
+```
+
+### Receipt Processing Output
+```json
+{
+  "vendor": "Starbucks",
+  "total_amount": 12.50,
+  "tax_amount": 1.25,
+  "date": "2024-01-15",
+  "category": "Food & Dining",
+  "confidence": 0.95,
+  "extraction_success": true
+}
+```
+
+### Match Result Output
+```json
+{
+  "receipt_id": "uuid",
+  "transaction_id": "transaction_123",
+  "confidence_score": 0.95,
+  "match_reason": "Same vendor, minor date difference (Auto-approved by rules)",
+  "receipt_vendor": "Starbucks",
+  "receipt_amount": 12.50,
+  "transaction_vendor": "STARBUCKS",
+  "transaction_amount": 12.50
+}
+```
+
+## 🔍 AI Matching Criteria
+
+The engine uses multiple criteria for matching:
+
+1. **Amount Similarity** - Compares receipt and transaction amounts (5% tolerance)
+2. **Date Proximity** - Checks date closeness (7-day tolerance)
+3. **Vendor Matching** - AI-powered vendor name comparison using Groq LLM
+4. **Rule-based Auto-approval** - Automatic approval for exact matches and high-confidence matches
+
+## 🛠️ Development
+
+### Project Structure
+```
+├── main.py                 # FastAPI application entry point
+├── ai_matcher.py           # AI-powered matching logic
+├── ai_rules.py            # Business rules engine
+├── document_processor.py   # Receipt data extraction
+├── matching_engine.py      # Main matching orchestrator
+├── feedback_logger.py      # User feedback tracking
+├── models.py              # Pydantic data models
+├── api_models.py          # API request/response models
+├── config.py              # Configuration settings
+├── requirements.txt       # Python dependencies
+└── test_images/           # Test image files
+```
+
+### Running Tests
+```bash
+# Test the server
+curl http://localhost:8343/
+
+# Test stats endpoint
+curl http://localhost:8343/stats
+
+# Test rules endpoint
+curl http://localhost:8343/rules
+```
+
+## 🚀 Production Deployment
+
+For production deployment:
+- Replace in-memory storage with a database (PostgreSQL recommended)
+- Configure proper authentication and authorization
+- Set up monitoring and logging (ELK stack recommended)
+- Use environment variables for all configuration
+- Implement proper error handling and retries
+- Set up rate limiting and API quotas
+- Configure CORS for frontend integration
+- Use HTTPS in production
+
+## 📞 Support
+
+This Data Science Engine is designed to be integrated with backend applications that handle:
+- QuickBooks API connections
+- User interface and workflows
+- Data persistence and management
+- External integrations
+
+The engine focuses purely on AI/ML capabilities and provides a clean JSON API for backend integration.
+
+## 🔧 Troubleshooting
+
+### Common Issues
+
+1. **API Key Error**: Ensure `GROQ_API_KEY` is set in your `.env` file
+2. **Port Already in Use**: Kill existing process with `pkill -f "python main.py"`
+3. **Import Errors**: Install dependencies with `pip install -r requirements.txt`
+4. **Rate Limiting**: The system includes built-in rate limiting to prevent API quota exhaustion
+
+### Logs
+Check the application logs for detailed error information:
+```bash
+tail -f app.log
+``` 
@@ -0,0 +1,244 @@
+import groq
+from datetime import datetime, timedelta
+from typing import List, Tuple
+import config
+from models import Receipt, Transaction, Match
+import time
+import logging
+import asyncio
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class AIMatcher:
+    def __init__(self):
+        self.client = groq.Groq(api_key=config.GROQ_API_KEY)
+        self.model = "llama3-8b-8192"
+        self.max_retries = 3
+        self.retry_delay = 2  # seconds - increased for rate limiting
+        self.rate_limit_delay = 1.0  # seconds between API calls
+        self.last_api_call = 0
+    
+    def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
+        """Match receipts to transactions using AI"""
+        logger.info(f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions")
+        matches = []
+        
+        for i, receipt in enumerate(receipts):
+            logger.info(f"Processing receipt {i+1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}")
+            
+            # Rate limiting
+            self._rate_limit()
+            
+            # Get the BEST match for this receipt (highest confidence score)
+            best_match = self._find_best_match(receipt, transactions)
+            if best_match:
+                matches.append(best_match)
+                logger.info(f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}")
+            else:
+                logger.warning(f"No match found for receipt: {receipt.vendor} - ${receipt.amount}")
+        
+        # Sort by confidence score (highest first)
+        matches = sorted(matches, key=lambda x: x.confidence_score, reverse=True)
+        logger.info(f"AI matching completed. Found {len(matches)} matches")
+        return matches
+    
+    def _rate_limit(self):
+        """Implement rate limiting to avoid API quota exhaustion"""
+        current_time = time.time()
+        time_since_last_call = current_time - self.last_api_call
+        
+        if time_since_last_call < self.rate_limit_delay:
+            sleep_time = self.rate_limit_delay - time_since_last_call
+            logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
+            time.sleep(sleep_time)
+        
+        self.last_api_call = time.time()
+    
+    def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
+        """Find the BEST match for a receipt (highest confidence score)"""
+        candidates = self._filter_candidates(receipt, transactions)
+        if not candidates:
+            logger.warning(f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}")
+            return None
+            
+        logger.info(f"Found {len(candidates)} candidates for receipt: {receipt.vendor}")
+        
+        best_match = None
+        highest_score = 0
+        
+        for transaction in candidates:
+            score, reason = self._calculate_match_score(receipt, transaction)
+            logger.debug(f"Score {score:.3f} for transaction {transaction.vendor}: {reason}")
+            
+            # Keep the match with the highest score, regardless of how low it is
+            if score > highest_score:
+                highest_score = score
+                best_match = Match(receipt, transaction, score, reason)
+        
+        return best_match
+    
+    def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
+        """Filter transactions to create a reasonable candidate list"""
+        candidates = []
+        amount_threshold = receipt.amount * 2.0  # 200% threshold - very inclusive
+        
+        for transaction in transactions:
+            # Use absolute value for transaction amount comparison
+            transaction_amount_abs = abs(transaction.amount)
+            
+            # Only exclude transactions with obviously different amounts
+            if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
+                candidates.append(transaction)
+        
+        logger.debug(f"Filtered {len(transactions)} transactions to {len(candidates)} candidates")
+        return candidates
+    
+    def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
+        """Calculate match score using AI"""
+        # Calculate differences for the AI to consider
+        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+        transaction_amount_abs = abs(transaction.amount)
+        amount_diff = abs(receipt.amount - transaction_amount_abs)
+        amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+        
+        prompt = f"""
+        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason.
+        
+        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
+        Receipt Description: {receipt.description}
+        Receipt Category: {receipt.category}
+        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
+        Transaction Notes: {transaction.notes}
+        
+        Differences:
+        - Date difference: {date_diff} days
+        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
+        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
+        - Description/Notes comparison: "{receipt.description}" vs "{transaction.notes}"
+        - Category: {receipt.category}
+        
+        Score this potential match based on how likely it is the correct match:
+        
+        - Perfect matches (same vendor, amount, date): 0.95-1.0
+        - High confidence (minor differences): 0.8-0.94
+        - Medium confidence (moderate differences): 0.6-0.79
+        - Low confidence (significant differences): 0.4-0.59
+        - Very low confidence (major differences): 0.2-0.39
+        - Minimal similarity: 0.1-0.19
+        - No meaningful similarity: 0.0-0.09
+        
+        Consider description and category similarity in your scoring.
+        
+        IMPORTANT: Return ONLY the score and reason separated by a pipe character.
+        Format: [score]|[reason]
+        Example: 0.85|Same vendor, same amount, 2 days apart
+        """
+        
+        for attempt in range(self.max_retries):
+            try:
+                result = self._call_groq_api_with_timeout(prompt, timeout=30)  # Increased timeout
+                
+                # Parse the result - handle multiple formats
+                score, reason = self._parse_ai_response(result)
+                
+                logger.debug(f"AI Response: {result}")
+                logger.debug(f"Parsed: score={score}, reason={reason}")
+                
+                return score, reason
+                
+            except Exception as e:
+                logger.warning(f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}")
+                if attempt < self.max_retries - 1:
+                    # Exponential backoff for rate limiting
+                    sleep_time = self.retry_delay * (2 ** attempt)
+                    logger.info(f"Waiting {sleep_time} seconds before retry...")
+                    time.sleep(sleep_time)
+                else:
+                    logger.error(f"All attempts failed for receipt {receipt.id}")
+                    return 0.0, f"AI error after {self.max_retries} attempts: {str(e)}"
+    
+    def _parse_ai_response(self, result: str) -> Tuple[float, str]:
+        """Parse AI response with robust error handling"""
+        result = result.strip()
+        logger.debug(f"Parsing AI response: {result}")
+        
+        # Try to find score in various formats
+        if '|' in result:
+            parts = result.split('|')
+            logger.debug(f"Split response into {len(parts)} parts: {parts}")
+            
+            # Look for a numeric score in any part
+            for i, part in enumerate(parts):
+                part = part.strip()
+                try:
+                    # Remove any non-numeric characters except decimal point
+                    score_str_clean = ''.join(c for c in part if c.isdigit() or c == '.')
+                    if score_str_clean:
+                        score = float(score_str_clean)
+                        if 0 <= score <= 1:  # Valid confidence score
+                            # Get reason from other parts
+                            reason_parts = [p.strip() for j, p in enumerate(parts) if j != i and p.strip()]
+                            reason = ' | '.join(reason_parts) if reason_parts else "Score extracted"
+                            logger.debug(f"Found score {score} in part {i}, reason: {reason}")
+                            return score, reason
+                except ValueError:
+                    continue
+        
+        # Try to extract just a number from the response
+        try:
+            import re
+            numbers = re.findall(r'\d+\.?\d*', result)
+            if numbers:
+                for num_str in numbers:
+                    score = float(num_str)
+                    if 0 <= score <= 1:  # Valid confidence score
+                        logger.debug(f"Extracted score {score} from response")
+                        return score, f"Extracted from response: {result[:50]}..."
+        except (ValueError, IndexError):
+            pass
+        
+        # Fallback - try to find any number and normalize it
+        try:
+            import re
+            numbers = re.findall(r'\d+\.?\d*', result)
+            if numbers:
+                score = float(numbers[0])
+                # Normalize to 0-1 range if it's a percentage or other scale
+                if score > 1:
+                    score = score / 100  # Assume percentage
+                score = max(0, min(1, score))  # Clamp to 0-1
+                logger.debug(f"Normalized score {score} from response")
+                return score, f"Normalized from response: {result[:50]}..."
+        except (ValueError, IndexError):
+            pass
+        
+        # Final fallback
+        logger.warning(f"Could not parse AI response: {result}")
+        return 0.0, f"Unparseable response: {result[:50]}..."
+    
+    def _call_groq_api_with_timeout(self, prompt: str, timeout: int = 15) -> str:
+        """Make API call with timeout and retry logic"""
+        import concurrent.futures
+        
+        def api_call():
+            try:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=200,
+                    temperature=0.1
+                )
+                return response.choices[0].message.content.strip()
+            except Exception as e:
+                raise e
+        
+        try:
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future = executor.submit(api_call)
+                return future.result(timeout=timeout)
+        except concurrent.futures.TimeoutError:
+            raise Exception(f"API call timed out after {timeout} seconds")
+        except Exception as e:
+            raise e 
@@ -0,0 +1,126 @@
+from dataclasses import dataclass
+from typing import Dict, Any, List
+import config
+from models import Receipt, Transaction
+from tax_rules_engine import TaxRulesEngine
+
+@dataclass
+class AIRule:
+    name: str
+    condition: str
+    action: str
+    source: str
+    status: str = "active"
+
+class AIRulesEngine:
+    def __init__(self):
+        self.rules: List[AIRule] = []
+        self.tax_rules_engine = TaxRulesEngine()
+        self._load_default_rules()
+    
+    def _load_default_rules(self):
+        self.rules = [
+            AIRule("exact_amount_match", "amount_diff <= 0.01", "auto_approve", "system"),
+            AIRule("same_vendor_same_date", "vendor_match and date_diff <= 1", "high_confidence", "system"),
+            AIRule("gas_station_pattern", "vendor_contains_gas_or_fuel", "categorize_transport", "system"),
+            # Tax-related rules
+            AIRule("fx_currency_mismatch", "currency_mismatch", "flag_fx_review", "tax_system"),
+            AIRule("meals_entertainment", "is_meals_entertainment", "apply_me_tax_rule", "tax_system"),
+            AIRule("provincial_tax_calculation", "has_address_info", "calculate_provincial_tax", "tax_system")
+        ]
+    
+    def apply_rules(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
+        results = {"auto_approve": False, "confidence_boost": 0, "category": None, "tax_analysis": {}}
+        
+        for rule in self.rules:
+            if rule.status != "active":
+                continue
+                
+            if self._evaluate_condition(rule.condition, receipt, transaction):
+                self._execute_action(rule.action, results, receipt, transaction)
+        
+        return results
+    
+    def _evaluate_condition(self, condition: str, receipt: Receipt, transaction: Transaction) -> bool:
+        """Safely evaluate rule conditions without using eval()"""
+        amount_diff = abs(receipt.amount - abs(transaction.amount))
+        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
+        vendor_match = receipt.vendor.lower() in transaction.vendor.lower() or transaction.vendor.lower() in receipt.vendor.lower()
+        vendor_lower = receipt.vendor.lower()
+        vendor_contains_gas_or_fuel = 'gas' in vendor_lower or 'fuel' in vendor_lower
+        
+        # Tax-related conditions
+        currency_mismatch = receipt.currency != transaction.currency
+        is_meals_entertainment = receipt.is_meals_entertainment
+        has_address_info = receipt.billing_address is not None or receipt.shipping_address is not None
+        
+        # Handle specific condition types safely
+        if condition == "amount_diff <= 0.01":
+            return amount_diff <= 0.01
+        elif condition == "vendor_match and date_diff <= 1":
+            return vendor_match and date_diff <= 1
+        elif condition == "vendor_contains_gas_or_fuel":
+            return vendor_contains_gas_or_fuel
+        elif condition == "currency_mismatch":
+            return currency_mismatch
+        elif condition == "is_meals_entertainment":
+            return is_meals_entertainment
+        elif condition == "has_address_info":
+            return has_address_info
+        else:
+            # For any other conditions, try to evaluate them safely
+            try:
+                # Only allow safe operations
+                safe_globals = {
+                    "amount_diff": amount_diff,
+                    "date_diff": date_diff,
+                    "vendor_match": vendor_match,
+                    "vendor_contains_gas_or_fuel": vendor_contains_gas_or_fuel,
+                    "currency_mismatch": currency_mismatch,
+                    "is_meals_entertainment": is_meals_entertainment,
+                    "has_address_info": has_address_info,
+                    "receipt": receipt,
+                    "transaction": transaction,
+                    "abs": abs,
+                    "len": len,
+                    "min": min,
+                    "max": max,
+                    "sum": sum,
+                    "round": round
+                }
+                return eval(condition, safe_globals, {})
+            except (SyntaxError, NameError, TypeError) as e:
+                print(f"Warning: Invalid condition '{condition}': {e}")
+                return False
+    
+    def _execute_action(self, action: str, results: Dict[str, Any], receipt: Receipt, transaction: Transaction):
+        if action == "auto_approve":
+            results["auto_approve"] = True
+        elif action == "high_confidence":
+            results["confidence_boost"] += 0.2
+        elif action == "categorize_transport":
+            results["category"] = "Transportation"
+        elif action == "flag_fx_review":
+            # Apply FX rule and flag for review
+            fx_result = self.tax_rules_engine.apply_fx_rule(receipt, transaction)
+            results["tax_analysis"]["fx"] = fx_result
+            if fx_result.get("requires_manual_review", False):
+                results["confidence_boost"] -= 0.1  # Reduce confidence for FX issues
+        elif action == "apply_me_tax_rule":
+            # Apply meals & entertainment rule
+            me_result = self.tax_rules_engine.apply_meals_entertainment_rule(receipt)
+            results["tax_analysis"]["meals_entertainment"] = me_result
+        elif action == "calculate_provincial_tax":
+            # Calculate provincial tax
+            tax_result = self.tax_rules_engine.apply_sales_tax_rule(receipt)
+            results["tax_analysis"]["sales_tax"] = tax_result
+    
+    def add_rule(self, rule: AIRule):
+        self.rules.append(rule)
+    
+    def remove_rule(self, rule_name: str):
+        self.rules = [r for r in self.rules if r.name != rule_name]
+    
+    def apply_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
+        """Apply all tax rules to a receipt/transaction pair"""
+        return self.tax_rules_engine.apply_all_tax_rules(receipt, transaction) 
@@ -0,0 +1,120 @@
+from pydantic import BaseModel
+from datetime import datetime
+from typing import List, Optional
+
+class AddressRequest(BaseModel):
+    province: str
+    city: str
+    postal_code: str
+    country: str = "Canada"
+
+class ReceiptRequest(BaseModel):
+    id: str
+    file_name: str
+    upload_date: datetime
+    receipt_date: datetime
+    amount: float
+    tax: float
+    vendor: str
+    category: str
+    description: str
+    # Tax rule fields
+    billing_address: Optional[AddressRequest] = None
+    shipping_address: Optional[AddressRequest] = None
+    currency: str = "CAD"
+    is_meals_entertainment: bool = False
+
+class TransactionRequest(BaseModel):
+    id: str
+    transaction_date: datetime
+    amount: float
+    vendor: str
+    notes: str
+    # Tax rule fields
+    currency: str = "CAD"
+    fx_rate: Optional[float] = None
+
+class AssetRequest(BaseModel):
+    id: str
+    name: str
+    purchase_date: datetime
+    purchase_amount: float
+    useful_life_years: int
+    residual_value: float
+    cca_rate: float
+    asset_class: str
+
+class MatchingRequest(BaseModel):
+    receipt_ids: List[str]
+    transaction_ids: List[str]
+
+class MatchResponse(BaseModel):
+    receipt_id: str
+    transaction_id: str
+    confidence_score: float
+    match_reason: str
+    tax_analysis: Optional[dict] = None
+    # Currency information
+    receipt_currency: str = "CAD"
+    transaction_currency: str = "CAD"
+    currency_match: bool = True
+
+class MatchingResponse(BaseModel):
+    matches: List[MatchResponse]
+    stats: dict
+
+class ApprovalRequest(BaseModel):
+    match_id: str
+    approved: bool
+    reason: Optional[str] = None
+
+class RuleRequest(BaseModel):
+    name: str
+    condition: str
+    action: str
+    source: str = "user"
+
+class DocumentUploadResponse(BaseModel):
+    file_id: str
+    filename: str
+    upload_date: datetime
+    status: str
+
+class DocumentProcessResponse(BaseModel):
+    file_id: str
+    extraction_success: bool
+    vendor: Optional[str] = None
+    description: Optional[str] = None
+    total_amount: Optional[float] = None
+    tax_amount: Optional[float] = None
+    date: Optional[str] = None
+    category: Optional[str] = None
+    confidence: Optional[float] = None
+    error: Optional[str] = None
+
+# New tax-related models
+class TaxCalculationRequest(BaseModel):
+    receipt_id: str
+    transaction_id: Optional[str] = None
+
+class TaxCalculationResponse(BaseModel):
+    receipt_id: str
+    rules_applied: List[str]
+    sales_tax: dict
+    fx_analysis: Optional[dict] = None
+    meals_entertainment: dict
+
+class DepreciationRequest(BaseModel):
+    asset: AssetRequest
+    year: int
+    method: str  # "straight_line" or "cca"
+
+class DepreciationResponse(BaseModel):
+    asset_id: str
+    year: int
+    method: str
+    depreciation: float
+    book_value: float
+    total_depreciation: Optional[float] = None
+    success: bool
+    error: Optional[str] = None 
@@ -0,0 +1,15 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Get API key from environment variable with fallback
+GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
+
+# Validate API key
+if not GROQ_API_KEY or GROQ_API_KEY == "your_api_key_here":
+    raise ValueError("GROQ_API_KEY environment variable is not set or invalid. Please set it in your .env file.")
+
+CONFIDENCE_THRESHOLD = 0.3
+DATE_TOLERANCE_DAYS = 7
+AMOUNT_TOLERANCE_PERCENT = 0.05 
@@ -0,0 +1,498 @@
+import groq
+import base64
+import io
+from PIL import Image
+import PyPDF2
+from typing import Dict, Any, List, Optional
+import config
+import os
+import aiofiles
+from datetime import datetime
+import logging
+
+logger = logging.getLogger(__name__)
+
+class DocumentProcessor:
+    def __init__(self):
+        self.client = groq.Groq(api_key=config.GROQ_API_KEY)
+        self.model = "meta-llama/llama-4-scout-17b-16e-instruct"  # Vision model
+    
+    async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
+        """Process uploaded file and extract receipt data"""
+        try:
+            if file_type.lower() in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
+                return await self._process_image(file_path)
+            elif file_type.lower() == 'pdf':
+                return await self._process_pdf(file_path)
+            else:
+                raise ValueError(f"Unsupported file type: {file_type}")
+        except Exception as e:
+            return {"error": str(e)}
+    
+    async def _process_image(self, image_path: str) -> Dict[str, Any]:
+        """Extract data from image using Groq vision"""
+        try:
+            # Encode image to base64
+            base64_image = self._encode_image(image_path)
+            
+            # Create Groq vision prompt
+            prompt = """
+            Analyze this receipt image and extract the following information in JSON format:
+            {
+                "vendor": "Store/company name",
+                "description": "Detailed description of items/services purchased",
+                "total_amount": 0.00,
+                "tax_amount": 0.00,
+                "date": "YYYY-MM-DD",
+                "category": "Food/Transport/Office/Other",
+                "confidence": 0.95
+            }
+            
+            Rules:
+            - Extract vendor name as it appears on receipt
+            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
+            - Total amount should be the final total including tax
+            - Tax amount is separate tax line if available
+            - Date should be the date on the receipt
+            - Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
+            - Confidence score 0-1 based on how clear the receipt is
+            
+            Return only valid JSON.
+            """
+            
+            # Call Groq vision API with correct format
+            response = self.client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}",
+                                },
+                            },
+                        ],
+                    }
+                ],
+                model=self.model,
+                max_tokens=500,
+                temperature=0.1
+            )
+            
+            # Parse response
+            result_text = response.choices[0].message.content.strip()
+            return self._parse_extraction_result(result_text)
+            
+        except Exception as e:
+            return {"error": f"Image processing error: {str(e)}"}
+    
+    def _encode_image(self, image_path: str) -> str:
+        """Encode image to base64 string"""
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    
+    async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
+        """Extract data from PDF by converting to image first"""
+        try:
+            # For now, extract text from PDF and process as text
+            text_content = self._extract_text_from_pdf(pdf_path)
+            return self._process_text_content(text_content)
+            
+        except Exception as e:
+            return {"error": f"PDF processing error: {str(e)}"}
+    
+    def _extract_text_from_pdf(self, pdf_path: str) -> str:
+        """Extract text from PDF"""
+        try:
+            with open(pdf_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n"
+                return text
+        except Exception as e:
+            return ""
+    
+    def _process_text_content(self, text_content: str) -> Dict[str, Any]:
+        """Process text content using Groq (fallback for PDFs)"""
+        try:
+            prompt = f"""
+            Analyze this receipt text and extract the following information in JSON format:
+            
+            Receipt Text:
+            {text_content}
+            
+            Extract:
+            {{
+                "vendor": "Store/company name",
+                "description": "Detailed description of items/services purchased",
+                "total_amount": 0.00,
+                "tax_amount": 0.00,
+                "date": "YYYY-MM-DD",
+                "category": "Food/Transport/Office/Other",
+                "confidence": 0.95
+            }}
+            
+            Rules:
+            - Extract vendor name as it appears on receipt
+            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
+            - Total amount should be the final total including tax
+            - Tax amount is separate tax line if available
+            - Date should be the date on the receipt
+            - Categorize based on vendor type
+            - Confidence score 0-1 based on clarity
+            
+            Return only valid JSON.
+            """
+            
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=500,
+                temperature=0.1
+            )
+            
+            result_text = response.choices[0].message.content.strip()
+            return self._parse_extraction_result(result_text)
+            
+        except Exception as e:
+            return {"error": f"Text processing error: {str(e)}"}
+    
+    def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
+        """Parse Groq response and extract JSON data"""
+        try:
+            # Clean up response and extract JSON
+            import json
+            import re
+            
+            # Find JSON in response - try multiple patterns
+            json_match = re.search(r'\{.*\}', result_text, re.DOTALL)
+            if json_match:
+                json_str = json_match.group()
+                
+                # Clean up common JSON issues
+                json_str = re.sub(r',\s*([}\]])', r'\1', json_str)  # Remove trailing commas
+                json_str = re.sub(r'([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', json_str)  # Quote unquoted keys
+                
+                try:
+                    data = json.loads(json_str)
+                except json.JSONDecodeError as e:
+                    # Try to fix common JSON issues
+                    logger.warning(f"Initial JSON parsing failed: {e}")
+                    
+                    # Try to extract individual fields using regex
+                    vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
+                    description_match = re.search(r'"description"\s*:\s*"([^"]*)"', json_str)
+                    total_amount_match = re.search(r'"total_amount"\s*:\s*([0-9.]+)', json_str)
+                    tax_amount_match = re.search(r'"tax_amount"\s*:\s*([0-9.]+)', json_str)
+                    date_match = re.search(r'"date"\s*:\s*"([^"]*)"', json_str)
+                    category_match = re.search(r'"category"\s*:\s*"([^"]*)"', json_str)
+                    confidence_match = re.search(r'"confidence"\s*:\s*([0-9.]+)', json_str)
+                    
+                    data = {
+                        "vendor": vendor_match.group(1) if vendor_match else "",
+                        "description": description_match.group(1) if description_match else "",
+                        "total_amount": float(total_amount_match.group(1)) if total_amount_match else 0.0,
+                        "tax_amount": float(tax_amount_match.group(1)) if tax_amount_match else 0.0,
+                        "date": date_match.group(1) if date_match else "",
+                        "category": category_match.group(1) if category_match else "Other",
+                        "confidence": float(confidence_match.group(1)) if confidence_match else 0.5
+                    }
+                
+                # Validate and clean data
+                return {
+                    "vendor": str(data.get("vendor", "")).strip(),
+                    "description": str(data.get("description", "")).strip(),
+                    "total_amount": float(data.get("total_amount", 0)),
+                    "tax_amount": float(data.get("tax_amount", 0)),
+                    "date": str(data.get("date", "")).strip(),
+                    "category": str(data.get("category", "Other")).strip(),
+                    "confidence": float(data.get("confidence", 0.5)),
+                    "extraction_success": True
+                }
+            else:
+                # Try to extract fields from plain text
+                logger.warning("No JSON found in response, attempting text extraction")
+                return self._extract_from_plain_text(result_text)
+                
+        except Exception as e:
+            logger.error(f"JSON parsing error: {str(e)}")
+            return {"error": f"JSON parsing error: {str(e)}", "extraction_success": False}
+    
+    def _extract_from_plain_text(self, text: str) -> Dict[str, Any]:
+        """Extract receipt data from plain text when JSON parsing fails"""
+        try:
+            import re
+            
+            # Extract vendor (look for common patterns)
+            vendor_patterns = [
+                r'(?:vendor|store|merchant|company)\s*[:\-]?\s*([A-Za-z0-9\s&.,]+)',
+                r'([A-Z][A-Za-z0-9\s&.,]{3,30})',  # Capitalized words
+            ]
+            
+            vendor = ""
+            for pattern in vendor_patterns:
+                match = re.search(pattern, text, re.IGNORECASE)
+                if match:
+                    vendor = match.group(1).strip()
+                    break
+            
+            # Extract amount (look for currency patterns)
+            amount_patterns = [
+                r'\$?\s*([0-9,]+\.?[0-9]*)',
+                r'(?:total|amount|sum)\s*[:\-]?\s*\$?\s*([0-9,]+\.?[0-9]*)',
+            ]
+            
+            total_amount = 0.0
+            for pattern in amount_patterns:
+                match = re.search(pattern, text, re.IGNORECASE)
+                if match:
+                    try:
+                        total_amount = float(match.group(1).replace(',', ''))
+                        break
+                    except ValueError:
+                        continue
+            
+            # Extract date
+            date_patterns = [
+                r'(\d{4}-\d{2}-\d{2})',
+                r'(\d{1,2}/\d{1,2}/\d{2,4})',
+                r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}',
+            ]
+            
+            date = ""
+            for pattern in date_patterns:
+                match = re.search(pattern, text, re.IGNORECASE)
+                if match:
+                    date = match.group(0)
+                    break
+            
+            return {
+                "vendor": vendor or "Unknown",
+                "total_amount": total_amount,
+                "tax_amount": 0.0,
+                "date": date or "",
+                "category": "Other",
+                "confidence": 0.3,  # Low confidence for text extraction
+                "extraction_success": True
+            }
+            
+        except Exception as e:
+            logger.error(f"Text extraction error: {str(e)}")
+            return {
+                "vendor": "Unknown",
+                "total_amount": 0.0,
+                "tax_amount": 0.0,
+                "date": "",
+                "category": "Other",
+                "confidence": 0.1,
+                "extraction_success": False,
+                "error": f"Text extraction failed: {str(e)}"
+            }
+    
+    async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
+        """Save uploaded file to temporary storage"""
+        try:
+            # Create uploads directory if it doesn't exist
+            upload_dir = "uploads"
+            os.makedirs(upload_dir, exist_ok=True)
+            
+            # Generate unique filename
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            safe_filename = f"{timestamp}_{filename.replace(' ', '_')}"
+            file_path = os.path.join(upload_dir, safe_filename)
+            
+            # Save file
+            async with aiofiles.open(file_path, 'wb') as f:
+                await f.write(file_content)
+            
+            return file_path
+            
+        except Exception as e:
+            raise Exception(f"Failed to save file: {str(e)}")
+
+    async def extract_transactions_from_image(self, image_path: str) -> Dict[str, Any]:
+        """Extract multiple transactions from an image (bank statement, credit card statement, etc.)"""
+        try:
+            # Encode image to base64
+            base64_image = self._encode_image(image_path)
+            
+            # Create Groq vision prompt for transaction extraction
+            prompt = """
+            Analyze this financial document image (bank statement, credit card statement, etc.) and extract ALL transactions in JSON format.
+            
+            Look for transaction lists, payment records, or any financial entries that show:
+            - Date
+            - Amount (positive or negative)
+            - Vendor/Description/Payee name
+            - Any additional notes or memo
+            
+            Return the transactions as a JSON array:
+            {
+                "extraction_success": true,
+                "transactions": [
+                    {
+                        "date": "YYYY-MM-DD",
+                        "amount": 0.00,
+                        "vendor": "Vendor name",
+                        "memo": "Additional notes"
+                    },
+                    {
+                        "date": "YYYY-MM-DD", 
+                        "amount": -0.00,
+                        "vendor": "Another vendor",
+                        "memo": "Payment or charge description"
+                    }
+                ]
+            }
+            
+            Rules:
+            - Extract ALL visible transactions
+            - Include both positive (credits) and negative (debits) amounts
+            - Use the actual date format from the document
+            - Vendor should be the merchant/payee name
+            - Memo can include transaction type, reference numbers, etc.
+            - If no transactions found, return empty array but set extraction_success to true
+            
+            Return only valid JSON.
+            """
+            
+            # Call Groq vision API
+            response = self.client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}",
+                                },
+                            },
+                        ],
+                    }
+                ],
+                model=self.model,
+                max_tokens=2000,  # Higher token limit for multiple transactions
+                temperature=0.1
+            )
+            
+            # Parse response
+            result_text = response.choices[0].message.content.strip()
+            return self._parse_transaction_extraction_result(result_text)
+            
+        except Exception as e:
+            return {
+                "extraction_success": False,
+                "error": f"Transaction extraction error: {str(e)}",
+                "transactions": []
+            }
+
+    def _parse_transaction_extraction_result(self, result_text: str) -> Dict[str, Any]:
+        """Parse Groq response for transaction extraction"""
+        try:
+            import json
+            import re
+
+            # Find the first '{' and last '}'
+            start = result_text.find('{')
+            end = result_text.rfind('}')
+            if start == -1 or end == -1 or end <= start:
+                return {
+                    "extraction_success": False,
+                    "error": "Could not find JSON object in AI response",
+                    "transactions": []
+                }
+            json_str = result_text[start:end+1]
+
+            # Remove trailing commas before } or ]
+            json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
+
+            try:
+                data = json.loads(json_str)
+            except Exception as e:
+                import logging
+                logging.error(f"JSON parsing error: {str(e)}")
+                logging.error(f"Offending JSON string:\n{json_str}")
+                return {
+                    "extraction_success": False,
+                    "error": f"JSON parsing error: {str(e)}",
+                    "transactions": []
+                }
+
+            # Validate and clean data
+            transactions = data.get("transactions", [])
+            cleaned_transactions = []
+            for txn in transactions:
+                try:
+                    cleaned_txn = {
+                        "date": str(txn.get("date", "")).strip(),
+                        "amount": float(str(txn.get("amount", 0)).replace('$', '').replace(',', '')),
+                        "vendor": str(txn.get("vendor", "")).strip(),
+                        "memo": str(txn.get("memo", "")).strip()
+                    }
+                    cleaned_transactions.append(cleaned_txn)
+                except Exception as e:
+                    continue
+            return {
+                "extraction_success": data.get("extraction_success", True),
+                "transactions": cleaned_transactions,
+                "total_transactions": len(cleaned_transactions)
+            }
+        except Exception as e:
+            import logging
+            logging.error(f"JSON parsing error (outer): {str(e)}")
+            return {
+                "extraction_success": False,
+                "error": f"JSON parsing error: {str(e)}",
+                "transactions": []
+            }
+
+    def _parse_date_to_iso(self, date_str: str) -> str:
+        """Parse various date formats and convert to YYYY-MM-DD"""
+        try:
+            import re
+            from datetime import datetime
+            
+            date_str = date_str.strip().upper()
+            
+            # Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
+            month_pattern = r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?'
+            match = re.match(month_pattern, date_str)
+            
+            if match:
+                month_abbr, day, year = match.groups()
+                month_map = {
+                    'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
+                    'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
+                }
+                
+                month = month_map[month_abbr]
+                day = int(day)
+                year = int(year) if year else datetime.now().year
+                
+                # Handle 2-digit years
+                if year < 100:
+                    year += 2000
+                
+                return f"{year:04d}-{month:02d}-{day:02d}"
+            
+            # Handle YYYY-MM-DD format
+            if re.match(r'\d{4}-\d{2}-\d{2}', date_str):
+                return date_str
+            
+            # Handle MM/DD/YYYY format
+            if re.match(r'\d{1,2}/\d{1,2}/\d{4}', date_str):
+                return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
+            
+            # Handle MM/DD/YY format
+            if re.match(r'\d{1,2}/\d{1,2}/\d{2}', date_str):
+                return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
+            
+            return None
+            
+        except Exception:
+            return None 
@@ -0,0 +1,60 @@
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import List, Optional
+import json
+import os
+
+@dataclass
+class FeedbackLog:
+    transaction_id: str
+    original_match: str
+    correction: str
+    reason: str
+    timestamp: datetime
+    user_id: str
+
+class FeedbackLogger:
+    def __init__(self, log_file: str = "feedback_logs.json"):
+        self.log_file = log_file
+        self.logs: List[FeedbackLog] = self._load_logs()
+    
+    def _load_logs(self) -> List[FeedbackLog]:
+        if not os.path.exists(self.log_file):
+            return []
+        
+        try:
+            with open(self.log_file, 'r') as f:
+                data = json.load(f)
+                return [FeedbackLog(**log) for log in data]
+        except:
+            return []
+    
+    def _save_logs(self):
+        with open(self.log_file, 'w') as f:
+            json.dump([{
+                'transaction_id': log.transaction_id,
+                'original_match': log.original_match,
+                'correction': log.correction,
+                'reason': log.reason,
+                'timestamp': log.timestamp.isoformat(),
+                'user_id': log.user_id
+            } for log in self.logs], f, indent=2)
+    
+    def log_override(self, transaction_id: str, original_match: str, correction: str, reason: str, user_id: str):
+        log = FeedbackLog(
+            transaction_id=transaction_id,
+            original_match=original_match,
+            correction=correction,
+            reason=reason,
+            timestamp=datetime.now(),
+            user_id=user_id
+        )
+        self.logs.append(log)
+        self._save_logs()
+    
+    def get_logs_by_transaction(self, transaction_id: str) -> List[FeedbackLog]:
+        return [log for log in self.logs if log.transaction_id == transaction_id]
+    
+    def get_recent_logs(self, days: int = 30) -> List[FeedbackLog]:
+        cutoff = datetime.now() - timedelta(days=days)
+        return [log for log in self.logs if log.timestamp > cutoff] 
@@ -0,0 +1,138 @@
+import os
+import io
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+
+class GoogleDriveSync:
+    def __init__(self):
+        self.service = None
+        self.processed_files = set()
+        
+    def authenticate(self):
+        """Authenticate with Google Drive API"""
+        try:
+            from google.auth.transport.requests import Request
+            from google.oauth2.credentials import Credentials
+            from google_auth_oauthlib.flow import InstalledAppFlow
+            from googleapiclient.discovery import build
+            
+            SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
+            
+            # Load existing credentials
+            if os.path.exists('token.json'):
+                self.creds = Credentials.from_authorized_user_file('token.json', SCOPES)
+            
+            # If no valid credentials available, let user log in
+            if not self.creds or not self.creds.valid:
+                if self.creds and self.creds.expired and self.creds.refresh_token:
+                    self.creds.refresh(Request())
+                else:
+                    if not os.path.exists('credentials.json'):
+                        raise Exception("credentials.json not found. Please download from Google Cloud Console.")
+                    
+                    flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
+                    self.creds = flow.run_local_server(port=0)
+                
+                # Save credentials for next run
+                with open('token.json', 'w') as token:
+                    token.write(self.creds.to_json())
+            
+            # Build the Drive service
+            self.service = build('drive', 'v3', credentials=self.creds)
+            return True
+            
+        except Exception as e:
+            print(f"Authentication error: {e}")
+            return False
+    
+    def list_folders(self) -> List[Dict[str, Any]]:
+        """List all folders in Google Drive"""
+        if not self.service:
+            if not self.authenticate():
+                return []
+        
+        try:
+            results = self.service.files().list(
+                q="mimeType='application/vnd.google-apps.folder'",
+                pageSize=100,
+                fields="nextPageToken, files(id, name, createdTime, modifiedTime)"
+            ).execute()
+            
+            return results.get('files', [])
+            
+        except Exception as e:
+            print(f"Error listing folders: {e}")
+            return []
+    
+    def get_folder_info(self, folder_id: str) -> Dict[str, Any]:
+        """Get information about a Google Drive folder"""
+        if not self.service:
+            if not self.authenticate():
+                return {}
+        
+        try:
+            folder = self.service.files().get(
+                fileId=folder_id,
+                fields="id, name, createdTime, modifiedTime"
+            ).execute()
+            
+            return folder
+            
+        except Exception as e:
+            print(f"Error getting folder info: {e}")
+            return {}
+    
+    async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]:
+        """Process all receipt files from Google Drive"""
+        if not self.service:
+            if not self.authenticate():
+                return []
+        
+        results = []
+        
+        try:
+            # File types to look for
+            file_types = ["'application/pdf'", "'image/jpeg'", "'image/png'", "'image/gif'", "'image/bmp'"]
+            mime_types = " or ".join(file_types)
+            
+            # Build query
+            query = f"mimeType contains {mime_types}"
+            if folder_id:
+                query += f" and '{folder_id}' in parents"
+            
+            # Add date filter (last 30 days)
+            thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + 'Z'
+            query += f" and modifiedTime > '{thirty_days_ago}'"
+            
+            results_files = self.service.files().list(
+                q=query,
+                pageSize=100,
+                fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)"
+            ).execute()
+            
+            files = results_files.get('files', [])
+            files = [file for file in files if file['id'] not in self.processed_files]
+            
+            # For demo purposes, return mock results
+            for file in files[:3]:  # Process first 3 files
+                mock_result = {
+                    "file_id": file['id'],
+                    "filename": file['name'],
+                    "drive_modified": file['modifiedTime'],
+                    "file_size": file.get('size', 0),
+                    "extraction_success": True,
+                    "vendor": "Demo Vendor",
+                    "description": "Coffee and sandwich",
+                    "total_amount": 25.50,
+                    "tax_amount": 2.04,
+                    "date": "2024-01-15",
+                    "category": "Food",
+                    "confidence": 0.95
+                }
+                results.append(mock_result)
+                self.processed_files.add(file['id'])
+            
+        except Exception as e:
+            print(f"Error processing Drive files: {e}")
+        
+        return results 
@@ -0,0 +1,555 @@
+from fastapi import FastAPI, HTTPException, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from datetime import datetime
+from typing import List
+import uuid
+import csv
+import io
+import logging
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('app.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+
+from api_models import (
+    MatchingRequest, MatchingResponse, MatchResponse,
+    ApprovalRequest, RuleRequest, DocumentUploadResponse, 
+    DocumentProcessResponse, TransactionRequest
+)
+from models import Receipt, Transaction, Match
+from matching_engine import MatchingEngine
+from ai_rules import AIRule
+from document_processor import DocumentProcessor
+
+app = FastAPI(
+    title="AI Bookkeeper - Data Science Engine",
+    description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
+    version="1.0.0"
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Initialize DS Engine components
+matching_engine = MatchingEngine()
+document_processor = DocumentProcessor()
+
+# In-memory storage for uploaded files (in production, use a database)
+uploaded_files = {}
+
+# Store imported transactions globally for easy access
+stored_transactions = []
+processed_receipts = {}
+
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {
+        "message": "AI Bookkeeper Data Science Engine is running",
+        "version": "1.0.0",
+        "status": "healthy"
+    }
+
+# ============================================================================
+# TRANSACTION IMPORT ENDPOINTS
+# ============================================================================
+
+@app.post("/transactions/import/csv")
+async def import_transactions_csv(file: UploadFile = File(...)):
+    """
+    Import transactions from a CSV file (custom bank export format).
+    """
+    try:
+        content = await file.read()
+        decoded = content.decode('utf-8')
+        reader = csv.DictReader(io.StringIO(decoded))
+        transactions = []
+        errors = []
+        for idx, row in enumerate(reader):
+            try:
+                # Use correct headers and strip whitespace
+                account_number = row.get('Account Number') or row.get('Account Number '.strip())
+                txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip())
+                amount_raw = row.get('Amount') or row.get('Amount '.strip())
+                payee_name = row.get('Description 2') or row.get('Description 2 '.strip())
+                memo = f"{row.get('Account Type','').strip()} {row.get('Cheque Number','').strip()} {row.get('Description 1','').strip()}".strip()
+                # Compose ID
+                txn_id = f"{account_number}_{idx+1}"
+                # Parse date (try multiple formats)
+                txn_date_str = txn_date_raw.strip()
+                txn_date = None
+                for fmt in ("%m/%d/%y", "%m/%d/%Y"):
+                    try:
+                        txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d")
+                        break
+                    except Exception:
+                        continue
+                if not txn_date:
+                    raise ValueError(f"Could not parse date: {txn_date_str}")
+                # Parse amount
+                amount = float(amount_raw.replace(',', '').strip())
+                transactions.append({
+                    "id": txn_id,
+                    "txn_date": txn_date,
+                    "amount": amount,
+                    "payee_name": payee_name.strip(),
+                    "memo": memo
+                })
+            except Exception as e:
+                errors.append(f"Row {idx+1}: {str(e)}")
+        # Store transactions globally for auto-matching
+        global stored_transactions
+        stored_transactions = transactions
+        
+        return {
+            "imported_count": len(transactions),
+            "converted_transactions": transactions,
+            "errors": errors
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/transactions/import/image")
+async def import_transactions_from_image(file: UploadFile = File(...)):
+    """
+    Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction.
+    """
+    try:
+        # Validate file type
+        allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
+        file_extension = file.filename.split('.')[-1].lower()
+        if file_extension not in allowed_types:
+            raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}")
+        # Read file content
+        content = await file.read()
+        # Save file to disk
+        image_path = await document_processor.save_uploaded_file(content, file.filename)
+        # Extract transactions from image (pass file path)
+        extraction_result = await document_processor.extract_transactions_from_image(image_path)
+        if not extraction_result.get("extraction_success", False):
+            raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed"))
+        extracted_transactions = extraction_result.get("transactions", [])
+        # Store transactions globally for auto-matching
+        global stored_transactions
+        stored_transactions = []
+        for idx, txn in enumerate(extracted_transactions):
+            try:
+                txn_id = f"img_{file.filename}_{idx+1}"
+                txn_date_raw = txn.get("date")
+                amount = txn.get("amount")
+                vendor = txn.get("vendor")
+                memo = txn.get("memo", "")
+                
+                # Parse date to YYYY-MM-DD format
+                txn_date = document_processor._parse_date_to_iso(txn_date_raw)
+                if not txn_date:
+                    # Fallback: use current year if parsing fails
+                    txn_date = f"2024-{txn_date_raw}"
+                
+                stored_transactions.append({
+                    "id": txn_id,
+                    "txn_date": txn_date,
+                    "amount": amount,
+                    "payee_name": vendor,
+                    "memo": memo
+                })
+            except Exception as e:
+                continue
+        return {
+            "imported_count": len(stored_transactions),
+            "converted_transactions": stored_transactions,
+            "errors": []
+        }
+    except Exception as e:
+        logger.error(f"Error importing transactions from image: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+# ============================================================================
+# DOCUMENT PROCESSING ENDPOINTS
+# ============================================================================
+
+@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
+async def upload_multiple_documents(files: List[UploadFile] = File(...)):
+    """
+    Upload multiple receipt images for processing.
+    
+    This endpoint accepts multiple image files and returns file IDs
+    that can be used with the /process/{file_id} endpoint.
+    """
+    try:
+        responses = []
+        
+        for file in files:
+            # Validate file type
+            allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
+            file_extension = file.filename.split('.')[-1].lower()
+            
+            if file_extension not in allowed_types:
+                raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}")
+            
+            # Generate unique file ID
+            file_id = str(uuid.uuid4())
+            
+            # Read and store file content
+            content = await file.read()
+            uploaded_files[file_id] = {
+                "filename": file.filename,
+                "content": content,
+                "upload_date": datetime.now()
+            }
+            
+            responses.append(DocumentUploadResponse(
+                file_id=file_id,
+                filename=file.filename,
+                file_type=file_extension,
+                upload_date=datetime.now(),
+                status="uploaded"
+            ))
+        
+        return responses
+        
+    except Exception as e:
+        logger.error(f"Error uploading documents: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
+async def process_document(file_id: str):
+    """
+    Process a previously uploaded document to extract receipt information.
+    
+    This endpoint uses AI to extract structured data from receipt images,
+    including vendor, amount, date, and category information.
+    """
+    try:
+        # Check if file exists
+        if file_id not in uploaded_files:
+            raise HTTPException(status_code=404, detail=f"File {file_id} not found")
+        
+        file_data = uploaded_files[file_id]
+        
+        # Save file temporarily and process it
+        file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"])
+        file_type = file_data["filename"].split('.')[-1].lower()
+        receipt_data = await document_processor.process_file(file_path, file_type)
+        
+        # Store processed receipt
+        processed_receipts[file_id] = receipt_data
+        
+        return DocumentProcessResponse(
+            file_id=file_id,
+            extraction_success=receipt_data.get("extraction_success", False),
+            vendor=receipt_data.get("vendor", ""),
+            description=receipt_data.get("description", ""),
+            total_amount=receipt_data.get("total_amount", 0.0),
+            tax_amount=receipt_data.get("tax_amount", 0.0),
+            date=receipt_data.get("date", ""),
+            category=receipt_data.get("category", ""),
+            confidence=receipt_data.get("confidence", 0.0),
+            error=receipt_data.get("error", None)
+        )
+        
+    except Exception as e:
+        logger.error(f"Error processing document {file_id}: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+# ============================================================================
+# MATCHING ENDPOINTS
+# ============================================================================
+
+@app.post("/match-specific", response_model=MatchingResponse)
+async def match_specific_receipts(file_ids: List[str]):
+    """
+    Match specific receipts against imported transactions.
+    
+    This endpoint takes a list of receipt file IDs and matches them against
+    the currently imported transactions using AI-powered matching logic.
+    """
+    try:
+        logger.info(f"Starting match-specific for file IDs: {file_ids}")
+        
+        # Check if transactions are imported
+        if not stored_transactions:
+            logger.warning("No transactions imported")
+            raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
+        
+        logger.info(f"Found {len(stored_transactions)} stored transactions")
+        
+        # Convert stored transactions to Transaction objects
+        transactions = []
+        for txn in stored_transactions:
+            try:
+                txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d")
+                transaction = Transaction(
+                    id=txn["id"],
+                    transaction_date=txn_date,
+                    amount=txn["amount"],
+                    vendor=txn["payee_name"],
+                    notes=txn["memo"]
+                )
+                transactions.append(transaction)
+            except Exception as e:
+                logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
+                continue
+        
+        logger.info(f"Converted {len(transactions)} transactions")
+        
+        # Get receipts for the specified file IDs
+        receipts = []
+        missing_files = []
+        
+        for file_id in file_ids:
+            if file_id in processed_receipts:
+                receipt_data = processed_receipts[file_id]
+                logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
+                logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}")
+                try:
+                    # Handle missing date field
+                    if "date" not in receipt_data or not receipt_data["date"]:
+                        logger.warning(f"Missing date for receipt {file_id}, using current date")
+                        receipt_date = datetime.now()
+                    else:
+                        receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
+                    
+                    # Handle missing amount field - try multiple possible keys
+                    amount = receipt_data.get("amount")
+                    if amount is None:
+                        amount = receipt_data.get("total_amount")
+                    if amount is None:
+                        amount = receipt_data.get("amount_total")
+                    if amount is None:
+                        logger.warning(f"Missing amount for receipt {file_id}, using 0.0")
+                        amount = 0.0
+                    
+                    # Ensure amount is a float
+                    try:
+                        amount = float(amount)
+                    except (ValueError, TypeError):
+                        logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0")
+                        amount = 0.0
+                    
+                    logger.info(f"DEBUG: amount for {file_id}: {amount}")
+                    
+                    # Handle missing vendor field
+                    vendor = receipt_data.get("vendor", "")
+                    if not vendor:
+                        logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'")
+                        vendor = "Unknown"
+                    
+                    # Handle missing category field
+                    category = receipt_data.get("category", "Other")
+                    
+                    # Handle description field
+                    description = receipt_data.get("description", "")
+                    
+                    # Handle tax field
+                    tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
+                    try:
+                        tax = float(tax)
+                    except (ValueError, TypeError):
+                        tax = 0.0
+                    
+                    receipt = Receipt(
+                        id=file_id,
+                        file_name=uploaded_files[file_id]["filename"],
+                        upload_date=uploaded_files[file_id]["upload_date"],
+                        receipt_date=receipt_date,
+                        amount=amount,
+                        tax=tax,
+                        vendor=vendor,
+                        category=category,
+                        description=description
+                    )
+                    receipts.append(receipt)
+                    logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
+                except Exception as e:
+                    logger.warning(f"Error creating receipt object for {file_id}: {str(e)}")
+                    missing_files.append(f"{file_id} (error: {str(e)})")
+            else:
+                logger.warning(f"Receipt {file_id} not found in processed_receipts")
+                missing_files.append(f"{file_id} (not found)")
+        
+        if missing_files:
+            logger.error(f"Missing files: {missing_files}")
+            raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}")
+        
+        logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions")
+        
+        # Perform matching
+        try:
+            logger.info("Starting direct matching call (without ThreadPoolExecutor)")
+            logger.info(f"matching_engine type: {type(matching_engine)}")
+            logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}")
+            logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
+            logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}")
+            
+            matches = matching_engine.process_matching(receipts, transactions)
+            
+            logger.info(f"Matching completed successfully. Found {len(matches)} matches")
+            
+            # Convert matches to response format
+            match_responses = []
+            for match in matches:
+                logger.info(f"Raw match object: {match}")
+                logger.info(f"  receipt_id: {match.receipt.id}")
+                logger.info(f"  transaction_id: {match.transaction.id}")
+                logger.info(f"  confidence_score: {match.confidence_score}")
+                logger.info(f"  match_reason: {match.match_reason}")
+                logger.info(f"  receipt_vendor: {match.receipt.vendor}")
+                logger.info(f"  receipt_amount: {match.receipt.amount}")
+                logger.info(f"  transaction_vendor: {match.transaction.vendor}")
+                logger.info(f"  transaction_amount: {match.transaction.amount}")
+                
+                match_response = MatchResponse(
+                    receipt_id=match.receipt.id,
+                    transaction_id=match.transaction.id,
+                    confidence_score=match.confidence_score,
+                    match_reason=match.match_reason,
+                    receipt_vendor=match.receipt.vendor,
+                    receipt_amount=match.receipt.amount,
+                    receipt_description=match.receipt.description,
+                    receipt_category=match.receipt.category,
+                    receipt_tax_amount=match.receipt.tax,
+                    transaction_vendor=match.transaction.vendor,
+                    transaction_amount=match.transaction.amount
+                )
+                match_responses.append(match_response)
+                logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}")
+            
+            logger.info(f"Formatted {len(match_responses)} match responses")
+            
+            # Calculate statistics
+            if match_responses:
+                high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8)
+                low_confidence = len(match_responses) - high_confidence
+                avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses)
+            else:
+                high_confidence = low_confidence = avg_score = 0
+            
+            stats = {
+                "total": len(match_responses),
+                "high_confidence": high_confidence,
+                "low_confidence": low_confidence,
+                "avg_score": round(avg_score, 2)
+            }
+            
+            logger.info(f"Generated stats: {stats}")
+            logger.info(f"Match-specific completed successfully with {len(match_responses)} matches")
+            
+            return MatchingResponse(
+                matches=match_responses,
+                stats=stats
+            )
+            
+        except Exception as e:
+            logger.error(f"Exception in matching section: {str(e)}")
+            logger.error(f"Exception type: {type(e)}")
+            logger.error(f"Exception args: {e.args}")
+            logger.error(f"Traceback: {e.__traceback__}")
+            raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}")
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+# ============================================================================
+# RULES MANAGEMENT ENDPOINTS
+# ============================================================================
+
+@app.post("/rules")
+async def add_rule(request: RuleRequest):
+    """
+    Add a new AI rule for transaction matching.
+    """
+    try:
+        new_rule = AIRule(
+            name=request.name,
+            condition=request.condition,
+            action=request.action,
+            source=request.source
+        )
+        
+        matching_engine.rules_engine.rules.append(new_rule)
+        
+        return {"message": f"Rule '{request.name}' added successfully"}
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/rules")
+async def get_rules():
+    """
+    Get all current AI rules.
+    """
+    try:
+        rules = []
+        for rule in matching_engine.rules_engine.rules:
+            rules.append({
+                "name": rule.name,
+                "condition": rule.condition,
+                "action": rule.action,
+                "source": rule.source,
+                "status": rule.status
+            })
+        
+        return {"rules": rules}
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.delete("/rules/{rule_name}")
+async def delete_rule(rule_name: str):
+    """
+    Delete an AI rule by name.
+    """
+    try:
+        rules = matching_engine.rules_engine.rules
+        for i, rule in enumerate(rules):
+            if rule.name == rule_name:
+                del rules[i]
+                return {"message": f"Rule '{rule_name}' deleted successfully"}
+        
+        raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+# ============================================================================
+# STATISTICS ENDPOINT
+# ============================================================================
+
+@app.get("/stats")
+async def get_stats():
+    """
+    Get system statistics.
+    """
+    try:
+        return {
+            "total_transactions": len(stored_transactions),
+            "total_receipts": len(processed_receipts),
+            "total_uploaded_files": len(uploaded_files),
+            "rules_count": len(matching_engine.rules_engine.rules)
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8343)
@@ -0,0 +1,77 @@
+from typing import List, Dict, Any
+from datetime import datetime
+from ai_matcher import AIMatcher
+from ai_rules import AIRulesEngine
+from feedback_logger import FeedbackLogger
+from models import Receipt, Transaction, Match
+
+class MatchingEngine:
+    def __init__(self):
+        self.ai_matcher = AIMatcher()
+        self.rules_engine = AIRulesEngine()
+        self.feedback_logger = FeedbackLogger()
+    
+    def process_matching(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
+        # Get AI matches
+        ai_matches = self.ai_matcher.match_receipts_to_transactions(receipts, transactions)
+        
+        # Apply rules and enhance matches
+        enhanced_matches = []
+        for match in ai_matches:
+            enhanced_match = self._enhance_match_with_rules(match)
+            enhanced_matches.append(enhanced_match)
+        
+        return enhanced_matches
+    
+    def _enhance_match_with_rules(self, match: Match) -> Match:
+        rule_results = self.rules_engine.apply_rules(match.receipt, match.transaction)
+        
+        # Apply confidence boost from rules
+        if rule_results["confidence_boost"] > 0:
+            match.confidence_score = min(1.0, match.confidence_score + rule_results["confidence_boost"])
+        
+        # Auto-approve if rules say so
+        if rule_results["auto_approve"]:
+            match.confidence_score = 1.0
+            match.match_reason += " (Auto-approved by rules)"
+        
+        # Add tax analysis to match
+        if rule_results.get("tax_analysis"):
+            match.tax_analysis = rule_results["tax_analysis"]
+        
+        return match
+    
+    def approve_match(self, match: Match, user_id: str):
+        # Log the approval
+        self.feedback_logger.log_override(
+            transaction_id=match.transaction.id,
+            original_match=f"AI Score: {match.confidence_score}",
+            correction="Approved",
+            reason="User approved match",
+            user_id=user_id
+        )
+    
+    def reject_match(self, match: Match, reason: str, user_id: str):
+        # Log the rejection
+        self.feedback_logger.log_override(
+            transaction_id=match.transaction.id,
+            original_match=f"AI Score: {match.confidence_score}",
+            correction="Rejected",
+            reason=reason,
+            user_id=user_id
+        )
+    
+    def get_matching_stats(self, matches: List[Match]) -> Dict[str, Any]:
+        if not matches:
+            return {"total": 0, "high_confidence": 0, "low_confidence": 0, "avg_score": 0}
+        
+        high_confidence = len([m for m in matches if m.confidence_score >= 0.8])
+        low_confidence = len([m for m in matches if m.confidence_score < 0.8])
+        avg_score = sum(m.confidence_score for m in matches) / len(matches)
+        
+        return {
+            "total": len(matches),
+            "high_confidence": high_confidence,
+            "low_confidence": low_confidence,
+            "avg_score": round(avg_score, 3)
+        } 
@@ -0,0 +1,59 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+
+@dataclass
+class Address:
+    """Address information for tax calculations"""
+    province: str
+    city: str
+    postal_code: str
+    country: str = "Canada"
+
+@dataclass
+class Receipt:
+    id: str
+    file_name: str
+    upload_date: datetime
+    receipt_date: datetime
+    amount: float
+    tax: float
+    vendor: str
+    category: str
+    description: str
+    # Tax rule fields
+    billing_address: Optional[Address] = None
+    shipping_address: Optional[Address] = None
+    currency: str = "CAD"
+    is_meals_entertainment: bool = False
+    
+@dataclass
+class Transaction:
+    id: str
+    transaction_date: datetime
+    amount: float
+    vendor: str
+    notes: str
+    # Tax rule fields
+    currency: str = "CAD"
+    fx_rate: Optional[float] = None
+    
+@dataclass
+class Asset:
+    """Asset for depreciation calculations"""
+    id: str
+    name: str
+    purchase_date: datetime
+    purchase_amount: float
+    useful_life_years: int
+    residual_value: float
+    cca_rate: float  # Capital Cost Allowance rate
+    asset_class: str
+    
+@dataclass
+class Match:
+    receipt: Receipt
+    transaction: Transaction
+    confidence_score: float
+    match_reason: str
+    tax_analysis: Optional[dict] = None 
@@ -0,0 +1,16 @@
+groq>=0.5.0
+python-dotenv==1.0.0
+pandas==2.1.4
+numpy==1.24.3
+fastapi==0.104.1
+uvicorn==0.24.0
+pydantic==2.5.0
+requests==2.31.0
+python-multipart==0.0.6
+Pillow==10.0.1
+PyPDF2==3.0.1
+aiofiles==23.2.1
+google-auth==2.23.4
+google-auth-oauthlib==1.1.0
+google-auth-httplib2==0.1.1
+google-api-python-client==2.108.0 
@@ -0,0 +1,271 @@
+from typing import Dict, Any, Optional, Tuple
+from datetime import datetime
+from models import Receipt, Transaction, Address, Asset
+import logging
+
+logger = logging.getLogger(__name__)
+
+class TaxRulesEngine:
+    """Engine to handle tax calculations based on the four tax rules"""
+    
+    # Provincial tax rates (simplified - in production, use a tax rate API)
+    PROVINCIAL_TAX_RATES = {
+        "ON": 0.13,  # Ontario HST
+        "QC": 0.14975,  # Quebec QST
+        "BC": 0.12,  # British Columbia
+        "AB": 0.05,  # Alberta
+        "SK": 0.11,  # Saskatchewan
+        "MB": 0.12,  # Manitoba
+        "NS": 0.15,  # Nova Scotia
+        "NB": 0.15,  # New Brunswick
+        "NL": 0.15,  # Newfoundland and Labrador
+        "PE": 0.15,  # Prince Edward Island
+        "NT": 0.05,  # Northwest Territories
+        "NU": 0.05,  # Nunavut
+        "YT": 0.05,  # Yukon
+    }
+    
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+    
+    def apply_sales_tax_rule(self, receipt: Receipt) -> Dict[str, Any]:
+        """
+        Sales Tax Rule: Apply correct sales tax based on billing vs shipping addresses
+        """
+        try:
+            # Determine which address to use for tax calculation
+            tax_address = self._get_tax_address(receipt)
+            
+            if not tax_address:
+                return {
+                    "success": False,
+                    "error": "No valid address found for tax calculation",
+                    "calculated_tax": 0.0,
+                    "tax_rate": 0.0
+                }
+            
+            # Get tax rate for the province
+            tax_rate = self.PROVINCIAL_TAX_RATES.get(tax_address.province, 0.0)
+            
+            # Calculate tax amount
+            calculated_tax = receipt.amount * tax_rate
+            
+            return {
+                "success": True,
+                "calculated_tax": calculated_tax,
+                "tax_rate": tax_rate,
+                "tax_address": tax_address.province,
+                "rule_applied": "Sales Tax Rule"
+            }
+            
+        except Exception as e:
+            self.logger.error(f"Error applying sales tax rule: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "calculated_tax": 0.0,
+                "tax_rate": 0.0
+            }
+    
+    def _get_tax_address(self, receipt: Receipt) -> Optional[Address]:
+        """Determine which address to use for tax calculation"""
+        # Rule: Use shipping address if different from billing, otherwise use billing
+        if receipt.shipping_address and receipt.billing_address:
+            if self._addresses_different(receipt.billing_address, receipt.shipping_address):
+                return receipt.shipping_address
+            else:
+                return receipt.billing_address
+        elif receipt.shipping_address:
+            return receipt.shipping_address
+        elif receipt.billing_address:
+            return receipt.billing_address
+        else:
+            return None
+    
+    def _addresses_different(self, billing: Address, shipping: Address) -> bool:
+        """Check if billing and shipping addresses are different"""
+        return (billing.province != shipping.province or 
+                billing.city != shipping.city or 
+                billing.postal_code != shipping.postal_code)
+    
+    def apply_fx_rule(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
+        """
+        Foreign Exchange Rule: Handle currency mismatches
+        """
+        try:
+            # Check for currency mismatch
+            if receipt.currency != transaction.currency:
+                fx_discrepancy = abs(receipt.amount - abs(transaction.amount))
+                
+                return {
+                    "success": True,
+                    "fx_discrepancy": fx_discrepancy,
+                    "receipt_currency": receipt.currency,
+                    "transaction_currency": transaction.currency,
+                    "receipt_amount": receipt.amount,
+                    "transaction_amount": abs(transaction.amount),
+                    "requires_manual_review": True,
+                    "rule_applied": "Foreign Exchange Rule"
+                }
+            else:
+                return {
+                    "success": True,
+                    "fx_discrepancy": 0.0,
+                    "requires_manual_review": False,
+                    "rule_applied": "No FX Rule (same currency)"
+                }
+                
+        except Exception as e:
+            self.logger.error(f"Error applying FX rule: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "fx_discrepancy": 0.0,
+                "requires_manual_review": False
+            }
+    
+    def calculate_straight_line_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
+        """
+        Straight-Line Depreciation for accounting purposes
+        """
+        try:
+            if year > asset.useful_life_years:
+                return {
+                    "success": False,
+                    "error": f"Year {year} exceeds useful life of {asset.useful_life_years} years",
+                    "depreciation": 0.0
+                }
+            
+            # Straight-line formula: (Cost - Residual Value) / Useful Life
+            annual_depreciation = (asset.purchase_amount - asset.residual_value) / asset.useful_life_years
+            
+            return {
+                "success": True,
+                "depreciation": annual_depreciation,
+                "book_value": asset.purchase_amount - (annual_depreciation * year),
+                "method": "Straight-Line",
+                "rule_applied": "Depreciation Rule (Accounting)"
+            }
+            
+        except Exception as e:
+            self.logger.error(f"Error calculating straight-line depreciation: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "depreciation": 0.0
+            }
+    
+    def calculate_cca_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
+        """
+        CCA (Capital Cost Allowance) Depreciation for tax purposes
+        """
+        try:
+            if year < 1:
+                return {
+                    "success": False,
+                    "error": "Year must be at least 1",
+                    "depreciation": 0.0
+                }
+            
+            # CCA uses declining balance method
+            book_value = asset.purchase_amount
+            total_depreciation = 0.0
+            
+            for current_year in range(1, year + 1):
+                # CCA is calculated on the declining balance
+                cca_amount = book_value * asset.cca_rate
+                book_value -= cca_amount
+                total_depreciation += cca_amount
+                
+                # Stop if book value reaches residual value
+                if book_value <= asset.residual_value:
+                    break
+            
+            return {
+                "success": True,
+                "depreciation": cca_amount,  # Current year depreciation
+                "total_depreciation": total_depreciation,
+                "book_value": max(book_value, asset.residual_value),
+                "method": "CCA Declining Balance",
+                "rule_applied": "Depreciation Rule (Tax)"
+            }
+            
+        except Exception as e:
+            self.logger.error(f"Error calculating CCA depreciation: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "depreciation": 0.0
+            }
+    
+    def apply_meals_entertainment_rule(self, receipt: Receipt) -> Dict[str, Any]:
+        """
+        Meals & Entertainment Tax Deduction Rule
+        """
+        try:
+            if not receipt.is_meals_entertainment:
+                return {
+                    "success": True,
+                    "tax_deduction": receipt.amount,
+                    "accounting_deduction": receipt.amount,
+                    "rule_applied": "No M&E Rule (not meals/entertainment)"
+                }
+            
+            # For tax purposes: 50% deductible
+            tax_deduction = receipt.amount * 0.5
+            
+            # For accounting purposes: 100% deductible
+            accounting_deduction = receipt.amount
+            
+            # Sales tax is fully deductible for accounting
+            tax_on_meal = receipt.tax
+            
+            return {
+                "success": True,
+                "tax_deduction": tax_deduction,
+                "accounting_deduction": accounting_deduction,
+                "tax_on_meal": tax_on_meal,
+                "rule_applied": "Meals & Entertainment Rule"
+            }
+            
+        except Exception as e:
+            self.logger.error(f"Error applying meals & entertainment rule: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "tax_deduction": 0.0,
+                "accounting_deduction": 0.0
+            }
+    
+    def apply_all_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
+        """
+        Apply all tax rules to a receipt
+        """
+        results = {
+            "receipt_id": receipt.id,
+            "rules_applied": [],
+            "sales_tax": {},
+            "fx_analysis": {},
+            "meals_entertainment": {}
+        }
+        
+        # Apply Sales Tax Rule
+        sales_tax_result = self.apply_sales_tax_rule(receipt)
+        results["sales_tax"] = sales_tax_result
+        if sales_tax_result["success"]:
+            results["rules_applied"].append("Sales Tax Rule")
+        
+        # Apply FX Rule (if transaction provided)
+        if transaction:
+            fx_result = self.apply_fx_rule(receipt, transaction)
+            results["fx_analysis"] = fx_result
+            if fx_result["success"]:
+                results["rules_applied"].append("Foreign Exchange Rule")
+        
+        # Apply Meals & Entertainment Rule
+        me_result = self.apply_meals_entertainment_rule(receipt)
+        results["meals_entertainment"] = me_result
+        if me_result["success"]:
+            results["rules_applied"].append("Meals & Entertainment Rule")
+        
+        return results