first commit
This commit is contained in:
+229
@@ -0,0 +1,229 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be added to the global gitignore or merged into this project gitignore. For a PyCharm
|
||||||
|
# project, it is recommended to include the following files:
|
||||||
|
# .idea/
|
||||||
|
# *.iml
|
||||||
|
# *.ipr
|
||||||
|
# *.iws
|
||||||
|
|
||||||
|
# VS Code
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# macOS
|
||||||
|
.DS_Store
|
||||||
|
.AppleDouble
|
||||||
|
.LSOverride
|
||||||
|
|
||||||
|
# Windows
|
||||||
|
Thumbs.db
|
||||||
|
ehthumbs.db
|
||||||
|
Desktop.ini
|
||||||
|
|
||||||
|
# Linux
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.temp
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Log files
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Database files
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
|
||||||
|
# Configuration files with sensitive data
|
||||||
|
config.ini
|
||||||
|
secrets.json
|
||||||
|
.env.local
|
||||||
|
.env.production
|
||||||
|
|
||||||
|
# Test files
|
||||||
|
test_*.py
|
||||||
|
*_test.py
|
||||||
|
tests/
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
docs/
|
||||||
|
*.md
|
||||||
|
!README.md
|
||||||
|
|
||||||
|
# IDE files
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.sublime-*
|
||||||
|
.atom/
|
||||||
|
|
||||||
|
# OS generated files
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
uploads/
|
||||||
|
chequing statement.csv
|
||||||
|
test_images/
|
||||||
|
.cursorrules.md
|
||||||
@@ -0,0 +1,262 @@
|
|||||||
|
# AI Bookkeeper - Data Science Engine
|
||||||
|
|
||||||
|
AI-powered receipt-to-transaction matching engine using Groq LLM. This is a **Data Science Engine** that provides intelligent matching capabilities for backend applications.
|
||||||
|
|
||||||
|
## 🎯 Purpose
|
||||||
|
|
||||||
|
This Data Science Engine receives QuickBooks transaction data from backend applications and provides:
|
||||||
|
- **AI-powered receipt processing** (OCR and data extraction)
|
||||||
|
- **Intelligent receipt-transaction matching** with confidence scores
|
||||||
|
- **Configurable AI rules** for business logic
|
||||||
|
- **Feedback logging** for continuous improvement
|
||||||
|
- **RESTful API** for easy integration
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
### 1. Install Dependencies
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure API Keys
|
||||||
|
Create a `.env` file in the project root with your Groq API key:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create .env file
|
||||||
|
echo "GROQ_API_KEY=your_actual_groq_api_key_here" > .env
|
||||||
|
```
|
||||||
|
|
||||||
|
**Important**: Get your API key from [Groq Console](https://console.groq.com/)
|
||||||
|
|
||||||
|
### 3. Start the Server
|
||||||
|
```bash
|
||||||
|
# Option 1: Using the main script
|
||||||
|
python main.py
|
||||||
|
|
||||||
|
# Option 2: Using uvicorn directly
|
||||||
|
uvicorn main:app --host 0.0.0.0 --port 8343 --reload
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Access API Documentation
|
||||||
|
- **Swagger UI**: http://localhost:8343/docs
|
||||||
|
- **ReDoc**: http://localhost:8343/redoc
|
||||||
|
|
||||||
|
## 📋 API Endpoints
|
||||||
|
|
||||||
|
### Transaction Import
|
||||||
|
- `POST /transactions/import/csv` - Import transactions from CSV file
|
||||||
|
- `POST /transactions/import/image` - Import transactions from image/PDF
|
||||||
|
|
||||||
|
### Receipt Processing
|
||||||
|
- `POST /upload-multiple` - Upload multiple receipt documents
|
||||||
|
- `POST /process/{file_id}` - Extract data from uploaded documents
|
||||||
|
|
||||||
|
### AI Matching Engine
|
||||||
|
- `POST /match-specific` - Match specific receipts to transactions using AI
|
||||||
|
|
||||||
|
### AI Rules Management
|
||||||
|
- `POST /rules` - Add new AI rules
|
||||||
|
- `GET /rules` - List all active rules
|
||||||
|
- `DELETE /rules/{rule_name}` - Delete rules
|
||||||
|
|
||||||
|
### System Monitoring
|
||||||
|
- `GET /stats` - Get system statistics and performance metrics
|
||||||
|
- `GET /` - Health check endpoint
|
||||||
|
|
||||||
|
## 🔧 Core Components
|
||||||
|
|
||||||
|
### **AIMatcher** (`ai_matcher.py`)
|
||||||
|
- Uses Groq LLM to compare receipts and transactions
|
||||||
|
- Provides confidence scores and reasoning
|
||||||
|
- Configurable matching criteria (amount, date, vendor)
|
||||||
|
- Rate limiting to prevent API quota exhaustion
|
||||||
|
|
||||||
|
### **AIRulesEngine** (`ai_rules.py`)
|
||||||
|
- Applies business rules for auto-approval and categorization
|
||||||
|
- Configurable rule conditions and actions
|
||||||
|
- Supports system and user-generated rules
|
||||||
|
- Safe condition evaluation with proper error handling
|
||||||
|
|
||||||
|
### **DocumentProcessor** (`document_processor.py`)
|
||||||
|
- AI-powered receipt data extraction using Groq vision model
|
||||||
|
- Supports PDF and image formats
|
||||||
|
- Robust JSON parsing with error handling
|
||||||
|
- Extracts vendor, amount, date, tax, and category information
|
||||||
|
|
||||||
|
### **MatchingEngine** (`matching_engine.py`)
|
||||||
|
- Main orchestrator combining all components
|
||||||
|
- Handles the complete matching workflow
|
||||||
|
- Provides statistics and feedback logging
|
||||||
|
- Configurable confidence thresholds
|
||||||
|
|
||||||
|
### **FeedbackLogger** (`feedback_logger.py`)
|
||||||
|
- Tracks manual overrides for AI training
|
||||||
|
- Maintains audit trail of user decisions
|
||||||
|
- Enables continuous model improvement
|
||||||
|
|
||||||
|
## 📊 Configuration
|
||||||
|
|
||||||
|
Edit `config.py` to adjust:
|
||||||
|
- **Confidence threshold** (default: 0.3)
|
||||||
|
- **Date tolerance days** (default: 7)
|
||||||
|
- **Amount tolerance percent** (default: 5%)
|
||||||
|
- **Groq API key** (from environment variable)
|
||||||
|
|
||||||
|
## 🔄 Integration Workflow
|
||||||
|
|
||||||
|
### 1. Import Transactions
|
||||||
|
```bash
|
||||||
|
# Import from CSV
|
||||||
|
curl -X POST -F "file=@transactions.csv" http://localhost:8343/transactions/import/csv
|
||||||
|
|
||||||
|
# Import from image
|
||||||
|
curl -X POST -F "file=@statement.jpg" http://localhost:8343/transactions/import/image
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Upload and Process Receipts
|
||||||
|
```bash
|
||||||
|
# Upload receipts
|
||||||
|
curl -X POST -F "files=@receipt1.jpg" -F "files=@receipt2.jpg" http://localhost:8343/upload-multiple
|
||||||
|
|
||||||
|
# Process a specific receipt
|
||||||
|
curl -X POST http://localhost:8343/process/{file_id}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. AI Matching
|
||||||
|
```bash
|
||||||
|
# Match specific receipts
|
||||||
|
curl -X POST -H "Content-Type: application/json" \
|
||||||
|
-d '["file_id_1", "file_id_2"]' \
|
||||||
|
http://localhost:8343/match-specific
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Check Results
|
||||||
|
```bash
|
||||||
|
# Get system stats
|
||||||
|
curl http://localhost:8343/stats
|
||||||
|
|
||||||
|
# View AI rules
|
||||||
|
curl http://localhost:8343/rules
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🎯 Key Features
|
||||||
|
|
||||||
|
- **AI-powered matching** with confidence scores
|
||||||
|
- **Rule-based auto-approval** and categorization
|
||||||
|
- **Feedback logging** for continuous improvement
|
||||||
|
- **Configurable matching parameters**
|
||||||
|
- **RESTful JSON API** for easy backend integration
|
||||||
|
- **Comprehensive error handling**
|
||||||
|
- **Rate limiting** to prevent API quota exhaustion
|
||||||
|
- **Robust JSON parsing** for AI responses
|
||||||
|
|
||||||
|
## 📝 Data Formats
|
||||||
|
|
||||||
|
### Transaction Input (CSV)
|
||||||
|
```csv
|
||||||
|
Date,Description,Amount,Category
|
||||||
|
2024-01-15,Starbucks Coffee,12.50,Food & Dining
|
||||||
|
2024-01-16,Office Supplies,45.99,Office
|
||||||
|
```
|
||||||
|
|
||||||
|
### Receipt Processing Output
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"vendor": "Starbucks",
|
||||||
|
"total_amount": 12.50,
|
||||||
|
"tax_amount": 1.25,
|
||||||
|
"date": "2024-01-15",
|
||||||
|
"category": "Food & Dining",
|
||||||
|
"confidence": 0.95,
|
||||||
|
"extraction_success": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Match Result Output
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"receipt_id": "uuid",
|
||||||
|
"transaction_id": "transaction_123",
|
||||||
|
"confidence_score": 0.95,
|
||||||
|
"match_reason": "Same vendor, minor date difference (Auto-approved by rules)",
|
||||||
|
"receipt_vendor": "Starbucks",
|
||||||
|
"receipt_amount": 12.50,
|
||||||
|
"transaction_vendor": "STARBUCKS",
|
||||||
|
"transaction_amount": 12.50
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔍 AI Matching Criteria
|
||||||
|
|
||||||
|
The engine uses multiple criteria for matching:
|
||||||
|
|
||||||
|
1. **Amount Similarity** - Compares receipt and transaction amounts (5% tolerance)
|
||||||
|
2. **Date Proximity** - Checks date closeness (7-day tolerance)
|
||||||
|
3. **Vendor Matching** - AI-powered vendor name comparison using Groq LLM
|
||||||
|
4. **Rule-based Auto-approval** - Automatic approval for exact matches and high-confidence matches
|
||||||
|
|
||||||
|
## 🛠️ Development
|
||||||
|
|
||||||
|
### Project Structure
|
||||||
|
```
|
||||||
|
├── main.py # FastAPI application entry point
|
||||||
|
├── ai_matcher.py # AI-powered matching logic
|
||||||
|
├── ai_rules.py # Business rules engine
|
||||||
|
├── document_processor.py # Receipt data extraction
|
||||||
|
├── matching_engine.py # Main matching orchestrator
|
||||||
|
├── feedback_logger.py # User feedback tracking
|
||||||
|
├── models.py # Pydantic data models
|
||||||
|
├── api_models.py # API request/response models
|
||||||
|
├── config.py # Configuration settings
|
||||||
|
├── requirements.txt # Python dependencies
|
||||||
|
└── test_images/ # Test image files
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
```bash
|
||||||
|
# Test the server
|
||||||
|
curl http://localhost:8343/
|
||||||
|
|
||||||
|
# Test stats endpoint
|
||||||
|
curl http://localhost:8343/stats
|
||||||
|
|
||||||
|
# Test rules endpoint
|
||||||
|
curl http://localhost:8343/rules
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Production Deployment
|
||||||
|
|
||||||
|
For production deployment:
|
||||||
|
- Replace in-memory storage with a database (PostgreSQL recommended)
|
||||||
|
- Configure proper authentication and authorization
|
||||||
|
- Set up monitoring and logging (ELK stack recommended)
|
||||||
|
- Use environment variables for all configuration
|
||||||
|
- Implement proper error handling and retries
|
||||||
|
- Set up rate limiting and API quotas
|
||||||
|
- Configure CORS for frontend integration
|
||||||
|
- Use HTTPS in production
|
||||||
|
|
||||||
|
## 📞 Support
|
||||||
|
|
||||||
|
This Data Science Engine is designed to be integrated with backend applications that handle:
|
||||||
|
- QuickBooks API connections
|
||||||
|
- User interface and workflows
|
||||||
|
- Data persistence and management
|
||||||
|
- External integrations
|
||||||
|
|
||||||
|
The engine focuses purely on AI/ML capabilities and provides a clean JSON API for backend integration.
|
||||||
|
|
||||||
|
## 🔧 Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **API Key Error**: Ensure `GROQ_API_KEY` is set in your `.env` file
|
||||||
|
2. **Port Already in Use**: Kill existing process with `pkill -f "python main.py"`
|
||||||
|
3. **Import Errors**: Install dependencies with `pip install -r requirements.txt`
|
||||||
|
4. **Rate Limiting**: The system includes built-in rate limiting to prevent API quota exhaustion
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
Check the application logs for detailed error information:
|
||||||
|
```bash
|
||||||
|
tail -f app.log
|
||||||
|
```
|
||||||
+244
@@ -0,0 +1,244 @@
|
|||||||
|
import groq
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import List, Tuple
|
||||||
|
import config
|
||||||
|
from models import Receipt, Transaction, Match
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class AIMatcher:
|
||||||
|
def __init__(self):
|
||||||
|
self.client = groq.Groq(api_key=config.GROQ_API_KEY)
|
||||||
|
self.model = "llama3-8b-8192"
|
||||||
|
self.max_retries = 3
|
||||||
|
self.retry_delay = 2 # seconds - increased for rate limiting
|
||||||
|
self.rate_limit_delay = 1.0 # seconds between API calls
|
||||||
|
self.last_api_call = 0
|
||||||
|
|
||||||
|
def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
|
||||||
|
"""Match receipts to transactions using AI"""
|
||||||
|
logger.info(f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions")
|
||||||
|
matches = []
|
||||||
|
|
||||||
|
for i, receipt in enumerate(receipts):
|
||||||
|
logger.info(f"Processing receipt {i+1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}")
|
||||||
|
|
||||||
|
# Rate limiting
|
||||||
|
self._rate_limit()
|
||||||
|
|
||||||
|
# Get the BEST match for this receipt (highest confidence score)
|
||||||
|
best_match = self._find_best_match(receipt, transactions)
|
||||||
|
if best_match:
|
||||||
|
matches.append(best_match)
|
||||||
|
logger.info(f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No match found for receipt: {receipt.vendor} - ${receipt.amount}")
|
||||||
|
|
||||||
|
# Sort by confidence score (highest first)
|
||||||
|
matches = sorted(matches, key=lambda x: x.confidence_score, reverse=True)
|
||||||
|
logger.info(f"AI matching completed. Found {len(matches)} matches")
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def _rate_limit(self):
|
||||||
|
"""Implement rate limiting to avoid API quota exhaustion"""
|
||||||
|
current_time = time.time()
|
||||||
|
time_since_last_call = current_time - self.last_api_call
|
||||||
|
|
||||||
|
if time_since_last_call < self.rate_limit_delay:
|
||||||
|
sleep_time = self.rate_limit_delay - time_since_last_call
|
||||||
|
logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
|
self.last_api_call = time.time()
|
||||||
|
|
||||||
|
def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
|
||||||
|
"""Find the BEST match for a receipt (highest confidence score)"""
|
||||||
|
candidates = self._filter_candidates(receipt, transactions)
|
||||||
|
if not candidates:
|
||||||
|
logger.warning(f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(f"Found {len(candidates)} candidates for receipt: {receipt.vendor}")
|
||||||
|
|
||||||
|
best_match = None
|
||||||
|
highest_score = 0
|
||||||
|
|
||||||
|
for transaction in candidates:
|
||||||
|
score, reason = self._calculate_match_score(receipt, transaction)
|
||||||
|
logger.debug(f"Score {score:.3f} for transaction {transaction.vendor}: {reason}")
|
||||||
|
|
||||||
|
# Keep the match with the highest score, regardless of how low it is
|
||||||
|
if score > highest_score:
|
||||||
|
highest_score = score
|
||||||
|
best_match = Match(receipt, transaction, score, reason)
|
||||||
|
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
|
||||||
|
"""Filter transactions to create a reasonable candidate list"""
|
||||||
|
candidates = []
|
||||||
|
amount_threshold = receipt.amount * 2.0 # 200% threshold - very inclusive
|
||||||
|
|
||||||
|
for transaction in transactions:
|
||||||
|
# Use absolute value for transaction amount comparison
|
||||||
|
transaction_amount_abs = abs(transaction.amount)
|
||||||
|
|
||||||
|
# Only exclude transactions with obviously different amounts
|
||||||
|
if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
|
||||||
|
candidates.append(transaction)
|
||||||
|
|
||||||
|
logger.debug(f"Filtered {len(transactions)} transactions to {len(candidates)} candidates")
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
|
||||||
|
"""Calculate match score using AI"""
|
||||||
|
# Calculate differences for the AI to consider
|
||||||
|
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
|
||||||
|
transaction_amount_abs = abs(transaction.amount)
|
||||||
|
amount_diff = abs(receipt.amount - transaction_amount_abs)
|
||||||
|
amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason.
|
||||||
|
|
||||||
|
Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
|
||||||
|
Receipt Description: {receipt.description}
|
||||||
|
Receipt Category: {receipt.category}
|
||||||
|
Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
|
||||||
|
Transaction Notes: {transaction.notes}
|
||||||
|
|
||||||
|
Differences:
|
||||||
|
- Date difference: {date_diff} days
|
||||||
|
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
|
||||||
|
- Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
|
||||||
|
- Description/Notes comparison: "{receipt.description}" vs "{transaction.notes}"
|
||||||
|
- Category: {receipt.category}
|
||||||
|
|
||||||
|
Score this potential match based on how likely it is the correct match:
|
||||||
|
|
||||||
|
- Perfect matches (same vendor, amount, date): 0.95-1.0
|
||||||
|
- High confidence (minor differences): 0.8-0.94
|
||||||
|
- Medium confidence (moderate differences): 0.6-0.79
|
||||||
|
- Low confidence (significant differences): 0.4-0.59
|
||||||
|
- Very low confidence (major differences): 0.2-0.39
|
||||||
|
- Minimal similarity: 0.1-0.19
|
||||||
|
- No meaningful similarity: 0.0-0.09
|
||||||
|
|
||||||
|
Consider description and category similarity in your scoring.
|
||||||
|
|
||||||
|
IMPORTANT: Return ONLY the score and reason separated by a pipe character.
|
||||||
|
Format: [score]|[reason]
|
||||||
|
Example: 0.85|Same vendor, same amount, 2 days apart
|
||||||
|
"""
|
||||||
|
|
||||||
|
for attempt in range(self.max_retries):
|
||||||
|
try:
|
||||||
|
result = self._call_groq_api_with_timeout(prompt, timeout=30) # Increased timeout
|
||||||
|
|
||||||
|
# Parse the result - handle multiple formats
|
||||||
|
score, reason = self._parse_ai_response(result)
|
||||||
|
|
||||||
|
logger.debug(f"AI Response: {result}")
|
||||||
|
logger.debug(f"Parsed: score={score}, reason={reason}")
|
||||||
|
|
||||||
|
return score, reason
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}")
|
||||||
|
if attempt < self.max_retries - 1:
|
||||||
|
# Exponential backoff for rate limiting
|
||||||
|
sleep_time = self.retry_delay * (2 ** attempt)
|
||||||
|
logger.info(f"Waiting {sleep_time} seconds before retry...")
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
else:
|
||||||
|
logger.error(f"All attempts failed for receipt {receipt.id}")
|
||||||
|
return 0.0, f"AI error after {self.max_retries} attempts: {str(e)}"
|
||||||
|
|
||||||
|
def _parse_ai_response(self, result: str) -> Tuple[float, str]:
|
||||||
|
"""Parse AI response with robust error handling"""
|
||||||
|
result = result.strip()
|
||||||
|
logger.debug(f"Parsing AI response: {result}")
|
||||||
|
|
||||||
|
# Try to find score in various formats
|
||||||
|
if '|' in result:
|
||||||
|
parts = result.split('|')
|
||||||
|
logger.debug(f"Split response into {len(parts)} parts: {parts}")
|
||||||
|
|
||||||
|
# Look for a numeric score in any part
|
||||||
|
for i, part in enumerate(parts):
|
||||||
|
part = part.strip()
|
||||||
|
try:
|
||||||
|
# Remove any non-numeric characters except decimal point
|
||||||
|
score_str_clean = ''.join(c for c in part if c.isdigit() or c == '.')
|
||||||
|
if score_str_clean:
|
||||||
|
score = float(score_str_clean)
|
||||||
|
if 0 <= score <= 1: # Valid confidence score
|
||||||
|
# Get reason from other parts
|
||||||
|
reason_parts = [p.strip() for j, p in enumerate(parts) if j != i and p.strip()]
|
||||||
|
reason = ' | '.join(reason_parts) if reason_parts else "Score extracted"
|
||||||
|
logger.debug(f"Found score {score} in part {i}, reason: {reason}")
|
||||||
|
return score, reason
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to extract just a number from the response
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
numbers = re.findall(r'\d+\.?\d*', result)
|
||||||
|
if numbers:
|
||||||
|
for num_str in numbers:
|
||||||
|
score = float(num_str)
|
||||||
|
if 0 <= score <= 1: # Valid confidence score
|
||||||
|
logger.debug(f"Extracted score {score} from response")
|
||||||
|
return score, f"Extracted from response: {result[:50]}..."
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback - try to find any number and normalize it
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
numbers = re.findall(r'\d+\.?\d*', result)
|
||||||
|
if numbers:
|
||||||
|
score = float(numbers[0])
|
||||||
|
# Normalize to 0-1 range if it's a percentage or other scale
|
||||||
|
if score > 1:
|
||||||
|
score = score / 100 # Assume percentage
|
||||||
|
score = max(0, min(1, score)) # Clamp to 0-1
|
||||||
|
logger.debug(f"Normalized score {score} from response")
|
||||||
|
return score, f"Normalized from response: {result[:50]}..."
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Final fallback
|
||||||
|
logger.warning(f"Could not parse AI response: {result}")
|
||||||
|
return 0.0, f"Unparseable response: {result[:50]}..."
|
||||||
|
|
||||||
|
def _call_groq_api_with_timeout(self, prompt: str, timeout: int = 15) -> str:
|
||||||
|
"""Make API call with timeout and retry logic"""
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
|
def api_call():
|
||||||
|
try:
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
max_tokens=200,
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
return response.choices[0].message.content.strip()
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
try:
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
future = executor.submit(api_call)
|
||||||
|
return future.result(timeout=timeout)
|
||||||
|
except concurrent.futures.TimeoutError:
|
||||||
|
raise Exception(f"API call timed out after {timeout} seconds")
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
+126
@@ -0,0 +1,126 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
import config
|
||||||
|
from models import Receipt, Transaction
|
||||||
|
from tax_rules_engine import TaxRulesEngine
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AIRule:
|
||||||
|
name: str
|
||||||
|
condition: str
|
||||||
|
action: str
|
||||||
|
source: str
|
||||||
|
status: str = "active"
|
||||||
|
|
||||||
|
class AIRulesEngine:
|
||||||
|
def __init__(self):
|
||||||
|
self.rules: List[AIRule] = []
|
||||||
|
self.tax_rules_engine = TaxRulesEngine()
|
||||||
|
self._load_default_rules()
|
||||||
|
|
||||||
|
def _load_default_rules(self):
|
||||||
|
self.rules = [
|
||||||
|
AIRule("exact_amount_match", "amount_diff <= 0.01", "auto_approve", "system"),
|
||||||
|
AIRule("same_vendor_same_date", "vendor_match and date_diff <= 1", "high_confidence", "system"),
|
||||||
|
AIRule("gas_station_pattern", "vendor_contains_gas_or_fuel", "categorize_transport", "system"),
|
||||||
|
# Tax-related rules
|
||||||
|
AIRule("fx_currency_mismatch", "currency_mismatch", "flag_fx_review", "tax_system"),
|
||||||
|
AIRule("meals_entertainment", "is_meals_entertainment", "apply_me_tax_rule", "tax_system"),
|
||||||
|
AIRule("provincial_tax_calculation", "has_address_info", "calculate_provincial_tax", "tax_system")
|
||||||
|
]
|
||||||
|
|
||||||
|
def apply_rules(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
|
||||||
|
results = {"auto_approve": False, "confidence_boost": 0, "category": None, "tax_analysis": {}}
|
||||||
|
|
||||||
|
for rule in self.rules:
|
||||||
|
if rule.status != "active":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self._evaluate_condition(rule.condition, receipt, transaction):
|
||||||
|
self._execute_action(rule.action, results, receipt, transaction)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _evaluate_condition(self, condition: str, receipt: Receipt, transaction: Transaction) -> bool:
|
||||||
|
"""Safely evaluate rule conditions without using eval()"""
|
||||||
|
amount_diff = abs(receipt.amount - abs(transaction.amount))
|
||||||
|
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
|
||||||
|
vendor_match = receipt.vendor.lower() in transaction.vendor.lower() or transaction.vendor.lower() in receipt.vendor.lower()
|
||||||
|
vendor_lower = receipt.vendor.lower()
|
||||||
|
vendor_contains_gas_or_fuel = 'gas' in vendor_lower or 'fuel' in vendor_lower
|
||||||
|
|
||||||
|
# Tax-related conditions
|
||||||
|
currency_mismatch = receipt.currency != transaction.currency
|
||||||
|
is_meals_entertainment = receipt.is_meals_entertainment
|
||||||
|
has_address_info = receipt.billing_address is not None or receipt.shipping_address is not None
|
||||||
|
|
||||||
|
# Handle specific condition types safely
|
||||||
|
if condition == "amount_diff <= 0.01":
|
||||||
|
return amount_diff <= 0.01
|
||||||
|
elif condition == "vendor_match and date_diff <= 1":
|
||||||
|
return vendor_match and date_diff <= 1
|
||||||
|
elif condition == "vendor_contains_gas_or_fuel":
|
||||||
|
return vendor_contains_gas_or_fuel
|
||||||
|
elif condition == "currency_mismatch":
|
||||||
|
return currency_mismatch
|
||||||
|
elif condition == "is_meals_entertainment":
|
||||||
|
return is_meals_entertainment
|
||||||
|
elif condition == "has_address_info":
|
||||||
|
return has_address_info
|
||||||
|
else:
|
||||||
|
# For any other conditions, try to evaluate them safely
|
||||||
|
try:
|
||||||
|
# Only allow safe operations
|
||||||
|
safe_globals = {
|
||||||
|
"amount_diff": amount_diff,
|
||||||
|
"date_diff": date_diff,
|
||||||
|
"vendor_match": vendor_match,
|
||||||
|
"vendor_contains_gas_or_fuel": vendor_contains_gas_or_fuel,
|
||||||
|
"currency_mismatch": currency_mismatch,
|
||||||
|
"is_meals_entertainment": is_meals_entertainment,
|
||||||
|
"has_address_info": has_address_info,
|
||||||
|
"receipt": receipt,
|
||||||
|
"transaction": transaction,
|
||||||
|
"abs": abs,
|
||||||
|
"len": len,
|
||||||
|
"min": min,
|
||||||
|
"max": max,
|
||||||
|
"sum": sum,
|
||||||
|
"round": round
|
||||||
|
}
|
||||||
|
return eval(condition, safe_globals, {})
|
||||||
|
except (SyntaxError, NameError, TypeError) as e:
|
||||||
|
print(f"Warning: Invalid condition '{condition}': {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _execute_action(self, action: str, results: Dict[str, Any], receipt: Receipt, transaction: Transaction):
|
||||||
|
if action == "auto_approve":
|
||||||
|
results["auto_approve"] = True
|
||||||
|
elif action == "high_confidence":
|
||||||
|
results["confidence_boost"] += 0.2
|
||||||
|
elif action == "categorize_transport":
|
||||||
|
results["category"] = "Transportation"
|
||||||
|
elif action == "flag_fx_review":
|
||||||
|
# Apply FX rule and flag for review
|
||||||
|
fx_result = self.tax_rules_engine.apply_fx_rule(receipt, transaction)
|
||||||
|
results["tax_analysis"]["fx"] = fx_result
|
||||||
|
if fx_result.get("requires_manual_review", False):
|
||||||
|
results["confidence_boost"] -= 0.1 # Reduce confidence for FX issues
|
||||||
|
elif action == "apply_me_tax_rule":
|
||||||
|
# Apply meals & entertainment rule
|
||||||
|
me_result = self.tax_rules_engine.apply_meals_entertainment_rule(receipt)
|
||||||
|
results["tax_analysis"]["meals_entertainment"] = me_result
|
||||||
|
elif action == "calculate_provincial_tax":
|
||||||
|
# Calculate provincial tax
|
||||||
|
tax_result = self.tax_rules_engine.apply_sales_tax_rule(receipt)
|
||||||
|
results["tax_analysis"]["sales_tax"] = tax_result
|
||||||
|
|
||||||
|
def add_rule(self, rule: AIRule):
|
||||||
|
self.rules.append(rule)
|
||||||
|
|
||||||
|
def remove_rule(self, rule_name: str):
|
||||||
|
self.rules = [r for r in self.rules if r.name != rule_name]
|
||||||
|
|
||||||
|
def apply_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
|
||||||
|
"""Apply all tax rules to a receipt/transaction pair"""
|
||||||
|
return self.tax_rules_engine.apply_all_tax_rules(receipt, transaction)
|
||||||
+120
@@ -0,0 +1,120 @@
|
|||||||
|
from pydantic import BaseModel
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
class AddressRequest(BaseModel):
|
||||||
|
province: str
|
||||||
|
city: str
|
||||||
|
postal_code: str
|
||||||
|
country: str = "Canada"
|
||||||
|
|
||||||
|
class ReceiptRequest(BaseModel):
|
||||||
|
id: str
|
||||||
|
file_name: str
|
||||||
|
upload_date: datetime
|
||||||
|
receipt_date: datetime
|
||||||
|
amount: float
|
||||||
|
tax: float
|
||||||
|
vendor: str
|
||||||
|
category: str
|
||||||
|
description: str
|
||||||
|
# Tax rule fields
|
||||||
|
billing_address: Optional[AddressRequest] = None
|
||||||
|
shipping_address: Optional[AddressRequest] = None
|
||||||
|
currency: str = "CAD"
|
||||||
|
is_meals_entertainment: bool = False
|
||||||
|
|
||||||
|
class TransactionRequest(BaseModel):
|
||||||
|
id: str
|
||||||
|
transaction_date: datetime
|
||||||
|
amount: float
|
||||||
|
vendor: str
|
||||||
|
notes: str
|
||||||
|
# Tax rule fields
|
||||||
|
currency: str = "CAD"
|
||||||
|
fx_rate: Optional[float] = None
|
||||||
|
|
||||||
|
class AssetRequest(BaseModel):
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
purchase_date: datetime
|
||||||
|
purchase_amount: float
|
||||||
|
useful_life_years: int
|
||||||
|
residual_value: float
|
||||||
|
cca_rate: float
|
||||||
|
asset_class: str
|
||||||
|
|
||||||
|
class MatchingRequest(BaseModel):
|
||||||
|
receipt_ids: List[str]
|
||||||
|
transaction_ids: List[str]
|
||||||
|
|
||||||
|
class MatchResponse(BaseModel):
|
||||||
|
receipt_id: str
|
||||||
|
transaction_id: str
|
||||||
|
confidence_score: float
|
||||||
|
match_reason: str
|
||||||
|
tax_analysis: Optional[dict] = None
|
||||||
|
# Currency information
|
||||||
|
receipt_currency: str = "CAD"
|
||||||
|
transaction_currency: str = "CAD"
|
||||||
|
currency_match: bool = True
|
||||||
|
|
||||||
|
class MatchingResponse(BaseModel):
|
||||||
|
matches: List[MatchResponse]
|
||||||
|
stats: dict
|
||||||
|
|
||||||
|
class ApprovalRequest(BaseModel):
|
||||||
|
match_id: str
|
||||||
|
approved: bool
|
||||||
|
reason: Optional[str] = None
|
||||||
|
|
||||||
|
class RuleRequest(BaseModel):
|
||||||
|
name: str
|
||||||
|
condition: str
|
||||||
|
action: str
|
||||||
|
source: str = "user"
|
||||||
|
|
||||||
|
class DocumentUploadResponse(BaseModel):
|
||||||
|
file_id: str
|
||||||
|
filename: str
|
||||||
|
upload_date: datetime
|
||||||
|
status: str
|
||||||
|
|
||||||
|
class DocumentProcessResponse(BaseModel):
|
||||||
|
file_id: str
|
||||||
|
extraction_success: bool
|
||||||
|
vendor: Optional[str] = None
|
||||||
|
description: Optional[str] = None
|
||||||
|
total_amount: Optional[float] = None
|
||||||
|
tax_amount: Optional[float] = None
|
||||||
|
date: Optional[str] = None
|
||||||
|
category: Optional[str] = None
|
||||||
|
confidence: Optional[float] = None
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
# New tax-related models
|
||||||
|
class TaxCalculationRequest(BaseModel):
|
||||||
|
receipt_id: str
|
||||||
|
transaction_id: Optional[str] = None
|
||||||
|
|
||||||
|
class TaxCalculationResponse(BaseModel):
|
||||||
|
receipt_id: str
|
||||||
|
rules_applied: List[str]
|
||||||
|
sales_tax: dict
|
||||||
|
fx_analysis: Optional[dict] = None
|
||||||
|
meals_entertainment: dict
|
||||||
|
|
||||||
|
class DepreciationRequest(BaseModel):
|
||||||
|
asset: AssetRequest
|
||||||
|
year: int
|
||||||
|
method: str # "straight_line" or "cca"
|
||||||
|
|
||||||
|
class DepreciationResponse(BaseModel):
|
||||||
|
asset_id: str
|
||||||
|
year: int
|
||||||
|
method: str
|
||||||
|
depreciation: float
|
||||||
|
book_value: float
|
||||||
|
total_depreciation: Optional[float] = None
|
||||||
|
success: bool
|
||||||
|
error: Optional[str] = None
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Get API key from environment variable with fallback
|
||||||
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
|
||||||
|
|
||||||
|
# Validate API key
|
||||||
|
if not GROQ_API_KEY or GROQ_API_KEY == "your_api_key_here":
|
||||||
|
raise ValueError("GROQ_API_KEY environment variable is not set or invalid. Please set it in your .env file.")
|
||||||
|
|
||||||
|
CONFIDENCE_THRESHOLD = 0.3
|
||||||
|
DATE_TOLERANCE_DAYS = 7
|
||||||
|
AMOUNT_TOLERANCE_PERCENT = 0.05
|
||||||
@@ -0,0 +1,498 @@
|
|||||||
|
import groq
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
from PIL import Image
|
||||||
|
import PyPDF2
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
import config
|
||||||
|
import os
|
||||||
|
import aiofiles
|
||||||
|
from datetime import datetime
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DocumentProcessor:
|
||||||
|
def __init__(self):
|
||||||
|
self.client = groq.Groq(api_key=config.GROQ_API_KEY)
|
||||||
|
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
||||||
|
|
||||||
|
async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
|
||||||
|
"""Process uploaded file and extract receipt data"""
|
||||||
|
try:
|
||||||
|
if file_type.lower() in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
|
||||||
|
return await self._process_image(file_path)
|
||||||
|
elif file_type.lower() == 'pdf':
|
||||||
|
return await self._process_pdf(file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported file type: {file_type}")
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
async def _process_image(self, image_path: str) -> Dict[str, Any]:
|
||||||
|
"""Extract data from image using Groq vision"""
|
||||||
|
try:
|
||||||
|
# Encode image to base64
|
||||||
|
base64_image = self._encode_image(image_path)
|
||||||
|
|
||||||
|
# Create Groq vision prompt
|
||||||
|
prompt = """
|
||||||
|
Analyze this receipt image and extract the following information in JSON format:
|
||||||
|
{
|
||||||
|
"vendor": "Store/company name",
|
||||||
|
"description": "Detailed description of items/services purchased",
|
||||||
|
"total_amount": 0.00,
|
||||||
|
"tax_amount": 0.00,
|
||||||
|
"date": "YYYY-MM-DD",
|
||||||
|
"category": "Food/Transport/Office/Other",
|
||||||
|
"confidence": 0.95
|
||||||
|
}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Extract vendor name as it appears on receipt
|
||||||
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||||
|
- Total amount should be the final total including tax
|
||||||
|
- Tax amount is separate tax line if available
|
||||||
|
- Date should be the date on the receipt
|
||||||
|
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
||||||
|
- Confidence score 0-1 based on how clear the receipt is
|
||||||
|
|
||||||
|
Return only valid JSON.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Call Groq vision API with correct format
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": f"data:image/jpeg;base64,{base64_image}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model=self.model,
|
||||||
|
max_tokens=500,
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse response
|
||||||
|
result_text = response.choices[0].message.content.strip()
|
||||||
|
return self._parse_extraction_result(result_text)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"Image processing error: {str(e)}"}
|
||||||
|
|
||||||
|
def _encode_image(self, image_path: str) -> str:
|
||||||
|
"""Encode image to base64 string"""
|
||||||
|
with open(image_path, "rb") as image_file:
|
||||||
|
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||||
|
|
||||||
|
async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
|
||||||
|
"""Extract data from PDF by converting to image first"""
|
||||||
|
try:
|
||||||
|
# For now, extract text from PDF and process as text
|
||||||
|
text_content = self._extract_text_from_pdf(pdf_path)
|
||||||
|
return self._process_text_content(text_content)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"PDF processing error: {str(e)}"}
|
||||||
|
|
||||||
|
def _extract_text_from_pdf(self, pdf_path: str) -> str:
|
||||||
|
"""Extract text from PDF"""
|
||||||
|
try:
|
||||||
|
with open(pdf_path, 'rb') as file:
|
||||||
|
pdf_reader = PyPDF2.PdfReader(file)
|
||||||
|
text = ""
|
||||||
|
for page in pdf_reader.pages:
|
||||||
|
text += page.extract_text() + "\n"
|
||||||
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _process_text_content(self, text_content: str) -> Dict[str, Any]:
|
||||||
|
"""Process text content using Groq (fallback for PDFs)"""
|
||||||
|
try:
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this receipt text and extract the following information in JSON format:
|
||||||
|
|
||||||
|
Receipt Text:
|
||||||
|
{text_content}
|
||||||
|
|
||||||
|
Extract:
|
||||||
|
{{
|
||||||
|
"vendor": "Store/company name",
|
||||||
|
"description": "Detailed description of items/services purchased",
|
||||||
|
"total_amount": 0.00,
|
||||||
|
"tax_amount": 0.00,
|
||||||
|
"date": "YYYY-MM-DD",
|
||||||
|
"category": "Food/Transport/Office/Other",
|
||||||
|
"confidence": 0.95
|
||||||
|
}}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Extract vendor name as it appears on receipt
|
||||||
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||||
|
- Total amount should be the final total including tax
|
||||||
|
- Tax amount is separate tax line if available
|
||||||
|
- Date should be the date on the receipt
|
||||||
|
- Categorize based on vendor type
|
||||||
|
- Confidence score 0-1 based on clarity
|
||||||
|
|
||||||
|
Return only valid JSON.
|
||||||
|
"""
|
||||||
|
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
max_tokens=500,
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
result_text = response.choices[0].message.content.strip()
|
||||||
|
return self._parse_extraction_result(result_text)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"Text processing error: {str(e)}"}
|
||||||
|
|
||||||
|
def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
|
||||||
|
"""Parse Groq response and extract JSON data"""
|
||||||
|
try:
|
||||||
|
# Clean up response and extract JSON
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Find JSON in response - try multiple patterns
|
||||||
|
json_match = re.search(r'\{.*\}', result_text, re.DOTALL)
|
||||||
|
if json_match:
|
||||||
|
json_str = json_match.group()
|
||||||
|
|
||||||
|
# Clean up common JSON issues
|
||||||
|
json_str = re.sub(r',\s*([}\]])', r'\1', json_str) # Remove trailing commas
|
||||||
|
json_str = re.sub(r'([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', json_str) # Quote unquoted keys
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(json_str)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
# Try to fix common JSON issues
|
||||||
|
logger.warning(f"Initial JSON parsing failed: {e}")
|
||||||
|
|
||||||
|
# Try to extract individual fields using regex
|
||||||
|
vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
|
||||||
|
description_match = re.search(r'"description"\s*:\s*"([^"]*)"', json_str)
|
||||||
|
total_amount_match = re.search(r'"total_amount"\s*:\s*([0-9.]+)', json_str)
|
||||||
|
tax_amount_match = re.search(r'"tax_amount"\s*:\s*([0-9.]+)', json_str)
|
||||||
|
date_match = re.search(r'"date"\s*:\s*"([^"]*)"', json_str)
|
||||||
|
category_match = re.search(r'"category"\s*:\s*"([^"]*)"', json_str)
|
||||||
|
confidence_match = re.search(r'"confidence"\s*:\s*([0-9.]+)', json_str)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"vendor": vendor_match.group(1) if vendor_match else "",
|
||||||
|
"description": description_match.group(1) if description_match else "",
|
||||||
|
"total_amount": float(total_amount_match.group(1)) if total_amount_match else 0.0,
|
||||||
|
"tax_amount": float(tax_amount_match.group(1)) if tax_amount_match else 0.0,
|
||||||
|
"date": date_match.group(1) if date_match else "",
|
||||||
|
"category": category_match.group(1) if category_match else "Other",
|
||||||
|
"confidence": float(confidence_match.group(1)) if confidence_match else 0.5
|
||||||
|
}
|
||||||
|
|
||||||
|
# Validate and clean data
|
||||||
|
return {
|
||||||
|
"vendor": str(data.get("vendor", "")).strip(),
|
||||||
|
"description": str(data.get("description", "")).strip(),
|
||||||
|
"total_amount": float(data.get("total_amount", 0)),
|
||||||
|
"tax_amount": float(data.get("tax_amount", 0)),
|
||||||
|
"date": str(data.get("date", "")).strip(),
|
||||||
|
"category": str(data.get("category", "Other")).strip(),
|
||||||
|
"confidence": float(data.get("confidence", 0.5)),
|
||||||
|
"extraction_success": True
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Try to extract fields from plain text
|
||||||
|
logger.warning("No JSON found in response, attempting text extraction")
|
||||||
|
return self._extract_from_plain_text(result_text)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"JSON parsing error: {str(e)}")
|
||||||
|
return {"error": f"JSON parsing error: {str(e)}", "extraction_success": False}
|
||||||
|
|
||||||
|
def _extract_from_plain_text(self, text: str) -> Dict[str, Any]:
|
||||||
|
"""Extract receipt data from plain text when JSON parsing fails"""
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Extract vendor (look for common patterns)
|
||||||
|
vendor_patterns = [
|
||||||
|
r'(?:vendor|store|merchant|company)\s*[:\-]?\s*([A-Za-z0-9\s&.,]+)',
|
||||||
|
r'([A-Z][A-Za-z0-9\s&.,]{3,30})', # Capitalized words
|
||||||
|
]
|
||||||
|
|
||||||
|
vendor = ""
|
||||||
|
for pattern in vendor_patterns:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
vendor = match.group(1).strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract amount (look for currency patterns)
|
||||||
|
amount_patterns = [
|
||||||
|
r'\$?\s*([0-9,]+\.?[0-9]*)',
|
||||||
|
r'(?:total|amount|sum)\s*[:\-]?\s*\$?\s*([0-9,]+\.?[0-9]*)',
|
||||||
|
]
|
||||||
|
|
||||||
|
total_amount = 0.0
|
||||||
|
for pattern in amount_patterns:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
total_amount = float(match.group(1).replace(',', ''))
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract date
|
||||||
|
date_patterns = [
|
||||||
|
r'(\d{4}-\d{2}-\d{2})',
|
||||||
|
r'(\d{1,2}/\d{1,2}/\d{2,4})',
|
||||||
|
r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}',
|
||||||
|
]
|
||||||
|
|
||||||
|
date = ""
|
||||||
|
for pattern in date_patterns:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
date = match.group(0)
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"vendor": vendor or "Unknown",
|
||||||
|
"total_amount": total_amount,
|
||||||
|
"tax_amount": 0.0,
|
||||||
|
"date": date or "",
|
||||||
|
"category": "Other",
|
||||||
|
"confidence": 0.3, # Low confidence for text extraction
|
||||||
|
"extraction_success": True
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Text extraction error: {str(e)}")
|
||||||
|
return {
|
||||||
|
"vendor": "Unknown",
|
||||||
|
"total_amount": 0.0,
|
||||||
|
"tax_amount": 0.0,
|
||||||
|
"date": "",
|
||||||
|
"category": "Other",
|
||||||
|
"confidence": 0.1,
|
||||||
|
"extraction_success": False,
|
||||||
|
"error": f"Text extraction failed: {str(e)}"
|
||||||
|
}
|
||||||
|
|
||||||
|
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
||||||
|
"""Save uploaded file to temporary storage"""
|
||||||
|
try:
|
||||||
|
# Create uploads directory if it doesn't exist
|
||||||
|
upload_dir = "uploads"
|
||||||
|
os.makedirs(upload_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Generate unique filename
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
safe_filename = f"{timestamp}_{filename.replace(' ', '_')}"
|
||||||
|
file_path = os.path.join(upload_dir, safe_filename)
|
||||||
|
|
||||||
|
# Save file
|
||||||
|
async with aiofiles.open(file_path, 'wb') as f:
|
||||||
|
await f.write(file_content)
|
||||||
|
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Failed to save file: {str(e)}")
|
||||||
|
|
||||||
|
async def extract_transactions_from_image(self, image_path: str) -> Dict[str, Any]:
|
||||||
|
"""Extract multiple transactions from an image (bank statement, credit card statement, etc.)"""
|
||||||
|
try:
|
||||||
|
# Encode image to base64
|
||||||
|
base64_image = self._encode_image(image_path)
|
||||||
|
|
||||||
|
# Create Groq vision prompt for transaction extraction
|
||||||
|
prompt = """
|
||||||
|
Analyze this financial document image (bank statement, credit card statement, etc.) and extract ALL transactions in JSON format.
|
||||||
|
|
||||||
|
Look for transaction lists, payment records, or any financial entries that show:
|
||||||
|
- Date
|
||||||
|
- Amount (positive or negative)
|
||||||
|
- Vendor/Description/Payee name
|
||||||
|
- Any additional notes or memo
|
||||||
|
|
||||||
|
Return the transactions as a JSON array:
|
||||||
|
{
|
||||||
|
"extraction_success": true,
|
||||||
|
"transactions": [
|
||||||
|
{
|
||||||
|
"date": "YYYY-MM-DD",
|
||||||
|
"amount": 0.00,
|
||||||
|
"vendor": "Vendor name",
|
||||||
|
"memo": "Additional notes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"date": "YYYY-MM-DD",
|
||||||
|
"amount": -0.00,
|
||||||
|
"vendor": "Another vendor",
|
||||||
|
"memo": "Payment or charge description"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Extract ALL visible transactions
|
||||||
|
- Include both positive (credits) and negative (debits) amounts
|
||||||
|
- Use the actual date format from the document
|
||||||
|
- Vendor should be the merchant/payee name
|
||||||
|
- Memo can include transaction type, reference numbers, etc.
|
||||||
|
- If no transactions found, return empty array but set extraction_success to true
|
||||||
|
|
||||||
|
Return only valid JSON.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Call Groq vision API
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": f"data:image/jpeg;base64,{base64_image}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model=self.model,
|
||||||
|
max_tokens=2000, # Higher token limit for multiple transactions
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse response
|
||||||
|
result_text = response.choices[0].message.content.strip()
|
||||||
|
return self._parse_transaction_extraction_result(result_text)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"extraction_success": False,
|
||||||
|
"error": f"Transaction extraction error: {str(e)}",
|
||||||
|
"transactions": []
|
||||||
|
}
|
||||||
|
|
||||||
|
def _parse_transaction_extraction_result(self, result_text: str) -> Dict[str, Any]:
|
||||||
|
"""Parse Groq response for transaction extraction"""
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Find the first '{' and last '}'
|
||||||
|
start = result_text.find('{')
|
||||||
|
end = result_text.rfind('}')
|
||||||
|
if start == -1 or end == -1 or end <= start:
|
||||||
|
return {
|
||||||
|
"extraction_success": False,
|
||||||
|
"error": "Could not find JSON object in AI response",
|
||||||
|
"transactions": []
|
||||||
|
}
|
||||||
|
json_str = result_text[start:end+1]
|
||||||
|
|
||||||
|
# Remove trailing commas before } or ]
|
||||||
|
json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(json_str)
|
||||||
|
except Exception as e:
|
||||||
|
import logging
|
||||||
|
logging.error(f"JSON parsing error: {str(e)}")
|
||||||
|
logging.error(f"Offending JSON string:\n{json_str}")
|
||||||
|
return {
|
||||||
|
"extraction_success": False,
|
||||||
|
"error": f"JSON parsing error: {str(e)}",
|
||||||
|
"transactions": []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Validate and clean data
|
||||||
|
transactions = data.get("transactions", [])
|
||||||
|
cleaned_transactions = []
|
||||||
|
for txn in transactions:
|
||||||
|
try:
|
||||||
|
cleaned_txn = {
|
||||||
|
"date": str(txn.get("date", "")).strip(),
|
||||||
|
"amount": float(str(txn.get("amount", 0)).replace('$', '').replace(',', '')),
|
||||||
|
"vendor": str(txn.get("vendor", "")).strip(),
|
||||||
|
"memo": str(txn.get("memo", "")).strip()
|
||||||
|
}
|
||||||
|
cleaned_transactions.append(cleaned_txn)
|
||||||
|
except Exception as e:
|
||||||
|
continue
|
||||||
|
return {
|
||||||
|
"extraction_success": data.get("extraction_success", True),
|
||||||
|
"transactions": cleaned_transactions,
|
||||||
|
"total_transactions": len(cleaned_transactions)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
import logging
|
||||||
|
logging.error(f"JSON parsing error (outer): {str(e)}")
|
||||||
|
return {
|
||||||
|
"extraction_success": False,
|
||||||
|
"error": f"JSON parsing error: {str(e)}",
|
||||||
|
"transactions": []
|
||||||
|
}
|
||||||
|
|
||||||
|
def _parse_date_to_iso(self, date_str: str) -> str:
|
||||||
|
"""Parse various date formats and convert to YYYY-MM-DD"""
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
date_str = date_str.strip().upper()
|
||||||
|
|
||||||
|
# Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
|
||||||
|
month_pattern = r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?'
|
||||||
|
match = re.match(month_pattern, date_str)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
month_abbr, day, year = match.groups()
|
||||||
|
month_map = {
|
||||||
|
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
|
||||||
|
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
|
||||||
|
}
|
||||||
|
|
||||||
|
month = month_map[month_abbr]
|
||||||
|
day = int(day)
|
||||||
|
year = int(year) if year else datetime.now().year
|
||||||
|
|
||||||
|
# Handle 2-digit years
|
||||||
|
if year < 100:
|
||||||
|
year += 2000
|
||||||
|
|
||||||
|
return f"{year:04d}-{month:02d}-{day:02d}"
|
||||||
|
|
||||||
|
# Handle YYYY-MM-DD format
|
||||||
|
if re.match(r'\d{4}-\d{2}-\d{2}', date_str):
|
||||||
|
return date_str
|
||||||
|
|
||||||
|
# Handle MM/DD/YYYY format
|
||||||
|
if re.match(r'\d{1,2}/\d{1,2}/\d{4}', date_str):
|
||||||
|
return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
# Handle MM/DD/YY format
|
||||||
|
if re.match(r'\d{1,2}/\d{1,2}/\d{2}', date_str):
|
||||||
|
return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import List, Optional
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FeedbackLog:
|
||||||
|
transaction_id: str
|
||||||
|
original_match: str
|
||||||
|
correction: str
|
||||||
|
reason: str
|
||||||
|
timestamp: datetime
|
||||||
|
user_id: str
|
||||||
|
|
||||||
|
class FeedbackLogger:
|
||||||
|
def __init__(self, log_file: str = "feedback_logs.json"):
|
||||||
|
self.log_file = log_file
|
||||||
|
self.logs: List[FeedbackLog] = self._load_logs()
|
||||||
|
|
||||||
|
def _load_logs(self) -> List[FeedbackLog]:
|
||||||
|
if not os.path.exists(self.log_file):
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(self.log_file, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return [FeedbackLog(**log) for log in data]
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _save_logs(self):
|
||||||
|
with open(self.log_file, 'w') as f:
|
||||||
|
json.dump([{
|
||||||
|
'transaction_id': log.transaction_id,
|
||||||
|
'original_match': log.original_match,
|
||||||
|
'correction': log.correction,
|
||||||
|
'reason': log.reason,
|
||||||
|
'timestamp': log.timestamp.isoformat(),
|
||||||
|
'user_id': log.user_id
|
||||||
|
} for log in self.logs], f, indent=2)
|
||||||
|
|
||||||
|
def log_override(self, transaction_id: str, original_match: str, correction: str, reason: str, user_id: str):
|
||||||
|
log = FeedbackLog(
|
||||||
|
transaction_id=transaction_id,
|
||||||
|
original_match=original_match,
|
||||||
|
correction=correction,
|
||||||
|
reason=reason,
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
user_id=user_id
|
||||||
|
)
|
||||||
|
self.logs.append(log)
|
||||||
|
self._save_logs()
|
||||||
|
|
||||||
|
def get_logs_by_transaction(self, transaction_id: str) -> List[FeedbackLog]:
|
||||||
|
return [log for log in self.logs if log.transaction_id == transaction_id]
|
||||||
|
|
||||||
|
def get_recent_logs(self, days: int = 30) -> List[FeedbackLog]:
|
||||||
|
cutoff = datetime.now() - timedelta(days=days)
|
||||||
|
return [log for log in self.logs if log.timestamp > cutoff]
|
||||||
@@ -0,0 +1,138 @@
|
|||||||
|
import os
|
||||||
|
import io
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
class GoogleDriveSync:
|
||||||
|
def __init__(self):
|
||||||
|
self.service = None
|
||||||
|
self.processed_files = set()
|
||||||
|
|
||||||
|
def authenticate(self):
|
||||||
|
"""Authenticate with Google Drive API"""
|
||||||
|
try:
|
||||||
|
from google.auth.transport.requests import Request
|
||||||
|
from google.oauth2.credentials import Credentials
|
||||||
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
|
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
|
||||||
|
|
||||||
|
# Load existing credentials
|
||||||
|
if os.path.exists('token.json'):
|
||||||
|
self.creds = Credentials.from_authorized_user_file('token.json', SCOPES)
|
||||||
|
|
||||||
|
# If no valid credentials available, let user log in
|
||||||
|
if not self.creds or not self.creds.valid:
|
||||||
|
if self.creds and self.creds.expired and self.creds.refresh_token:
|
||||||
|
self.creds.refresh(Request())
|
||||||
|
else:
|
||||||
|
if not os.path.exists('credentials.json'):
|
||||||
|
raise Exception("credentials.json not found. Please download from Google Cloud Console.")
|
||||||
|
|
||||||
|
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
||||||
|
self.creds = flow.run_local_server(port=0)
|
||||||
|
|
||||||
|
# Save credentials for next run
|
||||||
|
with open('token.json', 'w') as token:
|
||||||
|
token.write(self.creds.to_json())
|
||||||
|
|
||||||
|
# Build the Drive service
|
||||||
|
self.service = build('drive', 'v3', credentials=self.creds)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Authentication error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def list_folders(self) -> List[Dict[str, Any]]:
|
||||||
|
"""List all folders in Google Drive"""
|
||||||
|
if not self.service:
|
||||||
|
if not self.authenticate():
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = self.service.files().list(
|
||||||
|
q="mimeType='application/vnd.google-apps.folder'",
|
||||||
|
pageSize=100,
|
||||||
|
fields="nextPageToken, files(id, name, createdTime, modifiedTime)"
|
||||||
|
).execute()
|
||||||
|
|
||||||
|
return results.get('files', [])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error listing folders: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_folder_info(self, folder_id: str) -> Dict[str, Any]:
|
||||||
|
"""Get information about a Google Drive folder"""
|
||||||
|
if not self.service:
|
||||||
|
if not self.authenticate():
|
||||||
|
return {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
folder = self.service.files().get(
|
||||||
|
fileId=folder_id,
|
||||||
|
fields="id, name, createdTime, modifiedTime"
|
||||||
|
).execute()
|
||||||
|
|
||||||
|
return folder
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting folder info: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]:
|
||||||
|
"""Process all receipt files from Google Drive"""
|
||||||
|
if not self.service:
|
||||||
|
if not self.authenticate():
|
||||||
|
return []
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# File types to look for
|
||||||
|
file_types = ["'application/pdf'", "'image/jpeg'", "'image/png'", "'image/gif'", "'image/bmp'"]
|
||||||
|
mime_types = " or ".join(file_types)
|
||||||
|
|
||||||
|
# Build query
|
||||||
|
query = f"mimeType contains {mime_types}"
|
||||||
|
if folder_id:
|
||||||
|
query += f" and '{folder_id}' in parents"
|
||||||
|
|
||||||
|
# Add date filter (last 30 days)
|
||||||
|
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + 'Z'
|
||||||
|
query += f" and modifiedTime > '{thirty_days_ago}'"
|
||||||
|
|
||||||
|
results_files = self.service.files().list(
|
||||||
|
q=query,
|
||||||
|
pageSize=100,
|
||||||
|
fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)"
|
||||||
|
).execute()
|
||||||
|
|
||||||
|
files = results_files.get('files', [])
|
||||||
|
files = [file for file in files if file['id'] not in self.processed_files]
|
||||||
|
|
||||||
|
# For demo purposes, return mock results
|
||||||
|
for file in files[:3]: # Process first 3 files
|
||||||
|
mock_result = {
|
||||||
|
"file_id": file['id'],
|
||||||
|
"filename": file['name'],
|
||||||
|
"drive_modified": file['modifiedTime'],
|
||||||
|
"file_size": file.get('size', 0),
|
||||||
|
"extraction_success": True,
|
||||||
|
"vendor": "Demo Vendor",
|
||||||
|
"description": "Coffee and sandwich",
|
||||||
|
"total_amount": 25.50,
|
||||||
|
"tax_amount": 2.04,
|
||||||
|
"date": "2024-01-15",
|
||||||
|
"category": "Food",
|
||||||
|
"confidence": 0.95
|
||||||
|
}
|
||||||
|
results.append(mock_result)
|
||||||
|
self.processed_files.add(file['id'])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing Drive files: {e}")
|
||||||
|
|
||||||
|
return results
|
||||||
@@ -0,0 +1,555 @@
|
|||||||
|
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List
|
||||||
|
import uuid
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.FileHandler('app.log'),
|
||||||
|
logging.StreamHandler()
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
from api_models import (
|
||||||
|
MatchingRequest, MatchingResponse, MatchResponse,
|
||||||
|
ApprovalRequest, RuleRequest, DocumentUploadResponse,
|
||||||
|
DocumentProcessResponse, TransactionRequest
|
||||||
|
)
|
||||||
|
from models import Receipt, Transaction, Match
|
||||||
|
from matching_engine import MatchingEngine
|
||||||
|
from ai_rules import AIRule
|
||||||
|
from document_processor import DocumentProcessor
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="AI Bookkeeper - Data Science Engine",
|
||||||
|
description="AI-powered receipt-to-transaction matching engine. Receives transaction data and provides intelligent matching capabilities.",
|
||||||
|
version="1.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# CORS middleware
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize DS Engine components
|
||||||
|
matching_engine = MatchingEngine()
|
||||||
|
document_processor = DocumentProcessor()
|
||||||
|
|
||||||
|
# In-memory storage for uploaded files (in production, use a database)
|
||||||
|
uploaded_files = {}
|
||||||
|
|
||||||
|
# Store imported transactions globally for easy access
|
||||||
|
stored_transactions = []
|
||||||
|
processed_receipts = {}
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
return {
|
||||||
|
"message": "AI Bookkeeper Data Science Engine is running",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"status": "healthy"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# TRANSACTION IMPORT ENDPOINTS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@app.post("/transactions/import/csv")
|
||||||
|
async def import_transactions_csv(file: UploadFile = File(...)):
|
||||||
|
"""
|
||||||
|
Import transactions from a CSV file (custom bank export format).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
content = await file.read()
|
||||||
|
decoded = content.decode('utf-8')
|
||||||
|
reader = csv.DictReader(io.StringIO(decoded))
|
||||||
|
transactions = []
|
||||||
|
errors = []
|
||||||
|
for idx, row in enumerate(reader):
|
||||||
|
try:
|
||||||
|
# Use correct headers and strip whitespace
|
||||||
|
account_number = row.get('Account Number') or row.get('Account Number '.strip())
|
||||||
|
txn_date_raw = row.get('Transaction Date') or row.get('Transaction Date '.strip())
|
||||||
|
amount_raw = row.get('Amount') or row.get('Amount '.strip())
|
||||||
|
payee_name = row.get('Description 2') or row.get('Description 2 '.strip())
|
||||||
|
memo = f"{row.get('Account Type','').strip()} {row.get('Cheque Number','').strip()} {row.get('Description 1','').strip()}".strip()
|
||||||
|
# Compose ID
|
||||||
|
txn_id = f"{account_number}_{idx+1}"
|
||||||
|
# Parse date (try multiple formats)
|
||||||
|
txn_date_str = txn_date_raw.strip()
|
||||||
|
txn_date = None
|
||||||
|
for fmt in ("%m/%d/%y", "%m/%d/%Y"):
|
||||||
|
try:
|
||||||
|
txn_date = datetime.strptime(txn_date_str, fmt).strftime("%Y-%m-%d")
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if not txn_date:
|
||||||
|
raise ValueError(f"Could not parse date: {txn_date_str}")
|
||||||
|
# Parse amount
|
||||||
|
amount = float(amount_raw.replace(',', '').strip())
|
||||||
|
transactions.append({
|
||||||
|
"id": txn_id,
|
||||||
|
"txn_date": txn_date,
|
||||||
|
"amount": amount,
|
||||||
|
"payee_name": payee_name.strip(),
|
||||||
|
"memo": memo
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(f"Row {idx+1}: {str(e)}")
|
||||||
|
# Store transactions globally for auto-matching
|
||||||
|
global stored_transactions
|
||||||
|
stored_transactions = transactions
|
||||||
|
|
||||||
|
return {
|
||||||
|
"imported_count": len(transactions),
|
||||||
|
"converted_transactions": transactions,
|
||||||
|
"errors": errors
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.post("/transactions/import/image")
|
||||||
|
async def import_transactions_from_image(file: UploadFile = File(...)):
|
||||||
|
"""
|
||||||
|
Import transactions from an image (bank statement, credit card statement, etc.) using AI extraction.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Validate file type
|
||||||
|
allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
|
||||||
|
file_extension = file.filename.split('.')[-1].lower()
|
||||||
|
if file_extension not in allowed_types:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed: {allowed_types}")
|
||||||
|
# Read file content
|
||||||
|
content = await file.read()
|
||||||
|
# Save file to disk
|
||||||
|
image_path = await document_processor.save_uploaded_file(content, file.filename)
|
||||||
|
# Extract transactions from image (pass file path)
|
||||||
|
extraction_result = await document_processor.extract_transactions_from_image(image_path)
|
||||||
|
if not extraction_result.get("extraction_success", False):
|
||||||
|
raise HTTPException(status_code=500, detail=extraction_result.get("error", "Extraction failed"))
|
||||||
|
extracted_transactions = extraction_result.get("transactions", [])
|
||||||
|
# Store transactions globally for auto-matching
|
||||||
|
global stored_transactions
|
||||||
|
stored_transactions = []
|
||||||
|
for idx, txn in enumerate(extracted_transactions):
|
||||||
|
try:
|
||||||
|
txn_id = f"img_{file.filename}_{idx+1}"
|
||||||
|
txn_date_raw = txn.get("date")
|
||||||
|
amount = txn.get("amount")
|
||||||
|
vendor = txn.get("vendor")
|
||||||
|
memo = txn.get("memo", "")
|
||||||
|
|
||||||
|
# Parse date to YYYY-MM-DD format
|
||||||
|
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
|
||||||
|
if not txn_date:
|
||||||
|
# Fallback: use current year if parsing fails
|
||||||
|
txn_date = f"2024-{txn_date_raw}"
|
||||||
|
|
||||||
|
stored_transactions.append({
|
||||||
|
"id": txn_id,
|
||||||
|
"txn_date": txn_date,
|
||||||
|
"amount": amount,
|
||||||
|
"payee_name": vendor,
|
||||||
|
"memo": memo
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
continue
|
||||||
|
return {
|
||||||
|
"imported_count": len(stored_transactions),
|
||||||
|
"converted_transactions": stored_transactions,
|
||||||
|
"errors": []
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing transactions from image: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# DOCUMENT PROCESSING ENDPOINTS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@app.post("/upload-multiple", response_model=List[DocumentUploadResponse])
|
||||||
|
async def upload_multiple_documents(files: List[UploadFile] = File(...)):
|
||||||
|
"""
|
||||||
|
Upload multiple receipt images for processing.
|
||||||
|
|
||||||
|
This endpoint accepts multiple image files and returns file IDs
|
||||||
|
that can be used with the /process/{file_id} endpoint.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
responses = []
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
# Validate file type
|
||||||
|
allowed_types = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'pdf']
|
||||||
|
file_extension = file.filename.split('.')[-1].lower()
|
||||||
|
|
||||||
|
if file_extension not in allowed_types:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}")
|
||||||
|
|
||||||
|
# Generate unique file ID
|
||||||
|
file_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Read and store file content
|
||||||
|
content = await file.read()
|
||||||
|
uploaded_files[file_id] = {
|
||||||
|
"filename": file.filename,
|
||||||
|
"content": content,
|
||||||
|
"upload_date": datetime.now()
|
||||||
|
}
|
||||||
|
|
||||||
|
responses.append(DocumentUploadResponse(
|
||||||
|
file_id=file_id,
|
||||||
|
filename=file.filename,
|
||||||
|
file_type=file_extension,
|
||||||
|
upload_date=datetime.now(),
|
||||||
|
status="uploaded"
|
||||||
|
))
|
||||||
|
|
||||||
|
return responses
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error uploading documents: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.post("/process/{file_id}", response_model=DocumentProcessResponse)
|
||||||
|
async def process_document(file_id: str):
|
||||||
|
"""
|
||||||
|
Process a previously uploaded document to extract receipt information.
|
||||||
|
|
||||||
|
This endpoint uses AI to extract structured data from receipt images,
|
||||||
|
including vendor, amount, date, and category information.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check if file exists
|
||||||
|
if file_id not in uploaded_files:
|
||||||
|
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
|
||||||
|
|
||||||
|
file_data = uploaded_files[file_id]
|
||||||
|
|
||||||
|
# Save file temporarily and process it
|
||||||
|
file_path = await document_processor.save_uploaded_file(file_data["content"], file_data["filename"])
|
||||||
|
file_type = file_data["filename"].split('.')[-1].lower()
|
||||||
|
receipt_data = await document_processor.process_file(file_path, file_type)
|
||||||
|
|
||||||
|
# Store processed receipt
|
||||||
|
processed_receipts[file_id] = receipt_data
|
||||||
|
|
||||||
|
return DocumentProcessResponse(
|
||||||
|
file_id=file_id,
|
||||||
|
extraction_success=receipt_data.get("extraction_success", False),
|
||||||
|
vendor=receipt_data.get("vendor", ""),
|
||||||
|
description=receipt_data.get("description", ""),
|
||||||
|
total_amount=receipt_data.get("total_amount", 0.0),
|
||||||
|
tax_amount=receipt_data.get("tax_amount", 0.0),
|
||||||
|
date=receipt_data.get("date", ""),
|
||||||
|
category=receipt_data.get("category", ""),
|
||||||
|
confidence=receipt_data.get("confidence", 0.0),
|
||||||
|
error=receipt_data.get("error", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing document {file_id}: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# MATCHING ENDPOINTS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@app.post("/match-specific", response_model=MatchingResponse)
|
||||||
|
async def match_specific_receipts(file_ids: List[str]):
|
||||||
|
"""
|
||||||
|
Match specific receipts against imported transactions.
|
||||||
|
|
||||||
|
This endpoint takes a list of receipt file IDs and matches them against
|
||||||
|
the currently imported transactions using AI-powered matching logic.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting match-specific for file IDs: {file_ids}")
|
||||||
|
|
||||||
|
# Check if transactions are imported
|
||||||
|
if not stored_transactions:
|
||||||
|
logger.warning("No transactions imported")
|
||||||
|
raise HTTPException(status_code=400, detail="No transactions imported. Please upload CSV first.")
|
||||||
|
|
||||||
|
logger.info(f"Found {len(stored_transactions)} stored transactions")
|
||||||
|
|
||||||
|
# Convert stored transactions to Transaction objects
|
||||||
|
transactions = []
|
||||||
|
for txn in stored_transactions:
|
||||||
|
try:
|
||||||
|
txn_date = datetime.strptime(txn["txn_date"], "%Y-%m-%d")
|
||||||
|
transaction = Transaction(
|
||||||
|
id=txn["id"],
|
||||||
|
transaction_date=txn_date,
|
||||||
|
amount=txn["amount"],
|
||||||
|
vendor=txn["payee_name"],
|
||||||
|
notes=txn["memo"]
|
||||||
|
)
|
||||||
|
transactions.append(transaction)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error converting transaction {txn['id']}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Converted {len(transactions)} transactions")
|
||||||
|
|
||||||
|
# Get receipts for the specified file IDs
|
||||||
|
receipts = []
|
||||||
|
missing_files = []
|
||||||
|
|
||||||
|
for file_id in file_ids:
|
||||||
|
if file_id in processed_receipts:
|
||||||
|
receipt_data = processed_receipts[file_id]
|
||||||
|
logger.info(f"DEBUG: receipt_data for {file_id}: {receipt_data}")
|
||||||
|
logger.info(f"DEBUG: receipt_data keys for {file_id}: {list(receipt_data.keys())}")
|
||||||
|
try:
|
||||||
|
# Handle missing date field
|
||||||
|
if "date" not in receipt_data or not receipt_data["date"]:
|
||||||
|
logger.warning(f"Missing date for receipt {file_id}, using current date")
|
||||||
|
receipt_date = datetime.now()
|
||||||
|
else:
|
||||||
|
receipt_date = datetime.strptime(receipt_data["date"], "%Y-%m-%d")
|
||||||
|
|
||||||
|
# Handle missing amount field - try multiple possible keys
|
||||||
|
amount = receipt_data.get("amount")
|
||||||
|
if amount is None:
|
||||||
|
amount = receipt_data.get("total_amount")
|
||||||
|
if amount is None:
|
||||||
|
amount = receipt_data.get("amount_total")
|
||||||
|
if amount is None:
|
||||||
|
logger.warning(f"Missing amount for receipt {file_id}, using 0.0")
|
||||||
|
amount = 0.0
|
||||||
|
|
||||||
|
# Ensure amount is a float
|
||||||
|
try:
|
||||||
|
amount = float(amount)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
logger.warning(f"Invalid amount '{amount}' for receipt {file_id}, using 0.0")
|
||||||
|
amount = 0.0
|
||||||
|
|
||||||
|
logger.info(f"DEBUG: amount for {file_id}: {amount}")
|
||||||
|
|
||||||
|
# Handle missing vendor field
|
||||||
|
vendor = receipt_data.get("vendor", "")
|
||||||
|
if not vendor:
|
||||||
|
logger.warning(f"Missing vendor for receipt {file_id}, using 'Unknown'")
|
||||||
|
vendor = "Unknown"
|
||||||
|
|
||||||
|
# Handle missing category field
|
||||||
|
category = receipt_data.get("category", "Other")
|
||||||
|
|
||||||
|
# Handle description field
|
||||||
|
description = receipt_data.get("description", "")
|
||||||
|
|
||||||
|
# Handle tax field
|
||||||
|
tax = receipt_data.get("tax", receipt_data.get("tax_amount", 0.0))
|
||||||
|
try:
|
||||||
|
tax = float(tax)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
tax = 0.0
|
||||||
|
|
||||||
|
receipt = Receipt(
|
||||||
|
id=file_id,
|
||||||
|
file_name=uploaded_files[file_id]["filename"],
|
||||||
|
upload_date=uploaded_files[file_id]["upload_date"],
|
||||||
|
receipt_date=receipt_date,
|
||||||
|
amount=amount,
|
||||||
|
tax=tax,
|
||||||
|
vendor=vendor,
|
||||||
|
category=category,
|
||||||
|
description=description
|
||||||
|
)
|
||||||
|
receipts.append(receipt)
|
||||||
|
logger.info(f"Added receipt: {receipt.vendor} - ${receipt.amount}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error creating receipt object for {file_id}: {str(e)}")
|
||||||
|
missing_files.append(f"{file_id} (error: {str(e)})")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Receipt {file_id} not found in processed_receipts")
|
||||||
|
missing_files.append(f"{file_id} (not found)")
|
||||||
|
|
||||||
|
if missing_files:
|
||||||
|
logger.error(f"Missing files: {missing_files}")
|
||||||
|
raise HTTPException(status_code=400, detail=f"Missing files: {missing_files}")
|
||||||
|
|
||||||
|
logger.info(f"Processing {len(receipts)} receipts against {len(transactions)} transactions")
|
||||||
|
|
||||||
|
# Perform matching
|
||||||
|
try:
|
||||||
|
logger.info("Starting direct matching call (without ThreadPoolExecutor)")
|
||||||
|
logger.info(f"matching_engine type: {type(matching_engine)}")
|
||||||
|
logger.info(f"matching_engine.process_matching type: {type(matching_engine.process_matching)}")
|
||||||
|
logger.info(f"receipts type: {type(receipts)}, length: {len(receipts)}")
|
||||||
|
logger.info(f"transactions type: {type(transactions)}, length: {len(transactions)}")
|
||||||
|
|
||||||
|
matches = matching_engine.process_matching(receipts, transactions)
|
||||||
|
|
||||||
|
logger.info(f"Matching completed successfully. Found {len(matches)} matches")
|
||||||
|
|
||||||
|
# Convert matches to response format
|
||||||
|
match_responses = []
|
||||||
|
for match in matches:
|
||||||
|
logger.info(f"Raw match object: {match}")
|
||||||
|
logger.info(f" receipt_id: {match.receipt.id}")
|
||||||
|
logger.info(f" transaction_id: {match.transaction.id}")
|
||||||
|
logger.info(f" confidence_score: {match.confidence_score}")
|
||||||
|
logger.info(f" match_reason: {match.match_reason}")
|
||||||
|
logger.info(f" receipt_vendor: {match.receipt.vendor}")
|
||||||
|
logger.info(f" receipt_amount: {match.receipt.amount}")
|
||||||
|
logger.info(f" transaction_vendor: {match.transaction.vendor}")
|
||||||
|
logger.info(f" transaction_amount: {match.transaction.amount}")
|
||||||
|
|
||||||
|
match_response = MatchResponse(
|
||||||
|
receipt_id=match.receipt.id,
|
||||||
|
transaction_id=match.transaction.id,
|
||||||
|
confidence_score=match.confidence_score,
|
||||||
|
match_reason=match.match_reason,
|
||||||
|
receipt_vendor=match.receipt.vendor,
|
||||||
|
receipt_amount=match.receipt.amount,
|
||||||
|
receipt_description=match.receipt.description,
|
||||||
|
receipt_category=match.receipt.category,
|
||||||
|
receipt_tax_amount=match.receipt.tax,
|
||||||
|
transaction_vendor=match.transaction.vendor,
|
||||||
|
transaction_amount=match.transaction.amount
|
||||||
|
)
|
||||||
|
match_responses.append(match_response)
|
||||||
|
logger.info(f"Successfully created MatchResponse for {match.receipt.vendor} -> {match.transaction.vendor}")
|
||||||
|
|
||||||
|
logger.info(f"Formatted {len(match_responses)} match responses")
|
||||||
|
|
||||||
|
# Calculate statistics
|
||||||
|
if match_responses:
|
||||||
|
high_confidence = sum(1 for m in match_responses if m.confidence_score >= 0.8)
|
||||||
|
low_confidence = len(match_responses) - high_confidence
|
||||||
|
avg_score = sum(m.confidence_score for m in match_responses) / len(match_responses)
|
||||||
|
else:
|
||||||
|
high_confidence = low_confidence = avg_score = 0
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
"total": len(match_responses),
|
||||||
|
"high_confidence": high_confidence,
|
||||||
|
"low_confidence": low_confidence,
|
||||||
|
"avg_score": round(avg_score, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Generated stats: {stats}")
|
||||||
|
logger.info(f"Match-specific completed successfully with {len(match_responses)} matches")
|
||||||
|
|
||||||
|
return MatchingResponse(
|
||||||
|
matches=match_responses,
|
||||||
|
stats=stats
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Exception in matching section: {str(e)}")
|
||||||
|
logger.error(f"Exception type: {type(e)}")
|
||||||
|
logger.error(f"Exception args: {e.args}")
|
||||||
|
logger.error(f"Traceback: {e.__traceback__}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Unexpected matching error: {str(e)}")
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error in match_specific_receipts: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# RULES MANAGEMENT ENDPOINTS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@app.post("/rules")
|
||||||
|
async def add_rule(request: RuleRequest):
|
||||||
|
"""
|
||||||
|
Add a new AI rule for transaction matching.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
new_rule = AIRule(
|
||||||
|
name=request.name,
|
||||||
|
condition=request.condition,
|
||||||
|
action=request.action,
|
||||||
|
source=request.source
|
||||||
|
)
|
||||||
|
|
||||||
|
matching_engine.rules_engine.rules.append(new_rule)
|
||||||
|
|
||||||
|
return {"message": f"Rule '{request.name}' added successfully"}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/rules")
|
||||||
|
async def get_rules():
|
||||||
|
"""
|
||||||
|
Get all current AI rules.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rules = []
|
||||||
|
for rule in matching_engine.rules_engine.rules:
|
||||||
|
rules.append({
|
||||||
|
"name": rule.name,
|
||||||
|
"condition": rule.condition,
|
||||||
|
"action": rule.action,
|
||||||
|
"source": rule.source,
|
||||||
|
"status": rule.status
|
||||||
|
})
|
||||||
|
|
||||||
|
return {"rules": rules}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.delete("/rules/{rule_name}")
|
||||||
|
async def delete_rule(rule_name: str):
|
||||||
|
"""
|
||||||
|
Delete an AI rule by name.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rules = matching_engine.rules_engine.rules
|
||||||
|
for i, rule in enumerate(rules):
|
||||||
|
if rule.name == rule_name:
|
||||||
|
del rules[i]
|
||||||
|
return {"message": f"Rule '{rule_name}' deleted successfully"}
|
||||||
|
|
||||||
|
raise HTTPException(status_code=404, detail=f"Rule '{rule_name}' not found")
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# STATISTICS ENDPOINT
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@app.get("/stats")
|
||||||
|
async def get_stats():
|
||||||
|
"""
|
||||||
|
Get system statistics.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
"total_transactions": len(stored_transactions),
|
||||||
|
"total_receipts": len(processed_receipts),
|
||||||
|
"total_uploaded_files": len(uploaded_files),
|
||||||
|
"rules_count": len(matching_engine.rules_engine.rules)
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8343)
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
from typing import List, Dict, Any
|
||||||
|
from datetime import datetime
|
||||||
|
from ai_matcher import AIMatcher
|
||||||
|
from ai_rules import AIRulesEngine
|
||||||
|
from feedback_logger import FeedbackLogger
|
||||||
|
from models import Receipt, Transaction, Match
|
||||||
|
|
||||||
|
class MatchingEngine:
|
||||||
|
def __init__(self):
|
||||||
|
self.ai_matcher = AIMatcher()
|
||||||
|
self.rules_engine = AIRulesEngine()
|
||||||
|
self.feedback_logger = FeedbackLogger()
|
||||||
|
|
||||||
|
def process_matching(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
|
||||||
|
# Get AI matches
|
||||||
|
ai_matches = self.ai_matcher.match_receipts_to_transactions(receipts, transactions)
|
||||||
|
|
||||||
|
# Apply rules and enhance matches
|
||||||
|
enhanced_matches = []
|
||||||
|
for match in ai_matches:
|
||||||
|
enhanced_match = self._enhance_match_with_rules(match)
|
||||||
|
enhanced_matches.append(enhanced_match)
|
||||||
|
|
||||||
|
return enhanced_matches
|
||||||
|
|
||||||
|
def _enhance_match_with_rules(self, match: Match) -> Match:
|
||||||
|
rule_results = self.rules_engine.apply_rules(match.receipt, match.transaction)
|
||||||
|
|
||||||
|
# Apply confidence boost from rules
|
||||||
|
if rule_results["confidence_boost"] > 0:
|
||||||
|
match.confidence_score = min(1.0, match.confidence_score + rule_results["confidence_boost"])
|
||||||
|
|
||||||
|
# Auto-approve if rules say so
|
||||||
|
if rule_results["auto_approve"]:
|
||||||
|
match.confidence_score = 1.0
|
||||||
|
match.match_reason += " (Auto-approved by rules)"
|
||||||
|
|
||||||
|
# Add tax analysis to match
|
||||||
|
if rule_results.get("tax_analysis"):
|
||||||
|
match.tax_analysis = rule_results["tax_analysis"]
|
||||||
|
|
||||||
|
return match
|
||||||
|
|
||||||
|
def approve_match(self, match: Match, user_id: str):
|
||||||
|
# Log the approval
|
||||||
|
self.feedback_logger.log_override(
|
||||||
|
transaction_id=match.transaction.id,
|
||||||
|
original_match=f"AI Score: {match.confidence_score}",
|
||||||
|
correction="Approved",
|
||||||
|
reason="User approved match",
|
||||||
|
user_id=user_id
|
||||||
|
)
|
||||||
|
|
||||||
|
def reject_match(self, match: Match, reason: str, user_id: str):
|
||||||
|
# Log the rejection
|
||||||
|
self.feedback_logger.log_override(
|
||||||
|
transaction_id=match.transaction.id,
|
||||||
|
original_match=f"AI Score: {match.confidence_score}",
|
||||||
|
correction="Rejected",
|
||||||
|
reason=reason,
|
||||||
|
user_id=user_id
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_matching_stats(self, matches: List[Match]) -> Dict[str, Any]:
|
||||||
|
if not matches:
|
||||||
|
return {"total": 0, "high_confidence": 0, "low_confidence": 0, "avg_score": 0}
|
||||||
|
|
||||||
|
high_confidence = len([m for m in matches if m.confidence_score >= 0.8])
|
||||||
|
low_confidence = len([m for m in matches if m.confidence_score < 0.8])
|
||||||
|
avg_score = sum(m.confidence_score for m in matches) / len(matches)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": len(matches),
|
||||||
|
"high_confidence": high_confidence,
|
||||||
|
"low_confidence": low_confidence,
|
||||||
|
"avg_score": round(avg_score, 3)
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Address:
|
||||||
|
"""Address information for tax calculations"""
|
||||||
|
province: str
|
||||||
|
city: str
|
||||||
|
postal_code: str
|
||||||
|
country: str = "Canada"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Receipt:
|
||||||
|
id: str
|
||||||
|
file_name: str
|
||||||
|
upload_date: datetime
|
||||||
|
receipt_date: datetime
|
||||||
|
amount: float
|
||||||
|
tax: float
|
||||||
|
vendor: str
|
||||||
|
category: str
|
||||||
|
description: str
|
||||||
|
# Tax rule fields
|
||||||
|
billing_address: Optional[Address] = None
|
||||||
|
shipping_address: Optional[Address] = None
|
||||||
|
currency: str = "CAD"
|
||||||
|
is_meals_entertainment: bool = False
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Transaction:
|
||||||
|
id: str
|
||||||
|
transaction_date: datetime
|
||||||
|
amount: float
|
||||||
|
vendor: str
|
||||||
|
notes: str
|
||||||
|
# Tax rule fields
|
||||||
|
currency: str = "CAD"
|
||||||
|
fx_rate: Optional[float] = None
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Asset:
|
||||||
|
"""Asset for depreciation calculations"""
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
purchase_date: datetime
|
||||||
|
purchase_amount: float
|
||||||
|
useful_life_years: int
|
||||||
|
residual_value: float
|
||||||
|
cca_rate: float # Capital Cost Allowance rate
|
||||||
|
asset_class: str
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Match:
|
||||||
|
receipt: Receipt
|
||||||
|
transaction: Transaction
|
||||||
|
confidence_score: float
|
||||||
|
match_reason: str
|
||||||
|
tax_analysis: Optional[dict] = None
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
groq>=0.5.0
|
||||||
|
python-dotenv==1.0.0
|
||||||
|
pandas==2.1.4
|
||||||
|
numpy==1.24.3
|
||||||
|
fastapi==0.104.1
|
||||||
|
uvicorn==0.24.0
|
||||||
|
pydantic==2.5.0
|
||||||
|
requests==2.31.0
|
||||||
|
python-multipart==0.0.6
|
||||||
|
Pillow==10.0.1
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
aiofiles==23.2.1
|
||||||
|
google-auth==2.23.4
|
||||||
|
google-auth-oauthlib==1.1.0
|
||||||
|
google-auth-httplib2==0.1.1
|
||||||
|
google-api-python-client==2.108.0
|
||||||
@@ -0,0 +1,271 @@
|
|||||||
|
from typing import Dict, Any, Optional, Tuple
|
||||||
|
from datetime import datetime
|
||||||
|
from models import Receipt, Transaction, Address, Asset
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class TaxRulesEngine:
|
||||||
|
"""Engine to handle tax calculations based on the four tax rules"""
|
||||||
|
|
||||||
|
# Provincial tax rates (simplified - in production, use a tax rate API)
|
||||||
|
PROVINCIAL_TAX_RATES = {
|
||||||
|
"ON": 0.13, # Ontario HST
|
||||||
|
"QC": 0.14975, # Quebec QST
|
||||||
|
"BC": 0.12, # British Columbia
|
||||||
|
"AB": 0.05, # Alberta
|
||||||
|
"SK": 0.11, # Saskatchewan
|
||||||
|
"MB": 0.12, # Manitoba
|
||||||
|
"NS": 0.15, # Nova Scotia
|
||||||
|
"NB": 0.15, # New Brunswick
|
||||||
|
"NL": 0.15, # Newfoundland and Labrador
|
||||||
|
"PE": 0.15, # Prince Edward Island
|
||||||
|
"NT": 0.05, # Northwest Territories
|
||||||
|
"NU": 0.05, # Nunavut
|
||||||
|
"YT": 0.05, # Yukon
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def apply_sales_tax_rule(self, receipt: Receipt) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Sales Tax Rule: Apply correct sales tax based on billing vs shipping addresses
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Determine which address to use for tax calculation
|
||||||
|
tax_address = self._get_tax_address(receipt)
|
||||||
|
|
||||||
|
if not tax_address:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "No valid address found for tax calculation",
|
||||||
|
"calculated_tax": 0.0,
|
||||||
|
"tax_rate": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get tax rate for the province
|
||||||
|
tax_rate = self.PROVINCIAL_TAX_RATES.get(tax_address.province, 0.0)
|
||||||
|
|
||||||
|
# Calculate tax amount
|
||||||
|
calculated_tax = receipt.amount * tax_rate
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"calculated_tax": calculated_tax,
|
||||||
|
"tax_rate": tax_rate,
|
||||||
|
"tax_address": tax_address.province,
|
||||||
|
"rule_applied": "Sales Tax Rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error applying sales tax rule: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"calculated_tax": 0.0,
|
||||||
|
"tax_rate": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_tax_address(self, receipt: Receipt) -> Optional[Address]:
|
||||||
|
"""Determine which address to use for tax calculation"""
|
||||||
|
# Rule: Use shipping address if different from billing, otherwise use billing
|
||||||
|
if receipt.shipping_address and receipt.billing_address:
|
||||||
|
if self._addresses_different(receipt.billing_address, receipt.shipping_address):
|
||||||
|
return receipt.shipping_address
|
||||||
|
else:
|
||||||
|
return receipt.billing_address
|
||||||
|
elif receipt.shipping_address:
|
||||||
|
return receipt.shipping_address
|
||||||
|
elif receipt.billing_address:
|
||||||
|
return receipt.billing_address
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _addresses_different(self, billing: Address, shipping: Address) -> bool:
|
||||||
|
"""Check if billing and shipping addresses are different"""
|
||||||
|
return (billing.province != shipping.province or
|
||||||
|
billing.city != shipping.city or
|
||||||
|
billing.postal_code != shipping.postal_code)
|
||||||
|
|
||||||
|
def apply_fx_rule(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Foreign Exchange Rule: Handle currency mismatches
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check for currency mismatch
|
||||||
|
if receipt.currency != transaction.currency:
|
||||||
|
fx_discrepancy = abs(receipt.amount - abs(transaction.amount))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"fx_discrepancy": fx_discrepancy,
|
||||||
|
"receipt_currency": receipt.currency,
|
||||||
|
"transaction_currency": transaction.currency,
|
||||||
|
"receipt_amount": receipt.amount,
|
||||||
|
"transaction_amount": abs(transaction.amount),
|
||||||
|
"requires_manual_review": True,
|
||||||
|
"rule_applied": "Foreign Exchange Rule"
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"fx_discrepancy": 0.0,
|
||||||
|
"requires_manual_review": False,
|
||||||
|
"rule_applied": "No FX Rule (same currency)"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error applying FX rule: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"fx_discrepancy": 0.0,
|
||||||
|
"requires_manual_review": False
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_straight_line_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Straight-Line Depreciation for accounting purposes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if year > asset.useful_life_years:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Year {year} exceeds useful life of {asset.useful_life_years} years",
|
||||||
|
"depreciation": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Straight-line formula: (Cost - Residual Value) / Useful Life
|
||||||
|
annual_depreciation = (asset.purchase_amount - asset.residual_value) / asset.useful_life_years
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"depreciation": annual_depreciation,
|
||||||
|
"book_value": asset.purchase_amount - (annual_depreciation * year),
|
||||||
|
"method": "Straight-Line",
|
||||||
|
"rule_applied": "Depreciation Rule (Accounting)"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error calculating straight-line depreciation: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"depreciation": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_cca_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
CCA (Capital Cost Allowance) Depreciation for tax purposes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if year < 1:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "Year must be at least 1",
|
||||||
|
"depreciation": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
# CCA uses declining balance method
|
||||||
|
book_value = asset.purchase_amount
|
||||||
|
total_depreciation = 0.0
|
||||||
|
|
||||||
|
for current_year in range(1, year + 1):
|
||||||
|
# CCA is calculated on the declining balance
|
||||||
|
cca_amount = book_value * asset.cca_rate
|
||||||
|
book_value -= cca_amount
|
||||||
|
total_depreciation += cca_amount
|
||||||
|
|
||||||
|
# Stop if book value reaches residual value
|
||||||
|
if book_value <= asset.residual_value:
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"depreciation": cca_amount, # Current year depreciation
|
||||||
|
"total_depreciation": total_depreciation,
|
||||||
|
"book_value": max(book_value, asset.residual_value),
|
||||||
|
"method": "CCA Declining Balance",
|
||||||
|
"rule_applied": "Depreciation Rule (Tax)"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error calculating CCA depreciation: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"depreciation": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
def apply_meals_entertainment_rule(self, receipt: Receipt) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Meals & Entertainment Tax Deduction Rule
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not receipt.is_meals_entertainment:
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"tax_deduction": receipt.amount,
|
||||||
|
"accounting_deduction": receipt.amount,
|
||||||
|
"rule_applied": "No M&E Rule (not meals/entertainment)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# For tax purposes: 50% deductible
|
||||||
|
tax_deduction = receipt.amount * 0.5
|
||||||
|
|
||||||
|
# For accounting purposes: 100% deductible
|
||||||
|
accounting_deduction = receipt.amount
|
||||||
|
|
||||||
|
# Sales tax is fully deductible for accounting
|
||||||
|
tax_on_meal = receipt.tax
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"tax_deduction": tax_deduction,
|
||||||
|
"accounting_deduction": accounting_deduction,
|
||||||
|
"tax_on_meal": tax_on_meal,
|
||||||
|
"rule_applied": "Meals & Entertainment Rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error applying meals & entertainment rule: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"tax_deduction": 0.0,
|
||||||
|
"accounting_deduction": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
def apply_all_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Apply all tax rules to a receipt
|
||||||
|
"""
|
||||||
|
results = {
|
||||||
|
"receipt_id": receipt.id,
|
||||||
|
"rules_applied": [],
|
||||||
|
"sales_tax": {},
|
||||||
|
"fx_analysis": {},
|
||||||
|
"meals_entertainment": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Apply Sales Tax Rule
|
||||||
|
sales_tax_result = self.apply_sales_tax_rule(receipt)
|
||||||
|
results["sales_tax"] = sales_tax_result
|
||||||
|
if sales_tax_result["success"]:
|
||||||
|
results["rules_applied"].append("Sales Tax Rule")
|
||||||
|
|
||||||
|
# Apply FX Rule (if transaction provided)
|
||||||
|
if transaction:
|
||||||
|
fx_result = self.apply_fx_rule(receipt, transaction)
|
||||||
|
results["fx_analysis"] = fx_result
|
||||||
|
if fx_result["success"]:
|
||||||
|
results["rules_applied"].append("Foreign Exchange Rule")
|
||||||
|
|
||||||
|
# Apply Meals & Entertainment Rule
|
||||||
|
me_result = self.apply_meals_entertainment_rule(receipt)
|
||||||
|
results["meals_entertainment"] = me_result
|
||||||
|
if me_result["success"]:
|
||||||
|
results["rules_applied"].append("Meals & Entertainment Rule")
|
||||||
|
|
||||||
|
return results
|
||||||
Reference in New Issue
Block a user