Compare commits

...

4 Commits

Author SHA1 Message Date
bolade 1784d2e406 Implement database integration for transactions and receipts, including CRUD operations and data retrieval endpoints 2025-08-07 10:58:35 +01:00
bolade 55ffc52339 added categorisation and user id 2025-08-07 09:46:04 +01:00
bolade 9698e2fcaf Refactor code for improved readability and maintainability across multiple files 2025-08-07 09:06:05 +01:00
bolade 1f530da7c4 Refactor main application structure and improve logging
- Reorganized imports in main.py for better readability and structure.
- Enhanced logging configuration and added more detailed log messages throughout the application.
- Improved error handling and response formatting in transaction import endpoints.
- Streamlined transaction processing logic for CSV and image uploads.
- Updated matching engine to enhance match results with rules and improved logging.
- Refactored tax rules engine for better clarity and maintainability.
- Cleaned up requirements.txt by removing specific versioning for easier dependency management.
2025-08-06 16:12:53 +01:00
10 changed files with 1509 additions and 582 deletions
+304 -79
View File
@@ -1,115 +1,322 @@
import groq
from datetime import datetime, timedelta
from typing import List, Tuple
import config
from models import Receipt, Transaction, Match
import time
import logging
import asyncio
import time
from typing import List, Tuple
import groq
import config
from models import Match, Receipt, Transaction
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class AIMatcher:
def __init__(self):
def __init__(self, use_batch_matching=True):
self.client = groq.Groq(api_key=config.GROQ_API_KEY)
self.model = "llama3-8b-8192"
self.max_retries = 3
self.retry_delay = 2 # seconds - increased for rate limiting
self.rate_limit_delay = 1.0 # seconds between API calls
self.last_api_call = 0
def match_receipts_to_transactions(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
self.use_batch_matching = (
use_batch_matching # Toggle between new and legacy methods
)
def match_receipts_to_transactions(
self, receipts: List[Receipt], transactions: List[Transaction]
) -> List[Match]:
"""Match receipts to transactions using AI"""
logger.info(f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions")
logger.info(
f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions"
)
matches = []
for i, receipt in enumerate(receipts):
logger.info(f"Processing receipt {i+1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}")
logger.info(
f"Processing receipt {i + 1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}"
)
# Rate limiting
self._rate_limit()
# Get the BEST match for this receipt (highest confidence score)
best_match = self._find_best_match(receipt, transactions)
if best_match:
matches.append(best_match)
logger.info(f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}")
logger.info(
f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}"
)
else:
logger.warning(f"No match found for receipt: {receipt.vendor} - ${receipt.amount}")
logger.warning(
f"No match found for receipt: {receipt.vendor} - ${receipt.amount}"
)
# Sort by confidence score (highest first)
matches = sorted(matches, key=lambda x: x.confidence_score, reverse=True)
logger.info(f"AI matching completed. Found {len(matches)} matches")
return matches
def _rate_limit(self):
"""Implement rate limiting to avoid API quota exhaustion"""
current_time = time.time()
time_since_last_call = current_time - self.last_api_call
if time_since_last_call < self.rate_limit_delay:
sleep_time = self.rate_limit_delay - time_since_last_call
logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
time.sleep(sleep_time)
self.last_api_call = time.time()
def _find_best_match(self, receipt: Receipt, transactions: List[Transaction]) -> Match:
"""Find the BEST match for a receipt (highest confidence score)"""
def _find_best_match(
self, receipt: Receipt, transactions: List[Transaction]
) -> Match:
"""Find the BEST match for a receipt using a single AI call for all candidates"""
candidates = self._filter_candidates(receipt, transactions)
if not candidates:
logger.warning(f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}")
logger.warning(
f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}"
)
return None
logger.info(f"Found {len(candidates)} candidates for receipt: {receipt.vendor}")
best_match = None
highest_score = 0
for transaction in candidates:
score, reason = self._calculate_match_score(receipt, transaction)
logger.debug(f"Score {score:.3f} for transaction {transaction.vendor}: {reason}")
# Keep the match with the highest score, regardless of how low it is
if score > highest_score:
highest_score = score
best_match = Match(receipt, transaction, score, reason)
# Choose matching method based on configuration
if self.use_batch_matching:
# New efficient method: single AI call for all candidates
best_match = self._find_best_match_single_call(receipt, candidates)
else:
# Legacy method: individual AI calls (fallback)
best_match = self._find_best_match_legacy(receipt, candidates)
return best_match
def _filter_candidates(self, receipt: Receipt, transactions: List[Transaction]) -> List[Transaction]:
def _find_best_match_single_call(
self, receipt: Receipt, candidates: List[Transaction]
) -> Match:
"""Find the best match using a single AI call to evaluate all candidates"""
if not candidates:
return None
# Limit candidates to avoid token limits (adjust based on your needs)
max_candidates = 10
if len(candidates) > max_candidates:
# Sort by amount similarity and take top candidates
candidates = sorted(
candidates, key=lambda t: abs(receipt.amount - abs(t.amount))
)[:max_candidates]
logger.info(
f"Limited candidates to top {max_candidates} by amount similarity"
)
# Build comprehensive prompt with all candidates
candidates_text = ""
for i, transaction in enumerate(candidates):
transaction_amount_abs = abs(transaction.amount)
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
amount_diff = abs(receipt.amount - transaction_amount_abs)
amount_percent_diff = (
(amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
)
candidates_text += f"""
Candidate {i + 1}:
- Vendor: {transaction.vendor}
- Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
- Notes: {transaction.notes}
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
"""
prompt = f"""
You are an expert at matching receipts to bank transactions. Analyze the receipt below against ALL the candidate transactions and return the BEST match.
RECEIPT TO MATCH:
- Vendor: {receipt.vendor}
- Amount: ${receipt.amount}
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
- Description: {receipt.description}
- Category: {receipt.category}
CANDIDATE TRANSACTIONS:
{candidates_text}
SCORING CRITERIA:
- Perfect matches (same vendor, amount, date): 0.95-1.0
- High confidence (minor differences): 0.8-0.94
- Medium confidence (moderate differences): 0.6-0.79
- Low confidence (significant differences): 0.4-0.59
- Very low confidence (major differences): 0.2-0.39
- Minimal similarity: 0.1-0.19
- No meaningful similarity: 0.0-0.09
Consider vendor name similarity, amount accuracy, date proximity, and description/notes relevance.
IMPORTANT: You MUST return the candidate with the highest match score, even if it's very low. Never return NONE.
Return ONLY the best match in this exact format:
CANDIDATE_NUMBER|CONFIDENCE_SCORE|REASON
Example: 3|0.87|Same vendor name, exact amount match, 1 day apart
Example of low match: 5|0.15|Best available option despite significant differences in vendor and amount
"""
for attempt in range(self.max_retries):
try:
result = self._call_groq_api_with_timeout(
prompt, timeout=45
) # Longer timeout for complex prompt
# Parse the single result
candidate_num, score, reason = self._parse_single_match_response(result)
if candidate_num == -1: # Parsing error occurred
logger.warning(
f"Failed to parse AI response for receipt: {receipt.vendor}"
)
return None
if 0 <= candidate_num < len(candidates):
best_transaction = candidates[candidate_num]
logger.info(
f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
)
return Match(receipt, best_transaction, score, reason)
else:
logger.warning(
f"AI returned invalid candidate number: {candidate_num}"
)
return None
except Exception as e:
logger.warning(
f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
)
if attempt < self.max_retries - 1:
sleep_time = self.retry_delay * (2**attempt)
logger.info(f"Waiting {sleep_time} seconds before retry...")
time.sleep(sleep_time)
else:
logger.error(f"All attempts failed for receipt {receipt.id}")
return None
return None
def _parse_single_match_response(self, result: str) -> Tuple[int, float, str]:
"""Parse AI response for single best match"""
result = result.strip()
logger.debug(f"Parsing single match response: {result}")
try:
if result.upper().startswith("NONE"):
# This should not happen with new prompt, but handle as parsing error
logger.warning(
"AI returned NONE despite being instructed to always return best match"
)
return -1, 0.0, "AI returned NONE unexpectedly"
if "|" in result:
parts = result.split("|")
if len(parts) >= 3:
candidate_str = parts[0].strip()
score_str = parts[1].strip()
reason = "|".join(parts[2:]).strip()
# Extract candidate number
import re
candidate_match = re.search(r"\d+", candidate_str)
if candidate_match:
candidate_num = (
int(candidate_match.group()) - 1
) # Convert to 0-based index
else:
raise ValueError("No candidate number found")
# Extract score
score_clean = "".join(
c for c in score_str if c.isdigit() or c == "."
)
score = float(score_clean) if score_clean else 0.0
# Ensure score is in valid range
score = max(0.0, min(1.0, score))
logger.debug(
f"Parsed: candidate={candidate_num}, score={score}, reason={reason}"
)
return candidate_num, score, reason
except Exception as e:
logger.warning(f"Error parsing single match response: {e}")
# Fallback
logger.warning(f"Could not parse single match response: {result}")
return -1, 0.0, f"Parse error: {result[:50]}..."
def _filter_candidates(
self, receipt: Receipt, transactions: List[Transaction]
) -> List[Transaction]:
"""Filter transactions to create a reasonable candidate list"""
candidates = []
amount_threshold = receipt.amount * 2.0 # 200% threshold - very inclusive
for transaction in transactions:
# Use absolute value for transaction amount comparison
transaction_amount_abs = abs(transaction.amount)
# Only exclude transactions with obviously different amounts
if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
candidates.append(transaction)
logger.debug(f"Filtered {len(transactions)} transactions to {len(candidates)} candidates")
logger.debug(
f"Filtered {len(transactions)} transactions to {len(candidates)} candidates"
)
return candidates
def _calculate_match_score(self, receipt: Receipt, transaction: Transaction) -> Tuple[float, str]:
def _find_best_match_legacy(
self, receipt: Receipt, transactions: List[Transaction]
) -> Match:
"""Legacy method: Find the best match using individual API calls (kept as fallback)"""
candidates = self._filter_candidates(receipt, transactions)
if not candidates:
return None
best_match = None
highest_score = 0
for transaction in candidates:
score, reason = self._calculate_match_score(receipt, transaction)
logger.debug(
f"Score {score:.3f} for transaction {transaction.vendor}: {reason}"
)
if score > highest_score:
highest_score = score
best_match = Match(receipt, transaction, score, reason)
return best_match
def _calculate_match_score(
self, receipt: Receipt, transaction: Transaction
) -> Tuple[float, str]:
"""Calculate match score using AI"""
# Calculate differences for the AI to consider
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
transaction_amount_abs = abs(transaction.amount)
amount_diff = abs(receipt.amount - transaction_amount_abs)
amount_percent_diff = (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
amount_percent_diff = (
(amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
)
prompt = f"""
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason.
Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason, the reason must be a single sentence without any special formatting.
Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime('%Y-%m-%d')}
Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime("%Y-%m-%d")}
Receipt Description: {receipt.description}
Receipt Category: {receipt.category}
Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime('%Y-%m-%d')}
Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime("%Y-%m-%d")}
Transaction Notes: {transaction.notes}
Differences:
@@ -135,61 +342,78 @@ class AIMatcher:
Format: [score]|[reason]
Example: 0.85|Same vendor, same amount, 2 days apart
"""
for attempt in range(self.max_retries):
try:
result = self._call_groq_api_with_timeout(prompt, timeout=30) # Increased timeout
result = self._call_groq_api_with_timeout(
prompt, timeout=30
) # Increased timeout
# Parse the result - handle multiple formats
score, reason = self._parse_ai_response(result)
logger.debug(f"AI Response: {result}")
logger.debug(f"Parsed: score={score}, reason={reason}")
return score, reason
except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}")
logger.warning(
f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
)
if attempt < self.max_retries - 1:
# Exponential backoff for rate limiting
sleep_time = self.retry_delay * (2 ** attempt)
sleep_time = self.retry_delay * (2**attempt)
logger.info(f"Waiting {sleep_time} seconds before retry...")
time.sleep(sleep_time)
else:
logger.error(f"All attempts failed for receipt {receipt.id}")
return 0.0, f"AI error after {self.max_retries} attempts: {str(e)}"
def _parse_ai_response(self, result: str) -> Tuple[float, str]:
"""Parse AI response with robust error handling"""
result = result.strip()
logger.debug(f"Parsing AI response: {result}")
# Try to find score in various formats
if '|' in result:
parts = result.split('|')
if "|" in result:
parts = result.split("|")
logger.debug(f"Split response into {len(parts)} parts: {parts}")
# Look for a numeric score in any part
for i, part in enumerate(parts):
part = part.strip()
try:
# Remove any non-numeric characters except decimal point
score_str_clean = ''.join(c for c in part if c.isdigit() or c == '.')
score_str_clean = "".join(
c for c in part if c.isdigit() or c == "."
)
if score_str_clean:
score = float(score_str_clean)
if 0 <= score <= 1: # Valid confidence score
# Get reason from other parts
reason_parts = [p.strip() for j, p in enumerate(parts) if j != i and p.strip()]
reason = ' | '.join(reason_parts) if reason_parts else "Score extracted"
logger.debug(f"Found score {score} in part {i}, reason: {reason}")
reason_parts = [
p.strip()
for j, p in enumerate(parts)
if j != i and p.strip()
]
reason = (
" | ".join(reason_parts)
if reason_parts
else "Score extracted"
)
logger.debug(
f"Found score {score} in part {i}, reason: {reason}"
)
return score, reason
except ValueError:
continue
# Try to extract just a number from the response
try:
import re
numbers = re.findall(r'\d+\.?\d*', result)
numbers = re.findall(r"\d+\.?\d*", result)
if numbers:
for num_str in numbers:
score = float(num_str)
@@ -198,11 +422,12 @@ class AIMatcher:
return score, f"Extracted from response: {result[:50]}..."
except (ValueError, IndexError):
pass
# Fallback - try to find any number and normalize it
try:
import re
numbers = re.findall(r'\d+\.?\d*', result)
numbers = re.findall(r"\d+\.?\d*", result)
if numbers:
score = float(numbers[0])
# Normalize to 0-1 range if it's a percentage or other scale
@@ -213,27 +438,27 @@ class AIMatcher:
return score, f"Normalized from response: {result[:50]}..."
except (ValueError, IndexError):
pass
# Final fallback
logger.warning(f"Could not parse AI response: {result}")
return 0.0, f"Unparseable response: {result[:50]}..."
def _call_groq_api_with_timeout(self, prompt: str, timeout: int = 15) -> str:
"""Make API call with timeout and retry logic"""
import concurrent.futures
def api_call():
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=200,
temperature=0.1
temperature=0.1,
)
return response.choices[0].message.content.strip()
except Exception as e:
raise e
try:
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(api_call)
@@ -241,4 +466,4 @@ class AIMatcher:
except concurrent.futures.TimeoutError:
raise Exception(f"API call timed out after {timeout} seconds")
except Exception as e:
raise e
raise e
+78 -29
View File
@@ -1,9 +1,10 @@
from dataclasses import dataclass
from typing import Dict, Any, List
import config
from typing import Any, Dict, List
from models import Receipt, Transaction
from tax_rules_engine import TaxRulesEngine
@dataclass
class AIRule:
name: str
@@ -12,48 +13,88 @@ class AIRule:
source: str
status: str = "active"
class AIRulesEngine:
def __init__(self):
self.rules: List[AIRule] = []
self.tax_rules_engine = TaxRulesEngine()
self._load_default_rules()
def _load_default_rules(self):
self.rules = [
AIRule("exact_amount_match", "amount_diff <= 0.01", "auto_approve", "system"),
AIRule("same_vendor_same_date", "vendor_match and date_diff <= 1", "high_confidence", "system"),
AIRule("gas_station_pattern", "vendor_contains_gas_or_fuel", "categorize_transport", "system"),
AIRule(
"exact_amount_match", "amount_diff <= 0.01", "auto_approve", "system"
),
AIRule(
"same_vendor_same_date",
"vendor_match and date_diff <= 1",
"high_confidence",
"system",
),
AIRule(
"gas_station_pattern",
"vendor_contains_gas_or_fuel",
"categorize_transport",
"system",
),
# Tax-related rules
AIRule("fx_currency_mismatch", "currency_mismatch", "flag_fx_review", "tax_system"),
AIRule("meals_entertainment", "is_meals_entertainment", "apply_me_tax_rule", "tax_system"),
AIRule("provincial_tax_calculation", "has_address_info", "calculate_provincial_tax", "tax_system")
AIRule(
"fx_currency_mismatch",
"currency_mismatch",
"flag_fx_review",
"tax_system",
),
AIRule(
"meals_entertainment",
"is_meals_entertainment",
"apply_me_tax_rule",
"tax_system",
),
AIRule(
"provincial_tax_calculation",
"has_address_info",
"calculate_provincial_tax",
"tax_system",
),
]
def apply_rules(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
results = {"auto_approve": False, "confidence_boost": 0, "category": None, "tax_analysis": {}}
results = {
"auto_approve": False,
"confidence_boost": 0,
"category": None,
"tax_analysis": {},
}
for rule in self.rules:
if rule.status != "active":
continue
if self._evaluate_condition(rule.condition, receipt, transaction):
self._execute_action(rule.action, results, receipt, transaction)
return results
def _evaluate_condition(self, condition: str, receipt: Receipt, transaction: Transaction) -> bool:
def _evaluate_condition(
self, condition: str, receipt: Receipt, transaction: Transaction
) -> bool:
"""Safely evaluate rule conditions without using eval()"""
amount_diff = abs(receipt.amount - abs(transaction.amount))
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
vendor_match = receipt.vendor.lower() in transaction.vendor.lower() or transaction.vendor.lower() in receipt.vendor.lower()
vendor_match = (
receipt.vendor.lower() in transaction.vendor.lower()
or transaction.vendor.lower() in receipt.vendor.lower()
)
vendor_lower = receipt.vendor.lower()
vendor_contains_gas_or_fuel = 'gas' in vendor_lower or 'fuel' in vendor_lower
vendor_contains_gas_or_fuel = "gas" in vendor_lower or "fuel" in vendor_lower
# Tax-related conditions
currency_mismatch = receipt.currency != transaction.currency
is_meals_entertainment = receipt.is_meals_entertainment
has_address_info = receipt.billing_address is not None or receipt.shipping_address is not None
has_address_info = (
receipt.billing_address is not None or receipt.shipping_address is not None
)
# Handle specific condition types safely
if condition == "amount_diff <= 0.01":
return amount_diff <= 0.01
@@ -86,14 +127,20 @@ class AIRulesEngine:
"min": min,
"max": max,
"sum": sum,
"round": round
"round": round,
}
return eval(condition, safe_globals, {})
except (SyntaxError, NameError, TypeError) as e:
print(f"Warning: Invalid condition '{condition}': {e}")
return False
def _execute_action(self, action: str, results: Dict[str, Any], receipt: Receipt, transaction: Transaction):
def _execute_action(
self,
action: str,
results: Dict[str, Any],
receipt: Receipt,
transaction: Transaction,
):
if action == "auto_approve":
results["auto_approve"] = True
elif action == "high_confidence":
@@ -114,13 +161,15 @@ class AIRulesEngine:
# Calculate provincial tax
tax_result = self.tax_rules_engine.apply_sales_tax_rule(receipt)
results["tax_analysis"]["sales_tax"] = tax_result
def add_rule(self, rule: AIRule):
self.rules.append(rule)
def remove_rule(self, rule_name: str):
self.rules = [r for r in self.rules if r.name != rule_name]
def apply_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
def apply_tax_rules(
self, receipt: Receipt, transaction: Transaction = None
) -> Dict[str, Any]:
"""Apply all tax rules to a receipt/transaction pair"""
return self.tax_rules_engine.apply_all_tax_rules(receipt, transaction)
return self.tax_rules_engine.apply_all_tax_rules(receipt, transaction)
+29 -7
View File
@@ -1,6 +1,7 @@
from pydantic import BaseModel
from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel
class AddressRequest(BaseModel):
province: str
@@ -8,6 +9,7 @@ class AddressRequest(BaseModel):
postal_code: str
country: str = "Canada"
class ReceiptRequest(BaseModel):
id: str
file_name: str
@@ -24,6 +26,7 @@ class ReceiptRequest(BaseModel):
currency: str = "CAD"
is_meals_entertainment: bool = False
class TransactionRequest(BaseModel):
id: str
transaction_date: datetime
@@ -34,6 +37,7 @@ class TransactionRequest(BaseModel):
currency: str = "CAD"
fx_rate: Optional[float] = None
class AssetRequest(BaseModel):
id: str
name: str
@@ -44,42 +48,51 @@ class AssetRequest(BaseModel):
cca_rate: float
asset_class: str
class MatchingRequest(BaseModel):
receipt_ids: List[str]
transaction_ids: List[str]
class MatchResponse(BaseModel):
receipt_id: str
transaction_id: str
confidence_score: float
match_reason: str
tax_analysis: Optional[dict] = None
# Currency information
receipt_currency: str = "CAD"
transaction_currency: str = "CAD"
currency_match: bool = True
receipt_vendor: str
receipt_amount: float
receipt_description: str
receipt_category: str
receipt_tax_amount: float
transaction_vendor: str
transaction_amount: float
class MatchingResponse(BaseModel):
matches: List[MatchResponse]
stats: dict
class ApprovalRequest(BaseModel):
match_id: str
approved: bool
reason: Optional[str] = None
class RuleRequest(BaseModel):
name: str
condition: str
action: str
source: str = "user"
class DocumentUploadResponse(BaseModel):
file_id: str
filename: str
upload_date: datetime
status: str
class DocumentProcessResponse(BaseModel):
file_id: str
extraction_success: bool
@@ -92,11 +105,13 @@ class DocumentProcessResponse(BaseModel):
confidence: Optional[float] = None
error: Optional[str] = None
# New tax-related models
class TaxCalculationRequest(BaseModel):
receipt_id: str
transaction_id: Optional[str] = None
class TaxCalculationResponse(BaseModel):
receipt_id: str
rules_applied: List[str]
@@ -104,11 +119,13 @@ class TaxCalculationResponse(BaseModel):
fx_analysis: Optional[dict] = None
meals_entertainment: dict
class DepreciationRequest(BaseModel):
asset: AssetRequest
year: int
method: str # "straight_line" or "cca"
class DepreciationResponse(BaseModel):
asset_id: str
year: int
@@ -117,4 +134,9 @@ class DepreciationResponse(BaseModel):
book_value: float
total_depreciation: Optional[float] = None
success: bool
error: Optional[str] = None
error: Optional[str] = None
class MatchSpecificRequest(BaseModel):
file_ids: List[str]
categorization_id: str
+75
View File
@@ -0,0 +1,75 @@
from typing import Annotated
from fastapi import Depends
from sqlalchemy import Column, DateTime, Float, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, sessionmaker
SQLALCHEMY_DATABASE_URL = "sqlite:///./sql_app.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
db_dependency = Annotated[Session, Depends(get_db)]
Base = declarative_base()
def create_db_tables():
Base.metadata.create_all(bind=engine)
def clear_all_data():
"""Clear all data from the database (useful for testing)"""
db = SessionLocal()
try:
db.query(Transaction).delete()
db.query(Receipt).delete()
db.commit()
finally:
db.close()
# Transactions table
class Transaction(Base):
__tablename__ = "transactions"
id = Column(Integer, primary_key=True, index=True)
transaction_id = Column(String, unique=True, index=True)
amount = Column(Float, nullable=False)
date = Column(DateTime, nullable=False)
vendor = Column(String, nullable=False)
description = Column(String, nullable=True)
category = Column(String, nullable=True)
tax_amount = Column(Float, nullable=True)
categorisation_id = Column(String, nullable=True)
user_id = Column(String, nullable=True)
# Receipts table
class Receipt(Base):
__tablename__ = "receipts"
id = Column(Integer, primary_key=True, index=True)
receipt_id = Column(String, unique=True, index=True)
file_id = Column(String, unique=True, index=True)
amount = Column(Float, nullable=False)
date = Column(DateTime, nullable=False)
vendor = Column(String, nullable=False)
description = Column(String, nullable=True)
category = Column(String, nullable=True)
tax_amount = Column(Float, nullable=True)
confidence = Column(Float, nullable=True)
extraction_success = Column(String, nullable=True)
error_message = Column(String, nullable=True)
+39 -23
View File
@@ -1,8 +1,9 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import List, Optional
import json
import os
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import List
@dataclass
class FeedbackLog:
@@ -13,48 +14,63 @@ class FeedbackLog:
timestamp: datetime
user_id: str
class FeedbackLogger:
def __init__(self, log_file: str = "feedback_logs.json"):
self.log_file = log_file
self.logs: List[FeedbackLog] = self._load_logs()
def _load_logs(self) -> List[FeedbackLog]:
if not os.path.exists(self.log_file):
return []
try:
with open(self.log_file, 'r') as f:
with open(self.log_file, "r") as f:
data = json.load(f)
return [FeedbackLog(**log) for log in data]
except:
except Exception:
return []
def _save_logs(self):
with open(self.log_file, 'w') as f:
json.dump([{
'transaction_id': log.transaction_id,
'original_match': log.original_match,
'correction': log.correction,
'reason': log.reason,
'timestamp': log.timestamp.isoformat(),
'user_id': log.user_id
} for log in self.logs], f, indent=2)
def log_override(self, transaction_id: str, original_match: str, correction: str, reason: str, user_id: str):
with open(self.log_file, "w") as f:
json.dump(
[
{
"transaction_id": log.transaction_id,
"original_match": log.original_match,
"correction": log.correction,
"reason": log.reason,
"timestamp": log.timestamp.isoformat(),
"user_id": log.user_id,
}
for log in self.logs
],
f,
indent=2,
)
def log_override(
self,
transaction_id: str,
original_match: str,
correction: str,
reason: str,
user_id: str,
):
log = FeedbackLog(
transaction_id=transaction_id,
original_match=original_match,
correction=correction,
reason=reason,
timestamp=datetime.now(),
user_id=user_id
user_id=user_id,
)
self.logs.append(log)
self._save_logs()
def get_logs_by_transaction(self, transaction_id: str) -> List[FeedbackLog]:
return [log for log in self.logs if log.transaction_id == transaction_id]
def get_recent_logs(self, days: int = 30) -> List[FeedbackLog]:
cutoff = datetime.now() - timedelta(days=days)
return [log for log in self.logs if log.timestamp > cutoff]
return [log for log in self.logs if log.timestamp > cutoff]
+81 -62
View File
@@ -1,13 +1,13 @@
import os
import io
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from typing import Any, Dict, List
class GoogleDriveSync:
def __init__(self):
self.service = None
self.processed_files = set()
def authenticate(self):
"""Authenticate with Google Drive API"""
try:
@@ -15,111 +15,130 @@ class GoogleDriveSync:
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
# Load existing credentials
if os.path.exists('token.json'):
self.creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if os.path.exists("token.json"):
self.creds = Credentials.from_authorized_user_file("token.json", SCOPES)
# If no valid credentials available, let user log in
if not self.creds or not self.creds.valid:
if self.creds and self.creds.expired and self.creds.refresh_token:
self.creds.refresh(Request())
else:
if not os.path.exists('credentials.json'):
raise Exception("credentials.json not found. Please download from Google Cloud Console.")
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
if not os.path.exists("credentials.json"):
raise Exception(
"credentials.json not found. Please download from Google Cloud Console."
)
flow = InstalledAppFlow.from_client_secrets_file(
"credentials.json", SCOPES
)
self.creds = flow.run_local_server(port=0)
# Save credentials for next run
with open('token.json', 'w') as token:
with open("token.json", "w") as token:
token.write(self.creds.to_json())
# Build the Drive service
self.service = build('drive', 'v3', credentials=self.creds)
self.service = build("drive", "v3", credentials=self.creds)
return True
except Exception as e:
print(f"Authentication error: {e}")
return False
def list_folders(self) -> List[Dict[str, Any]]:
"""List all folders in Google Drive"""
if not self.service:
if not self.authenticate():
return []
try:
results = self.service.files().list(
q="mimeType='application/vnd.google-apps.folder'",
pageSize=100,
fields="nextPageToken, files(id, name, createdTime, modifiedTime)"
).execute()
return results.get('files', [])
results = (
self.service.files()
.list(
q="mimeType='application/vnd.google-apps.folder'",
pageSize=100,
fields="nextPageToken, files(id, name, createdTime, modifiedTime)",
)
.execute()
)
return results.get("files", [])
except Exception as e:
print(f"Error listing folders: {e}")
return []
def get_folder_info(self, folder_id: str) -> Dict[str, Any]:
"""Get information about a Google Drive folder"""
if not self.service:
if not self.authenticate():
return {}
try:
folder = self.service.files().get(
fileId=folder_id,
fields="id, name, createdTime, modifiedTime"
).execute()
folder = (
self.service.files()
.get(fileId=folder_id, fields="id, name, createdTime, modifiedTime")
.execute()
)
return folder
except Exception as e:
print(f"Error getting folder info: {e}")
return {}
async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]:
"""Process all receipt files from Google Drive"""
if not self.service:
if not self.authenticate():
return []
results = []
try:
# File types to look for
file_types = ["'application/pdf'", "'image/jpeg'", "'image/png'", "'image/gif'", "'image/bmp'"]
file_types = [
"'application/pdf'",
"'image/jpeg'",
"'image/png'",
"'image/gif'",
"'image/bmp'",
]
mime_types = " or ".join(file_types)
# Build query
query = f"mimeType contains {mime_types}"
if folder_id:
query += f" and '{folder_id}' in parents"
# Add date filter (last 30 days)
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + 'Z'
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + "Z"
query += f" and modifiedTime > '{thirty_days_ago}'"
results_files = self.service.files().list(
q=query,
pageSize=100,
fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)"
).execute()
files = results_files.get('files', [])
files = [file for file in files if file['id'] not in self.processed_files]
results_files = (
self.service.files()
.list(
q=query,
pageSize=100,
fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)",
)
.execute()
)
files = results_files.get("files", [])
files = [file for file in files if file["id"] not in self.processed_files]
# For demo purposes, return mock results
for file in files[:3]: # Process first 3 files
mock_result = {
"file_id": file['id'],
"filename": file['name'],
"drive_modified": file['modifiedTime'],
"file_size": file.get('size', 0),
"file_id": file["id"],
"filename": file["name"],
"drive_modified": file["modifiedTime"],
"file_size": file.get("size", 0),
"extraction_success": True,
"vendor": "Demo Vendor",
"description": "Coffee and sandwich",
@@ -127,12 +146,12 @@ class GoogleDriveSync:
"tax_amount": 2.04,
"date": "2024-01-15",
"category": "Food",
"confidence": 0.95
"confidence": 0.95,
}
results.append(mock_result)
self.processed_files.add(file['id'])
self.processed_files.add(file["id"])
except Exception as e:
print(f"Error processing Drive files: {e}")
return results
return results
+776 -272
View File
File diff suppressed because it is too large Load Diff
+36 -24
View File
@@ -1,46 +1,53 @@
from typing import List, Dict, Any
from datetime import datetime
from typing import Any, Dict, List
from ai_matcher import AIMatcher
from ai_rules import AIRulesEngine
from feedback_logger import FeedbackLogger
from models import Receipt, Transaction, Match
from models import Match, Receipt, Transaction
class MatchingEngine:
def __init__(self):
self.ai_matcher = AIMatcher()
self.rules_engine = AIRulesEngine()
self.feedback_logger = FeedbackLogger()
def process_matching(self, receipts: List[Receipt], transactions: List[Transaction]) -> List[Match]:
def process_matching(
self, receipts: List[Receipt], transactions: List[Transaction]
) -> List[Match]:
# Get AI matches
ai_matches = self.ai_matcher.match_receipts_to_transactions(receipts, transactions)
ai_matches = self.ai_matcher.match_receipts_to_transactions(
receipts, transactions
)
# Apply rules and enhance matches
enhanced_matches = []
for match in ai_matches:
enhanced_match = self._enhance_match_with_rules(match)
enhanced_matches.append(enhanced_match)
return enhanced_matches
def _enhance_match_with_rules(self, match: Match) -> Match:
rule_results = self.rules_engine.apply_rules(match.receipt, match.transaction)
# Apply confidence boost from rules
if rule_results["confidence_boost"] > 0:
match.confidence_score = min(1.0, match.confidence_score + rule_results["confidence_boost"])
match.confidence_score = min(
1.0, match.confidence_score + rule_results["confidence_boost"]
)
# Auto-approve if rules say so
if rule_results["auto_approve"]:
match.confidence_score = 1.0
match.match_reason += " (Auto-approved by rules)"
# Add tax analysis to match
if rule_results.get("tax_analysis"):
match.tax_analysis = rule_results["tax_analysis"]
return match
def approve_match(self, match: Match, user_id: str):
# Log the approval
self.feedback_logger.log_override(
@@ -48,9 +55,9 @@ class MatchingEngine:
original_match=f"AI Score: {match.confidence_score}",
correction="Approved",
reason="User approved match",
user_id=user_id
user_id=user_id,
)
def reject_match(self, match: Match, reason: str, user_id: str):
# Log the rejection
self.feedback_logger.log_override(
@@ -58,20 +65,25 @@ class MatchingEngine:
original_match=f"AI Score: {match.confidence_score}",
correction="Rejected",
reason=reason,
user_id=user_id
user_id=user_id,
)
def get_matching_stats(self, matches: List[Match]) -> Dict[str, Any]:
if not matches:
return {"total": 0, "high_confidence": 0, "low_confidence": 0, "avg_score": 0}
return {
"total": 0,
"high_confidence": 0,
"low_confidence": 0,
"avg_score": 0,
}
high_confidence = len([m for m in matches if m.confidence_score >= 0.8])
low_confidence = len([m for m in matches if m.confidence_score < 0.8])
avg_score = sum(m.confidence_score for m in matches) / len(matches)
return {
"total": len(matches),
"high_confidence": high_confidence,
"low_confidence": low_confidence,
"avg_score": round(avg_score, 3)
}
"avg_score": round(avg_score, 3),
}
+16 -16
View File
@@ -1,16 +1,16 @@
groq>=0.5.0
python-dotenv==1.0.0
pandas==2.1.4
numpy==1.24.3
fastapi==0.104.1
uvicorn==0.24.0
pydantic==2.5.0
requests==2.31.0
python-multipart==0.0.6
Pillow==10.0.1
PyPDF2==3.0.1
aiofiles==23.2.1
google-auth==2.23.4
google-auth-oauthlib==1.1.0
google-auth-httplib2==0.1.1
google-api-python-client==2.108.0
groq
python-dotenv
pandas
numpy
fastapi
uvicorn
pydantic
requests
python-multipart
Pillow
PyPDF2
aiofiles
google-auth
google-auth-oauthlib
google-auth-httplib2
google-api-python-client
+75 -70
View File
@@ -1,13 +1,14 @@
from typing import Dict, Any, Optional, Tuple
from datetime import datetime
from models import Receipt, Transaction, Address, Asset
import logging
from typing import Any, Dict, Optional
from models import Address, Asset, Receipt, Transaction
logger = logging.getLogger(__name__)
class TaxRulesEngine:
"""Engine to handle tax calculations based on the four tax rules"""
# Provincial tax rates (simplified - in production, use a tax rate API)
PROVINCIAL_TAX_RATES = {
"ON": 0.13, # Ontario HST
@@ -24,10 +25,10 @@ class TaxRulesEngine:
"NU": 0.05, # Nunavut
"YT": 0.05, # Yukon
}
def __init__(self):
self.logger = logging.getLogger(__name__)
def apply_sales_tax_rule(self, receipt: Receipt) -> Dict[str, Any]:
"""
Sales Tax Rule: Apply correct sales tax based on billing vs shipping addresses
@@ -35,43 +36,45 @@ class TaxRulesEngine:
try:
# Determine which address to use for tax calculation
tax_address = self._get_tax_address(receipt)
if not tax_address:
return {
"success": False,
"error": "No valid address found for tax calculation",
"calculated_tax": 0.0,
"tax_rate": 0.0
"tax_rate": 0.0,
}
# Get tax rate for the province
tax_rate = self.PROVINCIAL_TAX_RATES.get(tax_address.province, 0.0)
# Calculate tax amount
calculated_tax = receipt.amount * tax_rate
return {
"success": True,
"calculated_tax": calculated_tax,
"tax_rate": tax_rate,
"tax_address": tax_address.province,
"rule_applied": "Sales Tax Rule"
"rule_applied": "Sales Tax Rule",
}
except Exception as e:
self.logger.error(f"Error applying sales tax rule: {str(e)}")
return {
"success": False,
"error": str(e),
"calculated_tax": 0.0,
"tax_rate": 0.0
"tax_rate": 0.0,
}
def _get_tax_address(self, receipt: Receipt) -> Optional[Address]:
"""Determine which address to use for tax calculation"""
# Rule: Use shipping address if different from billing, otherwise use billing
if receipt.shipping_address and receipt.billing_address:
if self._addresses_different(receipt.billing_address, receipt.shipping_address):
if self._addresses_different(
receipt.billing_address, receipt.shipping_address
):
return receipt.shipping_address
else:
return receipt.billing_address
@@ -81,14 +84,18 @@ class TaxRulesEngine:
return receipt.billing_address
else:
return None
def _addresses_different(self, billing: Address, shipping: Address) -> bool:
"""Check if billing and shipping addresses are different"""
return (billing.province != shipping.province or
billing.city != shipping.city or
billing.postal_code != shipping.postal_code)
def apply_fx_rule(self, receipt: Receipt, transaction: Transaction) -> Dict[str, Any]:
return (
billing.province != shipping.province
or billing.city != shipping.city
or billing.postal_code != shipping.postal_code
)
def apply_fx_rule(
self, receipt: Receipt, transaction: Transaction
) -> Dict[str, Any]:
"""
Foreign Exchange Rule: Handle currency mismatches
"""
@@ -96,7 +103,7 @@ class TaxRulesEngine:
# Check for currency mismatch
if receipt.currency != transaction.currency:
fx_discrepancy = abs(receipt.amount - abs(transaction.amount))
return {
"success": True,
"fx_discrepancy": fx_discrepancy,
@@ -105,26 +112,28 @@ class TaxRulesEngine:
"receipt_amount": receipt.amount,
"transaction_amount": abs(transaction.amount),
"requires_manual_review": True,
"rule_applied": "Foreign Exchange Rule"
"rule_applied": "Foreign Exchange Rule",
}
else:
return {
"success": True,
"fx_discrepancy": 0.0,
"requires_manual_review": False,
"rule_applied": "No FX Rule (same currency)"
"rule_applied": "No FX Rule (same currency)",
}
except Exception as e:
self.logger.error(f"Error applying FX rule: {str(e)}")
return {
"success": False,
"error": str(e),
"fx_discrepancy": 0.0,
"requires_manual_review": False
"requires_manual_review": False,
}
def calculate_straight_line_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
def calculate_straight_line_depreciation(
self, asset: Asset, year: int
) -> Dict[str, Any]:
"""
Straight-Line Depreciation for accounting purposes
"""
@@ -133,28 +142,26 @@ class TaxRulesEngine:
return {
"success": False,
"error": f"Year {year} exceeds useful life of {asset.useful_life_years} years",
"depreciation": 0.0
"depreciation": 0.0,
}
# Straight-line formula: (Cost - Residual Value) / Useful Life
annual_depreciation = (asset.purchase_amount - asset.residual_value) / asset.useful_life_years
annual_depreciation = (
asset.purchase_amount - asset.residual_value
) / asset.useful_life_years
return {
"success": True,
"depreciation": annual_depreciation,
"book_value": asset.purchase_amount - (annual_depreciation * year),
"method": "Straight-Line",
"rule_applied": "Depreciation Rule (Accounting)"
"rule_applied": "Depreciation Rule (Accounting)",
}
except Exception as e:
self.logger.error(f"Error calculating straight-line depreciation: {str(e)}")
return {
"success": False,
"error": str(e),
"depreciation": 0.0
}
return {"success": False, "error": str(e), "depreciation": 0.0}
def calculate_cca_depreciation(self, asset: Asset, year: int) -> Dict[str, Any]:
"""
CCA (Capital Cost Allowance) Depreciation for tax purposes
@@ -164,40 +171,36 @@ class TaxRulesEngine:
return {
"success": False,
"error": "Year must be at least 1",
"depreciation": 0.0
"depreciation": 0.0,
}
# CCA uses declining balance method
book_value = asset.purchase_amount
total_depreciation = 0.0
for current_year in range(1, year + 1):
# CCA is calculated on the declining balance
cca_amount = book_value * asset.cca_rate
book_value -= cca_amount
total_depreciation += cca_amount
# Stop if book value reaches residual value
if book_value <= asset.residual_value:
break
return {
"success": True,
"depreciation": cca_amount, # Current year depreciation
"total_depreciation": total_depreciation,
"book_value": max(book_value, asset.residual_value),
"method": "CCA Declining Balance",
"rule_applied": "Depreciation Rule (Tax)"
"rule_applied": "Depreciation Rule (Tax)",
}
except Exception as e:
self.logger.error(f"Error calculating CCA depreciation: {str(e)}")
return {
"success": False,
"error": str(e),
"depreciation": 0.0
}
return {"success": False, "error": str(e), "depreciation": 0.0}
def apply_meals_entertainment_rule(self, receipt: Receipt) -> Dict[str, Any]:
"""
Meals & Entertainment Tax Deduction Rule
@@ -208,36 +211,38 @@ class TaxRulesEngine:
"success": True,
"tax_deduction": receipt.amount,
"accounting_deduction": receipt.amount,
"rule_applied": "No M&E Rule (not meals/entertainment)"
"rule_applied": "No M&E Rule (not meals/entertainment)",
}
# For tax purposes: 50% deductible
tax_deduction = receipt.amount * 0.5
# For accounting purposes: 100% deductible
accounting_deduction = receipt.amount
# Sales tax is fully deductible for accounting
tax_on_meal = receipt.tax
return {
"success": True,
"tax_deduction": tax_deduction,
"accounting_deduction": accounting_deduction,
"tax_on_meal": tax_on_meal,
"rule_applied": "Meals & Entertainment Rule"
"rule_applied": "Meals & Entertainment Rule",
}
except Exception as e:
self.logger.error(f"Error applying meals & entertainment rule: {str(e)}")
return {
"success": False,
"error": str(e),
"tax_deduction": 0.0,
"accounting_deduction": 0.0
"accounting_deduction": 0.0,
}
def apply_all_tax_rules(self, receipt: Receipt, transaction: Transaction = None) -> Dict[str, Any]:
def apply_all_tax_rules(
self, receipt: Receipt, transaction: Transaction = None
) -> Dict[str, Any]:
"""
Apply all tax rules to a receipt
"""
@@ -246,26 +251,26 @@ class TaxRulesEngine:
"rules_applied": [],
"sales_tax": {},
"fx_analysis": {},
"meals_entertainment": {}
"meals_entertainment": {},
}
# Apply Sales Tax Rule
sales_tax_result = self.apply_sales_tax_rule(receipt)
results["sales_tax"] = sales_tax_result
if sales_tax_result["success"]:
results["rules_applied"].append("Sales Tax Rule")
# Apply FX Rule (if transaction provided)
if transaction:
fx_result = self.apply_fx_rule(receipt, transaction)
results["fx_analysis"] = fx_result
if fx_result["success"]:
results["rules_applied"].append("Foreign Exchange Rule")
# Apply Meals & Entertainment Rule
me_result = self.apply_meals_entertainment_rule(receipt)
results["meals_entertainment"] = me_result
if me_result["success"]:
results["rules_applied"].append("Meals & Entertainment Rule")
return results
return results