Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f6535908fe | |||
| 8d745c1f8e | |||
| 2b83ffe00c | |||
| 85fafae311 | |||
| fa25f7bafd | |||
| 2f917ec085 | |||
| 7296d09319 | |||
| 01aa2efa43 | |||
| c8da3c61ca | |||
| 3559cbe19d | |||
| 2e020437a8 | |||
| f582110674 | |||
| 5116fb5efb | |||
| b2bf631448 | |||
| 659ca4ff15 | |||
| d8315f13ac |
@@ -9,3 +9,6 @@ __pycache__/
|
|||||||
.env
|
.env
|
||||||
*.log
|
*.log
|
||||||
/uploads
|
/uploads
|
||||||
|
server_manager.sh
|
||||||
|
server.log
|
||||||
|
server.pid
|
||||||
@@ -5,6 +5,7 @@ class Settings(BaseSettings):
|
|||||||
database_url: Optional[str] = None
|
database_url: Optional[str] = None
|
||||||
secret_key: Optional[str] = None
|
secret_key: Optional[str] = None
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
|
model: str = "openai/gpt-oss-120b"
|
||||||
GROQ_API_KEY: str
|
GROQ_API_KEY: str
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
env_file = ".env"
|
||||||
|
|||||||
+50
-1
@@ -27,7 +27,34 @@ Base = declarative_base()
|
|||||||
|
|
||||||
|
|
||||||
def create_db_tables():
|
def create_db_tables():
|
||||||
Base.metadata.create_all(bind=engine)
|
"""Create database tables safely with error handling"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check if tables already exist to avoid unnecessary DDL operations
|
||||||
|
from sqlalchemy import inspect
|
||||||
|
|
||||||
|
inspector = inspect(engine)
|
||||||
|
existing_tables = inspector.get_table_names()
|
||||||
|
|
||||||
|
if existing_tables:
|
||||||
|
logger.info(f"Database tables already exist: {existing_tables}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create tables with timeout protection
|
||||||
|
logger.info("Creating database tables...")
|
||||||
|
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||||
|
logger.info("Database tables created successfully")
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.warning("Database creation interrupted by user")
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating database tables: {e}")
|
||||||
|
# Don't crash the app - tables might already exist
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def clear_all_data():
|
def clear_all_data():
|
||||||
@@ -56,6 +83,21 @@ class DBTransaction(Base):
|
|||||||
tax_amount = Column(Float, nullable=True)
|
tax_amount = Column(Float, nullable=True)
|
||||||
categorisation_id = Column(String, nullable=True)
|
categorisation_id = Column(String, nullable=True)
|
||||||
user_id = Column(String, nullable=True)
|
user_id = Column(String, nullable=True)
|
||||||
|
source = Column(String, nullable=True) # e.g., "csv", "image", "manual", "api"
|
||||||
|
|
||||||
|
# Additional QuickBooks CSV columns
|
||||||
|
TxnId = Column(String, nullable=True)
|
||||||
|
AccountType = Column(String, nullable=True)
|
||||||
|
AccountNumber = Column(String, nullable=True)
|
||||||
|
TransactionDate = Column(String, nullable=True)
|
||||||
|
TransactionType = Column(String, nullable=True)
|
||||||
|
ChequeNumber = Column(String, nullable=True)
|
||||||
|
Description1 = Column(String, nullable=True)
|
||||||
|
Description2 = Column(String, nullable=True)
|
||||||
|
VendorId = Column(String, nullable=True)
|
||||||
|
VendorName = Column(String, nullable=True)
|
||||||
|
AccountId = Column(String, nullable=True)
|
||||||
|
AccountName = Column(String, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
# Uploaded Files table
|
# Uploaded Files table
|
||||||
@@ -88,3 +130,10 @@ class DBReceipt(Base):
|
|||||||
extraction_success = Column(String, nullable=True)
|
extraction_success = Column(String, nullable=True)
|
||||||
error_message = Column(String, nullable=True)
|
error_message = Column(String, nullable=True)
|
||||||
receipt_currency = Column(String, nullable=True)
|
receipt_currency = Column(String, nullable=True)
|
||||||
|
receipt_location = Column(String, nullable=True)
|
||||||
|
calculated_tax = Column(Float, nullable=True)
|
||||||
|
is_depreciable = Column(String, nullable=True) # Store as string "True"/"False"
|
||||||
|
name_of_asset = Column(String, nullable=True) # Name/description of the asset
|
||||||
|
cca_rate = Column(Float, nullable=True)
|
||||||
|
useful_life = Column(Integer, nullable=True)
|
||||||
|
residual_value = Column(Float, nullable=True)
|
||||||
|
|||||||
+344
-65
@@ -1,7 +1,6 @@
|
|||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
@@ -15,6 +14,7 @@ from database import (
|
|||||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from schemas import (
|
from schemas import (
|
||||||
|
DocumentProcessRequest,
|
||||||
DocumentProcessResponse,
|
DocumentProcessResponse,
|
||||||
DocumentUploadResponse,
|
DocumentUploadResponse,
|
||||||
MatchingResponse,
|
MatchingResponse,
|
||||||
@@ -29,7 +29,8 @@ from services.document_processor import DocumentProcessor
|
|||||||
from services.matching_engine import MatchingEngine
|
from services.matching_engine import MatchingEngine
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
create_db_tables()
|
# Don't create tables at import time - do it on startup
|
||||||
|
# create_db_tables()
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
@@ -53,6 +54,15 @@ app.add_middleware(
|
|||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Initialize database on startup"""
|
||||||
|
logger.info("Starting up application...")
|
||||||
|
create_db_tables()
|
||||||
|
logger.info("Application startup complete")
|
||||||
|
|
||||||
|
|
||||||
# Initialize DS Engine components
|
# Initialize DS Engine components
|
||||||
matching_engine = MatchingEngine()
|
matching_engine = MatchingEngine()
|
||||||
document_processor = DocumentProcessor()
|
document_processor = DocumentProcessor()
|
||||||
@@ -122,46 +132,77 @@ async def import_transactions_csv(
|
|||||||
errors = []
|
errors = []
|
||||||
for idx, row in enumerate(reader):
|
for idx, row in enumerate(reader):
|
||||||
try:
|
try:
|
||||||
# Use correct headers and strip whitespace
|
# Extract all CSV columns with proper header handling
|
||||||
account_number = row.get("Account Number") or row.get(
|
txn_id = row.get("TxnId", "").strip()
|
||||||
"Account Number ".strip()
|
account_type = row.get("Account Type", "").strip()
|
||||||
|
account_number = row.get("Account Number", "").strip()
|
||||||
|
transaction_date = row.get("Transaction Date", "").strip()
|
||||||
|
transaction_type = row.get("Transaction Type", "").strip()
|
||||||
|
cheque_number = row.get("Cheque Number", "").strip()
|
||||||
|
description_1 = row.get("Description 1", "").strip()
|
||||||
|
description_2 = row.get("Description 2", "").strip()
|
||||||
|
amount_raw = row.get("Amount", "").strip()[1:]
|
||||||
|
vendor_id = row.get("VendorId", "").strip()
|
||||||
|
vendor_name = row.get("VendorName", "").strip()
|
||||||
|
account_id = row.get("AccountId", "").strip()
|
||||||
|
account_name = row.get("AccountName", "").strip()
|
||||||
|
source = row.get("Source", "").strip()
|
||||||
|
|
||||||
|
# Compose internal ID
|
||||||
|
internal_txn_id = (
|
||||||
|
f"{account_number}_{idx + 1}"
|
||||||
|
if account_number
|
||||||
|
else f"txn_{idx + 1}"
|
||||||
)
|
)
|
||||||
txn_date_raw = row.get("Transaction Date") or row.get(
|
|
||||||
"Transaction Date ".strip() or row.get("Date")
|
|
||||||
)
|
|
||||||
amount_raw = row.get("Amount") or row.get("Amount ".strip())
|
|
||||||
payee_name = row.get("Description 2") or row.get(
|
|
||||||
"Description 2 ".strip()
|
|
||||||
)
|
|
||||||
memo = f"{row.get('Account Type', '').strip()} {row.get('Cheque Number', '').strip()} {row.get('Description 1', '').strip()}".strip()
|
|
||||||
# Compose ID
|
|
||||||
txn_id = f"{account_number}_{idx + 1}"
|
|
||||||
# Parse date (try multiple formats)
|
# Parse date (try multiple formats)
|
||||||
txn_date_str = txn_date_raw.strip()
|
|
||||||
txn_date = None
|
txn_date = None
|
||||||
for fmt in ("%m/%d/%y", "%m/%d/%Y"):
|
for fmt in ("%m/%d/%y", "%m/%d/%Y", "%Y-%m-%d"):
|
||||||
try:
|
try:
|
||||||
txn_date = datetime.strptime(txn_date_str, fmt).strftime(
|
txn_date = datetime.strptime(transaction_date, fmt).strftime(
|
||||||
"%Y-%m-%d"
|
"%Y-%m-%d"
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
if not txn_date:
|
if not txn_date:
|
||||||
raise ValueError(f"Could not parse date: {txn_date_str}")
|
raise ValueError(f"Could not parse date: {transaction_date}")
|
||||||
# Parse amount
|
|
||||||
amount = float(amount_raw.replace(",", "").strip())
|
|
||||||
|
|
||||||
# Create database transaction object
|
# Parse amount
|
||||||
|
amount = (
|
||||||
|
float(amount_raw.replace(",", "").strip()) if amount_raw else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use vendor_name (Description 2) as the vendor, fallback to description_2
|
||||||
|
vendor = vendor_name if vendor_name else description_2
|
||||||
|
|
||||||
|
# Compose description/memo from multiple fields
|
||||||
|
memo = f"{account_type} {cheque_number} {description_1}".strip()
|
||||||
|
|
||||||
|
# Create database transaction object with all QuickBooks fields
|
||||||
txn_date_obj = datetime.strptime(txn_date, "%Y-%m-%d")
|
txn_date_obj = datetime.strptime(txn_date, "%Y-%m-%d")
|
||||||
db_transaction = DBTransaction(
|
db_transaction = DBTransaction(
|
||||||
transaction_id=txn_id,
|
transaction_id=internal_txn_id,
|
||||||
amount=amount,
|
amount=amount,
|
||||||
date=txn_date_obj,
|
date=txn_date_obj,
|
||||||
vendor=payee_name.strip(),
|
vendor=vendor,
|
||||||
description=memo,
|
description=memo,
|
||||||
categorisation_id=categorization_id,
|
categorisation_id=categorization_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
|
source=source, # Source of this transaction
|
||||||
|
# QuickBooks CSV columns
|
||||||
|
TxnId=txn_id,
|
||||||
|
AccountType=account_type,
|
||||||
|
AccountNumber=account_number,
|
||||||
|
TransactionDate=transaction_date,
|
||||||
|
TransactionType=transaction_type,
|
||||||
|
ChequeNumber=cheque_number,
|
||||||
|
Description1=description_1,
|
||||||
|
Description2=description_2,
|
||||||
|
VendorId=vendor_id,
|
||||||
|
VendorName=vendor_name,
|
||||||
|
AccountId=account_id,
|
||||||
|
AccountName=account_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to database
|
# Add to database
|
||||||
@@ -169,13 +210,27 @@ async def import_transactions_csv(
|
|||||||
|
|
||||||
transactions.append(
|
transactions.append(
|
||||||
{
|
{
|
||||||
"id": txn_id,
|
"id": internal_txn_id,
|
||||||
"txn_date": txn_date,
|
"txn_date": txn_date,
|
||||||
"amount": amount,
|
"amount": amount,
|
||||||
"payee_name": payee_name.strip(),
|
"payee_name": vendor,
|
||||||
"memo": memo,
|
"memo": memo,
|
||||||
"categorization_id": categorization_id,
|
"categorization_id": categorization_id,
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
# Include QuickBooks fields in response
|
||||||
|
"TxnId": txn_id,
|
||||||
|
"AccountType": account_type,
|
||||||
|
"AccountNumber": account_number,
|
||||||
|
"TransactionDate": transaction_date,
|
||||||
|
"TransactionType": transaction_type,
|
||||||
|
"ChequeNumber": cheque_number,
|
||||||
|
"Description1": description_1,
|
||||||
|
"Description2": description_2,
|
||||||
|
"VendorId": vendor_id,
|
||||||
|
"VendorName": vendor_name,
|
||||||
|
"AccountId": account_id,
|
||||||
|
"AccountName": account_name,
|
||||||
|
"source": source,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -307,51 +362,77 @@ async def upload_multiple_documents(
|
|||||||
This endpoint accepts multiple image files and returns file IDs
|
This endpoint accepts multiple image files and returns file IDs
|
||||||
that can be used with the /process/{file_id} endpoint.
|
that can be used with the /process/{file_id} endpoint.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
responses = []
|
responses = []
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
# Validate file type
|
# Validate file type
|
||||||
allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
|
|
||||||
file_extension = file.filename.split(".")[-1].lower()
|
|
||||||
|
|
||||||
|
allowed_types = ["jpg", "jpeg", "png", "gif", "bmp", "pdf"]
|
||||||
|
|
||||||
|
file_extension = file.filename.split(".")[-1].lower()
|
||||||
|
google_file_id, file_name = file.filename.split("|")
|
||||||
if file_extension not in allowed_types:
|
if file_extension not in allowed_types:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}",
|
detail=f"Unsupported file type for {file.filename}. Allowed: {allowed_types}",
|
||||||
)
|
)
|
||||||
|
logger.info(
|
||||||
|
f"filename: {file_name}, extension: {file_extension}, google_file_id: {google_file_id}"
|
||||||
|
)
|
||||||
|
|
||||||
# Generate unique file ID
|
# Generate unique file ID
|
||||||
file_id = str(uuid.uuid4())
|
file_id = google_file_id # Using Google Drive file ID as file_id
|
||||||
|
|
||||||
# Read file content and save to disk
|
# Check if file already exists in database
|
||||||
content = await file.read()
|
existing_file = get_uploaded_file_from_db(db, file_id)
|
||||||
file_path = await document_processor.save_uploaded_file(
|
|
||||||
content, file.filename
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create database record for uploaded file
|
if existing_file:
|
||||||
db_uploaded_file = DBUploadedFile(
|
# File already exists, return existing record
|
||||||
file_id=file_id,
|
logger.info(
|
||||||
filename=file.filename,
|
f"File {file_name} with ID {file_id} already exists, returning existing record"
|
||||||
file_path=file_path,
|
)
|
||||||
file_type=file_extension,
|
responses.append(
|
||||||
upload_date=datetime.now(),
|
DocumentUploadResponse(
|
||||||
status="uploaded",
|
file_id=existing_file.file_id,
|
||||||
)
|
filename=existing_file.filename,
|
||||||
|
file_type=existing_file.file_type,
|
||||||
|
upload_date=existing_file.upload_date,
|
||||||
|
status=existing_file.status,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# File doesn't exist, create new record
|
||||||
|
# Read file content and save to disk
|
||||||
|
content = await file.read()
|
||||||
|
file_path = await document_processor.save_uploaded_file(
|
||||||
|
content, file_name
|
||||||
|
)
|
||||||
|
|
||||||
# Add to database
|
# Create database record for uploaded file
|
||||||
db.add(db_uploaded_file)
|
db_uploaded_file = DBUploadedFile(
|
||||||
|
|
||||||
responses.append(
|
|
||||||
DocumentUploadResponse(
|
|
||||||
file_id=file_id,
|
file_id=file_id,
|
||||||
filename=file.filename,
|
filename=file_name,
|
||||||
|
file_path=file_path,
|
||||||
file_type=file_extension,
|
file_type=file_extension,
|
||||||
upload_date=datetime.now(),
|
upload_date=datetime.now(),
|
||||||
status="uploaded",
|
status="uploaded",
|
||||||
)
|
)
|
||||||
)
|
logger.info(f"Uploaded new file {file_name} with ID {file_id}")
|
||||||
|
|
||||||
|
# Add to database
|
||||||
|
db.add(db_uploaded_file)
|
||||||
|
|
||||||
|
responses.append(
|
||||||
|
DocumentUploadResponse(
|
||||||
|
file_id=file_id,
|
||||||
|
filename=file_name,
|
||||||
|
file_type=file_extension,
|
||||||
|
upload_date=datetime.now(),
|
||||||
|
status="uploaded",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Commit all uploaded files to database
|
# Commit all uploaded files to database
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -368,24 +449,76 @@ async def upload_multiple_documents(
|
|||||||
response_model=DocumentProcessResponse,
|
response_model=DocumentProcessResponse,
|
||||||
tags=["Document Processing"],
|
tags=["Document Processing"],
|
||||||
)
|
)
|
||||||
async def process_document(file_id: str, db: db_dependency):
|
async def process_document(
|
||||||
|
file_id: str, request: DocumentProcessRequest, db: db_dependency
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Process a previously uploaded document to extract receipt information.
|
Process a previously uploaded document to extract receipt information.
|
||||||
|
|
||||||
This endpoint uses AI to extract structured data from receipt images,
|
This endpoint uses AI to extract structured data from receipt images,
|
||||||
including vendor, amount, date, and category information.
|
including vendor, amount, date, and category information.
|
||||||
|
|
||||||
|
Optionally accepts:
|
||||||
|
- user_location: Guide tax calculations and depreciation based on location
|
||||||
|
(format: "State/Province, Country" e.g., "Ontario, Canada")
|
||||||
|
- ai_rules: Custom categorization rules to override default logic
|
||||||
|
(e.g., [{"condition": "vendor is Starbucks", "action": "Food"}])
|
||||||
"""
|
"""
|
||||||
|
logger.info(f"Request: {request}")
|
||||||
try:
|
try:
|
||||||
# Get file info from database
|
# Get file info from database
|
||||||
db_uploaded_file = get_uploaded_file_from_db(db, file_id)
|
db_uploaded_file = get_uploaded_file_from_db(db, file_id)
|
||||||
if not db_uploaded_file:
|
if not db_uploaded_file:
|
||||||
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
|
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
|
||||||
|
|
||||||
|
# Convert ai_rules from Pydantic models to dictionaries if provided
|
||||||
|
ai_rules_list = None
|
||||||
|
if request.ai_rules:
|
||||||
|
ai_rules_list = [
|
||||||
|
{"condition": rule.condition, "action": rule.action}
|
||||||
|
for rule in request.ai_rules
|
||||||
|
]
|
||||||
|
|
||||||
|
# Check if receipt already exists for this file_id
|
||||||
|
existing_receipt = get_receipt_from_db(db, file_id)
|
||||||
|
|
||||||
|
if existing_receipt:
|
||||||
|
# Receipt already processed, return existing data
|
||||||
|
logger.info(
|
||||||
|
f"Receipt for file {file_id} already exists, returning existing record"
|
||||||
|
)
|
||||||
|
return DocumentProcessResponse(
|
||||||
|
file_id=file_id,
|
||||||
|
receipt_id=existing_receipt.receipt_id,
|
||||||
|
extraction_success=existing_receipt.extraction_success == "True",
|
||||||
|
vendor=existing_receipt.vendor,
|
||||||
|
description=existing_receipt.description,
|
||||||
|
total_amount=existing_receipt.amount,
|
||||||
|
tax_amount=existing_receipt.tax_amount,
|
||||||
|
date=existing_receipt.date.strftime("%Y-%m-%d"),
|
||||||
|
category=existing_receipt.category,
|
||||||
|
confidence=existing_receipt.confidence,
|
||||||
|
error=existing_receipt.error_message,
|
||||||
|
receipt_currency=existing_receipt.receipt_currency,
|
||||||
|
receipt_location=existing_receipt.receipt_location,
|
||||||
|
calculated_tax=existing_receipt.calculated_tax,
|
||||||
|
is_depreciable=existing_receipt.is_depreciable == "True"
|
||||||
|
if existing_receipt.is_depreciable
|
||||||
|
else None,
|
||||||
|
name_of_asset=existing_receipt.name_of_asset,
|
||||||
|
cca_rate=existing_receipt.cca_rate,
|
||||||
|
useful_life=existing_receipt.useful_life,
|
||||||
|
residual_value=existing_receipt.residual_value,
|
||||||
|
)
|
||||||
|
|
||||||
# Process the file using the stored file path
|
# Process the file using the stored file path
|
||||||
receipt_data = await document_processor.process_file(
|
receipt_data = await document_processor.process_file(
|
||||||
db_uploaded_file.file_path, db_uploaded_file.file_type
|
db_uploaded_file.file_path,
|
||||||
|
db_uploaded_file.file_type,
|
||||||
|
user_location=request.user_location,
|
||||||
|
ai_rules=ai_rules_list,
|
||||||
)
|
)
|
||||||
|
logger.info(f"Extracted receipt data: {receipt_data}")
|
||||||
# Parse date for database storage
|
# Parse date for database storage
|
||||||
receipt_date = None
|
receipt_date = None
|
||||||
if receipt_data.get("date"):
|
if receipt_data.get("date"):
|
||||||
@@ -410,6 +543,15 @@ async def process_document(file_id: str, db: db_dependency):
|
|||||||
extraction_success=str(receipt_data.get("extraction_success", False)),
|
extraction_success=str(receipt_data.get("extraction_success", False)),
|
||||||
error_message=receipt_data.get("error"),
|
error_message=receipt_data.get("error"),
|
||||||
receipt_currency=receipt_data.get("currency"),
|
receipt_currency=receipt_data.get("currency"),
|
||||||
|
receipt_location=receipt_data.get("location"),
|
||||||
|
calculated_tax=receipt_data.get("calculated_tax"),
|
||||||
|
is_depreciable=str(receipt_data.get("is_depreciable"))
|
||||||
|
if receipt_data.get("is_depreciable") is not None
|
||||||
|
else None,
|
||||||
|
name_of_asset=receipt_data.get("name_of_asset"),
|
||||||
|
cca_rate=receipt_data.get("cca_rate"),
|
||||||
|
useful_life=receipt_data.get("useful_life"),
|
||||||
|
residual_value=receipt_data.get("residual_value"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to database
|
# Add to database
|
||||||
@@ -429,10 +571,17 @@ async def process_document(file_id: str, db: db_dependency):
|
|||||||
confidence=receipt_data.get("confidence", 0.0),
|
confidence=receipt_data.get("confidence", 0.0),
|
||||||
error=receipt_data.get("error", None),
|
error=receipt_data.get("error", None),
|
||||||
receipt_currency=receipt_data.get("currency"),
|
receipt_currency=receipt_data.get("currency"),
|
||||||
|
receipt_location=receipt_data.get("location"),
|
||||||
|
calculated_tax=receipt_data.get("calculated_tax"),
|
||||||
|
is_depreciable=receipt_data.get("is_depreciable"),
|
||||||
|
name_of_asset=receipt_data.get("name_of_asset"),
|
||||||
|
cca_rate=receipt_data.get("cca_rate"),
|
||||||
|
useful_life=receipt_data.get("useful_life"),
|
||||||
|
residual_value=receipt_data.get("residual_value"),
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing document {file_id}: {str(e)}")
|
logger.error(f"Error processing document {request.file_id}: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
@@ -475,6 +624,20 @@ async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependen
|
|||||||
amount=db_txn.amount,
|
amount=db_txn.amount,
|
||||||
vendor=db_txn.vendor,
|
vendor=db_txn.vendor,
|
||||||
notes=db_txn.description or "",
|
notes=db_txn.description or "",
|
||||||
|
source=db_txn.source,
|
||||||
|
# QuickBooks CSV fields
|
||||||
|
TxnId=db_txn.TxnId,
|
||||||
|
AccountType=db_txn.AccountType,
|
||||||
|
AccountNumber=db_txn.AccountNumber,
|
||||||
|
TransactionDate=db_txn.TransactionDate,
|
||||||
|
TransactionType=db_txn.TransactionType,
|
||||||
|
ChequeNumber=db_txn.ChequeNumber,
|
||||||
|
Description1=db_txn.Description1,
|
||||||
|
Description2=db_txn.Description2,
|
||||||
|
VendorId=db_txn.VendorId,
|
||||||
|
VendorName=db_txn.VendorName,
|
||||||
|
AccountId=db_txn.AccountId,
|
||||||
|
AccountName=db_txn.AccountName,
|
||||||
)
|
)
|
||||||
transactions.append(transaction)
|
transactions.append(transaction)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -546,19 +709,56 @@ async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependen
|
|||||||
else:
|
else:
|
||||||
logger.info(f"Using default/provided user_location: {user_location}")
|
logger.info(f"Using default/provided user_location: {user_location}")
|
||||||
|
|
||||||
|
# Convert ai_rules from Pydantic models to dictionaries if provided
|
||||||
|
ai_rules_list = None
|
||||||
|
if request.ai_rules:
|
||||||
|
ai_rules_list = [
|
||||||
|
{"condition": rule.condition, "action": rule.action}
|
||||||
|
for rule in request.ai_rules
|
||||||
|
]
|
||||||
|
logger.info(f"Applying {len(ai_rules_list)} custom AI rules to matching")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
matching_results = matching_engine.process_matching(
|
matching_results = matching_engine.process_matching(
|
||||||
receipts, transactions, user_location=user_location
|
receipts,
|
||||||
|
transactions,
|
||||||
|
user_location=user_location,
|
||||||
|
ai_rules=ai_rules_list,
|
||||||
)
|
)
|
||||||
logger.info(f"Matching completed, got {len(matching_results)} results")
|
logger.info(f"Matching completed, got {len(matching_results)} results")
|
||||||
|
|
||||||
|
# Filter results by confidence threshold (10% minimum)
|
||||||
|
CONFIDENCE_THRESHOLD = 0.10
|
||||||
|
filtered_results = [r for r in matching_results if r.confidence_score >= CONFIDENCE_THRESHOLD]
|
||||||
|
logger.info(f"After filtering by {CONFIDENCE_THRESHOLD*100}% threshold: {len(filtered_results)} matches remain")
|
||||||
|
|
||||||
|
# Track matched transaction IDs
|
||||||
|
matched_transaction_ids = set()
|
||||||
|
for result in filtered_results:
|
||||||
|
if result.transaction:
|
||||||
|
matched_transaction_ids.add(result.transaction.id)
|
||||||
|
|
||||||
|
# Find unmatched transactions
|
||||||
|
unmatched_transactions = [
|
||||||
|
txn for txn in transactions
|
||||||
|
if txn.id not in matched_transaction_ids
|
||||||
|
]
|
||||||
|
logger.info(f"Found {len(unmatched_transactions)} unmatched transactions")
|
||||||
|
|
||||||
# Convert matching results to response format
|
# Convert matching results to response format
|
||||||
match_responses = []
|
match_responses = []
|
||||||
for result in matching_results:
|
for result in filtered_results:
|
||||||
# Get final tax amount from LLM analysis if available, otherwise use receipt's stated tax
|
# Get final tax amount from LLM analysis if available, otherwise use receipt's stated tax
|
||||||
final_tax = result.receipt.tax
|
final_tax = result.receipt.tax
|
||||||
if result.tax_analysis and "final_tax_amount" in result.tax_analysis:
|
# if result.tax_analysis and "final_tax_amount" in result.tax_analysis:
|
||||||
final_tax = result.tax_analysis["final_tax_amount"]
|
# final_tax = result.tax_analysis["final_tax_amount"]
|
||||||
|
|
||||||
|
# Extract flag_for_review and auto_approve from tax_analysis if available
|
||||||
|
flag_for_review = None
|
||||||
|
auto_approve = None
|
||||||
|
if result.tax_analysis:
|
||||||
|
flag_for_review = result.tax_analysis.get("flag_for_review")
|
||||||
|
auto_approve = result.tax_analysis.get("auto_approve")
|
||||||
|
|
||||||
match_response = MatchResponse(
|
match_response = MatchResponse(
|
||||||
receipt_id=result.receipt.id,
|
receipt_id=result.receipt.id,
|
||||||
@@ -579,33 +779,112 @@ async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependen
|
|||||||
if result.transaction
|
if result.transaction
|
||||||
else 0.0,
|
else 0.0,
|
||||||
tax_analysis=result.tax_analysis,
|
tax_analysis=result.tax_analysis,
|
||||||
|
flag_for_review=flag_for_review,
|
||||||
|
auto_approve=auto_approve,
|
||||||
|
# Transaction metadata
|
||||||
|
transaction_source=result.transaction.source
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
# QuickBooks CSV fields
|
||||||
|
TxnId=result.transaction.TxnId if result.transaction else None,
|
||||||
|
AccountType=result.transaction.AccountType
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
AccountNumber=result.transaction.AccountNumber
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
TransactionDate=result.transaction.TransactionDate
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
TransactionType=result.transaction.TransactionType
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
ChequeNumber=result.transaction.ChequeNumber
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
Description1=result.transaction.Description1
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
Description2=result.transaction.Description2
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
VendorId=result.transaction.VendorId
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
VendorName=result.transaction.VendorName
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
AccountId=result.transaction.AccountId
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
AccountName=result.transaction.AccountName
|
||||||
|
if result.transaction
|
||||||
|
else None,
|
||||||
|
Source=result.transaction.source if result.transaction else None,
|
||||||
)
|
)
|
||||||
match_responses.append(match_response)
|
match_responses.append(match_response)
|
||||||
|
|
||||||
# Calculate statistics
|
# Add unmatched transactions as MatchResponse objects with empty receipt data
|
||||||
|
for txn in unmatched_transactions:
|
||||||
|
unmatched_match = MatchResponse(
|
||||||
|
receipt_id="",
|
||||||
|
transaction_id=txn.id,
|
||||||
|
confidence_score=0.0,
|
||||||
|
match_reason="Unmatched transaction",
|
||||||
|
receipt_vendor="",
|
||||||
|
receipt_amount=0.0,
|
||||||
|
receipt_description="",
|
||||||
|
receipt_category="",
|
||||||
|
receipt_tax_amount=0.0,
|
||||||
|
transaction_vendor=txn.vendor,
|
||||||
|
transaction_amount=txn.amount,
|
||||||
|
tax_analysis=None,
|
||||||
|
flag_for_review=None,
|
||||||
|
auto_approve=None,
|
||||||
|
transaction_source=txn.source,
|
||||||
|
TxnId=txn.TxnId,
|
||||||
|
AccountType=txn.AccountType,
|
||||||
|
AccountNumber=txn.AccountNumber,
|
||||||
|
TransactionDate=txn.TransactionDate,
|
||||||
|
TransactionType=txn.TransactionType,
|
||||||
|
ChequeNumber=txn.ChequeNumber,
|
||||||
|
Description1=txn.Description1,
|
||||||
|
Description2=txn.Description2,
|
||||||
|
VendorId=txn.VendorId,
|
||||||
|
VendorName=txn.VendorName,
|
||||||
|
AccountId=txn.AccountId,
|
||||||
|
AccountName=txn.AccountName,
|
||||||
|
Source=txn.source,
|
||||||
|
)
|
||||||
|
match_responses.append(unmatched_match)
|
||||||
|
|
||||||
|
# Calculate statistics on filtered results
|
||||||
high_confidence = len(
|
high_confidence = len(
|
||||||
[r for r in matching_results if r.confidence_score >= 0.8]
|
[r for r in filtered_results if r.confidence_score >= 0.8]
|
||||||
)
|
)
|
||||||
low_confidence = len(
|
low_confidence = len(
|
||||||
[r for r in matching_results if r.confidence_score < 0.5]
|
[r for r in filtered_results if r.confidence_score < 0.5]
|
||||||
)
|
)
|
||||||
avg_score = (
|
avg_score = (
|
||||||
sum(r.confidence_score for r in matching_results)
|
sum(r.confidence_score for r in filtered_results)
|
||||||
/ len(matching_results)
|
/ len(filtered_results)
|
||||||
if matching_results
|
if filtered_results
|
||||||
else 0
|
else 0
|
||||||
)
|
)
|
||||||
|
|
||||||
stats = {
|
stats = {
|
||||||
"total": len(match_responses),
|
"total": len(match_responses),
|
||||||
|
"matched": len(filtered_results),
|
||||||
|
"unmatched_transactions": len(unmatched_transactions),
|
||||||
"high_confidence": high_confidence,
|
"high_confidence": high_confidence,
|
||||||
"low_confidence": low_confidence,
|
"low_confidence": low_confidence,
|
||||||
"avg_score": round(avg_score, 2),
|
"avg_score": round(avg_score, 2),
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Generated stats: {stats}")
|
logger.info(f"Generated stats: {stats}")
|
||||||
|
logger.info(f"Total responses (matched + unmatched): {len(match_responses)}")
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Match-specific completed successfully with {len(match_responses)} matches"
|
f"Match-specific completed successfully with {len(filtered_results)} matches and {len(unmatched_transactions)} unmatched transactions"
|
||||||
)
|
)
|
||||||
|
|
||||||
return MatchingResponse(matches=match_responses, stats=stats)
|
return MatchingResponse(matches=match_responses, stats=stats)
|
||||||
|
|||||||
@@ -43,6 +43,21 @@ class Transaction:
|
|||||||
# Tax rule fields
|
# Tax rule fields
|
||||||
currency: str = "CAD"
|
currency: str = "CAD"
|
||||||
fx_rate: Optional[float] = None
|
fx_rate: Optional[float] = None
|
||||||
|
source: Optional[str] = None # e.g., "csv", "image", "manual", "api"
|
||||||
|
|
||||||
|
# QuickBooks CSV fields
|
||||||
|
TxnId: Optional[str] = None
|
||||||
|
AccountType: Optional[str] = None
|
||||||
|
AccountNumber: Optional[str] = None
|
||||||
|
TransactionDate: Optional[str] = None
|
||||||
|
TransactionType: Optional[str] = None
|
||||||
|
ChequeNumber: Optional[str] = None
|
||||||
|
Description1: Optional[str] = None
|
||||||
|
Description2: Optional[str] = None
|
||||||
|
VendorId: Optional[str] = None
|
||||||
|
VendorName: Optional[str] = None
|
||||||
|
AccountId: Optional[str] = None
|
||||||
|
AccountName: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -101,6 +116,21 @@ class TransactionRequest(BaseModel):
|
|||||||
# Tax rule fields
|
# Tax rule fields
|
||||||
currency: str = "CAD"
|
currency: str = "CAD"
|
||||||
fx_rate: Optional[float] = None
|
fx_rate: Optional[float] = None
|
||||||
|
source: Optional[str] = None # e.g., "csv", "image", "manual", "api"
|
||||||
|
|
||||||
|
# QuickBooks CSV fields
|
||||||
|
TxnId: Optional[str] = None
|
||||||
|
AccountType: Optional[str] = None
|
||||||
|
AccountNumber: Optional[str] = None
|
||||||
|
TransactionDate: Optional[str] = None
|
||||||
|
TransactionType: Optional[str] = None
|
||||||
|
ChequeNumber: Optional[str] = None
|
||||||
|
Description1: Optional[str] = None
|
||||||
|
Description2: Optional[str] = None
|
||||||
|
VendorId: Optional[str] = None
|
||||||
|
VendorName: Optional[str] = None
|
||||||
|
AccountId: Optional[str] = None
|
||||||
|
AccountName: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class AssetRequest(BaseModel):
|
class AssetRequest(BaseModel):
|
||||||
@@ -132,6 +162,26 @@ class MatchResponse(BaseModel):
|
|||||||
transaction_vendor: str
|
transaction_vendor: str
|
||||||
transaction_amount: float
|
transaction_amount: float
|
||||||
tax_analysis: Optional[dict] = None
|
tax_analysis: Optional[dict] = None
|
||||||
|
flag_for_review: Optional[bool] = None
|
||||||
|
auto_approve: Optional[bool] = None
|
||||||
|
|
||||||
|
# Transaction metadata
|
||||||
|
transaction_source: Optional[str] = None # Source of the transaction
|
||||||
|
|
||||||
|
# QuickBooks CSV fields from transaction
|
||||||
|
TxnId: Optional[str] = None
|
||||||
|
AccountType: Optional[str] = None
|
||||||
|
AccountNumber: Optional[str] = None
|
||||||
|
TransactionDate: Optional[str] = None
|
||||||
|
TransactionType: Optional[str] = None
|
||||||
|
ChequeNumber: Optional[str] = None
|
||||||
|
Description1: Optional[str] = None
|
||||||
|
Description2: Optional[str] = None
|
||||||
|
VendorId: Optional[str] = None
|
||||||
|
VendorName: Optional[str] = None
|
||||||
|
AccountId: Optional[str] = None
|
||||||
|
AccountName: Optional[str] = None
|
||||||
|
Source: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class MatchingResponse(BaseModel):
|
class MatchingResponse(BaseModel):
|
||||||
@@ -160,6 +210,19 @@ class DocumentUploadResponse(BaseModel):
|
|||||||
status: str
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
class AIRules(BaseModel):
|
||||||
|
condition: str
|
||||||
|
action: str
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentProcessRequest(BaseModel):
|
||||||
|
file_id: Optional[str] = None
|
||||||
|
user_location: Optional[str] = (
|
||||||
|
None # Format: "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
)
|
||||||
|
ai_rules: Optional[List[AIRules]] = None
|
||||||
|
|
||||||
|
|
||||||
class DocumentProcessResponse(BaseModel):
|
class DocumentProcessResponse(BaseModel):
|
||||||
file_id: str
|
file_id: str
|
||||||
receipt_id: str
|
receipt_id: str
|
||||||
@@ -173,6 +236,21 @@ class DocumentProcessResponse(BaseModel):
|
|||||||
confidence: Optional[float] = None
|
confidence: Optional[float] = None
|
||||||
error: Optional[str] = None
|
error: Optional[str] = None
|
||||||
receipt_currency: Optional[str] = "CAD"
|
receipt_currency: Optional[str] = "CAD"
|
||||||
|
receipt_location: Optional[str] = (
|
||||||
|
None # Location from receipt (e.g., "Ontario, Canada" or "California, USA")
|
||||||
|
)
|
||||||
|
calculated_tax: Optional[float] = None # Calculated sales tax if not clearly shown
|
||||||
|
is_depreciable: Optional[bool] = None # Whether item is a depreciable asset
|
||||||
|
name_of_asset: Optional[str] = None # Name/description of the asset if depreciable
|
||||||
|
cca_rate: Optional[float] = (
|
||||||
|
None # CCA rate for tax depreciation (e.g., 0.30 for 30%)
|
||||||
|
)
|
||||||
|
useful_life: Optional[int] = (
|
||||||
|
None # Useful life in years for straight-line depreciation
|
||||||
|
)
|
||||||
|
residual_value: Optional[float] = (
|
||||||
|
None # Residual value for straight-line depreciation
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# New tax-related models
|
# New tax-related models
|
||||||
@@ -270,3 +348,4 @@ class MatchSpecificRequest(BaseModel):
|
|||||||
categorization_id: str
|
categorization_id: str
|
||||||
user_location: Optional[str] = "Canada" # Kept for backward compatibility
|
user_location: Optional[str] = "Canada" # Kept for backward compatibility
|
||||||
user_tax_info: Optional[UserTaxInfo] = None
|
user_tax_info: Optional[UserTaxInfo] = None
|
||||||
|
ai_rules: Optional[List[AIRules]] = None
|
||||||
|
|||||||
+166
-66
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
|
|||||||
class AIMatcher:
|
class AIMatcher:
|
||||||
def __init__(self, use_batch_matching=True):
|
def __init__(self, use_batch_matching=True):
|
||||||
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||||||
self.model = "llama-3.1-8b-instant"
|
self.model = settings.model
|
||||||
self.max_retries = 3
|
self.max_retries = 3
|
||||||
self.retry_delay = 2 # seconds - increased for rate limiting
|
self.retry_delay = 2 # seconds - increased for rate limiting
|
||||||
self.rate_limit_delay = 1.0 # seconds between API calls
|
self.rate_limit_delay = 1.0 # seconds between API calls
|
||||||
@@ -116,7 +116,7 @@ class AIMatcher:
|
|||||||
for i, transaction in enumerate(candidates):
|
for i, transaction in enumerate(candidates):
|
||||||
transaction_amount_abs = abs(transaction.amount)
|
transaction_amount_abs = abs(transaction.amount)
|
||||||
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
|
date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
|
||||||
amount_diff = abs(receipt.amount - transaction_amount_abs)
|
amount_diff = abs(receipt.amount - transaction_amount_abs - receipt.tax)
|
||||||
amount_percent_diff = (
|
amount_percent_diff = (
|
||||||
(amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
|
(amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
|
||||||
)
|
)
|
||||||
@@ -127,11 +127,12 @@ Candidate {i + 1}:
|
|||||||
- Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
|
- Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
|
||||||
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
|
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
|
||||||
- Notes: {transaction.notes}
|
- Notes: {transaction.notes}
|
||||||
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
|
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%) Taking in account receipt tax
|
||||||
"""
|
"""
|
||||||
|
logger.info(f"\nThis is the receipt: {receipt}\n")
|
||||||
|
logger.info(f"\nCandidate text: {candidates_text}\n")
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""You are an expert at matching receipts to bank transactions. Your PRIMARY goal is to find the candidate with the CLOSEST AMOUNT match.
|
||||||
You are an expert at matching receipts to bank transactions. Analyze the receipt below against ALL the candidate transactions and return the BEST match.
|
|
||||||
|
|
||||||
RECEIPT TO MATCH:
|
RECEIPT TO MATCH:
|
||||||
- Vendor: {receipt.vendor}
|
- Vendor: {receipt.vendor}
|
||||||
@@ -143,25 +144,52 @@ RECEIPT TO MATCH:
|
|||||||
CANDIDATE TRANSACTIONS:
|
CANDIDATE TRANSACTIONS:
|
||||||
{candidates_text}
|
{candidates_text}
|
||||||
|
|
||||||
SCORING CRITERIA:
|
CRITICAL INSTRUCTIONS FOR SELECTION:
|
||||||
- Perfect matches (same vendor, amount, date): 0.95-1.0
|
1. FIRST: Find the candidate(s) with the SMALLEST amount percentage difference
|
||||||
- High confidence (minor differences): 0.8-0.94
|
2. ONLY if multiple candidates have similar amounts (within 2% of each other), THEN consider vendor/date/notes
|
||||||
- Medium confidence (moderate differences): 0.6-0.79
|
3. USE THE PERCENTAGE DIFFERENCE PROVIDED for each candidate - DO NOT calculate yourself
|
||||||
- Low confidence (significant differences): 0.4-0.59
|
4. IGNORE vendor/description matches if amounts are far apart (>20% difference)
|
||||||
- Very low confidence (major differences): 0.2-0.39
|
5. The candidate with the closest amount is almost always the correct match
|
||||||
- Minimal similarity: 0.1-0.19
|
|
||||||
- No meaningful similarity: 0.0-0.09
|
|
||||||
|
|
||||||
Consider vendor name similarity, amount accuracy, date proximity, and description/notes relevance.
|
SCORING CRITERIA - AMOUNT DIFFERENCE IS 90% OF THE DECISION:
|
||||||
|
|
||||||
IMPORTANT: You MUST return the candidate with the highest match score, even if it's very low. Never return NONE.
|
Step 1: Calculate BASE SCORE using the provided amount percentage difference:
|
||||||
Return ONLY the best match in this exact format:
|
- 0-1% difference: Base score = 0.95
|
||||||
CANDIDATE_NUMBER|CONFIDENCE_SCORE|REASON
|
- 1-2% difference: Base score = 0.90
|
||||||
|
- 2-3% difference: Base score = 0.85
|
||||||
|
- 3-5% difference: Base score = 0.75
|
||||||
|
- 5-7% difference: Base score = 0.65
|
||||||
|
- 7-10% difference: Base score = 0.55
|
||||||
|
- 10-15% difference: Base score = 0.40
|
||||||
|
- 15-20% difference: Base score = 0.25
|
||||||
|
- 20-30% difference: Base score = 0.15
|
||||||
|
- 30-50% difference: Base score = 0.08
|
||||||
|
- 50-100% difference: Base score = 0.03
|
||||||
|
- >100% difference: Base score = 0.01
|
||||||
|
|
||||||
Example: 3|0.87|Same vendor name, exact amount match, 1 day apart
|
Step 2: ADJUST the base score (±0.10 maximum):
|
||||||
Example of low match: 5|0.15|Best available option despite significant differences in vendor and amount
|
- Vendor exact match: +0.10
|
||||||
"""
|
- Vendor similar/partial match: +0.05
|
||||||
|
- Date within 7 days: +0.05
|
||||||
|
- Date within 30 days: +0.02
|
||||||
|
- Description/notes keywords match: +0.02
|
||||||
|
- Vendor completely different: -0.05
|
||||||
|
- Date >90 days apart: -0.03
|
||||||
|
|
||||||
|
Step 3: Ensure final score is between 0.0 and 1.0
|
||||||
|
|
||||||
|
|
||||||
|
CRITICAL: You MUST return valid JSON only. No explanations, no text before or after.
|
||||||
|
|
||||||
|
Return format:
|
||||||
|
{{"candidate_number": 1, "confidence_score": 0.65, "reason": "5.8% amount difference with similar vendor"}}
|
||||||
|
|
||||||
|
Another example:
|
||||||
|
{{"candidate_number": 2, "confidence_score": 0.01, "reason": "9850% amount difference, extremely poor match"}}
|
||||||
|
|
||||||
|
Return ONLY JSON for the best candidate:"""
|
||||||
|
|
||||||
|
# logger.info(f"This is the prompt: {prompt}")
|
||||||
for attempt in range(self.max_retries):
|
for attempt in range(self.max_retries):
|
||||||
try:
|
try:
|
||||||
result = self._call_groq_api_with_timeout(
|
result = self._call_groq_api_with_timeout(
|
||||||
@@ -179,6 +207,22 @@ Example of low match: 5|0.15|Best available option despite significant differenc
|
|||||||
|
|
||||||
if 0 <= candidate_num < len(candidates):
|
if 0 <= candidate_num < len(candidates):
|
||||||
best_transaction = candidates[candidate_num]
|
best_transaction = candidates[candidate_num]
|
||||||
|
|
||||||
|
# Validate the match - catch AI errors with extreme amount differences
|
||||||
|
transaction_amount_abs = abs(best_transaction.amount)
|
||||||
|
amount_diff = abs(receipt.amount - transaction_amount_abs)
|
||||||
|
amount_percent_diff = (
|
||||||
|
(amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# If amount difference is >100%, force very low score
|
||||||
|
if amount_percent_diff > 100:
|
||||||
|
logger.warning(
|
||||||
|
f"Overriding AI score for extreme mismatch: {receipt.amount} vs {transaction_amount_abs} ({amount_percent_diff:.1f}% diff)"
|
||||||
|
)
|
||||||
|
score = min(0.05, score) # Cap at 0.05 for extreme mismatches
|
||||||
|
reason = f"{amount_percent_diff:.1f}% amount difference, extreme mismatch"
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
|
f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
|
||||||
)
|
)
|
||||||
@@ -204,55 +248,93 @@ Example of low match: 5|0.15|Best available option despite significant differenc
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def _parse_single_match_response(self, result: str) -> Tuple[int, float, str]:
|
def _parse_single_match_response(self, result: str) -> Tuple[int, float, str]:
|
||||||
"""Parse AI response for single best match"""
|
"""Parse AI response for single best match (JSON format)"""
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
result = result.strip()
|
result = result.strip()
|
||||||
logger.debug(f"Parsing single match response: {result}")
|
logger.debug(f"Parsing single match response: {result}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if result.upper().startswith("NONE"):
|
# First, try to parse the entire result as JSON
|
||||||
# This should not happen with new prompt, but handle as parsing error
|
try:
|
||||||
logger.warning(
|
data = json.loads(result)
|
||||||
"AI returned NONE despite being instructed to always return best match"
|
candidate_num = int(data.get("candidate_number", -1)) - 1
|
||||||
)
|
score = float(data.get("confidence_score", 0.0))
|
||||||
return -1, 0.0, "AI returned NONE unexpectedly"
|
reason = str(data.get("reason", "No reason provided"))
|
||||||
|
score = max(0.0, min(1.0, score))
|
||||||
|
logger.debug(f"Parsed JSON: candidate={candidate_num}, score={score}, reason={reason}")
|
||||||
|
return candidate_num, score, reason
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
if "|" in result:
|
# Try to extract JSON object from the response using improved regex
|
||||||
parts = result.split("|")
|
# This handles nested braces better
|
||||||
if len(parts) >= 3:
|
json_pattern = r'\{[^{}]*"candidate_number"[^{}]*"confidence_score"[^{}]*"reason"[^{}]*\}'
|
||||||
candidate_str = parts[0].strip()
|
json_match = re.search(json_pattern, result)
|
||||||
score_str = parts[1].strip()
|
|
||||||
reason = "|".join(parts[2:]).strip()
|
|
||||||
|
|
||||||
# Extract candidate number
|
if json_match:
|
||||||
import re
|
json_str = json_match.group()
|
||||||
|
data = json.loads(json_str)
|
||||||
|
candidate_num = int(data.get("candidate_number", -1)) - 1
|
||||||
|
score = float(data.get("confidence_score", 0.0))
|
||||||
|
reason = str(data.get("reason", "No reason provided"))
|
||||||
|
score = max(0.0, min(1.0, score))
|
||||||
|
logger.debug(f"Parsed extracted JSON: candidate={candidate_num}, score={score}, reason={reason}")
|
||||||
|
return candidate_num, score, reason
|
||||||
|
|
||||||
candidate_match = re.search(r"\d+", candidate_str)
|
# Try to find any JSON-like structure with the required fields
|
||||||
if candidate_match:
|
candidate_match = re.search(r'"candidate_number"\s*:\s*(\d+)', result)
|
||||||
candidate_num = (
|
score_match = re.search(r'"confidence_score"\s*:\s*([\d.]+)', result)
|
||||||
int(candidate_match.group()) - 1
|
reason_match = re.search(r'"reason"\s*:\s*"([^"]*)"', result)
|
||||||
) # Convert to 0-based index
|
|
||||||
else:
|
|
||||||
raise ValueError("No candidate number found")
|
|
||||||
|
|
||||||
# Extract score
|
if candidate_match and score_match and reason_match:
|
||||||
score_clean = "".join(
|
candidate_num = int(candidate_match.group(1)) - 1
|
||||||
c for c in score_str if c.isdigit() or c == "."
|
score = float(score_match.group(1))
|
||||||
)
|
reason = reason_match.group(1)
|
||||||
score = float(score_clean) if score_clean else 0.0
|
score = max(0.0, min(1.0, score))
|
||||||
|
logger.debug(f"Parsed fields individually: candidate={candidate_num}, score={score}, reason={reason}")
|
||||||
|
return candidate_num, score, reason
|
||||||
|
|
||||||
# Ensure score is in valid range
|
except (json.JSONDecodeError, ValueError, KeyError) as e:
|
||||||
score = max(0.0, min(1.0, score))
|
logger.warning(f"Error parsing JSON response: {e}")
|
||||||
|
|
||||||
logger.debug(
|
# Fallback to old pipe-delimited format for backwards compatibility
|
||||||
f"Parsed: candidate={candidate_num}, score={score}, reason={reason}"
|
try:
|
||||||
)
|
if "|" in result:
|
||||||
return candidate_num, score, reason
|
parts = result.split("|")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
candidate_str = parts[0].strip()
|
||||||
|
score_str = parts[1].strip()
|
||||||
|
reason = "|".join(parts[2:]).strip()
|
||||||
|
|
||||||
except Exception as e:
|
# Extract candidate number
|
||||||
logger.warning(f"Error parsing single match response: {e}")
|
candidate_match = re.search(r"\d+", candidate_str)
|
||||||
|
if candidate_match:
|
||||||
|
candidate_num = (
|
||||||
|
int(candidate_match.group()) - 1
|
||||||
|
) # Convert to 0-based index
|
||||||
|
else:
|
||||||
|
raise ValueError("No candidate number found")
|
||||||
|
|
||||||
# Fallback
|
# Extract score
|
||||||
logger.warning(f"Could not parse single match response: {result}")
|
score_clean = "".join(
|
||||||
|
c for c in score_str if c.isdigit() or c == "."
|
||||||
|
)
|
||||||
|
score = float(score_clean) if score_clean else 0.0
|
||||||
|
|
||||||
|
# Ensure score is in valid range
|
||||||
|
score = max(0.0, min(1.0, score))
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Parsed (fallback): candidate={candidate_num}, score={score}, reason={reason}"
|
||||||
|
)
|
||||||
|
return candidate_num, score, reason
|
||||||
|
except Exception as fallback_error:
|
||||||
|
logger.warning(f"Fallback parsing also failed: {fallback_error}")
|
||||||
|
|
||||||
|
# Final fallback
|
||||||
|
# logger.warning(f"Could not parse single match response: {result}")
|
||||||
return -1, 0.0, f"Parse error: {result[:50]}..."
|
return -1, 0.0, f"Parse error: {result[:50]}..."
|
||||||
|
|
||||||
def _filter_candidates(
|
def _filter_candidates(
|
||||||
@@ -260,18 +342,29 @@ Example of low match: 5|0.15|Best available option despite significant differenc
|
|||||||
) -> List[Transaction]:
|
) -> List[Transaction]:
|
||||||
"""Filter transactions to create a reasonable candidate list"""
|
"""Filter transactions to create a reasonable candidate list"""
|
||||||
candidates = []
|
candidates = []
|
||||||
amount_threshold = receipt.amount * 2.0 # 200% threshold - very inclusive
|
|
||||||
|
|
||||||
for transaction in transactions:
|
for transaction in transactions:
|
||||||
# Use absolute value for transaction amount comparison
|
# Use absolute value for transaction amount comparison
|
||||||
transaction_amount_abs = abs(transaction.amount)
|
transaction_amount_abs = abs(transaction.amount)
|
||||||
|
amount_diff = abs(receipt.amount - transaction_amount_abs)
|
||||||
|
|
||||||
# Only exclude transactions with obviously different amounts
|
# Calculate percentage difference
|
||||||
if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
|
if receipt.amount > 0:
|
||||||
|
percent_diff = (amount_diff / receipt.amount) * 100
|
||||||
|
else:
|
||||||
|
percent_diff = 0
|
||||||
|
|
||||||
|
# Be more restrictive: exclude transactions with >300% difference
|
||||||
|
# This prevents extreme mismatches while still being generous
|
||||||
|
if percent_diff <= 300:
|
||||||
candidates.append(transaction)
|
candidates.append(transaction)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"Filtered out transaction ${transaction_amount_abs} for receipt ${receipt.amount} ({percent_diff:.1f}% difference)"
|
||||||
|
)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Filtered {len(transactions)} transactions to {len(candidates)} candidates"
|
f"Filtered {len(transactions)} transactions to {len(candidates)} candidates for receipt ${receipt.amount}"
|
||||||
)
|
)
|
||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
@@ -338,6 +431,10 @@ Example of low match: 5|0.15|Best available option despite significant differenc
|
|||||||
|
|
||||||
Consider description and category similarity in your scoring.
|
Consider description and category similarity in your scoring.
|
||||||
|
|
||||||
|
THINGS TO NOTE:
|
||||||
|
The most important factor to consider is the Amount for both the transaction and the receipt, the closer the amounts, the higher the score.
|
||||||
|
If the amounts are different or not close return a low score (0-0.1) based on other factors.
|
||||||
|
|
||||||
IMPORTANT: Return ONLY the score and reason separated by a pipe character.
|
IMPORTANT: Return ONLY the score and reason separated by a pipe character.
|
||||||
Format: [score]|[reason]
|
Format: [score]|[reason]
|
||||||
Example: 0.85|Same vendor, same amount, 2 days apart
|
Example: 0.85|Same vendor, same amount, 2 days apart
|
||||||
@@ -352,8 +449,8 @@ Example of low match: 5|0.15|Best available option despite significant differenc
|
|||||||
# Parse the result - handle multiple formats
|
# Parse the result - handle multiple formats
|
||||||
score, reason = self._parse_ai_response(result)
|
score, reason = self._parse_ai_response(result)
|
||||||
|
|
||||||
logger.debug(f"AI Response: {result}")
|
# logger.debug(f"AI Response: {result}")
|
||||||
logger.debug(f"Parsed: score={score}, reason={reason}")
|
# logger.debug(f"Parsed: score={score}, reason={reason}")
|
||||||
|
|
||||||
return score, reason
|
return score, reason
|
||||||
|
|
||||||
@@ -451,9 +548,12 @@ Example of low match: 5|0.15|Best available option despite significant differenc
|
|||||||
try:
|
try:
|
||||||
response = self.client.chat.completions.create(
|
response = self.client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[
|
||||||
max_tokens=200,
|
{"role": "system", "content": "You are a JSON-only response assistant. Return only valid JSON, no explanations."},
|
||||||
temperature=0.1,
|
{"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
max_tokens=150,
|
||||||
|
temperature=0,
|
||||||
)
|
)
|
||||||
return response.choices[0].message.content.strip()
|
return response.choices[0].message.content.strip()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -0,0 +1,273 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
import groq
|
||||||
|
from config import settings
|
||||||
|
from schemas import Match
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class AIRulesMatcher:
|
||||||
|
"""
|
||||||
|
AI-powered rules engine for post-matching evaluation.
|
||||||
|
Uses LLM to intelligently apply custom rules and determine if matches should be:
|
||||||
|
- Flagged for manual review (flag_for_review=True)
|
||||||
|
- Auto-approved (auto_approve=True)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||||||
|
self.model = settings.model
|
||||||
|
|
||||||
|
def apply_rules_to_matches(
|
||||||
|
self, matches: List[Match], ai_rules: Optional[List[Dict]] = None
|
||||||
|
) -> List[Match]:
|
||||||
|
"""
|
||||||
|
Apply AI rules to all matches and add flag_for_review and auto_approve fields.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
matches: List of Match objects from the matching engine
|
||||||
|
ai_rules: Optional list of custom rules (format: [{"condition": str, "action": str}])
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Enhanced matches with tax_analysis containing flag_for_review and auto_approve
|
||||||
|
"""
|
||||||
|
if not matches:
|
||||||
|
return matches
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Applying AI rules to {len(matches)} matches with {len(ai_rules) if ai_rules else 0} custom rules"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Built-in rule: currency mismatch should always flag for review
|
||||||
|
builtin_rules = [
|
||||||
|
{
|
||||||
|
"condition": "receipt currency differs from transaction currency",
|
||||||
|
"action": "flag_for_review",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Combine built-in rules with user-provided rules
|
||||||
|
all_rules = builtin_rules + (ai_rules if ai_rules else [])
|
||||||
|
|
||||||
|
# Process each match
|
||||||
|
for match in matches:
|
||||||
|
try:
|
||||||
|
rule_evaluation = self._evaluate_rules_for_match(match, all_rules)
|
||||||
|
|
||||||
|
# Initialize or update tax_analysis
|
||||||
|
if match.tax_analysis is None:
|
||||||
|
match.tax_analysis = {}
|
||||||
|
|
||||||
|
# Add rule evaluation results
|
||||||
|
match.tax_analysis["flag_for_review"] = rule_evaluation[
|
||||||
|
"flag_for_review"
|
||||||
|
]
|
||||||
|
match.tax_analysis["auto_approve"] = rule_evaluation["auto_approve"]
|
||||||
|
match.tax_analysis["rules_applied"] = rule_evaluation["rules_applied"]
|
||||||
|
match.tax_analysis["rule_reasons"] = rule_evaluation["reasons"]
|
||||||
|
|
||||||
|
# Update match reason with rule information
|
||||||
|
if rule_evaluation["flag_for_review"]:
|
||||||
|
match.match_reason += " | 🚩 FLAGGED FOR REVIEW"
|
||||||
|
if rule_evaluation["auto_approve"]:
|
||||||
|
match.match_reason += " | ✅ AUTO-APPROVED"
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Match {match.receipt.id} → {match.transaction.id}: "
|
||||||
|
f"flag_for_review={rule_evaluation['flag_for_review']}, "
|
||||||
|
f"auto_approve={rule_evaluation['auto_approve']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error applying rules to match: {str(e)}")
|
||||||
|
# Fail safe: flag for review if rule processing fails
|
||||||
|
if match.tax_analysis is None:
|
||||||
|
match.tax_analysis = {}
|
||||||
|
match.tax_analysis["flag_for_review"] = True
|
||||||
|
match.tax_analysis["auto_approve"] = False
|
||||||
|
match.tax_analysis["rule_reasons"] = [
|
||||||
|
f"Rule evaluation error: {str(e)}"
|
||||||
|
]
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def _evaluate_rules_for_match(
|
||||||
|
self, match: Match, rules: List[Dict]
|
||||||
|
) -> Dict[str, any]:
|
||||||
|
"""
|
||||||
|
Use LLM to evaluate all rules for a single match.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"flag_for_review": bool,
|
||||||
|
"auto_approve": bool,
|
||||||
|
"rules_applied": List[str],
|
||||||
|
"reasons": List[str]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
# Build context about the match
|
||||||
|
match_context = self._build_match_context(match)
|
||||||
|
|
||||||
|
# Build rules context
|
||||||
|
rules_context = self._build_rules_context(rules)
|
||||||
|
|
||||||
|
# Create prompt for LLM
|
||||||
|
prompt = f"""You are a financial matching rules engine. Analyze the following receipt-to-transaction match and apply the specified rules.
|
||||||
|
|
||||||
|
MATCH DETAILS:
|
||||||
|
{match_context}
|
||||||
|
|
||||||
|
RULES TO APPLY:
|
||||||
|
{rules_context}
|
||||||
|
|
||||||
|
INSTRUCTIONS:
|
||||||
|
1. Evaluate each rule's condition against the match details
|
||||||
|
2. If a rule's condition is TRUE, apply the action:
|
||||||
|
- If action is "flag_for_review" or "review" → set flag_for_review = true
|
||||||
|
- If action is "auto_approve" or "approve" → set auto_approve = true
|
||||||
|
- For other actions, determine if they imply review or approval
|
||||||
|
3. If BOTH flag_for_review and auto_approve are triggered, flag_for_review takes priority
|
||||||
|
4. If NO rules match, set both to false (default behavior)
|
||||||
|
|
||||||
|
IMPORTANT BUILT-IN RULE:
|
||||||
|
- If receipt currency differs from transaction currency → ALWAYS set flag_for_review = true
|
||||||
|
|
||||||
|
Return ONLY a valid JSON object with this exact format:
|
||||||
|
{{
|
||||||
|
"flag_for_review": boolean,
|
||||||
|
"auto_approve": boolean,
|
||||||
|
"rules_applied": ["list of rule conditions that matched"],
|
||||||
|
"reasons": ["list of reasons for the decisions"]
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Call LLM
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a financial rules evaluation assistant. You analyze transaction matches and apply business rules. Always respond with valid JSON only.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
],
|
||||||
|
temperature=0.1,
|
||||||
|
max_tokens=500,
|
||||||
|
)
|
||||||
|
|
||||||
|
result_text = response.choices[0].message.content.strip()
|
||||||
|
|
||||||
|
# Parse JSON response
|
||||||
|
result = self._parse_llm_response(result_text)
|
||||||
|
|
||||||
|
# Validate and enforce constraints
|
||||||
|
if result["flag_for_review"] and result["auto_approve"]:
|
||||||
|
logger.warning(
|
||||||
|
"Both flag_for_review and auto_approve were true, prioritizing flag_for_review"
|
||||||
|
)
|
||||||
|
result["auto_approve"] = False
|
||||||
|
result["reasons"].append(
|
||||||
|
"Conflicting rules: prioritized manual review over auto-approval"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"LLM evaluation failed: {str(e)}")
|
||||||
|
# Fail safe: flag for review
|
||||||
|
return {
|
||||||
|
"flag_for_review": True,
|
||||||
|
"auto_approve": False,
|
||||||
|
"rules_applied": [],
|
||||||
|
"reasons": [f"Error evaluating rules: {str(e)}"],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _build_match_context(self, match: Match) -> str:
|
||||||
|
"""Build a text description of the match for the LLM"""
|
||||||
|
receipt = match.receipt
|
||||||
|
transaction = match.transaction
|
||||||
|
|
||||||
|
context = f"""Receipt Information:
|
||||||
|
- ID: {receipt.id}
|
||||||
|
- Vendor: {receipt.vendor}
|
||||||
|
- Amount: ${receipt.amount:.2f}
|
||||||
|
- Tax: ${receipt.tax:.2f}
|
||||||
|
- Category: {receipt.category}
|
||||||
|
- Description: {receipt.description}
|
||||||
|
- Date: {receipt.receipt_date}
|
||||||
|
- Currency: {receipt.currency}
|
||||||
|
|
||||||
|
Transaction Information:
|
||||||
|
- ID: {transaction.id}
|
||||||
|
- Vendor: {transaction.vendor}
|
||||||
|
- Amount: ${transaction.amount:.2f}
|
||||||
|
- Date: {transaction.transaction_date}
|
||||||
|
- Notes: {transaction.notes}
|
||||||
|
- Currency: {transaction.currency}
|
||||||
|
|
||||||
|
Match Quality:
|
||||||
|
- Confidence Score: {match.confidence_score:.2%}
|
||||||
|
- Match Reason: {match.match_reason}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Add tax analysis if available
|
||||||
|
if match.tax_analysis:
|
||||||
|
context += f"\nTax Analysis:\n{json.dumps(match.tax_analysis, indent=2)}"
|
||||||
|
|
||||||
|
return context
|
||||||
|
|
||||||
|
def _build_rules_context(self, rules: List[Dict]) -> str:
|
||||||
|
"""Build a formatted list of rules for the LLM"""
|
||||||
|
if not rules:
|
||||||
|
return "No custom rules provided. Apply default evaluation."
|
||||||
|
|
||||||
|
rules_text = ""
|
||||||
|
for idx, rule in enumerate(rules, 1):
|
||||||
|
condition = rule.get("condition", "")
|
||||||
|
action = rule.get("action", "")
|
||||||
|
rules_text += f"{idx}. IF {condition} → THEN {action}\n"
|
||||||
|
|
||||||
|
return rules_text
|
||||||
|
|
||||||
|
def _parse_llm_response(self, response_text: str) -> Dict:
|
||||||
|
"""Parse and validate LLM JSON response"""
|
||||||
|
try:
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if "```json" in response_text:
|
||||||
|
response_text = response_text.split("```json")[1].split("```")[0]
|
||||||
|
elif "```" in response_text:
|
||||||
|
response_text = response_text.split("```")[1].split("```")[0]
|
||||||
|
|
||||||
|
# Parse JSON
|
||||||
|
result = json.loads(response_text.strip())
|
||||||
|
|
||||||
|
# Validate required fields
|
||||||
|
if "flag_for_review" not in result:
|
||||||
|
result["flag_for_review"] = False
|
||||||
|
if "auto_approve" not in result:
|
||||||
|
result["auto_approve"] = False
|
||||||
|
if "rules_applied" not in result:
|
||||||
|
result["rules_applied"] = []
|
||||||
|
if "reasons" not in result:
|
||||||
|
result["reasons"] = []
|
||||||
|
|
||||||
|
# Ensure boolean types
|
||||||
|
result["flag_for_review"] = bool(result["flag_for_review"])
|
||||||
|
result["auto_approve"] = bool(result["auto_approve"])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
|
||||||
|
logger.error(f"Response text: {response_text}")
|
||||||
|
# Return safe defaults
|
||||||
|
return {
|
||||||
|
"flag_for_review": True, # Fail safe to manual review
|
||||||
|
"auto_approve": False,
|
||||||
|
"rules_applied": [],
|
||||||
|
"reasons": ["Failed to parse LLM response"],
|
||||||
|
}
|
||||||
@@ -1,13 +1,14 @@
|
|||||||
import base64
|
import base64
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import groq
|
import groq
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
|
|
||||||
from config import settings
|
from config import settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -18,51 +19,204 @@ class DocumentProcessor:
|
|||||||
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||||||
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
||||||
|
|
||||||
async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
|
def _extract_first_json(self, raw: str) -> dict:
|
||||||
"""Process uploaded file and extract receipt data"""
|
"""Extract the first valid JSON object from raw LLM output.
|
||||||
|
|
||||||
|
Handles cases where LLM returns extra text after/before the JSON.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# First try direct parsing (fastest path)
|
||||||
|
return json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Find the first '{' and match closing '}'
|
||||||
|
start = raw.find("{")
|
||||||
|
if start == -1:
|
||||||
|
raise ValueError("No JSON object found in LLM output")
|
||||||
|
|
||||||
|
depth = 0
|
||||||
|
end = -1
|
||||||
|
in_string = False
|
||||||
|
escape_next = False
|
||||||
|
|
||||||
|
for i in range(start, len(raw)):
|
||||||
|
ch = raw[i]
|
||||||
|
|
||||||
|
# Handle string escaping
|
||||||
|
if escape_next:
|
||||||
|
escape_next = False
|
||||||
|
continue
|
||||||
|
if ch == "\\":
|
||||||
|
escape_next = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Track if we're inside a string
|
||||||
|
if ch == '"':
|
||||||
|
in_string = not in_string
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Only count braces outside of strings
|
||||||
|
if not in_string:
|
||||||
|
if ch == "{":
|
||||||
|
depth += 1
|
||||||
|
elif ch == "}":
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
end = i + 1
|
||||||
|
break
|
||||||
|
|
||||||
|
if end == -1:
|
||||||
|
raise ValueError("Unbalanced JSON braces in LLM output")
|
||||||
|
|
||||||
|
json_str = raw[start:end]
|
||||||
|
return json.loads(json_str)
|
||||||
|
|
||||||
|
async def process_file(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
file_type: str,
|
||||||
|
user_location: str = None,
|
||||||
|
ai_rules: list = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Process uploaded file and extract receipt data
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the file to process
|
||||||
|
file_type: Type of file (jpg, pdf, etc.)
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
ai_rules: List of AI rules for categorization (e.g., [{"condition": "vendor is Starbucks", "action": "Food"}])
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
|
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
|
||||||
return await self._process_image(file_path)
|
return await self._process_image(file_path, user_location, ai_rules)
|
||||||
elif file_type.lower() == "pdf":
|
elif file_type.lower() == "pdf":
|
||||||
return await self._process_pdf(file_path)
|
return await self._process_pdf(file_path, user_location, ai_rules)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported file type: {file_type}")
|
raise ValueError(f"Unsupported file type: {file_type}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
async def _process_image(self, image_path: str) -> Dict[str, Any]:
|
async def _process_image(
|
||||||
"""Extract data from image using Groq vision"""
|
self, image_path: str, user_location: str = None, ai_rules: list = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Extract data from image using Groq vision
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: Path to the image file
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
ai_rules: List of AI rules for categorization
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# Encode image to base64
|
# Encode image to base64
|
||||||
base64_image = self._encode_image(image_path)
|
base64_image = self._encode_image(image_path)
|
||||||
|
|
||||||
|
# Build user location context
|
||||||
|
user_location_context = ""
|
||||||
|
if user_location:
|
||||||
|
user_location_context = f"""
|
||||||
|
|
||||||
|
USER LOCATION CONTEXT:
|
||||||
|
The user is located in {user_location}.
|
||||||
|
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
|
||||||
|
- If the receipt clearly shows a different location, use the receipt's location instead.
|
||||||
|
- Apply depreciation rules based on the user's location.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Build AI rules context for categorization
|
||||||
|
ai_rules_context = ""
|
||||||
|
if ai_rules and len(ai_rules) > 0:
|
||||||
|
# Create a simple, direct instruction for each rule
|
||||||
|
ai_rules_context = "\n "
|
||||||
|
for idx, rule in enumerate(ai_rules, 1):
|
||||||
|
condition = rule.get("condition", "")
|
||||||
|
action = rule.get("action", "")
|
||||||
|
|
||||||
|
# Extract the keyword and category from the rule
|
||||||
|
keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
|
||||||
|
category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
|
||||||
|
|
||||||
|
if keyword_match and category_match:
|
||||||
|
keyword = keyword_match.group(1)
|
||||||
|
category = category_match.group(1).strip()
|
||||||
|
# Create one simple instruction per line
|
||||||
|
ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n '
|
||||||
|
|
||||||
|
ai_rules_context += "\n"
|
||||||
|
|
||||||
# Create Groq vision prompt
|
# Create Groq vision prompt
|
||||||
prompt = """
|
prompt = f"""
|
||||||
Analyze this receipt image and extract the following information in JSON format:
|
Analyze this receipt image and extract the following information in JSON format.
|
||||||
{
|
{ai_rules_context}
|
||||||
|
JSON Format:
|
||||||
|
{{
|
||||||
"vendor": "Store/company name",
|
"vendor": "Store/company name",
|
||||||
"description": "Detailed description of items/services purchased",
|
"description": "Detailed description of items/services purchased",
|
||||||
"total_amount": 0.00,
|
"total_amount": 0.00,
|
||||||
"tax_amount": 0.00,
|
"tax_amount": 0.00,
|
||||||
"date": "YYYY-MM-DD",
|
"date": "YYYY-MM-DD",
|
||||||
"category": "Food/Transport/Office/Other",
|
"category": "Check rules above first",
|
||||||
"confidence": 0.95,
|
"confidence": 0.95,
|
||||||
"currency": "USD"
|
"currency": "USD",
|
||||||
}
|
"location": "Province/State, Country",
|
||||||
|
"calculated_tax": 0.00,
|
||||||
|
"is_depreciable": false,
|
||||||
|
"name_of_asset": null,
|
||||||
|
"cca_rate": null,
|
||||||
|
"useful_life": null,
|
||||||
|
"residual_value": null,
|
||||||
|
"extraction_success": True
|
||||||
|
}}
|
||||||
|
|
||||||
Rules:
|
EXTRACTION Rules:
|
||||||
- Extract vendor name as it appears on receipt
|
- Extract vendor name as it appears on receipt
|
||||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||||
- Total amount should be the final total including tax
|
- Total amount should be the final total including tax
|
||||||
- Tax amount is separate tax line if available
|
- Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
|
||||||
- Date should be the date on the receipt
|
- Date should be the date on the receipt
|
||||||
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
|
||||||
- Confidence score 0-1 based on how clear the receipt is
|
- Confidence score 0-1 based on how clear the receipt is
|
||||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||||
|
|
||||||
|
{user_location_context}
|
||||||
|
LOCATION & TAX RULES:
|
||||||
|
- Extract location from receipt (look for store address, province/state, country)
|
||||||
|
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||||
|
- If location not shown on receipt, return null for location (system will use user location as fallback)
|
||||||
|
|
||||||
|
TAX EXTRACTION RULES (IMPORTANT):
|
||||||
|
- If tax is EXPLICITLY shown on receipt (even if $0 or 0%), use that exact value:
|
||||||
|
* If receipt shows "Tax: $0", "Tax: $0.00", "Tax (0%)", or similar → set tax_amount to 0.00 and calculated_tax to null
|
||||||
|
* If receipt shows any other tax amount → set tax_amount to that value and calculated_tax to null
|
||||||
|
|
||||||
|
- If tax_amount is NOT shown or UNCLEAR on receipt, calculate it based on location:
|
||||||
|
* Ontario, Canada: 13% HST
|
||||||
|
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
|
||||||
|
* British Columbia, Canada: 12% (5% GST + 7% PST)
|
||||||
|
* Alberta, Canada: 5% GST
|
||||||
|
* California, USA: ~7.25% (varies by locality)
|
||||||
|
* New York, USA: ~8.875% (varies by locality)
|
||||||
|
* Texas, USA: 6.25%
|
||||||
|
* For other locations, estimate based on typical rates
|
||||||
|
* Store calculated tax in "calculated_tax" field and set tax_amount to the calculated value
|
||||||
|
|
||||||
|
DEPRECIATION RULES:
|
||||||
|
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
|
||||||
|
- Set is_depreciable to true only for capital assets, false for consumables/services
|
||||||
|
- If is_depreciable is true, provide:
|
||||||
|
* name_of_asset: Specific name/model of the asset (e.g., "2024 Honda Accord", "Dell Laptop XPS 15", "Office Desk")
|
||||||
|
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
|
||||||
|
- Class 10 (Vehicles): 30%
|
||||||
|
- Class 8 (Furniture, equipment): 20%
|
||||||
|
- Class 50 (Computers, software): 55%
|
||||||
|
- Class 1 (Buildings): 4%
|
||||||
|
- Class 10.1 (Passenger vehicles >$30k): 30%
|
||||||
|
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
|
||||||
|
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||||
|
- If is_depreciable is false, set name_of_asset, cca_rate, useful_life, and residual_value to null
|
||||||
|
|
||||||
Return only valid JSON.
|
Return only valid JSON.
|
||||||
"""
|
"""
|
||||||
|
logger.info(f"This is the prompt: {prompt}")
|
||||||
# Call Groq vision API with correct format
|
# Call Groq vision API with correct format
|
||||||
response = self.client.chat.completions.create(
|
response = self.client.chat.completions.create(
|
||||||
messages=[
|
messages=[
|
||||||
@@ -80,7 +234,7 @@ class DocumentProcessor:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=self.model,
|
model=self.model,
|
||||||
max_tokens=500,
|
max_tokens=800,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -96,12 +250,20 @@ class DocumentProcessor:
|
|||||||
with open(image_path, "rb") as image_file:
|
with open(image_path, "rb") as image_file:
|
||||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||||
|
|
||||||
async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
|
async def _process_pdf(
|
||||||
"""Extract data from PDF by converting to image first"""
|
self, pdf_path: str, user_location: str = None, ai_rules: list = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Extract data from PDF by converting to image first
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pdf_path: Path to the PDF file
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
ai_rules: List of AI rules for categorization
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# For now, extract text from PDF and process as text
|
# For now, extract text from PDF and process as text
|
||||||
text_content = self._extract_text_from_pdf(pdf_path)
|
text_content = self._extract_text_from_pdf(pdf_path)
|
||||||
return self._process_text_content(text_content)
|
return self._process_text_content(text_content, user_location, ai_rules)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": f"PDF processing error: {str(e)}"}
|
return {"error": f"PDF processing error: {str(e)}"}
|
||||||
@@ -118,12 +280,53 @@ class DocumentProcessor:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _process_text_content(self, text_content: str) -> Dict[str, Any]:
|
def _process_text_content(
|
||||||
"""Process text content using Groq (fallback for PDFs)"""
|
self, text_content: str, user_location: str = None, ai_rules: list = None
|
||||||
try:
|
) -> Dict[str, Any]:
|
||||||
prompt = f"""
|
"""Process text content using Groq (fallback for PDFs)
|
||||||
Analyze this receipt text and extract the following information in JSON format:
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text_content: Extracted text from PDF
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
ai_rules: List of AI rules for categorization
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Build user location context
|
||||||
|
user_location_context = ""
|
||||||
|
if user_location:
|
||||||
|
user_location_context = f"""
|
||||||
|
|
||||||
|
USER LOCATION CONTEXT:
|
||||||
|
The user is located in {user_location}.
|
||||||
|
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
|
||||||
|
- If the receipt clearly shows a different location, use the receipt's location instead.
|
||||||
|
- Apply depreciation rules based on the user's location.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Build AI rules context for categorization
|
||||||
|
ai_rules_context = ""
|
||||||
|
if ai_rules and len(ai_rules) > 0:
|
||||||
|
# Create a simple, direct instruction for each rule
|
||||||
|
ai_rules_context = "\n "
|
||||||
|
for idx, rule in enumerate(ai_rules, 1):
|
||||||
|
condition = rule.get("condition", "")
|
||||||
|
action = rule.get("action", "")
|
||||||
|
|
||||||
|
# Extract the keyword and category from the rule
|
||||||
|
keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
|
||||||
|
category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
|
||||||
|
|
||||||
|
if keyword_match and category_match:
|
||||||
|
keyword = keyword_match.group(1)
|
||||||
|
category = category_match.group(1).strip()
|
||||||
|
# Create one simple instruction per line
|
||||||
|
ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n '
|
||||||
|
|
||||||
|
ai_rules_context += "\n"
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this receipt text and extract the following information in JSON format.
|
||||||
|
{ai_rules_context}
|
||||||
Receipt Text:
|
Receipt Text:
|
||||||
{text_content}
|
{text_content}
|
||||||
|
|
||||||
@@ -134,20 +337,63 @@ class DocumentProcessor:
|
|||||||
"total_amount": 0.00,
|
"total_amount": 0.00,
|
||||||
"tax_amount": 0.00,
|
"tax_amount": 0.00,
|
||||||
"date": "YYYY-MM-DD",
|
"date": "YYYY-MM-DD",
|
||||||
"category": "Food/Transport/Office/Other",
|
"category": "Check rules above first",
|
||||||
"confidence": 0.95,
|
"confidence": 0.95,
|
||||||
"currency": "USD"
|
"currency": "USD",
|
||||||
|
"location": "Province/State, Country",
|
||||||
|
"calculated_tax": 0.00,
|
||||||
|
"is_depreciable": false,
|
||||||
|
"name_of_asset": null,
|
||||||
|
"cca_rate": null,
|
||||||
|
"useful_life": null,
|
||||||
|
"residual_value": null,
|
||||||
|
"extraction_success": True
|
||||||
}}
|
}}
|
||||||
|
|
||||||
Rules:
|
EXTRACTION Rules:
|
||||||
- Extract vendor name as it appears on receipt
|
- Extract vendor name as it appears on receipt
|
||||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||||
- Total amount should be the final total including tax
|
- Total amount should be the final total including tax
|
||||||
- Tax amount is separate tax line if available
|
- Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
|
||||||
- Date should be the date on the receipt
|
- Date should be the date on the receipt
|
||||||
- Categorize based on vendor type
|
|
||||||
- Confidence score 0-1 based on clarity
|
- Confidence score 0-1 based on clarity
|
||||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||||
|
{user_location_context}
|
||||||
|
LOCATION & TAX RULES:
|
||||||
|
- Extract location from receipt (look for store address, province/state, country)
|
||||||
|
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||||
|
- If location not shown on receipt, return null for location (system will use user location as fallback)
|
||||||
|
|
||||||
|
TAX EXTRACTION RULES (IMPORTANT):
|
||||||
|
- If tax is EXPLICITLY shown on receipt (even if $0 or 0%), use that exact value:
|
||||||
|
* If receipt shows "Tax: $0", "Tax: $0.00", "Tax (0%)", or similar → set tax_amount to 0.00 and calculated_tax to null
|
||||||
|
* If receipt shows any other tax amount → set tax_amount to that value and calculated_tax to null
|
||||||
|
|
||||||
|
- If tax_amount is NOT shown or UNCLEAR on receipt, calculate it based on location:
|
||||||
|
* Ontario, Canada: 13% HST
|
||||||
|
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
|
||||||
|
* British Columbia, Canada: 12% (5% GST + 7% PST)
|
||||||
|
* Alberta, Canada: 5% GST
|
||||||
|
* California, USA: ~7.25% (varies by locality)
|
||||||
|
* New York, USA: ~8.875% (varies by locality)
|
||||||
|
* Texas, USA: 6.25%
|
||||||
|
* For other locations, estimate based on typical rates
|
||||||
|
* Store calculated tax in "calculated_tax" field and set tax_amount to the calculated value
|
||||||
|
|
||||||
|
DEPRECIATION RULES:
|
||||||
|
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
|
||||||
|
- Set is_depreciable to true only for capital assets, false for consumables/services
|
||||||
|
- If is_depreciable is true, provide:
|
||||||
|
* name_of_asset: Specific name/model of the asset (e.g., "2024 Honda Accord", "Dell Laptop XPS 15", "Office Desk")
|
||||||
|
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
|
||||||
|
- Class 10 (Vehicles): 30%
|
||||||
|
- Class 8 (Furniture, equipment): 20%
|
||||||
|
- Class 50 (Computers, software): 55%
|
||||||
|
- Class 1 (Buildings): 4%
|
||||||
|
- Class 10.1 (Passenger vehicles >$30k): 30%
|
||||||
|
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
|
||||||
|
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||||
|
- If is_depreciable is false, set name_of_asset, cca_rate, useful_life, and residual_value to null
|
||||||
|
|
||||||
Return only valid JSON.
|
Return only valid JSON.
|
||||||
"""
|
"""
|
||||||
@@ -155,7 +401,7 @@ class DocumentProcessor:
|
|||||||
response = self.client.chat.completions.create(
|
response = self.client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
max_tokens=500,
|
max_tokens=800,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -168,11 +414,16 @@ class DocumentProcessor:
|
|||||||
def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
|
def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
|
||||||
"""Parse Groq response and extract JSON data"""
|
"""Parse Groq response and extract JSON data"""
|
||||||
try:
|
try:
|
||||||
# Clean up response and extract JSON
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Find JSON in response - try multiple patterns
|
# Try robust JSON extraction first (handles extra text)
|
||||||
|
try:
|
||||||
|
data = self._extract_first_json(result_text)
|
||||||
|
return data
|
||||||
|
except (json.JSONDecodeError, ValueError) as e:
|
||||||
|
logger.warning(f"Robust JSON extraction failed: {e}. Trying fallback methods...")
|
||||||
|
|
||||||
|
# Fallback: Find JSON in response - try multiple patterns
|
||||||
json_match = re.search(r"\{.*\}", result_text, re.DOTALL)
|
json_match = re.search(r"\{.*\}", result_text, re.DOTALL)
|
||||||
if json_match:
|
if json_match:
|
||||||
json_str = json_match.group()
|
json_str = json_match.group()
|
||||||
@@ -189,7 +440,7 @@ class DocumentProcessor:
|
|||||||
data = json.loads(json_str)
|
data = json.loads(json_str)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
# Try to fix common JSON issues
|
# Try to fix common JSON issues
|
||||||
logger.warning(f"Initial JSON parsing failed: {e}")
|
logger.warning(f"Fallback JSON parsing also failed: {e}")
|
||||||
|
|
||||||
# Try to extract individual fields using regex
|
# Try to extract individual fields using regex
|
||||||
vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
|
vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
|
||||||
@@ -207,8 +458,25 @@ class DocumentProcessor:
|
|||||||
confidence_match = re.search(
|
confidence_match = re.search(
|
||||||
r'"confidence"\s*:\s*([0-9.]+)', json_str
|
r'"confidence"\s*:\s*([0-9.]+)', json_str
|
||||||
)
|
)
|
||||||
currency_match = re.search(
|
currency_match = re.search(r'"currency"\s*:\s*"([^"]*)"', json_str)
|
||||||
r'"currency"\s*:\s*"([^"]*)"', json_str
|
location_match = re.search(r'"location"\s*:\s*"([^"]*)"', json_str)
|
||||||
|
calculated_tax_match = re.search(
|
||||||
|
r'"calculated_tax"\s*:\s*([0-9.]+|null)', json_str
|
||||||
|
)
|
||||||
|
is_depreciable_match = re.search(
|
||||||
|
r'"is_depreciable"\s*:\s*(true|false)', json_str
|
||||||
|
)
|
||||||
|
name_of_asset_match = re.search(
|
||||||
|
r'"name_of_asset"\s*:\s*"([^"]*)"', json_str
|
||||||
|
)
|
||||||
|
cca_rate_match = re.search(
|
||||||
|
r'"cca_rate"\s*:\s*([0-9.]+|null)', json_str
|
||||||
|
)
|
||||||
|
useful_life_match = re.search(
|
||||||
|
r'"useful_life"\s*:\s*([0-9]+|null)', json_str
|
||||||
|
)
|
||||||
|
residual_value_match = re.search(
|
||||||
|
r'"residual_value"\s*:\s*([0-9.]+|null)', json_str
|
||||||
)
|
)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
@@ -229,7 +497,30 @@ class DocumentProcessor:
|
|||||||
"confidence": float(confidence_match.group(1))
|
"confidence": float(confidence_match.group(1))
|
||||||
if confidence_match
|
if confidence_match
|
||||||
else 0.5,
|
else 0.5,
|
||||||
"currency": currency_match.group(1) if currency_match else "CAD"
|
"currency": currency_match.group(1)
|
||||||
|
if currency_match
|
||||||
|
else "CAD",
|
||||||
|
"location": location_match.group(1) if location_match else None,
|
||||||
|
"calculated_tax": float(calculated_tax_match.group(1))
|
||||||
|
if calculated_tax_match
|
||||||
|
and calculated_tax_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
|
"is_depreciable": is_depreciable_match.group(1) == "true"
|
||||||
|
if is_depreciable_match
|
||||||
|
else None,
|
||||||
|
"name_of_asset": name_of_asset_match.group(1)
|
||||||
|
if name_of_asset_match
|
||||||
|
else None,
|
||||||
|
"cca_rate": float(cca_rate_match.group(1))
|
||||||
|
if cca_rate_match and cca_rate_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
|
"useful_life": int(useful_life_match.group(1))
|
||||||
|
if useful_life_match and useful_life_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
|
"residual_value": float(residual_value_match.group(1))
|
||||||
|
if residual_value_match
|
||||||
|
and residual_value_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Validate and clean data
|
# Validate and clean data
|
||||||
@@ -243,6 +534,13 @@ class DocumentProcessor:
|
|||||||
"confidence": float(data.get("confidence", 0.5)),
|
"confidence": float(data.get("confidence", 0.5)),
|
||||||
"extraction_success": True,
|
"extraction_success": True,
|
||||||
"currency": data.get("currency", "CAD").strip(),
|
"currency": data.get("currency", "CAD").strip(),
|
||||||
|
"location": data.get("location"),
|
||||||
|
"calculated_tax": data.get("calculated_tax"),
|
||||||
|
"is_depreciable": data.get("is_depreciable"),
|
||||||
|
"name_of_asset": data.get("name_of_asset"),
|
||||||
|
"cca_rate": data.get("cca_rate"),
|
||||||
|
"useful_life": data.get("useful_life"),
|
||||||
|
"residual_value": data.get("residual_value"),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# Try to extract fields from plain text
|
# Try to extract fields from plain text
|
||||||
@@ -312,6 +610,13 @@ class DocumentProcessor:
|
|||||||
"category": "Other",
|
"category": "Other",
|
||||||
"confidence": 0.3, # Low confidence for text extraction
|
"confidence": 0.3, # Low confidence for text extraction
|
||||||
"extraction_success": True,
|
"extraction_success": True,
|
||||||
|
"location": None,
|
||||||
|
"calculated_tax": None,
|
||||||
|
"is_depreciable": None,
|
||||||
|
"name_of_asset": None,
|
||||||
|
"cca_rate": None,
|
||||||
|
"useful_life": None,
|
||||||
|
"residual_value": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -325,6 +630,13 @@ class DocumentProcessor:
|
|||||||
"confidence": 0.1,
|
"confidence": 0.1,
|
||||||
"extraction_success": False,
|
"extraction_success": False,
|
||||||
"error": f"Text extraction failed: {str(e)}",
|
"error": f"Text extraction failed: {str(e)}",
|
||||||
|
"location": None,
|
||||||
|
"calculated_tax": None,
|
||||||
|
"is_depreciable": None,
|
||||||
|
"name_of_asset": None,
|
||||||
|
"cca_rate": None,
|
||||||
|
"useful_life": None,
|
||||||
|
"residual_value": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ class LLMTaxAnalyzer:
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||||||
self.model = "llama-3.1-8b-instant"
|
self.model = settings.model
|
||||||
self.max_retries = 3
|
self.max_retries = 3
|
||||||
|
|
||||||
def analyze_and_apply_tax_rules_batch(
|
def analyze_and_apply_tax_rules_batch(
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from schemas import Match, Receipt, Transaction
|
from schemas import Match, Receipt, Transaction
|
||||||
from services.ai_matcher import AIMatcher
|
from services.ai_matcher import AIMatcher
|
||||||
from services.ai_rules import AIRulesEngine
|
from services.ai_rules import AIRulesEngine
|
||||||
|
from services.ai_rules_matcher import AIRulesMatcher
|
||||||
from services.feedback_logger import FeedbackLogger
|
from services.feedback_logger import FeedbackLogger
|
||||||
from services.llm_tax_analyzer import LLMTaxAnalyzer
|
from services.llm_tax_analyzer import LLMTaxAnalyzer
|
||||||
from services.manual_tax_calculator import ManualTaxCalculator
|
from services.manual_tax_calculator import ManualTaxCalculator
|
||||||
@@ -15,6 +16,7 @@ class MatchingEngine:
|
|||||||
self.feedback_logger = FeedbackLogger()
|
self.feedback_logger = FeedbackLogger()
|
||||||
self.llm_tax_analyzer = LLMTaxAnalyzer()
|
self.llm_tax_analyzer = LLMTaxAnalyzer()
|
||||||
self.manual_tax_calculator = ManualTaxCalculator()
|
self.manual_tax_calculator = ManualTaxCalculator()
|
||||||
|
self.ai_rules_matcher = AIRulesMatcher()
|
||||||
self.use_manual_tax_calculator = use_manual_tax_calculator
|
self.use_manual_tax_calculator = use_manual_tax_calculator
|
||||||
|
|
||||||
def process_matching(
|
def process_matching(
|
||||||
@@ -22,51 +24,51 @@ class MatchingEngine:
|
|||||||
receipts: List[Receipt],
|
receipts: List[Receipt],
|
||||||
transactions: List[Transaction],
|
transactions: List[Transaction],
|
||||||
user_location: str = "ON",
|
user_location: str = "ON",
|
||||||
|
ai_rules: Optional[List[Dict]] = None,
|
||||||
) -> List[Match]:
|
) -> List[Match]:
|
||||||
# Get AI matches
|
# Get AI matches
|
||||||
ai_matches = self.ai_matcher.match_receipts_to_transactions(
|
ai_matches = self.ai_matcher.match_receipts_to_transactions(
|
||||||
receipts, transactions
|
receipts, transactions
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply traditional rules first (lightweight, no API calls)
|
# # Apply traditional rules first (lightweight, no API calls)
|
||||||
for match in ai_matches:
|
# for match in ai_matches:
|
||||||
rule_results = self.rules_engine.apply_rules(
|
# rule_results = self.rules_engine.apply_rules(
|
||||||
match.receipt, match.transaction
|
# match.receipt, match.transaction
|
||||||
)
|
# )
|
||||||
|
|
||||||
# Apply confidence boost from traditional rules
|
# # Apply confidence boost from traditional rules
|
||||||
if rule_results["confidence_boost"] > 0:
|
# if rule_results["confidence_boost"] > 0:
|
||||||
match.confidence_score = min(
|
# match.confidence_score = min(
|
||||||
1.0, match.confidence_score + rule_results["confidence_boost"]
|
# 1.0, match.confidence_score + rule_results["confidence_boost"]
|
||||||
)
|
# )
|
||||||
|
|
||||||
# Auto-approve if rules say so
|
# # Auto-approve if rules say so
|
||||||
if rule_results["auto_approve"]:
|
# if rule_results["auto_approve"]:
|
||||||
match.confidence_score = 1.0
|
# match.confidence_score = 1.0
|
||||||
match.match_reason += " (Auto-approved by rules)"
|
# match.match_reason += " (Auto-approved by rules)"
|
||||||
|
|
||||||
# Apply tax analysis - use manual calculator or LLM based on configuration
|
# # Apply tax analysis - use manual calculator or LLM based on configuration
|
||||||
if self.use_manual_tax_calculator:
|
# if self.use_manual_tax_calculator:
|
||||||
# Use deterministic rule-based calculator
|
# # Use deterministic rule-based calculator
|
||||||
enhanced_matches = self._apply_manual_tax_analysis(
|
# enhanced_matches = self._apply_manual_tax_analysis(
|
||||||
ai_matches, user_location
|
# ai_matches, user_location
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# # No tax analysis, just use the matches as-is
|
||||||
|
# enhanced_matches = ai_matches
|
||||||
|
|
||||||
|
# Apply AI rules for post-matching evaluation
|
||||||
|
# This adds flag_for_review and auto_approve fields based on custom rules
|
||||||
|
if ai_rules:
|
||||||
|
enhanced_matches = self.ai_rules_matcher.apply_rules_to_matches(
|
||||||
|
ai_matches, ai_rules
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Use LLM-based tax analysis in a SINGLE batch call
|
# Even without custom rules, apply built-in rules (e.g., currency mismatch)
|
||||||
try:
|
enhanced_matches = self.ai_rules_matcher.apply_rules_to_matches(
|
||||||
enhanced_matches = (
|
ai_matches, None
|
||||||
self.llm_tax_analyzer.analyze_and_apply_tax_rules_batch(
|
)
|
||||||
ai_matches, user_location
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
# If batch LLM analysis fails, log it and continue with matches as-is
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logging.error(f"Batch LLM tax analysis failed: {str(e)}")
|
|
||||||
for match in ai_matches:
|
|
||||||
match.match_reason += " (Note: Advanced tax analysis unavailable)"
|
|
||||||
enhanced_matches = ai_matches
|
|
||||||
|
|
||||||
return enhanced_matches
|
return enhanced_matches
|
||||||
|
|
||||||
|
|||||||
@@ -44,16 +44,6 @@ If the location is missing, apply the user’s location sales tax by default.
|
|||||||
**FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but don’t fetch exchange rates.
|
**FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but don’t fetch exchange rates.
|
||||||
|
|
||||||
|
|
||||||
### Tax Rules:
|
|
||||||
Four Rules for Tax and Depreciation Handling
|
|
||||||
### 1. **Sales Tax Rule**
|
|
||||||
**Purpose**: To calculate and apply the correct sales tax based on the shipping and billing addresses.
|
|
||||||
- **When Billing and Shipping Address are the Same**: Apply the sales tax rate based on the billing address.
|
|
||||||
- **When Billing and Shipping Address are Different**: Apply the sales tax rate based on the shipping address.
|
|
||||||
|
|
||||||
**Example**:
|
|
||||||
1. If the billing and shipping address are in Ontario, the system will apply the 13% HST tax rate based on Ontario's tax rate.
|
|
||||||
2. If the billing address is in Ontario but the shipping address is in Quebec, the system will apply the 14.975% QST tax rate based on the shipping address.
|
|
||||||
|
|
||||||
### 2. **Foreign Exchange (FX) Rule**
|
### 2. **Foreign Exchange (FX) Rule**
|
||||||
**Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD).
|
**Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD).
|
||||||
|
|||||||
@@ -1,892 +0,0 @@
|
|||||||
INFO: Started server process [18995]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://0.0.0.0:8765 (Press CTRL+C to quit)
|
|
||||||
INFO: Shutting down
|
|
||||||
INFO: Waiting for application shutdown.
|
|
||||||
INFO: Application shutdown complete.
|
|
||||||
INFO: Finished server process [18995]
|
|
||||||
INFO: Started server process [19157]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://0.0.0.0:8654 (Press CTRL+C to quit)
|
|
||||||
INFO: 102.89.45.216:11636 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:14600 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO:__main__:Starting match-specific for file IDs: ['0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgchkov1_x8jntm
|
|
||||||
INFO:__main__:Found 7 transactions in database
|
|
||||||
INFO:__main__:Converted 7 transactions
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
|
|
||||||
INFO:__main__:Found 8 receipts, 0 missing
|
|
||||||
INFO:__main__:Starting matching with 8 receipts and 7 transactions
|
|
||||||
INFO:services.ai_matcher:Starting AI matching for 8 receipts against 7 transactions
|
|
||||||
INFO:services.ai_matcher:Processing receipt 1/8: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
The reason for this low score is that none of the candidate transactions have a perfect match with the receipt. The closest candidate is Candidate 1, but it has significant differences in vendor name, amount, and date, resulting in a very low confidence score.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 2/8: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, since I'm not allowed to return "NONE" and must return the best match, I'll provide the next best option:
|
|
||||||
|
|
||||||
1|0.0|No meaningful similarity
|
|
||||||
|
|
||||||
Since there are no perfect matches, I'll consider the next best option.
|
|
||||||
|
|
||||||
Candidate 1 has a vendor name difference, amount difference, and date difference. However, it's the closest option available.
|
|
||||||
|
|
||||||
1|0.0|No meaningful similarity
|
|
||||||
|
|
||||||
However, I can provide a more detailed explanation of why it's the best option available.
|
|
||||||
|
|
||||||
The vendor name difference is significant, with "Figma, Inc." and "BOOKS BY BESSIE" being unrelated. The amount difference is also significant, with $27.0 and $55.0 being 103.7% apart. The date difference is 136 days, which is a significant difference.
|
|
||||||
|
|
||||||
However, since I
|
|
||||||
INFO:services.ai_matcher:Processing receipt 3/8: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will evaluate each candidate based on the scoring criteria.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor similarity: 0.0 (A1 RENTAL BACKHOE DEPOSIT REFUND vs Eleven Labs Inc.)
|
|
||||||
- Amount difference: 88.13 (78.8%)
|
|
||||||
- Date difference: 115 days
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
- Total score: 0.0
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor similarity: 0.0 (BOOKS BY BESSIE vs Eleven Labs Inc.)
|
|
||||||
- Amount difference: 56.87 (50.8%)
|
|
||||||
- Date difference: 145 days
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
- Total score: 0.0
|
|
||||||
|
|
||||||
Candidate 3:
|
|
||||||
- Vendor similarity: 0.0 (No Vendor vs Eleven Labs Inc.)
|
|
||||||
- Amount difference: 106.88 (95.5%)
|
|
||||||
- Date difference: 87 days
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Processing receipt 4/8: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Processing receipt 5/8: PAYPAL *BZABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZABAWSKYJ
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
This is because none of the candidate transactions have a perfect match with the receipt. However, I must return the candidate with the highest match score, even if it's very low.
|
|
||||||
|
|
||||||
To calculate the match score, I considered the following:
|
|
||||||
|
|
||||||
- Vendor name similarity: None of the candidate transactions have a vendor name that matches the receipt.
|
|
||||||
- Amount accuracy: The amount on the receipt ($37.55) does not match any of the candidate transactions.
|
|
||||||
- Date proximity: The date on the receipt (2023-05-22) is significantly different from the dates on the candidate transactions.
|
|
||||||
- Description/notes relevance: None of the candidate transactions have a description or notes that match the receipt.
|
|
||||||
|
|
||||||
Since none of the candidate transactions have a meaningful similarity with the receipt, the best match is the one with the lowest possible score, which is 0.0.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 6/8: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, since I must return the candidate with the highest match score, even if it's very low, I will provide the next best option:
|
|
||||||
|
|
||||||
5|0.2|Minimal similarity due to vendor name difference, amount difference of $28.0, and 136 days apart
|
|
||||||
INFO:services.ai_matcher:Processing receipt 7/8: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will analyze each candidate transaction against the given receipt.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor similarity: 0.0 (A1 RENTAL BACKHOE DEPOSIT REFUND vs Eleven Labs Inc.)
|
|
||||||
- Amount difference: 88.13 (78.8%)
|
|
||||||
- Date difference: 115 days
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
- Overall score: 0.0 (no meaningful similarity)
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor similarity: 0.0 (BOOKS BY BESSIE vs Eleven Labs Inc.)
|
|
||||||
- Amount difference: 56.87 (50.8%)
|
|
||||||
- Date difference: 145 days
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
- Overall score: 0.0 (no meaningful similarity)
|
|
||||||
|
|
||||||
Candidate 3:
|
|
||||||
- Vendor similarity: 0.0 (No Vendor vs Eleven Labs Inc.)
|
|
||||||
- Amount difference: 106.88 (95.5%)
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Processing receipt 8/8: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:AI matching completed. Found 4 matches
|
|
||||||
INFO:__main__:Matching completed, got 4 results
|
|
||||||
INFO:__main__:Generated stats: {'total': 4, 'high_confidence': 0, 'low_confidence': 4, 'avg_score': 0.0}
|
|
||||||
INFO:__main__:Match-specific completed successfully with 4 matches
|
|
||||||
INFO: 102.89.45.216:14600 - "POST /match-specific HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /process/a8969315-6ed6-4dcd-9a47-3eb542d85d64 HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /process/9845ef9d-2bd3-4803-93f8-d8d5bca0de7b HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /process/ba36aa95-8fdb-4f16-973e-479f99da3100 HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /process/dc542f59-1105-470c-a401-56407f2bbecf HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:16587 - "POST /process/d0d43d67-1e25-47b8-bf74-8ce9695cb699 HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:16533 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO:__main__:Starting match-specific for file IDs: ['d0d43d67-1e25-47b8-bf74-8ce9695cb699', 'dc542f59-1105-470c-a401-56407f2bbecf', 'ba36aa95-8fdb-4f16-973e-479f99da3100', '9845ef9d-2bd3-4803-93f8-d8d5bca0de7b', 'a8969315-6ed6-4dcd-9a47-3eb542d85d64', '0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgci9kky_b9qz7l
|
|
||||||
INFO:__main__:Found 7 transactions in database
|
|
||||||
INFO:__main__:Converted 7 transactions
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: d0d43d67-1e25-47b8-bf74-8ce9695cb699
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: dc542f59-1105-470c-a401-56407f2bbecf
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ba36aa95-8fdb-4f16-973e-479f99da3100
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 9845ef9d-2bd3-4803-93f8-d8d5bca0de7b
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: a8969315-6ed6-4dcd-9a47-3eb542d85d64
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
|
|
||||||
INFO:__main__:Found 13 receipts, 0 missing
|
|
||||||
INFO:__main__:Starting matching with 13 receipts and 7 transactions
|
|
||||||
INFO:services.ai_matcher:Starting AI matching for 13 receipts against 7 transactions
|
|
||||||
INFO:services.ai_matcher:Processing receipt 1/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, I can provide a more detailed analysis of why this is the case and what the closest match is.
|
|
||||||
|
|
||||||
The receipt has a vendor name of "Figma, Inc.", which does not match any of the candidate transactions. The closest match in terms of vendor name similarity is none, as there are no similar names.
|
|
||||||
|
|
||||||
The amount on the receipt is $27.0, which is significantly different from the amounts on the candidate transactions. The closest match in terms of amount accuracy is Candidate 1, but it has a difference of $28.0, which is a 103.7% difference.
|
|
||||||
|
|
||||||
The date on the receipt is 2025-06-19, which is also significantly different from the dates on the candidate transactions. The closest match in terms of date proximity is Candidate 1, but it is 136 days apart.
|
|
||||||
|
|
||||||
The description on the receipt is
|
|
||||||
INFO:services.ai_matcher:Processing receipt 2/13: Google LLC - $21.15
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Google LLC
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
The reason for this low score is that there are significant differences between the receipt and the candidate transactions. The vendor name is completely different ("Google LLC" vs. "BOOKS BY BESSIE"), the amount is significantly different ($21.15 vs. $55.0), and the date is 155 days apart.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 3/13: PAYPAL *BZAABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZAABAWSKYJ
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, since I must return the candidate with the highest match score, even if it's very low, I will provide the next best option:
|
|
||||||
|
|
||||||
5|0.15|Best available option despite significant differences in vendor and amount
|
|
||||||
INFO:services.ai_matcher:Processing receipt 4/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: A1 RENTAL BACKHOE DEPOSIT REFUND (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
Explanation: None of the candidate transactions match the receipt in terms of vendor name, amount, date, or description. However, I must return a candidate, so I'm returning the first one with a confidence score of 0.0, indicating no meaningful similarity.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 5/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
The reason for this low score is that there are significant differences in vendor name, amount, date, and description between the receipt and the candidate transactions. The vendor name is completely different, the amount is off by $28, the date is 136 days apart, and the description does not match.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 6/13: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, since I'm not allowed to return "NONE" and must return the best match, I'll provide the next best option:
|
|
||||||
|
|
||||||
1|0.0|No meaningful similarity
|
|
||||||
|
|
||||||
Since there are no perfect matches, I'll look for the next best option.
|
|
||||||
|
|
||||||
Candidate 1 has a significant difference in vendor name (46.5%), amount difference (46.5%), and a large date difference (895 days). However, it's the only candidate available, so it's the best match.
|
|
||||||
|
|
||||||
1|0.0|No meaningful similarity
|
|
||||||
INFO:services.ai_matcher:Processing receipt 7/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, I can provide a more detailed explanation of why this is the case. None of the candidate transactions match the receipt perfectly, but I can calculate a score for each candidate based on the given criteria.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor name similarity: 0 ( BOOKS BY BESSIE vs Figma, Inc. )
|
|
||||||
- Amount accuracy: 0 ( $55.0 vs $27.0 )
|
|
||||||
- Date proximity: 0.007 ( 136 days difference )
|
|
||||||
- Description/notes relevance: 0 ( No relevance )
|
|
||||||
- Amount difference: 103.7% ( significant difference )
|
|
||||||
- Overall score: 0.0
|
|
||||||
|
|
||||||
Since none of the candidate transactions match the receipt perfectly, I will return the candidate with the highest score, which is still 0.0. However, I can suggest that the best available option is actually none of the
|
|
||||||
INFO:services.ai_matcher:Processing receipt 8/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will evaluate each candidate transaction based on the scoring criteria.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor similarity: 0.0 (Eleven Labs Inc. vs A1 RENTAL BACKHOE DEPOSIT REFUND)
|
|
||||||
- Amount difference: 88.13 (78.8%)
|
|
||||||
- Date difference: 115 days
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
- Total score: 0.0
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor similarity: 0.0 (Eleven Labs Inc. vs BOOKS BY BESSIE)
|
|
||||||
- Amount difference: 56.87 (50.8%)
|
|
||||||
- Date difference: 145 days
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
- Total score: 0.0
|
|
||||||
|
|
||||||
Candidate 3:
|
|
||||||
- Vendor similarity: 0.0 (Eleven Labs Inc. vs No Vendor)
|
|
||||||
- Amount difference: 106.88 (95.5%)
|
|
||||||
-
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Processing receipt 9/13: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Processing receipt 10/13: PAYPAL *BZABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: PAYPAL *BZABAWSKYJ
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
The reason for this low score is that there are significant differences in vendor name, amount, and date between the receipt and the candidate transactions. The vendor name is completely different, the amount is off by $17.45, and the date is 895 days apart.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 11/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 1 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: BOOKS BY BESSIE (score: 0.000)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.000 - No meaningful similarity
|
|
||||||
|
|
||||||
However, since I must return the candidate with the highest match score, even if it's very low, I will provide the next best option:
|
|
||||||
|
|
||||||
5|0.2|Minimal similarity due to vendor name difference, but same category and date proximity
|
|
||||||
INFO:services.ai_matcher:Processing receipt 12/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 3 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: To determine the best match, I will evaluate each candidate transaction based on the scoring criteria.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor similarity: 0.0 (Eleven Labs Inc. vs A1 RENTAL BACKHOE DEPOSIT REFUND)
|
|
||||||
- Amount accuracy: 0.0 (no exact match)
|
|
||||||
- Date proximity: 0.0 (115 days difference)
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
Total score: 0.0
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor similarity: 0.0 (Eleven Labs Inc. vs BOOKS BY BESSIE)
|
|
||||||
- Amount accuracy: 0.0 (no exact match)
|
|
||||||
- Date proximity: 0.0 (145 days difference)
|
|
||||||
- Description/notes relevance: 0.0 (no relevance)
|
|
||||||
Total score: 0.0
|
|
||||||
|
|
||||||
Candidate 3:
|
|
||||||
- Vendor similarity: 0.0 (Eleven Labs Inc. vs No Vendor)
|
|
||||||
- Amount accuracy: 0
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Eleven Labs Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Processing receipt 13/13: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No candidates found for receipt: Twitter, Inc. - $4.0
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:AI matching completed. Found 9 matches
|
|
||||||
INFO:__main__:Matching completed, got 9 results
|
|
||||||
INFO:__main__:Generated stats: {'total': 9, 'high_confidence': 0, 'low_confidence': 9, 'avg_score': 0.0}
|
|
||||||
INFO:__main__:Match-specific completed successfully with 9 matches
|
|
||||||
INFO: 102.89.45.216:11676 - "POST /match-specific HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:28828 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:14522 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:2730 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO:__main__:Starting match-specific for file IDs: ['d0d43d67-1e25-47b8-bf74-8ce9695cb699', 'dc542f59-1105-470c-a401-56407f2bbecf', 'ba36aa95-8fdb-4f16-973e-479f99da3100', '9845ef9d-2bd3-4803-93f8-d8d5bca0de7b', 'a8969315-6ed6-4dcd-9a47-3eb542d85d64', '0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgcolko1_wmfzzd
|
|
||||||
INFO:__main__:Found 119 transactions in database
|
|
||||||
INFO:__main__:Converted 119 transactions
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: d0d43d67-1e25-47b8-bf74-8ce9695cb699
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: dc542f59-1105-470c-a401-56407f2bbecf
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ba36aa95-8fdb-4f16-973e-479f99da3100
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 9845ef9d-2bd3-4803-93f8-d8d5bca0de7b
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: a8969315-6ed6-4dcd-9a47-3eb542d85d64
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
|
|
||||||
INFO:__main__:Found 13 receipts, 0 missing
|
|
||||||
INFO:__main__:Starting matching with 13 receipts and 119 transactions
|
|
||||||
INFO:services.ai_matcher:Starting AI matching for 13 receipts against 119 transactions
|
|
||||||
INFO:services.ai_matcher:Processing receipt 1/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.290 - Close amount match, relevant note about office expenses, but significant date difference
|
|
||||||
|
|
||||||
This candidate has a relatively low confidence score due to the significant date difference (85 days apart) and the fact that the vendor name is unknown. However, the amount difference is moderate ($8.03), and the note mentions "Bought lunch for crew 102" which could be related to office expenses, making it a slightly better match than the other candidates.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 2/13: Google LLC - $21.15
|
|
||||||
INFO:services.ai_matcher:Found 25 candidates for receipt: Google LLC
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 7: Unknown (score: 0.140)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.140 - Closest amount match, but significant difference in vendor name and date
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
|
|
||||||
- Vendor name similarity: 0 (Unknown vs Google LLC)
|
|
||||||
- Amount accuracy: 0.14 (18.08 vs 21.15, 14.5% difference)
|
|
||||||
- Date proximity: 0 (93 days difference)
|
|
||||||
- Description/notes relevance: 0 (Office Supplies vs Google Workspace)
|
|
||||||
|
|
||||||
Although the amount match is the closest among all candidates, the significant differences in vendor name and date result in a low confidence score.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 3/13: PAYPAL *BZAABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZAABAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
|
|
||||||
|
|
||||||
Candidate 1: 0.09|Vendor name similarity, significant amount difference, and large date difference
|
|
||||||
|
|
||||||
Reason: Although the vendor name is unknown, the amount difference is relatively minor ($3.55) compared to other candidates. However, the date difference is significant (864 days), and the vendor name is unknown, resulting in a low confidence score.
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZAABAWSKYJ
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZAABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Processing receipt 4/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor difference in vendor name, and relatively close date
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- The amount matches exactly, with a minor difference of 0.1%.
|
|
||||||
- Although the vendor name is unknown, it's likely a typo or variation of Eleven Labs Inc.
|
|
||||||
- The date difference is 87 days, which is relatively close considering the other options.
|
|
||||||
|
|
||||||
This candidate has the highest match score, despite not being a perfect match.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 5/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.290 - Close amount match, relevant note about office expenses, but significant date difference
|
|
||||||
|
|
||||||
This candidate has a close amount match ($18.97 vs $27.0), a relevant note about office expenses, but a significant date difference of 85 days.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 6/13: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
|
|
||||||
|
|
||||||
Candidate 1: 0.09|Vendor name similarity, significant amount difference, and large date difference
|
|
||||||
|
|
||||||
Reason: Although the vendor name is unknown, the description in the receipt contains the vendor's name, which is a good match. However, the amount difference is significant (9.5%), and the date difference is large (864 days). This is the best available option despite the significant differences.
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Processing receipt 7/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 9.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.190)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.190 - Vendor name similarity, amount difference of 9.8%, and no description match
|
|
||||||
|
|
||||||
This is because Candidate 1 has the closest vendor name similarity (Unknown vs Figma, Inc. is not possible, but it's the closest) and the smallest amount difference among all the candidates. Although the date difference is significant (62 days), it's still the best available option given the other factors.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 8/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.900 - Vendor name similarity, exact amount match, 87 days apart
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- Vendor name similarity: Although the vendor name is unknown, it's likely that Eleven Labs Inc. is a similar or related entity to the vendor in Candidate 1, given the context of the transaction.
|
|
||||||
- Amount accuracy: The amount in Candidate 1 ($112.0) is very close to the amount in the receipt ($111.87), with a difference of only 0.1%.
|
|
||||||
- Date proximity: The date in Candidate 1 (2025-09-05) is 87 days apart from the date in the receipt (2025-06-10), which is a relatively small difference.
|
|
||||||
- Description/notes relevance: Although the description in Candidate 1 is not directly related to the receipt, it mentions "Bank Equipment rental for 5 days," which could
|
|
||||||
INFO:services.ai_matcher:Processing receipt 9/13: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 7.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: Based on the given receipt and candidate transactions, I will analyze each candidate and return the best match.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor: Unknown (0.0 similarity)
|
|
||||||
- Amount: $3.86 (3.5% difference from $4.0)
|
|
||||||
- Date: 2025-09-03 (65 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
|
|
||||||
Score: 0.6 (Medium confidence due to minor amount difference, but unknown vendor and no description relevance)
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor: Unknown (0.0 similarity)
|
|
||||||
- Amount: $5.66 (41.5% difference from $4.0)
|
|
||||||
- Date: 2025-08-29 (60 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
|
|
||||||
Score: 0.4 (Low confidence due to significant amount difference and unknown vendor)
|
|
||||||
|
|
||||||
Since neither candidate has a perfect match, I will choose
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Processing receipt 10/13: PAYPAL *BZABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZABAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 9.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
|
|
||||||
|
|
||||||
Candidate 1: 0.09|Vendor name similarity, significant amount difference, and large date difference
|
|
||||||
|
|
||||||
Reason: Although the amount difference is significant (9.5%), the vendor name similarity is the closest match among all candidates. The date difference is also substantial, but it's the best available option given the other differences.
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZABAWSKYJ
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Processing receipt 11/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.290 - Closest amount match, minor date difference, and relevant note about office expenses
|
|
||||||
|
|
||||||
This candidate has a relatively low confidence score due to significant differences in vendor name and amount. However, it is the best available option given the provided candidate transactions.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 12/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor difference in vendor name, and relatively close date
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- Vendor name similarity: The vendor name is unknown in both the receipt and the candidate transaction, so it's not a strong match. However, it's not a major difference either.
|
|
||||||
- Amount accuracy: The amount is $111.87 in the receipt and $112.0 in the candidate transaction, which is a minor difference of 0.1%.
|
|
||||||
- Date proximity: The date is 2025-06-10 in the receipt and 2025-09-05 in the candidate transaction, which is a difference of 87 days. This is not ideal, but it's not a major difference either.
|
|
||||||
- Description/notes relevance: There is no description or notes in the receipt, but the candidate transaction has a note about bank equipment rental. This is not directly
|
|
||||||
INFO:services.ai_matcher:Processing receipt 13/13: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 7.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: Based on the provided receipt and candidate transactions, I will analyze each candidate and return the best match.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor: Unknown (0.0 similarity)
|
|
||||||
- Amount: $3.86 (3.5% difference from $4.0)
|
|
||||||
- Date: 2025-09-03 (65 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
- Amount difference: $0.14000000000000012 (3.5%)
|
|
||||||
|
|
||||||
Score: 0.6 (Medium confidence, minor differences in amount and date)
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor: Unknown (0.0 similarity)
|
|
||||||
- Amount: $5.66 (41.5% difference from $4.0)
|
|
||||||
- Date: 2025-08-29 (60 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
- Amount difference: $1.6600000000000001 (41.5
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:AI matching completed. Found 8 matches
|
|
||||||
INFO:__main__:Matching completed, got 8 results
|
|
||||||
INFO:__main__:Generated stats: {'total': 8, 'high_confidence': 3, 'low_confidence': 5, 'avg_score': 0.49}
|
|
||||||
INFO:__main__:Match-specific completed successfully with 8 matches
|
|
||||||
INFO:__main__:Starting match-specific for file IDs: ['d0d43d67-1e25-47b8-bf74-8ce9695cb699', 'dc542f59-1105-470c-a401-56407f2bbecf', 'ba36aa95-8fdb-4f16-973e-479f99da3100', '9845ef9d-2bd3-4803-93f8-d8d5bca0de7b', 'a8969315-6ed6-4dcd-9a47-3eb542d85d64', '0b3d64a4-c558-43cb-bf57-a6561205f1e6', 'e96d57f5-2070-43d6-8044-1d68106a3c27', 'bae25e20-2425-4db3-a3fc-adcb09c7d431', 'bfb36530-62f6-489a-b0b9-970ab8e7c20c', '0b4db1d9-670b-4dd7-bd3a-dfa39897acbb', '8fbf46d7-5f7b-4b01-a5d1-173adcb55748', 'e779f8ce-9f9a-4575-af8c-4558c6405977', 'ee595b47-e9b8-4c82-82e6-7490d716baa7'], categorization_id: cat_mgcolko1_wmfzzd
|
|
||||||
INFO:__main__:Found 119 transactions in database
|
|
||||||
INFO:__main__:Converted 119 transactions
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: d0d43d67-1e25-47b8-bf74-8ce9695cb699
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: dc542f59-1105-470c-a401-56407f2bbecf
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ba36aa95-8fdb-4f16-973e-479f99da3100
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 9845ef9d-2bd3-4803-93f8-d8d5bca0de7b
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: a8969315-6ed6-4dcd-9a47-3eb542d85d64
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b3d64a4-c558-43cb-bf57-a6561205f1e6
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e96d57f5-2070-43d6-8044-1d68106a3c27
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bae25e20-2425-4db3-a3fc-adcb09c7d431
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: bfb36530-62f6-489a-b0b9-970ab8e7c20c
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0b4db1d9-670b-4dd7-bd3a-dfa39897acbb
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 8fbf46d7-5f7b-4b01-a5d1-173adcb55748
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: e779f8ce-9f9a-4575-af8c-4558c6405977
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ee595b47-e9b8-4c82-82e6-7490d716baa7
|
|
||||||
INFO:__main__:Found 13 receipts, 0 missing
|
|
||||||
INFO:__main__:Starting matching with 13 receipts and 119 transactions
|
|
||||||
INFO:services.ai_matcher:Starting AI matching for 13 receipts against 119 transactions
|
|
||||||
INFO:services.ai_matcher:Processing receipt 1/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.390)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.390 - Date proximity, description relevance, but significant amount difference
|
|
||||||
INFO:services.ai_matcher:Processing receipt 2/13: Google LLC - $21.15
|
|
||||||
INFO:services.ai_matcher:Found 25 candidates for receipt: Google LLC
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 7: Unknown (score: 0.140)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.140 - Vendor name similarity (Google LLC vs Unknown), exact amount match is not possible, but amount difference is moderate, and date proximity is relatively good (93 days difference)
|
|
||||||
|
|
||||||
Note: The confidence score is low due to significant differences in vendor name and amount, but it's the best available option given the provided candidate transactions.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 3/13: PAYPAL *BZAABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZAABAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Exact amount match, minor date difference
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- The amount on the receipt ($37.55) matches exactly with Candidate 1 ($34.0, but considering the absolute value, it's $34.0).
|
|
||||||
- Although the date difference is significant (864 days), the amount match is a strong indicator of a potential match.
|
|
||||||
- The vendor name is unknown, but the description is not provided for any candidate, so it's not a deciding factor in this case.
|
|
||||||
|
|
||||||
Note that the confidence score is high despite the significant date difference, as the amount match is a strong indicator of a potential match.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 4/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor difference in vendor name, and relatively close date
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- Vendor name similarity: 0.8 (unknown vs Eleven Labs Inc. is not a perfect match, but the difference is minor)
|
|
||||||
- Amount accuracy: 0.95 (amount difference is 0.1%, which is considered minor)
|
|
||||||
- Date proximity: 0.9 (87 days difference is relatively close)
|
|
||||||
- Description/notes relevance: 0.8 (the description is not directly related to the receipt, but it's a plausible explanation for the transaction)
|
|
||||||
|
|
||||||
The confidence score is 0.9, which falls under the high confidence category.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 5/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.890)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.890 - Close vendor name match, minor amount difference, and relatively close date
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- Vendor name similarity: Figma, Inc. is not explicitly mentioned in the candidate transactions, but "Unknown" is a close match to the vendor name.
|
|
||||||
- Amount accuracy: The amount difference is $2.64, which is a relatively minor difference of 9.8%.
|
|
||||||
- Date proximity: The date difference is 62 days, which is not ideal but still relatively close.
|
|
||||||
- Description/notes relevance: There is no description or notes in the candidate transactions, so this factor does not contribute to the match score.
|
|
||||||
|
|
||||||
Note that while the match score is not perfect, Candidate 1 has the highest score among all the candidate transactions, making it the best available option despite significant differences in vendor and amount.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 6/13: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 1.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: After analyzing the receipt against all the candidate transactions, I found the best match to be:
|
|
||||||
|
|
||||||
Candidate 1: 0.09|Vendor name similarity, but significant amount difference and large date gap
|
|
||||||
|
|
||||||
This candidate has the highest match score despite significant differences in amount and date. The vendor name similarity is the primary reason for this match, but the large date gap and significant amount difference reduce the overall confidence score.
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Processing receipt 7/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 8: Unknown (score: 0.290)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.290 - Closest amount match, minor difference in vendor name, and some relevance in the notes (Bought lunch for crew, which might be related to office expenses)
|
|
||||||
|
|
||||||
Note: Although the amount difference is significant (29.7%), it's the closest match in terms of amount, and the notes provide some relevance to the office category.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 8/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.900 - Same amount, minor date difference, unknown vendor matches with the receipt's unknown vendor
|
|
||||||
|
|
||||||
Explanation:
|
|
||||||
|
|
||||||
- Vendor similarity: The receipt's vendor is unknown, and Candidate 1's vendor is also unknown, so this is a perfect match in terms of vendor similarity.
|
|
||||||
- Amount accuracy: The amount on the receipt ($111.87) is very close to the amount in Candidate 1 ($112.0), with a difference of only $0.12999999999999545 (0.1%).
|
|
||||||
- Date proximity: The date on the receipt (2025-06-10) is 87 days apart from the date in Candidate 1 (2025-09-05), which is a relatively minor difference.
|
|
||||||
- Description/notes relevance: While the description in Candidate 1 does not match the description on the receipt, the notes mention "Bank Equipment rental
|
|
||||||
INFO:services.ai_matcher:Processing receipt 9/13: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 7.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: Based on the provided receipt and candidate transactions, I will analyze each candidate and return the best match.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor: Unknown (0.0 similarity)
|
|
||||||
- Amount: $3.86 (3.5% difference from $4.0)
|
|
||||||
- Date: 2025-09-03 (65 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
- Amount difference: $0.14000000000000012 (3.5%)
|
|
||||||
|
|
||||||
Score: 0.6 (Medium confidence due to minor amount difference and lack of vendor and date match)
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor: Unknown (0.0 similarity)
|
|
||||||
- Amount: $5.66 (41.5% difference from $4.0)
|
|
||||||
- Date: 2025-08-29 (60 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
- Amount difference: $1.660000000000000
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Processing receipt 10/13: PAYPAL *BZABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 62 candidates for receipt: PAYPAL *BZABAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.190)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.190 - Vendor name similarity, amount difference of 9.5%
|
|
||||||
|
|
||||||
This is because the vendor name is similar (PAYPAL *BZABAWSKYJ vs Unknown), but the amount is off by 9.5%. The date difference is significant (864 days), and the description/notes do not match. However, this is the best available option given the significant differences in the other candidates.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 11/13: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 44 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.600)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.600 - Vendor name similarity (Figma, Inc. is similar to Unknown), moderate amount difference ($2.64), and date proximity (62 days apart)
|
|
||||||
|
|
||||||
Note: Although the amount difference is significant, the vendor name similarity and date proximity contribute to a moderate confidence score.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 12/13: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 90 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.900)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.900 - Same vendor name similarity (Eleven Labs Inc. and Unknown), minor amount difference (0.1%), and relatively close date (87 days apart)
|
|
||||||
|
|
||||||
Note that while the vendor name is not an exact match, it is the closest match available, and the amount difference is minor. The date difference is also relatively close, considering the time frame.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 13/13: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:Found 2 candidates for receipt: Twitter, Inc.
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 6.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.ai_matcher:Could not parse single match response: Based on the provided receipt and candidate transactions, I will analyze each candidate and return the best match.
|
|
||||||
|
|
||||||
Candidate 1:
|
|
||||||
- Vendor: Unknown (0.0 similarity to Twitter, Inc.)
|
|
||||||
- Amount: $3.86 (3.5% difference from $4.0)
|
|
||||||
- Date: 2025-09-03 (65 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
- Amount difference: $0.14000000000000012 (3.5%)
|
|
||||||
|
|
||||||
Score: 0.15 (Minimal similarity due to significant vendor name difference and moderate amount difference)
|
|
||||||
|
|
||||||
Candidate 2:
|
|
||||||
- Vendor: Unknown (0.0 similarity to Twitter, Inc.)
|
|
||||||
- Amount: $5.66 (41.5% difference from $4.0)
|
|
||||||
- Date: 2025-08-29 (60 days difference)
|
|
||||||
- Notes: Bank No Description (no relevance to "X Premium Basic")
|
|
||||||
- Amount difference: $1
|
|
||||||
WARNING:services.ai_matcher:Failed to parse AI response for receipt: Twitter, Inc.
|
|
||||||
WARNING:services.ai_matcher:No match found for receipt: Twitter, Inc. - $4.0
|
|
||||||
INFO:services.ai_matcher:AI matching completed. Found 10 matches
|
|
||||||
INFO:__main__:Matching completed, got 10 results
|
|
||||||
INFO:__main__:Generated stats: {'total': 10, 'high_confidence': 5, 'low_confidence': 4, 'avg_score': 0.61}
|
|
||||||
INFO:__main__:Match-specific completed successfully with 10 matches
|
|
||||||
INFO: 102.89.45.216:29795 - "POST /match-specific HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /process/82e672e4-a1a1-4df2-9b7d-f0cfa3307ed9 HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /process/c4a7f61d-9d2a-4e6a-b86d-bb958a06d5f3 HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /process/1281627c-59fc-4efa-beae-a8a69f3dd508 HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /process/ee93fc23-e6f6-47ee-81da-c5b41319d1bc HTTP/1.1" 200 OK
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 102.89.45.216:22092 - "POST /process/058a0bcf-d25e-49b3-903c-45559de871ad HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:49820 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:49836 - "POST /process/2d005728-3cce-4456-be4a-952188203772 HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:49850 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:49866 - "POST /process/de39fc65-0565-4c45-a559-bcda66af9c4a HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:17706 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:17710 - "POST /process/0f9b5c0f-ab7f-47f6-8edf-f5dab0badd64 HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:17714 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
|
|
||||||
INFO: 199.241.139.243:17730 - "POST /process/cd679479-376d-42f0-ad9e-0743c89cd9fe HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:17740 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:17754 - "POST /process/0046dcd7-86a7-4153-be65-cddd3774a232 HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:39628 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:39644 - "POST /process/d0fe3ebb-094b-4191-9202-9ab216811ec9 HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:39652 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:39656 - "POST /process/1a23de15-07a5-4998-9d3f-6a6345aba237 HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:39658 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:39674 - "POST /process/cd3cc6e2-100e-462a-ba4a-3d03ee2da57f HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:26574 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
WARNING:services.document_processor:Initial JSON parsing failed: Extra data: line 10 column 4 (char 246)
|
|
||||||
INFO: 199.241.139.243:26586 - "POST /process/ffb999aa-bfd1-4a8a-a7e6-4700b284c30a HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:26596 - "POST /upload-multiple HTTP/1.1" 200 OK
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO: 199.241.139.243:26602 - "POST /process/a1a16ce3-ef6d-466c-8606-4ba9501f86a7 HTTP/1.1" 200 OK
|
|
||||||
INFO: 199.241.139.243:46078 - "POST /transactions/import/csv HTTP/1.1" 200 OK
|
|
||||||
INFO:__main__:Starting match-specific for file IDs: ['a1a16ce3-ef6d-466c-8606-4ba9501f86a7', 'ffb999aa-bfd1-4a8a-a7e6-4700b284c30a', 'cd3cc6e2-100e-462a-ba4a-3d03ee2da57f', '1a23de15-07a5-4998-9d3f-6a6345aba237', 'd0fe3ebb-094b-4191-9202-9ab216811ec9', '0046dcd7-86a7-4153-be65-cddd3774a232', 'cd679479-376d-42f0-ad9e-0743c89cd9fe', '0f9b5c0f-ab7f-47f6-8edf-f5dab0badd64', 'de39fc65-0565-4c45-a559-bcda66af9c4a', '2d005728-3cce-4456-be4a-952188203772'], categorization_id: cat_mgcvsk8r_6upxfy
|
|
||||||
INFO:__main__:Found 123 transactions in database
|
|
||||||
INFO:__main__:Converted 123 transactions
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: a1a16ce3-ef6d-466c-8606-4ba9501f86a7
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: ffb999aa-bfd1-4a8a-a7e6-4700b284c30a
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: cd3cc6e2-100e-462a-ba4a-3d03ee2da57f
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 1a23de15-07a5-4998-9d3f-6a6345aba237
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: d0fe3ebb-094b-4191-9202-9ab216811ec9
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0046dcd7-86a7-4153-be65-cddd3774a232
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: cd679479-376d-42f0-ad9e-0743c89cd9fe
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 0f9b5c0f-ab7f-47f6-8edf-f5dab0badd64
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: de39fc65-0565-4c45-a559-bcda66af9c4a
|
|
||||||
INFO:__main__:Successfully loaded receipt for file_id: 2d005728-3cce-4456-be4a-952188203772
|
|
||||||
INFO:__main__:Found 10 receipts, 0 missing
|
|
||||||
INFO:__main__:Starting matching with 10 receipts and 123 transactions
|
|
||||||
INFO:services.ai_matcher:Starting AI matching for 10 receipts against 123 transactions
|
|
||||||
INFO:services.ai_matcher:Processing receipt 1/10: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 94 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.870)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.870 - Same vendor name, exact amount match, 87 days apart
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- Vendor name similarity: 0.95 (perfect match)
|
|
||||||
- Amount accuracy: 0.95 (exact match)
|
|
||||||
- Date proximity: 0.8 (87 days apart, which is a relatively minor difference)
|
|
||||||
- Description/notes relevance: 0.8 (no direct relevance, but the vendor name is the same)
|
|
||||||
|
|
||||||
The candidate with the highest match score is Candidate 1, with a confidence score of 0.87.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 2/10: PAYPAL *BZAABAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 66 candidates for receipt: PAYPAL *BZAABAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match.
|
|
||||||
|
|
||||||
This is because Candidate 1 has a perfect match in vendor name, amount, and date, which is the highest scoring criteria.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 3/10: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Same vendor name, exact amount match, exact date match
|
|
||||||
|
|
||||||
This is because Candidate 1 has the exact same vendor name, amount, and date as the receipt, resulting in a perfect match score of 0.95.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 4/10: Google LLC - $21.15
|
|
||||||
INFO:services.ai_matcher:Found 29 candidates for receipt: Google LLC
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
|
|
||||||
INFO:services.ai_matcher:Processing receipt 5/10: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
|
|
||||||
INFO:services.ai_matcher:Processing receipt 6/10: Eleven Labs Inc. - $111.87
|
|
||||||
INFO:services.ai_matcher:Found 94 candidates for receipt: Eleven Labs Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Unknown (score: 0.870)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.870 - Same vendor name, exact amount match, 87 days apart
|
|
||||||
|
|
||||||
Reasoning:
|
|
||||||
- Vendor name similarity: 0.95 (same vendor name, Eleven Labs Inc.)
|
|
||||||
- Amount accuracy: 0.95 (exact amount match, $111.87)
|
|
||||||
- Date proximity: 0.85 (87 days apart, which is a relatively small difference)
|
|
||||||
- Description/notes relevance: 0.80 (no direct match, but the transaction is related to a bank equipment rental)
|
|
||||||
|
|
||||||
The candidate with the highest match score is Candidate 1, with a confidence score of 0.87.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 7/10: PAYPAL *BZA BAWSKYJ - $37.55
|
|
||||||
INFO:services.ai_matcher:Found 66 candidates for receipt: PAYPAL *BZA BAWSKYJ
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
|
|
||||||
|
|
||||||
This is because Candidate 1 has a perfect match in vendor name ("PAYPAL *BZA BAWSKYJ" vs "PAYPAL *BZABAWSKYJ"), exact amount match ($37.55), and exact date match (2023-05-22).
|
|
||||||
INFO:services.ai_matcher:Processing receipt 8/10: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Perfect match in vendor name, exact amount match, and exact date match
|
|
||||||
INFO:services.ai_matcher:Processing receipt 9/10: Google LLC - $21.15
|
|
||||||
INFO:services.ai_matcher:Found 29 candidates for receipt: Google LLC
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Perfect match: same vendor, amount, and date
|
|
||||||
|
|
||||||
This candidate has a perfect match score of 0.95 due to the exact match in vendor name, amount, and date.
|
|
||||||
INFO:services.ai_matcher:Processing receipt 10/10: Figma, Inc. - $27.0
|
|
||||||
INFO:services.ai_matcher:Found 48 candidates for receipt: Figma, Inc.
|
|
||||||
INFO:services.ai_matcher:Limited candidates to top 10 by amount similarity
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
|
|
||||||
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
|
|
||||||
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
|
||||||
INFO:services.ai_matcher:AI selected candidate 1: Books by Bessie (score: 0.950)
|
|
||||||
INFO:services.ai_matcher:Found match: 0.950 - Same vendor name, exact amount match, exact date match
|
|
||||||
|
|
||||||
This is because Candidate 1 has an exact match in vendor name, amount, and date, which meets the scoring criteria for a perfect match.
|
|
||||||
INFO:services.ai_matcher:AI matching completed. Found 10 matches
|
|
||||||
INFO:__main__:Matching completed, got 10 results
|
|
||||||
INFO:__main__:Generated stats: {'total': 10, 'high_confidence': 10, 'low_confidence': 0, 'avg_score': 0.97}
|
|
||||||
INFO:__main__:Match-specific completed successfully with 10 matches
|
|
||||||
INFO: 199.241.139.243:50450 - "POST /match-specific HTTP/1.1" 200 OK
|
|
||||||
Reference in New Issue
Block a user