Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 659ca4ff15 | |||
| d8315f13ac |
@@ -88,3 +88,9 @@ class DBReceipt(Base):
|
|||||||
extraction_success = Column(String, nullable=True)
|
extraction_success = Column(String, nullable=True)
|
||||||
error_message = Column(String, nullable=True)
|
error_message = Column(String, nullable=True)
|
||||||
receipt_currency = Column(String, nullable=True)
|
receipt_currency = Column(String, nullable=True)
|
||||||
|
receipt_location = Column(String, nullable=True)
|
||||||
|
calculated_tax = Column(Float, nullable=True)
|
||||||
|
is_depreciable = Column(String, nullable=True) # Store as string "True"/"False"
|
||||||
|
cca_rate = Column(Float, nullable=True)
|
||||||
|
useful_life = Column(Integer, nullable=True)
|
||||||
|
residual_value = Column(Float, nullable=True)
|
||||||
|
|||||||
+31
-9
@@ -15,6 +15,7 @@ from database import (
|
|||||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from schemas import (
|
from schemas import (
|
||||||
|
DocumentProcessRequest,
|
||||||
DocumentProcessResponse,
|
DocumentProcessResponse,
|
||||||
DocumentUploadResponse,
|
DocumentUploadResponse,
|
||||||
MatchingResponse,
|
MatchingResponse,
|
||||||
@@ -364,26 +365,33 @@ async def upload_multiple_documents(
|
|||||||
|
|
||||||
|
|
||||||
@app.post(
|
@app.post(
|
||||||
"/process/{file_id}",
|
"/process",
|
||||||
response_model=DocumentProcessResponse,
|
response_model=DocumentProcessResponse,
|
||||||
tags=["Document Processing"],
|
tags=["Document Processing"],
|
||||||
)
|
)
|
||||||
async def process_document(file_id: str, db: db_dependency):
|
async def process_document(request: DocumentProcessRequest, db: db_dependency):
|
||||||
"""
|
"""
|
||||||
Process a previously uploaded document to extract receipt information.
|
Process a previously uploaded document to extract receipt information.
|
||||||
|
|
||||||
This endpoint uses AI to extract structured data from receipt images,
|
This endpoint uses AI to extract structured data from receipt images,
|
||||||
including vendor, amount, date, and category information.
|
including vendor, amount, date, and category information.
|
||||||
|
|
||||||
|
Optionally accepts user_location to guide tax calculations and depreciation
|
||||||
|
based on the user's location (format: "State/Province, Country" e.g., "Ontario, Canada").
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get file info from database
|
# Get file info from database
|
||||||
db_uploaded_file = get_uploaded_file_from_db(db, file_id)
|
db_uploaded_file = get_uploaded_file_from_db(db, request.file_id)
|
||||||
if not db_uploaded_file:
|
if not db_uploaded_file:
|
||||||
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"File {request.file_id} not found"
|
||||||
|
)
|
||||||
|
|
||||||
# Process the file using the stored file path
|
# Process the file using the stored file path
|
||||||
receipt_data = await document_processor.process_file(
|
receipt_data = await document_processor.process_file(
|
||||||
db_uploaded_file.file_path, db_uploaded_file.file_type
|
db_uploaded_file.file_path,
|
||||||
|
db_uploaded_file.file_type,
|
||||||
|
user_location=request.user_location,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse date for database storage
|
# Parse date for database storage
|
||||||
@@ -398,8 +406,8 @@ async def process_document(file_id: str, db: db_dependency):
|
|||||||
|
|
||||||
# Create database receipt object
|
# Create database receipt object
|
||||||
db_receipt = DBReceipt(
|
db_receipt = DBReceipt(
|
||||||
receipt_id=f"receipt_{file_id}",
|
receipt_id=f"receipt_{request.file_id}",
|
||||||
file_id=file_id,
|
file_id=request.file_id,
|
||||||
amount=receipt_data.get("total_amount", 0.0),
|
amount=receipt_data.get("total_amount", 0.0),
|
||||||
date=receipt_date,
|
date=receipt_date,
|
||||||
vendor=receipt_data.get("vendor", ""),
|
vendor=receipt_data.get("vendor", ""),
|
||||||
@@ -410,6 +418,14 @@ async def process_document(file_id: str, db: db_dependency):
|
|||||||
extraction_success=str(receipt_data.get("extraction_success", False)),
|
extraction_success=str(receipt_data.get("extraction_success", False)),
|
||||||
error_message=receipt_data.get("error"),
|
error_message=receipt_data.get("error"),
|
||||||
receipt_currency=receipt_data.get("currency"),
|
receipt_currency=receipt_data.get("currency"),
|
||||||
|
receipt_location=receipt_data.get("location"),
|
||||||
|
calculated_tax=receipt_data.get("calculated_tax"),
|
||||||
|
is_depreciable=str(receipt_data.get("is_depreciable"))
|
||||||
|
if receipt_data.get("is_depreciable") is not None
|
||||||
|
else None,
|
||||||
|
cca_rate=receipt_data.get("cca_rate"),
|
||||||
|
useful_life=receipt_data.get("useful_life"),
|
||||||
|
residual_value=receipt_data.get("residual_value"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to database
|
# Add to database
|
||||||
@@ -417,7 +433,7 @@ async def process_document(file_id: str, db: db_dependency):
|
|||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
return DocumentProcessResponse(
|
return DocumentProcessResponse(
|
||||||
file_id=file_id,
|
file_id=request.file_id,
|
||||||
receipt_id=db_receipt.receipt_id,
|
receipt_id=db_receipt.receipt_id,
|
||||||
extraction_success=receipt_data.get("extraction_success", False),
|
extraction_success=receipt_data.get("extraction_success", False),
|
||||||
vendor=receipt_data.get("vendor", ""),
|
vendor=receipt_data.get("vendor", ""),
|
||||||
@@ -429,10 +445,16 @@ async def process_document(file_id: str, db: db_dependency):
|
|||||||
confidence=receipt_data.get("confidence", 0.0),
|
confidence=receipt_data.get("confidence", 0.0),
|
||||||
error=receipt_data.get("error", None),
|
error=receipt_data.get("error", None),
|
||||||
receipt_currency=receipt_data.get("currency"),
|
receipt_currency=receipt_data.get("currency"),
|
||||||
|
receipt_location=receipt_data.get("location"),
|
||||||
|
calculated_tax=receipt_data.get("calculated_tax"),
|
||||||
|
is_depreciable=receipt_data.get("is_depreciable"),
|
||||||
|
cca_rate=receipt_data.get("cca_rate"),
|
||||||
|
useful_life=receipt_data.get("useful_life"),
|
||||||
|
residual_value=receipt_data.get("residual_value"),
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing document {file_id}: {str(e)}")
|
logger.error(f"Error processing document {request.file_id}: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -160,6 +160,13 @@ class DocumentUploadResponse(BaseModel):
|
|||||||
status: str
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentProcessRequest(BaseModel):
|
||||||
|
file_id: str
|
||||||
|
user_location: Optional[str] = (
|
||||||
|
None # Format: "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DocumentProcessResponse(BaseModel):
|
class DocumentProcessResponse(BaseModel):
|
||||||
file_id: str
|
file_id: str
|
||||||
receipt_id: str
|
receipt_id: str
|
||||||
@@ -173,6 +180,20 @@ class DocumentProcessResponse(BaseModel):
|
|||||||
confidence: Optional[float] = None
|
confidence: Optional[float] = None
|
||||||
error: Optional[str] = None
|
error: Optional[str] = None
|
||||||
receipt_currency: Optional[str] = "CAD"
|
receipt_currency: Optional[str] = "CAD"
|
||||||
|
receipt_location: Optional[str] = (
|
||||||
|
None # Location from receipt (e.g., "Ontario, Canada" or "California, USA")
|
||||||
|
)
|
||||||
|
calculated_tax: Optional[float] = None # Calculated sales tax if not clearly shown
|
||||||
|
is_depreciable: Optional[bool] = None # Whether item is a depreciable asset
|
||||||
|
cca_rate: Optional[float] = (
|
||||||
|
None # CCA rate for tax depreciation (e.g., 0.30 for 30%)
|
||||||
|
)
|
||||||
|
useful_life: Optional[int] = (
|
||||||
|
None # Useful life in years for straight-line depreciation
|
||||||
|
)
|
||||||
|
residual_value: Optional[float] = (
|
||||||
|
None # Residual value for straight-line depreciation
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# New tax-related models
|
# New tax-related models
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from typing import Any, Dict
|
|||||||
import aiofiles
|
import aiofiles
|
||||||
import groq
|
import groq
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
|
|
||||||
from config import settings
|
from config import settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -18,28 +17,55 @@ class DocumentProcessor:
|
|||||||
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||||||
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
||||||
|
|
||||||
async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
|
async def process_file(
|
||||||
"""Process uploaded file and extract receipt data"""
|
self, file_path: str, file_type: str, user_location: str = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Process uploaded file and extract receipt data
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the file to process
|
||||||
|
file_type: Type of file (jpg, pdf, etc.)
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
|
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
|
||||||
return await self._process_image(file_path)
|
return await self._process_image(file_path, user_location)
|
||||||
elif file_type.lower() == "pdf":
|
elif file_type.lower() == "pdf":
|
||||||
return await self._process_pdf(file_path)
|
return await self._process_pdf(file_path, user_location)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported file type: {file_type}")
|
raise ValueError(f"Unsupported file type: {file_type}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
async def _process_image(self, image_path: str) -> Dict[str, Any]:
|
async def _process_image(
|
||||||
"""Extract data from image using Groq vision"""
|
self, image_path: str, user_location: str = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Extract data from image using Groq vision
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: Path to the image file
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# Encode image to base64
|
# Encode image to base64
|
||||||
base64_image = self._encode_image(image_path)
|
base64_image = self._encode_image(image_path)
|
||||||
|
|
||||||
|
# Build user location context
|
||||||
|
user_location_context = ""
|
||||||
|
if user_location:
|
||||||
|
user_location_context = f"""
|
||||||
|
|
||||||
|
USER LOCATION CONTEXT:
|
||||||
|
The user is located in {user_location}.
|
||||||
|
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
|
||||||
|
- If the receipt clearly shows a different location, use the receipt's location instead.
|
||||||
|
- Apply depreciation rules based on the user's location.
|
||||||
|
"""
|
||||||
|
|
||||||
# Create Groq vision prompt
|
# Create Groq vision prompt
|
||||||
prompt = """
|
prompt = f"""
|
||||||
Analyze this receipt image and extract the following information in JSON format:
|
Analyze this receipt image and extract the following information in JSON format:
|
||||||
{
|
{{
|
||||||
"vendor": "Store/company name",
|
"vendor": "Store/company name",
|
||||||
"description": "Detailed description of items/services purchased",
|
"description": "Detailed description of items/services purchased",
|
||||||
"total_amount": 0.00,
|
"total_amount": 0.00,
|
||||||
@@ -47,18 +73,55 @@ class DocumentProcessor:
|
|||||||
"date": "YYYY-MM-DD",
|
"date": "YYYY-MM-DD",
|
||||||
"category": "Food/Transport/Office/Other",
|
"category": "Food/Transport/Office/Other",
|
||||||
"confidence": 0.95,
|
"confidence": 0.95,
|
||||||
"currency": "USD"
|
"currency": "USD",
|
||||||
}
|
"location": "Province/State, Country",
|
||||||
|
"calculated_tax": 0.00,
|
||||||
|
"is_depreciable": false,
|
||||||
|
"cca_rate": null,
|
||||||
|
"useful_life": null,
|
||||||
|
"residual_value": null
|
||||||
|
}}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Extract vendor name as it appears on receipt
|
- Extract vendor name as it appears on receipt
|
||||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||||
- Total amount should be the final total including tax
|
- Total amount should be the final total including tax
|
||||||
- Tax amount is separate tax line if available
|
- Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
|
||||||
- Date should be the date on the receipt
|
- Date should be the date on the receipt
|
||||||
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
||||||
- Confidence score 0-1 based on how clear the receipt is
|
- Confidence score 0-1 based on how clear the receipt is
|
||||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||||
|
{user_location_context}
|
||||||
|
LOCATION & TAX RULES:
|
||||||
|
- Extract location from receipt (look for store address, province/state, country)
|
||||||
|
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||||
|
- If location not shown on receipt, return null for location (system will use user location as fallback)
|
||||||
|
|
||||||
|
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
|
||||||
|
* Ontario, Canada: 13% HST
|
||||||
|
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
|
||||||
|
* British Columbia, Canada: 12% (5% GST + 7% PST)
|
||||||
|
* Alberta, Canada: 5% GST
|
||||||
|
* California, USA: ~7.25% (varies by locality)
|
||||||
|
* New York, USA: ~8.875% (varies by locality)
|
||||||
|
* Texas, USA: 6.25%
|
||||||
|
* For other locations, estimate based on typical rates
|
||||||
|
- Store calculated tax in "calculated_tax" field (set to null if tax clearly shown)
|
||||||
|
- If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null
|
||||||
|
|
||||||
|
DEPRECIATION RULES:
|
||||||
|
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
|
||||||
|
- Set is_depreciable to true only for capital assets, false for consumables/services
|
||||||
|
- If is_depreciable is true, provide:
|
||||||
|
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
|
||||||
|
- Class 10 (Vehicles): 30%
|
||||||
|
- Class 8 (Furniture, equipment): 20%
|
||||||
|
- Class 50 (Computers, software): 55%
|
||||||
|
- Class 1 (Buildings): 4%
|
||||||
|
- Class 10.1 (Passenger vehicles >$30k): 30%
|
||||||
|
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
|
||||||
|
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||||
|
- If is_depreciable is false, set cca_rate, useful_life, and residual_value to null
|
||||||
|
|
||||||
Return only valid JSON.
|
Return only valid JSON.
|
||||||
"""
|
"""
|
||||||
@@ -80,7 +143,7 @@ class DocumentProcessor:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=self.model,
|
model=self.model,
|
||||||
max_tokens=500,
|
max_tokens=800,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -96,12 +159,19 @@ class DocumentProcessor:
|
|||||||
with open(image_path, "rb") as image_file:
|
with open(image_path, "rb") as image_file:
|
||||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||||
|
|
||||||
async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
|
async def _process_pdf(
|
||||||
"""Extract data from PDF by converting to image first"""
|
self, pdf_path: str, user_location: str = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Extract data from PDF by converting to image first
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pdf_path: Path to the PDF file
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# For now, extract text from PDF and process as text
|
# For now, extract text from PDF and process as text
|
||||||
text_content = self._extract_text_from_pdf(pdf_path)
|
text_content = self._extract_text_from_pdf(pdf_path)
|
||||||
return self._process_text_content(text_content)
|
return self._process_text_content(text_content, user_location)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": f"PDF processing error: {str(e)}"}
|
return {"error": f"PDF processing error: {str(e)}"}
|
||||||
@@ -118,9 +188,28 @@ class DocumentProcessor:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _process_text_content(self, text_content: str) -> Dict[str, Any]:
|
def _process_text_content(
|
||||||
"""Process text content using Groq (fallback for PDFs)"""
|
self, text_content: str, user_location: str = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Process text content using Groq (fallback for PDFs)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text_content: Extracted text from PDF
|
||||||
|
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
|
# Build user location context
|
||||||
|
user_location_context = ""
|
||||||
|
if user_location:
|
||||||
|
user_location_context = f"""
|
||||||
|
|
||||||
|
USER LOCATION CONTEXT:
|
||||||
|
The user is located in {user_location}.
|
||||||
|
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
|
||||||
|
- If the receipt clearly shows a different location, use the receipt's location instead.
|
||||||
|
- Apply depreciation rules based on the user's location.
|
||||||
|
"""
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Analyze this receipt text and extract the following information in JSON format:
|
Analyze this receipt text and extract the following information in JSON format:
|
||||||
|
|
||||||
@@ -136,18 +225,55 @@ class DocumentProcessor:
|
|||||||
"date": "YYYY-MM-DD",
|
"date": "YYYY-MM-DD",
|
||||||
"category": "Food/Transport/Office/Other",
|
"category": "Food/Transport/Office/Other",
|
||||||
"confidence": 0.95,
|
"confidence": 0.95,
|
||||||
"currency": "USD"
|
"currency": "USD",
|
||||||
|
"location": "Province/State, Country",
|
||||||
|
"calculated_tax": 0.00,
|
||||||
|
"is_depreciable": false,
|
||||||
|
"cca_rate": null,
|
||||||
|
"useful_life": null,
|
||||||
|
"residual_value": null
|
||||||
}}
|
}}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Extract vendor name as it appears on receipt
|
- Extract vendor name as it appears on receipt
|
||||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||||
- Total amount should be the final total including tax
|
- Total amount should be the final total including tax
|
||||||
- Tax amount is separate tax line if available
|
- Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
|
||||||
- Date should be the date on the receipt
|
- Date should be the date on the receipt
|
||||||
- Categorize based on vendor type
|
- Categorize based on vendor type
|
||||||
- Confidence score 0-1 based on clarity
|
- Confidence score 0-1 based on clarity
|
||||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||||
|
{user_location_context}
|
||||||
|
LOCATION & TAX RULES:
|
||||||
|
- Extract location from receipt (look for store address, province/state, country)
|
||||||
|
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||||
|
- If location not shown on receipt, return null for location (system will use user location as fallback)
|
||||||
|
|
||||||
|
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
|
||||||
|
* Ontario, Canada: 13% HST
|
||||||
|
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
|
||||||
|
* British Columbia, Canada: 12% (5% GST + 7% PST)
|
||||||
|
* Alberta, Canada: 5% GST
|
||||||
|
* California, USA: ~7.25% (varies by locality)
|
||||||
|
* New York, USA: ~8.875% (varies by locality)
|
||||||
|
* Texas, USA: 6.25%
|
||||||
|
* For other locations, estimate based on typical rates
|
||||||
|
- Store calculated tax in "calculated_tax" field (set to null if tax clearly shown)
|
||||||
|
- If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null
|
||||||
|
|
||||||
|
DEPRECIATION RULES:
|
||||||
|
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
|
||||||
|
- Set is_depreciable to true only for capital assets, false for consumables/services
|
||||||
|
- If is_depreciable is true, provide:
|
||||||
|
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
|
||||||
|
- Class 10 (Vehicles): 30%
|
||||||
|
- Class 8 (Furniture, equipment): 20%
|
||||||
|
- Class 50 (Computers, software): 55%
|
||||||
|
- Class 1 (Buildings): 4%
|
||||||
|
- Class 10.1 (Passenger vehicles >$30k): 30%
|
||||||
|
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
|
||||||
|
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||||
|
- If is_depreciable is false, set cca_rate, useful_life, and residual_value to null
|
||||||
|
|
||||||
Return only valid JSON.
|
Return only valid JSON.
|
||||||
"""
|
"""
|
||||||
@@ -155,7 +281,7 @@ class DocumentProcessor:
|
|||||||
response = self.client.chat.completions.create(
|
response = self.client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
max_tokens=500,
|
max_tokens=800,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -207,8 +333,22 @@ class DocumentProcessor:
|
|||||||
confidence_match = re.search(
|
confidence_match = re.search(
|
||||||
r'"confidence"\s*:\s*([0-9.]+)', json_str
|
r'"confidence"\s*:\s*([0-9.]+)', json_str
|
||||||
)
|
)
|
||||||
currency_match = re.search(
|
currency_match = re.search(r'"currency"\s*:\s*"([^"]*)"', json_str)
|
||||||
r'"currency"\s*:\s*"([^"]*)"', json_str
|
location_match = re.search(r'"location"\s*:\s*"([^"]*)"', json_str)
|
||||||
|
calculated_tax_match = re.search(
|
||||||
|
r'"calculated_tax"\s*:\s*([0-9.]+|null)', json_str
|
||||||
|
)
|
||||||
|
is_depreciable_match = re.search(
|
||||||
|
r'"is_depreciable"\s*:\s*(true|false)', json_str
|
||||||
|
)
|
||||||
|
cca_rate_match = re.search(
|
||||||
|
r'"cca_rate"\s*:\s*([0-9.]+|null)', json_str
|
||||||
|
)
|
||||||
|
useful_life_match = re.search(
|
||||||
|
r'"useful_life"\s*:\s*([0-9]+|null)', json_str
|
||||||
|
)
|
||||||
|
residual_value_match = re.search(
|
||||||
|
r'"residual_value"\s*:\s*([0-9.]+|null)', json_str
|
||||||
)
|
)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
@@ -229,7 +369,27 @@ class DocumentProcessor:
|
|||||||
"confidence": float(confidence_match.group(1))
|
"confidence": float(confidence_match.group(1))
|
||||||
if confidence_match
|
if confidence_match
|
||||||
else 0.5,
|
else 0.5,
|
||||||
"currency": currency_match.group(1) if currency_match else "CAD"
|
"currency": currency_match.group(1)
|
||||||
|
if currency_match
|
||||||
|
else "CAD",
|
||||||
|
"location": location_match.group(1) if location_match else None,
|
||||||
|
"calculated_tax": float(calculated_tax_match.group(1))
|
||||||
|
if calculated_tax_match
|
||||||
|
and calculated_tax_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
|
"is_depreciable": is_depreciable_match.group(1) == "true"
|
||||||
|
if is_depreciable_match
|
||||||
|
else None,
|
||||||
|
"cca_rate": float(cca_rate_match.group(1))
|
||||||
|
if cca_rate_match and cca_rate_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
|
"useful_life": int(useful_life_match.group(1))
|
||||||
|
if useful_life_match and useful_life_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
|
"residual_value": float(residual_value_match.group(1))
|
||||||
|
if residual_value_match
|
||||||
|
and residual_value_match.group(1) != "null"
|
||||||
|
else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Validate and clean data
|
# Validate and clean data
|
||||||
@@ -243,6 +403,12 @@ class DocumentProcessor:
|
|||||||
"confidence": float(data.get("confidence", 0.5)),
|
"confidence": float(data.get("confidence", 0.5)),
|
||||||
"extraction_success": True,
|
"extraction_success": True,
|
||||||
"currency": data.get("currency", "CAD").strip(),
|
"currency": data.get("currency", "CAD").strip(),
|
||||||
|
"location": data.get("location"),
|
||||||
|
"calculated_tax": data.get("calculated_tax"),
|
||||||
|
"is_depreciable": data.get("is_depreciable"),
|
||||||
|
"cca_rate": data.get("cca_rate"),
|
||||||
|
"useful_life": data.get("useful_life"),
|
||||||
|
"residual_value": data.get("residual_value"),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# Try to extract fields from plain text
|
# Try to extract fields from plain text
|
||||||
@@ -312,6 +478,12 @@ class DocumentProcessor:
|
|||||||
"category": "Other",
|
"category": "Other",
|
||||||
"confidence": 0.3, # Low confidence for text extraction
|
"confidence": 0.3, # Low confidence for text extraction
|
||||||
"extraction_success": True,
|
"extraction_success": True,
|
||||||
|
"location": None,
|
||||||
|
"calculated_tax": None,
|
||||||
|
"is_depreciable": None,
|
||||||
|
"cca_rate": None,
|
||||||
|
"useful_life": None,
|
||||||
|
"residual_value": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -325,6 +497,12 @@ class DocumentProcessor:
|
|||||||
"confidence": 0.1,
|
"confidence": 0.1,
|
||||||
"extraction_success": False,
|
"extraction_success": False,
|
||||||
"error": f"Text extraction failed: {str(e)}",
|
"error": f"Text extraction failed: {str(e)}",
|
||||||
|
"location": None,
|
||||||
|
"calculated_tax": None,
|
||||||
|
"is_depreciable": None,
|
||||||
|
"cca_rate": None,
|
||||||
|
"useful_life": None,
|
||||||
|
"residual_value": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
||||||
|
|||||||
+1
-11
@@ -44,17 +44,7 @@ If the location is missing, apply the user’s location sales tax by default.
|
|||||||
**FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but don’t fetch exchange rates.
|
**FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but don’t fetch exchange rates.
|
||||||
|
|
||||||
|
|
||||||
### Tax Rules:
|
|
||||||
Four Rules for Tax and Depreciation Handling
|
|
||||||
### 1. **Sales Tax Rule**
|
|
||||||
**Purpose**: To calculate and apply the correct sales tax based on the shipping and billing addresses.
|
|
||||||
- **When Billing and Shipping Address are the Same**: Apply the sales tax rate based on the billing address.
|
|
||||||
- **When Billing and Shipping Address are Different**: Apply the sales tax rate based on the shipping address.
|
|
||||||
|
|
||||||
**Example**:
|
|
||||||
1. If the billing and shipping address are in Ontario, the system will apply the 13% HST tax rate based on Ontario's tax rate.
|
|
||||||
2. If the billing address is in Ontario but the shipping address is in Quebec, the system will apply the 14.975% QST tax rate based on the shipping address.
|
|
||||||
|
|
||||||
### 2. **Foreign Exchange (FX) Rule**
|
### 2. **Foreign Exchange (FX) Rule**
|
||||||
**Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD).
|
**Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD).
|
||||||
- **Action**: Identify the currency mismatch, but do not automatically fetch the exchange rate. Flag the FX difference for manual review, allowing the user to approve or adjust the balance.
|
- **Action**: Identify the currency mismatch, but do not automatically fetch the exchange rate. Flag the FX difference for manual review, allowing the user to approve or adjust the balance.
|
||||||
|
|||||||
Reference in New Issue
Block a user