Add new fields for tax and depreciation in receipt processing
- Introduced fields: receipt_location, calculated_tax, is_depreciable, cca_rate, useful_life, and residual_value in DBReceipt model. - Updated process_document function to handle new receipt data attributes. - Enhanced DocumentProcessResponse schema to include new fields. - Updated document processing rules to incorporate tax calculation based on location and depreciation rules.
This commit is contained in:
@@ -7,7 +7,6 @@ from typing import Any, Dict
|
||||
import aiofiles
|
||||
import groq
|
||||
import PyPDF2
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -47,18 +46,55 @@ class DocumentProcessor:
|
||||
"date": "YYYY-MM-DD",
|
||||
"category": "Food/Transport/Office/Other",
|
||||
"confidence": 0.95,
|
||||
"currency": "USD"
|
||||
"currency": "USD",
|
||||
"location": "Province/State, Country",
|
||||
"calculated_tax": 0.00,
|
||||
"is_depreciable": false,
|
||||
"cca_rate": null,
|
||||
"useful_life": null,
|
||||
"residual_value": null
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Extract vendor name as it appears on receipt
|
||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||
- Total amount should be the final total including tax
|
||||
- Tax amount is separate tax line if available
|
||||
- Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
|
||||
- Date should be the date on the receipt
|
||||
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
||||
- Confidence score 0-1 based on how clear the receipt is
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||
|
||||
LOCATION & TAX RULES:
|
||||
- Extract location from receipt (look for store address, province/state, country)
|
||||
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||
- If location not shown, return null for location
|
||||
|
||||
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
|
||||
* Ontario, Canada: 13% HST
|
||||
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
|
||||
* British Columbia, Canada: 12% (5% GST + 7% PST)
|
||||
* Alberta, Canada: 5% GST
|
||||
* California, USA: ~7.25% (varies by locality)
|
||||
* New York, USA: ~8.875% (varies by locality)
|
||||
* Texas, USA: 6.25%
|
||||
* For other locations, estimate based on typical rates
|
||||
- Store calculated tax in "calculated_tax" field (set to null if tax clearly shown)
|
||||
- If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null
|
||||
|
||||
DEPRECIATION RULES:
|
||||
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
|
||||
- Set is_depreciable to true only for capital assets, false for consumables/services
|
||||
- If is_depreciable is true, provide:
|
||||
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
|
||||
- Class 10 (Vehicles): 30%
|
||||
- Class 8 (Furniture, equipment): 20%
|
||||
- Class 50 (Computers, software): 55%
|
||||
- Class 1 (Buildings): 4%
|
||||
- Class 10.1 (Passenger vehicles >$30k): 30%
|
||||
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
|
||||
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||
- If is_depreciable is false, set cca_rate, useful_life, and residual_value to null
|
||||
|
||||
Return only valid JSON.
|
||||
"""
|
||||
@@ -80,7 +116,7 @@ class DocumentProcessor:
|
||||
}
|
||||
],
|
||||
model=self.model,
|
||||
max_tokens=500,
|
||||
max_tokens=800,
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
@@ -136,18 +172,55 @@ class DocumentProcessor:
|
||||
"date": "YYYY-MM-DD",
|
||||
"category": "Food/Transport/Office/Other",
|
||||
"confidence": 0.95,
|
||||
"currency": "USD"
|
||||
"currency": "USD",
|
||||
"location": "Province/State, Country",
|
||||
"calculated_tax": 0.00,
|
||||
"is_depreciable": false,
|
||||
"cca_rate": null,
|
||||
"useful_life": null,
|
||||
"residual_value": null
|
||||
}}
|
||||
|
||||
Rules:
|
||||
- Extract vendor name as it appears on receipt
|
||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||
- Total amount should be the final total including tax
|
||||
- Tax amount is separate tax line if available
|
||||
- Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
|
||||
- Date should be the date on the receipt
|
||||
- Categorize based on vendor type
|
||||
- Confidence score 0-1 based on clarity
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||
|
||||
LOCATION & TAX RULES:
|
||||
- Extract location from receipt (look for store address, province/state, country)
|
||||
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||
- If location not shown, return null for location
|
||||
|
||||
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
|
||||
* Ontario, Canada: 13% HST
|
||||
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
|
||||
* British Columbia, Canada: 12% (5% GST + 7% PST)
|
||||
* Alberta, Canada: 5% GST
|
||||
* California, USA: ~7.25% (varies by locality)
|
||||
* New York, USA: ~8.875% (varies by locality)
|
||||
* Texas, USA: 6.25%
|
||||
* For other locations, estimate based on typical rates
|
||||
- Store calculated tax in "calculated_tax" field (set to null if tax clearly shown)
|
||||
- If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null
|
||||
|
||||
DEPRECIATION RULES:
|
||||
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
|
||||
- Set is_depreciable to true only for capital assets, false for consumables/services
|
||||
- If is_depreciable is true, provide:
|
||||
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
|
||||
- Class 10 (Vehicles): 30%
|
||||
- Class 8 (Furniture, equipment): 20%
|
||||
- Class 50 (Computers, software): 55%
|
||||
- Class 1 (Buildings): 4%
|
||||
- Class 10.1 (Passenger vehicles >$30k): 30%
|
||||
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
|
||||
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||
- If is_depreciable is false, set cca_rate, useful_life, and residual_value to null
|
||||
|
||||
Return only valid JSON.
|
||||
"""
|
||||
@@ -155,7 +228,7 @@ class DocumentProcessor:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=500,
|
||||
max_tokens=800,
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
@@ -207,8 +280,22 @@ class DocumentProcessor:
|
||||
confidence_match = re.search(
|
||||
r'"confidence"\s*:\s*([0-9.]+)', json_str
|
||||
)
|
||||
currency_match = re.search(
|
||||
r'"currency"\s*:\s*"([^"]*)"', json_str
|
||||
currency_match = re.search(r'"currency"\s*:\s*"([^"]*)"', json_str)
|
||||
location_match = re.search(r'"location"\s*:\s*"([^"]*)"', json_str)
|
||||
calculated_tax_match = re.search(
|
||||
r'"calculated_tax"\s*:\s*([0-9.]+|null)', json_str
|
||||
)
|
||||
is_depreciable_match = re.search(
|
||||
r'"is_depreciable"\s*:\s*(true|false)', json_str
|
||||
)
|
||||
cca_rate_match = re.search(
|
||||
r'"cca_rate"\s*:\s*([0-9.]+|null)', json_str
|
||||
)
|
||||
useful_life_match = re.search(
|
||||
r'"useful_life"\s*:\s*([0-9]+|null)', json_str
|
||||
)
|
||||
residual_value_match = re.search(
|
||||
r'"residual_value"\s*:\s*([0-9.]+|null)', json_str
|
||||
)
|
||||
|
||||
data = {
|
||||
@@ -229,7 +316,27 @@ class DocumentProcessor:
|
||||
"confidence": float(confidence_match.group(1))
|
||||
if confidence_match
|
||||
else 0.5,
|
||||
"currency": currency_match.group(1) if currency_match else "CAD"
|
||||
"currency": currency_match.group(1)
|
||||
if currency_match
|
||||
else "CAD",
|
||||
"location": location_match.group(1) if location_match else None,
|
||||
"calculated_tax": float(calculated_tax_match.group(1))
|
||||
if calculated_tax_match
|
||||
and calculated_tax_match.group(1) != "null"
|
||||
else None,
|
||||
"is_depreciable": is_depreciable_match.group(1) == "true"
|
||||
if is_depreciable_match
|
||||
else None,
|
||||
"cca_rate": float(cca_rate_match.group(1))
|
||||
if cca_rate_match and cca_rate_match.group(1) != "null"
|
||||
else None,
|
||||
"useful_life": int(useful_life_match.group(1))
|
||||
if useful_life_match and useful_life_match.group(1) != "null"
|
||||
else None,
|
||||
"residual_value": float(residual_value_match.group(1))
|
||||
if residual_value_match
|
||||
and residual_value_match.group(1) != "null"
|
||||
else None,
|
||||
}
|
||||
|
||||
# Validate and clean data
|
||||
@@ -243,6 +350,12 @@ class DocumentProcessor:
|
||||
"confidence": float(data.get("confidence", 0.5)),
|
||||
"extraction_success": True,
|
||||
"currency": data.get("currency", "CAD").strip(),
|
||||
"location": data.get("location"),
|
||||
"calculated_tax": data.get("calculated_tax"),
|
||||
"is_depreciable": data.get("is_depreciable"),
|
||||
"cca_rate": data.get("cca_rate"),
|
||||
"useful_life": data.get("useful_life"),
|
||||
"residual_value": data.get("residual_value"),
|
||||
}
|
||||
else:
|
||||
# Try to extract fields from plain text
|
||||
@@ -312,6 +425,12 @@ class DocumentProcessor:
|
||||
"category": "Other",
|
||||
"confidence": 0.3, # Low confidence for text extraction
|
||||
"extraction_success": True,
|
||||
"location": None,
|
||||
"calculated_tax": None,
|
||||
"is_depreciable": None,
|
||||
"cca_rate": None,
|
||||
"useful_life": None,
|
||||
"residual_value": None,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
@@ -325,6 +444,12 @@ class DocumentProcessor:
|
||||
"confidence": 0.1,
|
||||
"extraction_success": False,
|
||||
"error": f"Text extraction failed: {str(e)}",
|
||||
"location": None,
|
||||
"calculated_tax": None,
|
||||
"is_depreciable": None,
|
||||
"cca_rate": None,
|
||||
"useful_life": None,
|
||||
"residual_value": None,
|
||||
}
|
||||
|
||||
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user