Add new fields for tax and depreciation in receipt processing

- Introduced fields: receipt_location, calculated_tax, is_depreciable, cca_rate, useful_life, and residual_value in DBReceipt model.
- Updated process_document function to handle new receipt data attributes.
- Enhanced DocumentProcessResponse schema to include new fields.
- Updated document processing rules to incorporate tax calculation based on location and depreciation rules.
This commit is contained in:
bolade
2025-10-07 11:15:26 +01:00
parent 823c05f78d
commit d8315f13ac
5 changed files with 172 additions and 23 deletions
+6
View File
@@ -88,3 +88,9 @@ class DBReceipt(Base):
extraction_success = Column(String, nullable=True) extraction_success = Column(String, nullable=True)
error_message = Column(String, nullable=True) error_message = Column(String, nullable=True)
receipt_currency = Column(String, nullable=True) receipt_currency = Column(String, nullable=True)
receipt_location = Column(String, nullable=True)
calculated_tax = Column(Float, nullable=True)
is_depreciable = Column(String, nullable=True) # Store as string "True"/"False"
cca_rate = Column(Float, nullable=True)
useful_life = Column(Integer, nullable=True)
residual_value = Column(Float, nullable=True)
+14
View File
@@ -410,6 +410,14 @@ async def process_document(file_id: str, db: db_dependency):
extraction_success=str(receipt_data.get("extraction_success", False)), extraction_success=str(receipt_data.get("extraction_success", False)),
error_message=receipt_data.get("error"), error_message=receipt_data.get("error"),
receipt_currency=receipt_data.get("currency"), receipt_currency=receipt_data.get("currency"),
receipt_location=receipt_data.get("location"),
calculated_tax=receipt_data.get("calculated_tax"),
is_depreciable=str(receipt_data.get("is_depreciable"))
if receipt_data.get("is_depreciable") is not None
else None,
cca_rate=receipt_data.get("cca_rate"),
useful_life=receipt_data.get("useful_life"),
residual_value=receipt_data.get("residual_value"),
) )
# Add to database # Add to database
@@ -429,6 +437,12 @@ async def process_document(file_id: str, db: db_dependency):
confidence=receipt_data.get("confidence", 0.0), confidence=receipt_data.get("confidence", 0.0),
error=receipt_data.get("error", None), error=receipt_data.get("error", None),
receipt_currency=receipt_data.get("currency"), receipt_currency=receipt_data.get("currency"),
receipt_location=receipt_data.get("location"),
calculated_tax=receipt_data.get("calculated_tax"),
is_depreciable=receipt_data.get("is_depreciable"),
cca_rate=receipt_data.get("cca_rate"),
useful_life=receipt_data.get("useful_life"),
residual_value=receipt_data.get("residual_value"),
) )
except Exception as e: except Exception as e:
+14
View File
@@ -173,6 +173,20 @@ class DocumentProcessResponse(BaseModel):
confidence: Optional[float] = None confidence: Optional[float] = None
error: Optional[str] = None error: Optional[str] = None
receipt_currency: Optional[str] = "CAD" receipt_currency: Optional[str] = "CAD"
receipt_location: Optional[str] = (
None # Location from receipt (e.g., "Ontario, Canada" or "California, USA")
)
calculated_tax: Optional[float] = None # Calculated sales tax if not clearly shown
is_depreciable: Optional[bool] = None # Whether item is a depreciable asset
cca_rate: Optional[float] = (
None # CCA rate for tax depreciation (e.g., 0.30 for 30%)
)
useful_life: Optional[int] = (
None # Useful life in years for straight-line depreciation
)
residual_value: Optional[float] = (
None # Residual value for straight-line depreciation
)
# New tax-related models # New tax-related models
+137 -12
View File
@@ -7,7 +7,6 @@ from typing import Any, Dict
import aiofiles import aiofiles
import groq import groq
import PyPDF2 import PyPDF2
from config import settings from config import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -47,18 +46,55 @@ class DocumentProcessor:
"date": "YYYY-MM-DD", "date": "YYYY-MM-DD",
"category": "Food/Transport/Office/Other", "category": "Food/Transport/Office/Other",
"confidence": 0.95, "confidence": 0.95,
"currency": "USD" "currency": "USD",
"location": "Province/State, Country",
"calculated_tax": 0.00,
"is_depreciable": false,
"cca_rate": null,
"useful_life": null,
"residual_value": null
} }
Rules: Rules:
- Extract vendor name as it appears on receipt - Extract vendor name as it appears on receipt
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies") - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
- Total amount should be the final total including tax - Total amount should be the final total including tax
- Tax amount is separate tax line if available - Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
- Date should be the date on the receipt - Date should be the date on the receipt
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.) - Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
- Confidence score 0-1 based on how clear the receipt is - Confidence score 0-1 based on how clear the receipt is
- Currency should be the currency used on the receipt (e.g., "USD", "EUR") - Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
LOCATION & TAX RULES:
- Extract location from receipt (look for store address, province/state, country)
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
- If location not shown, return null for location
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
* Ontario, Canada: 13% HST
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
* British Columbia, Canada: 12% (5% GST + 7% PST)
* Alberta, Canada: 5% GST
* California, USA: ~7.25% (varies by locality)
* New York, USA: ~8.875% (varies by locality)
* Texas, USA: 6.25%
* For other locations, estimate based on typical rates
- Store calculated tax in "calculated_tax" field (set to null if tax clearly shown)
- If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null
DEPRECIATION RULES:
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
- Set is_depreciable to true only for capital assets, false for consumables/services
- If is_depreciable is true, provide:
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
- Class 10 (Vehicles): 30%
- Class 8 (Furniture, equipment): 20%
- Class 50 (Computers, software): 55%
- Class 1 (Buildings): 4%
- Class 10.1 (Passenger vehicles >$30k): 30%
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
- If is_depreciable is false, set cca_rate, useful_life, and residual_value to null
Return only valid JSON. Return only valid JSON.
""" """
@@ -80,7 +116,7 @@ class DocumentProcessor:
} }
], ],
model=self.model, model=self.model,
max_tokens=500, max_tokens=800,
temperature=0.1, temperature=0.1,
) )
@@ -136,18 +172,55 @@ class DocumentProcessor:
"date": "YYYY-MM-DD", "date": "YYYY-MM-DD",
"category": "Food/Transport/Office/Other", "category": "Food/Transport/Office/Other",
"confidence": 0.95, "confidence": 0.95,
"currency": "USD" "currency": "USD",
"location": "Province/State, Country",
"calculated_tax": 0.00,
"is_depreciable": false,
"cca_rate": null,
"useful_life": null,
"residual_value": null
}} }}
Rules: Rules:
- Extract vendor name as it appears on receipt - Extract vendor name as it appears on receipt
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies") - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
- Total amount should be the final total including tax - Total amount should be the final total including tax
- Tax amount is separate tax line if available - Tax amount is separate tax line if available (if not clearly shown, calculate based on location)
- Date should be the date on the receipt - Date should be the date on the receipt
- Categorize based on vendor type - Categorize based on vendor type
- Confidence score 0-1 based on clarity - Confidence score 0-1 based on clarity
- Currency should be the currency used on the receipt (e.g., "USD", "EUR") - Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
LOCATION & TAX RULES:
- Extract location from receipt (look for store address, province/state, country)
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
- If location not shown, return null for location
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
* Ontario, Canada: 13% HST
* Quebec, Canada: 9.975% QST + 5% GST = 14.975% total
* British Columbia, Canada: 12% (5% GST + 7% PST)
* Alberta, Canada: 5% GST
* California, USA: ~7.25% (varies by locality)
* New York, USA: ~8.875% (varies by locality)
* Texas, USA: 6.25%
* For other locations, estimate based on typical rates
- Store calculated tax in "calculated_tax" field (set to null if tax clearly shown)
- If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null
DEPRECIATION RULES:
- Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings)
- Set is_depreciable to true only for capital assets, false for consumables/services
- If is_depreciable is true, provide:
* cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%)
- Class 10 (Vehicles): 30%
- Class 8 (Furniture, equipment): 20%
- Class 50 (Computers, software): 55%
- Class 1 (Buildings): 4%
- Class 10.1 (Passenger vehicles >$30k): 30%
* useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture)
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
- If is_depreciable is false, set cca_rate, useful_life, and residual_value to null
Return only valid JSON. Return only valid JSON.
""" """
@@ -155,7 +228,7 @@ class DocumentProcessor:
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model=self.model, model=self.model,
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
max_tokens=500, max_tokens=800,
temperature=0.1, temperature=0.1,
) )
@@ -207,8 +280,22 @@ class DocumentProcessor:
confidence_match = re.search( confidence_match = re.search(
r'"confidence"\s*:\s*([0-9.]+)', json_str r'"confidence"\s*:\s*([0-9.]+)', json_str
) )
currency_match = re.search( currency_match = re.search(r'"currency"\s*:\s*"([^"]*)"', json_str)
r'"currency"\s*:\s*"([^"]*)"', json_str location_match = re.search(r'"location"\s*:\s*"([^"]*)"', json_str)
calculated_tax_match = re.search(
r'"calculated_tax"\s*:\s*([0-9.]+|null)', json_str
)
is_depreciable_match = re.search(
r'"is_depreciable"\s*:\s*(true|false)', json_str
)
cca_rate_match = re.search(
r'"cca_rate"\s*:\s*([0-9.]+|null)', json_str
)
useful_life_match = re.search(
r'"useful_life"\s*:\s*([0-9]+|null)', json_str
)
residual_value_match = re.search(
r'"residual_value"\s*:\s*([0-9.]+|null)', json_str
) )
data = { data = {
@@ -229,7 +316,27 @@ class DocumentProcessor:
"confidence": float(confidence_match.group(1)) "confidence": float(confidence_match.group(1))
if confidence_match if confidence_match
else 0.5, else 0.5,
"currency": currency_match.group(1) if currency_match else "CAD" "currency": currency_match.group(1)
if currency_match
else "CAD",
"location": location_match.group(1) if location_match else None,
"calculated_tax": float(calculated_tax_match.group(1))
if calculated_tax_match
and calculated_tax_match.group(1) != "null"
else None,
"is_depreciable": is_depreciable_match.group(1) == "true"
if is_depreciable_match
else None,
"cca_rate": float(cca_rate_match.group(1))
if cca_rate_match and cca_rate_match.group(1) != "null"
else None,
"useful_life": int(useful_life_match.group(1))
if useful_life_match and useful_life_match.group(1) != "null"
else None,
"residual_value": float(residual_value_match.group(1))
if residual_value_match
and residual_value_match.group(1) != "null"
else None,
} }
# Validate and clean data # Validate and clean data
@@ -243,6 +350,12 @@ class DocumentProcessor:
"confidence": float(data.get("confidence", 0.5)), "confidence": float(data.get("confidence", 0.5)),
"extraction_success": True, "extraction_success": True,
"currency": data.get("currency", "CAD").strip(), "currency": data.get("currency", "CAD").strip(),
"location": data.get("location"),
"calculated_tax": data.get("calculated_tax"),
"is_depreciable": data.get("is_depreciable"),
"cca_rate": data.get("cca_rate"),
"useful_life": data.get("useful_life"),
"residual_value": data.get("residual_value"),
} }
else: else:
# Try to extract fields from plain text # Try to extract fields from plain text
@@ -312,6 +425,12 @@ class DocumentProcessor:
"category": "Other", "category": "Other",
"confidence": 0.3, # Low confidence for text extraction "confidence": 0.3, # Low confidence for text extraction
"extraction_success": True, "extraction_success": True,
"location": None,
"calculated_tax": None,
"is_depreciable": None,
"cca_rate": None,
"useful_life": None,
"residual_value": None,
} }
except Exception as e: except Exception as e:
@@ -325,6 +444,12 @@ class DocumentProcessor:
"confidence": 0.1, "confidence": 0.1,
"extraction_success": False, "extraction_success": False,
"error": f"Text extraction failed: {str(e)}", "error": f"Text extraction failed: {str(e)}",
"location": None,
"calculated_tax": None,
"is_depreciable": None,
"cca_rate": None,
"useful_life": None,
"residual_value": None,
} }
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str: async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
+1 -11
View File
@@ -44,17 +44,7 @@ If the location is missing, apply the users location sales tax by default.
**FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but dont fetch exchange rates. **FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but dont fetch exchange rates.
### Tax Rules:
Four Rules for Tax and Depreciation Handling
### 1. **Sales Tax Rule**
**Purpose**: To calculate and apply the correct sales tax based on the shipping and billing addresses.
- **When Billing and Shipping Address are the Same**: Apply the sales tax rate based on the billing address.
- **When Billing and Shipping Address are Different**: Apply the sales tax rate based on the shipping address.
**Example**:
1. If the billing and shipping address are in Ontario, the system will apply the 13% HST tax rate based on Ontario's tax rate.
2. If the billing address is in Ontario but the shipping address is in Quebec, the system will apply the 14.975% QST tax rate based on the shipping address.
### 2. **Foreign Exchange (FX) Rule** ### 2. **Foreign Exchange (FX) Rule**
**Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD). **Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD).
- **Action**: Identify the currency mismatch, but do not automatically fetch the exchange rate. Flag the FX difference for manual review, allowing the user to approve or adjust the balance. - **Action**: Identify the currency mismatch, but do not automatically fetch the exchange rate. Flag the FX difference for manual review, allowing the user to approve or adjust the balance.