diff --git a/app/database.py b/app/database.py index 9f8ea5b..0e7abda 100644 --- a/app/database.py +++ b/app/database.py @@ -88,3 +88,9 @@ class DBReceipt(Base): extraction_success = Column(String, nullable=True) error_message = Column(String, nullable=True) receipt_currency = Column(String, nullable=True) + receipt_location = Column(String, nullable=True) + calculated_tax = Column(Float, nullable=True) + is_depreciable = Column(String, nullable=True) # Store as string "True"/"False" + cca_rate = Column(Float, nullable=True) + useful_life = Column(Integer, nullable=True) + residual_value = Column(Float, nullable=True) diff --git a/app/main.py b/app/main.py index 37f506d..6a06ffc 100644 --- a/app/main.py +++ b/app/main.py @@ -410,6 +410,14 @@ async def process_document(file_id: str, db: db_dependency): extraction_success=str(receipt_data.get("extraction_success", False)), error_message=receipt_data.get("error"), receipt_currency=receipt_data.get("currency"), + receipt_location=receipt_data.get("location"), + calculated_tax=receipt_data.get("calculated_tax"), + is_depreciable=str(receipt_data.get("is_depreciable")) + if receipt_data.get("is_depreciable") is not None + else None, + cca_rate=receipt_data.get("cca_rate"), + useful_life=receipt_data.get("useful_life"), + residual_value=receipt_data.get("residual_value"), ) # Add to database @@ -429,6 +437,12 @@ async def process_document(file_id: str, db: db_dependency): confidence=receipt_data.get("confidence", 0.0), error=receipt_data.get("error", None), receipt_currency=receipt_data.get("currency"), + receipt_location=receipt_data.get("location"), + calculated_tax=receipt_data.get("calculated_tax"), + is_depreciable=receipt_data.get("is_depreciable"), + cca_rate=receipt_data.get("cca_rate"), + useful_life=receipt_data.get("useful_life"), + residual_value=receipt_data.get("residual_value"), ) except Exception as e: diff --git a/app/schemas.py b/app/schemas.py index 0910f5d..6649e76 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -173,6 +173,20 @@ class DocumentProcessResponse(BaseModel): confidence: Optional[float] = None error: Optional[str] = None receipt_currency: Optional[str] = "CAD" + receipt_location: Optional[str] = ( + None # Location from receipt (e.g., "Ontario, Canada" or "California, USA") + ) + calculated_tax: Optional[float] = None # Calculated sales tax if not clearly shown + is_depreciable: Optional[bool] = None # Whether item is a depreciable asset + cca_rate: Optional[float] = ( + None # CCA rate for tax depreciation (e.g., 0.30 for 30%) + ) + useful_life: Optional[int] = ( + None # Useful life in years for straight-line depreciation + ) + residual_value: Optional[float] = ( + None # Residual value for straight-line depreciation + ) # New tax-related models diff --git a/app/services/document_processor.py b/app/services/document_processor.py index 6745e72..0200100 100644 --- a/app/services/document_processor.py +++ b/app/services/document_processor.py @@ -7,7 +7,6 @@ from typing import Any, Dict import aiofiles import groq import PyPDF2 - from config import settings logger = logging.getLogger(__name__) @@ -47,18 +46,55 @@ class DocumentProcessor: "date": "YYYY-MM-DD", "category": "Food/Transport/Office/Other", "confidence": 0.95, - "currency": "USD" + "currency": "USD", + "location": "Province/State, Country", + "calculated_tax": 0.00, + "is_depreciable": false, + "cca_rate": null, + "useful_life": null, + "residual_value": null } Rules: - Extract vendor name as it appears on receipt - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies") - Total amount should be the final total including tax - - Tax amount is separate tax line if available + - Tax amount is separate tax line if available (if not clearly shown, calculate based on location) - Date should be the date on the receipt - Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.) - Confidence score 0-1 based on how clear the receipt is - - Currency should be the currency used on the receipt (e.g., "USD", "EUR") + - Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD") + + LOCATION & TAX RULES: + - Extract location from receipt (look for store address, province/state, country) + - Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA") + - If location not shown, return null for location + + - If tax_amount is NOT clearly shown on receipt, calculate it based on location: + * Ontario, Canada: 13% HST + * Quebec, Canada: 9.975% QST + 5% GST = 14.975% total + * British Columbia, Canada: 12% (5% GST + 7% PST) + * Alberta, Canada: 5% GST + * California, USA: ~7.25% (varies by locality) + * New York, USA: ~8.875% (varies by locality) + * Texas, USA: 6.25% + * For other locations, estimate based on typical rates + - Store calculated tax in "calculated_tax" field (set to null if tax clearly shown) + - If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null + + DEPRECIATION RULES: + - Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings) + - Set is_depreciable to true only for capital assets, false for consumables/services + - If is_depreciable is true, provide: + * cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%) + - Class 10 (Vehicles): 30% + - Class 8 (Furniture, equipment): 20% + - Class 50 (Computers, software): 55% + - Class 1 (Buildings): 4% + - Class 10.1 (Passenger vehicles >$30k): 30% + * useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture) + * residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles) + - If is_depreciable is false, set cca_rate, useful_life, and residual_value to null Return only valid JSON. """ @@ -80,7 +116,7 @@ class DocumentProcessor: } ], model=self.model, - max_tokens=500, + max_tokens=800, temperature=0.1, ) @@ -136,18 +172,55 @@ class DocumentProcessor: "date": "YYYY-MM-DD", "category": "Food/Transport/Office/Other", "confidence": 0.95, - "currency": "USD" + "currency": "USD", + "location": "Province/State, Country", + "calculated_tax": 0.00, + "is_depreciable": false, + "cca_rate": null, + "useful_life": null, + "residual_value": null }} Rules: - Extract vendor name as it appears on receipt - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies") - Total amount should be the final total including tax - - Tax amount is separate tax line if available + - Tax amount is separate tax line if available (if not clearly shown, calculate based on location) - Date should be the date on the receipt - Categorize based on vendor type - Confidence score 0-1 based on clarity - - Currency should be the currency used on the receipt (e.g., "USD", "EUR") + - Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD") + + LOCATION & TAX RULES: + - Extract location from receipt (look for store address, province/state, country) + - Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA") + - If location not shown, return null for location + + - If tax_amount is NOT clearly shown on receipt, calculate it based on location: + * Ontario, Canada: 13% HST + * Quebec, Canada: 9.975% QST + 5% GST = 14.975% total + * British Columbia, Canada: 12% (5% GST + 7% PST) + * Alberta, Canada: 5% GST + * California, USA: ~7.25% (varies by locality) + * New York, USA: ~8.875% (varies by locality) + * Texas, USA: 6.25% + * For other locations, estimate based on typical rates + - Store calculated tax in "calculated_tax" field (set to null if tax clearly shown) + - If tax is clearly shown on receipt, use that value for tax_amount and set calculated_tax to null + + DEPRECIATION RULES: + - Determine if item is a depreciable asset (vehicles, machinery, equipment, computers, furniture, buildings) + - Set is_depreciable to true only for capital assets, false for consumables/services + - If is_depreciable is true, provide: + * cca_rate: CCA rate as decimal (e.g., 0.30 for 30%, 0.20 for 20%, 0.04 for 4%) + - Class 10 (Vehicles): 30% + - Class 8 (Furniture, equipment): 20% + - Class 50 (Computers, software): 55% + - Class 1 (Buildings): 4% + - Class 10.1 (Passenger vehicles >$30k): 30% + * useful_life: Expected years of use (e.g., 5 for computers, 8 for vehicles, 10 for furniture) + * residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles) + - If is_depreciable is false, set cca_rate, useful_life, and residual_value to null Return only valid JSON. """ @@ -155,7 +228,7 @@ class DocumentProcessor: response = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], - max_tokens=500, + max_tokens=800, temperature=0.1, ) @@ -207,8 +280,22 @@ class DocumentProcessor: confidence_match = re.search( r'"confidence"\s*:\s*([0-9.]+)', json_str ) - currency_match = re.search( - r'"currency"\s*:\s*"([^"]*)"', json_str + currency_match = re.search(r'"currency"\s*:\s*"([^"]*)"', json_str) + location_match = re.search(r'"location"\s*:\s*"([^"]*)"', json_str) + calculated_tax_match = re.search( + r'"calculated_tax"\s*:\s*([0-9.]+|null)', json_str + ) + is_depreciable_match = re.search( + r'"is_depreciable"\s*:\s*(true|false)', json_str + ) + cca_rate_match = re.search( + r'"cca_rate"\s*:\s*([0-9.]+|null)', json_str + ) + useful_life_match = re.search( + r'"useful_life"\s*:\s*([0-9]+|null)', json_str + ) + residual_value_match = re.search( + r'"residual_value"\s*:\s*([0-9.]+|null)', json_str ) data = { @@ -229,7 +316,27 @@ class DocumentProcessor: "confidence": float(confidence_match.group(1)) if confidence_match else 0.5, - "currency": currency_match.group(1) if currency_match else "CAD" + "currency": currency_match.group(1) + if currency_match + else "CAD", + "location": location_match.group(1) if location_match else None, + "calculated_tax": float(calculated_tax_match.group(1)) + if calculated_tax_match + and calculated_tax_match.group(1) != "null" + else None, + "is_depreciable": is_depreciable_match.group(1) == "true" + if is_depreciable_match + else None, + "cca_rate": float(cca_rate_match.group(1)) + if cca_rate_match and cca_rate_match.group(1) != "null" + else None, + "useful_life": int(useful_life_match.group(1)) + if useful_life_match and useful_life_match.group(1) != "null" + else None, + "residual_value": float(residual_value_match.group(1)) + if residual_value_match + and residual_value_match.group(1) != "null" + else None, } # Validate and clean data @@ -243,6 +350,12 @@ class DocumentProcessor: "confidence": float(data.get("confidence", 0.5)), "extraction_success": True, "currency": data.get("currency", "CAD").strip(), + "location": data.get("location"), + "calculated_tax": data.get("calculated_tax"), + "is_depreciable": data.get("is_depreciable"), + "cca_rate": data.get("cca_rate"), + "useful_life": data.get("useful_life"), + "residual_value": data.get("residual_value"), } else: # Try to extract fields from plain text @@ -312,6 +425,12 @@ class DocumentProcessor: "category": "Other", "confidence": 0.3, # Low confidence for text extraction "extraction_success": True, + "location": None, + "calculated_tax": None, + "is_depreciable": None, + "cca_rate": None, + "useful_life": None, + "residual_value": None, } except Exception as e: @@ -325,6 +444,12 @@ class DocumentProcessor: "confidence": 0.1, "extraction_success": False, "error": f"Text extraction failed: {str(e)}", + "location": None, + "calculated_tax": None, + "is_depreciable": None, + "cca_rate": None, + "useful_life": None, + "residual_value": None, } async def save_uploaded_file(self, file_content: bytes, filename: str) -> str: diff --git a/app/services/rules.py b/app/services/rules.py index cb3f3cf..30b10c0 100644 --- a/app/services/rules.py +++ b/app/services/rules.py @@ -44,17 +44,7 @@ If the location is missing, apply the user’s location sales tax by default. **FX (Foreign Exchange):** If the receipt is in a different currency, flag the FX difference for manual review but don’t fetch exchange rates. -### Tax Rules: -Four Rules for Tax and Depreciation Handling -### 1. **Sales Tax Rule** -**Purpose**: To calculate and apply the correct sales tax based on the shipping and billing addresses. -- **When Billing and Shipping Address are the Same**: Apply the sales tax rate based on the billing address. -- **When Billing and Shipping Address are Different**: Apply the sales tax rate based on the shipping address. - -**Example**: -1. If the billing and shipping address are in Ontario, the system will apply the 13% HST tax rate based on Ontario's tax rate. -2. If the billing address is in Ontario but the shipping address is in Quebec, the system will apply the 14.975% QST tax rate based on the shipping address. - + ### 2. **Foreign Exchange (FX) Rule** **Purpose**: To handle discrepancies when transactions and receipts are in different currencies (e.g., USD vs. CAD). - **Action**: Identify the currency mismatch, but do not automatically fetch the exchange rate. Flag the FX difference for manual review, allowing the user to approve or adjust the balance.