Add user location support for tax calculations in document processing
This commit is contained in:
+17
-9
@@ -15,6 +15,7 @@ from database import (
|
||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from schemas import (
|
||||
DocumentProcessRequest,
|
||||
DocumentProcessResponse,
|
||||
DocumentUploadResponse,
|
||||
MatchingResponse,
|
||||
@@ -364,26 +365,33 @@ async def upload_multiple_documents(
|
||||
|
||||
|
||||
@app.post(
|
||||
"/process/{file_id}",
|
||||
"/process",
|
||||
response_model=DocumentProcessResponse,
|
||||
tags=["Document Processing"],
|
||||
)
|
||||
async def process_document(file_id: str, db: db_dependency):
|
||||
async def process_document(request: DocumentProcessRequest, db: db_dependency):
|
||||
"""
|
||||
Process a previously uploaded document to extract receipt information.
|
||||
|
||||
This endpoint uses AI to extract structured data from receipt images,
|
||||
including vendor, amount, date, and category information.
|
||||
|
||||
Optionally accepts user_location to guide tax calculations and depreciation
|
||||
based on the user's location (format: "State/Province, Country" e.g., "Ontario, Canada").
|
||||
"""
|
||||
try:
|
||||
# Get file info from database
|
||||
db_uploaded_file = get_uploaded_file_from_db(db, file_id)
|
||||
db_uploaded_file = get_uploaded_file_from_db(db, request.file_id)
|
||||
if not db_uploaded_file:
|
||||
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"File {request.file_id} not found"
|
||||
)
|
||||
|
||||
# Process the file using the stored file path
|
||||
receipt_data = await document_processor.process_file(
|
||||
db_uploaded_file.file_path, db_uploaded_file.file_type
|
||||
db_uploaded_file.file_path,
|
||||
db_uploaded_file.file_type,
|
||||
user_location=request.user_location,
|
||||
)
|
||||
|
||||
# Parse date for database storage
|
||||
@@ -398,8 +406,8 @@ async def process_document(file_id: str, db: db_dependency):
|
||||
|
||||
# Create database receipt object
|
||||
db_receipt = DBReceipt(
|
||||
receipt_id=f"receipt_{file_id}",
|
||||
file_id=file_id,
|
||||
receipt_id=f"receipt_{request.file_id}",
|
||||
file_id=request.file_id,
|
||||
amount=receipt_data.get("total_amount", 0.0),
|
||||
date=receipt_date,
|
||||
vendor=receipt_data.get("vendor", ""),
|
||||
@@ -425,7 +433,7 @@ async def process_document(file_id: str, db: db_dependency):
|
||||
db.commit()
|
||||
|
||||
return DocumentProcessResponse(
|
||||
file_id=file_id,
|
||||
file_id=request.file_id,
|
||||
receipt_id=db_receipt.receipt_id,
|
||||
extraction_success=receipt_data.get("extraction_success", False),
|
||||
vendor=receipt_data.get("vendor", ""),
|
||||
@@ -446,7 +454,7 @@ async def process_document(file_id: str, db: db_dependency):
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing document {file_id}: {str(e)}")
|
||||
logger.error(f"Error processing document {request.file_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
@@ -160,6 +160,13 @@ class DocumentUploadResponse(BaseModel):
|
||||
status: str
|
||||
|
||||
|
||||
class DocumentProcessRequest(BaseModel):
|
||||
file_id: str
|
||||
user_location: Optional[str] = (
|
||||
None # Format: "State/Province, Country" (e.g., "Ontario, Canada")
|
||||
)
|
||||
|
||||
|
||||
class DocumentProcessResponse(BaseModel):
|
||||
file_id: str
|
||||
receipt_id: str
|
||||
|
||||
@@ -17,28 +17,55 @@ class DocumentProcessor:
|
||||
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||||
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
||||
|
||||
async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
|
||||
"""Process uploaded file and extract receipt data"""
|
||||
async def process_file(
|
||||
self, file_path: str, file_type: str, user_location: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Process uploaded file and extract receipt data
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to process
|
||||
file_type: Type of file (jpg, pdf, etc.)
|
||||
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||
"""
|
||||
try:
|
||||
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
|
||||
return await self._process_image(file_path)
|
||||
return await self._process_image(file_path, user_location)
|
||||
elif file_type.lower() == "pdf":
|
||||
return await self._process_pdf(file_path)
|
||||
return await self._process_pdf(file_path, user_location)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {file_type}")
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _process_image(self, image_path: str) -> Dict[str, Any]:
|
||||
"""Extract data from image using Groq vision"""
|
||||
async def _process_image(
|
||||
self, image_path: str, user_location: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Extract data from image using Groq vision
|
||||
|
||||
Args:
|
||||
image_path: Path to the image file
|
||||
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||
"""
|
||||
try:
|
||||
# Encode image to base64
|
||||
base64_image = self._encode_image(image_path)
|
||||
|
||||
# Build user location context
|
||||
user_location_context = ""
|
||||
if user_location:
|
||||
user_location_context = f"""
|
||||
|
||||
USER LOCATION CONTEXT:
|
||||
The user is located in {user_location}.
|
||||
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
|
||||
- If the receipt clearly shows a different location, use the receipt's location instead.
|
||||
- Apply depreciation rules based on the user's location.
|
||||
"""
|
||||
|
||||
# Create Groq vision prompt
|
||||
prompt = """
|
||||
prompt = f"""
|
||||
Analyze this receipt image and extract the following information in JSON format:
|
||||
{
|
||||
{{
|
||||
"vendor": "Store/company name",
|
||||
"description": "Detailed description of items/services purchased",
|
||||
"total_amount": 0.00,
|
||||
@@ -53,7 +80,7 @@ class DocumentProcessor:
|
||||
"cca_rate": null,
|
||||
"useful_life": null,
|
||||
"residual_value": null
|
||||
}
|
||||
}}
|
||||
|
||||
Rules:
|
||||
- Extract vendor name as it appears on receipt
|
||||
@@ -64,11 +91,11 @@ class DocumentProcessor:
|
||||
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
||||
- Confidence score 0-1 based on how clear the receipt is
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||
|
||||
{user_location_context}
|
||||
LOCATION & TAX RULES:
|
||||
- Extract location from receipt (look for store address, province/state, country)
|
||||
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||
- If location not shown, return null for location
|
||||
- If location not shown on receipt, return null for location (system will use user location as fallback)
|
||||
|
||||
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
|
||||
* Ontario, Canada: 13% HST
|
||||
@@ -132,12 +159,19 @@ class DocumentProcessor:
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
|
||||
"""Extract data from PDF by converting to image first"""
|
||||
async def _process_pdf(
|
||||
self, pdf_path: str, user_location: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Extract data from PDF by converting to image first
|
||||
|
||||
Args:
|
||||
pdf_path: Path to the PDF file
|
||||
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||
"""
|
||||
try:
|
||||
# For now, extract text from PDF and process as text
|
||||
text_content = self._extract_text_from_pdf(pdf_path)
|
||||
return self._process_text_content(text_content)
|
||||
return self._process_text_content(text_content, user_location)
|
||||
|
||||
except Exception as e:
|
||||
return {"error": f"PDF processing error: {str(e)}"}
|
||||
@@ -154,9 +188,28 @@ class DocumentProcessor:
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _process_text_content(self, text_content: str) -> Dict[str, Any]:
|
||||
"""Process text content using Groq (fallback for PDFs)"""
|
||||
def _process_text_content(
|
||||
self, text_content: str, user_location: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Process text content using Groq (fallback for PDFs)
|
||||
|
||||
Args:
|
||||
text_content: Extracted text from PDF
|
||||
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
|
||||
"""
|
||||
try:
|
||||
# Build user location context
|
||||
user_location_context = ""
|
||||
if user_location:
|
||||
user_location_context = f"""
|
||||
|
||||
USER LOCATION CONTEXT:
|
||||
The user is located in {user_location}.
|
||||
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
|
||||
- If the receipt clearly shows a different location, use the receipt's location instead.
|
||||
- Apply depreciation rules based on the user's location.
|
||||
"""
|
||||
|
||||
prompt = f"""
|
||||
Analyze this receipt text and extract the following information in JSON format:
|
||||
|
||||
@@ -190,11 +243,11 @@ class DocumentProcessor:
|
||||
- Categorize based on vendor type
|
||||
- Confidence score 0-1 based on clarity
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||
|
||||
{user_location_context}
|
||||
LOCATION & TAX RULES:
|
||||
- Extract location from receipt (look for store address, province/state, country)
|
||||
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
|
||||
- If location not shown, return null for location
|
||||
- If location not shown on receipt, return null for location (system will use user location as fallback)
|
||||
|
||||
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
|
||||
* Ontario, Canada: 13% HST
|
||||
|
||||
Reference in New Issue
Block a user