Add user location support for tax calculations in document processing

This commit is contained in:
bolade
2025-10-07 12:03:26 +01:00
parent d8315f13ac
commit 659ca4ff15
3 changed files with 95 additions and 27 deletions
+71 -18
View File
@@ -17,28 +17,55 @@ class DocumentProcessor:
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
"""Process uploaded file and extract receipt data"""
async def process_file(
self, file_path: str, file_type: str, user_location: str = None
) -> Dict[str, Any]:
"""Process uploaded file and extract receipt data
Args:
file_path: Path to the file to process
file_type: Type of file (jpg, pdf, etc.)
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
"""
try:
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
return await self._process_image(file_path)
return await self._process_image(file_path, user_location)
elif file_type.lower() == "pdf":
return await self._process_pdf(file_path)
return await self._process_pdf(file_path, user_location)
else:
raise ValueError(f"Unsupported file type: {file_type}")
except Exception as e:
return {"error": str(e)}
async def _process_image(self, image_path: str) -> Dict[str, Any]:
"""Extract data from image using Groq vision"""
async def _process_image(
self, image_path: str, user_location: str = None
) -> Dict[str, Any]:
"""Extract data from image using Groq vision
Args:
image_path: Path to the image file
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
"""
try:
# Encode image to base64
base64_image = self._encode_image(image_path)
# Build user location context
user_location_context = ""
if user_location:
user_location_context = f"""
USER LOCATION CONTEXT:
The user is located in {user_location}.
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
- If the receipt clearly shows a different location, use the receipt's location instead.
- Apply depreciation rules based on the user's location.
"""
# Create Groq vision prompt
prompt = """
prompt = f"""
Analyze this receipt image and extract the following information in JSON format:
{
{{
"vendor": "Store/company name",
"description": "Detailed description of items/services purchased",
"total_amount": 0.00,
@@ -53,7 +80,7 @@ class DocumentProcessor:
"cca_rate": null,
"useful_life": null,
"residual_value": null
}
}}
Rules:
- Extract vendor name as it appears on receipt
@@ -64,11 +91,11 @@ class DocumentProcessor:
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
- Confidence score 0-1 based on how clear the receipt is
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
{user_location_context}
LOCATION & TAX RULES:
- Extract location from receipt (look for store address, province/state, country)
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
- If location not shown, return null for location
- If location not shown on receipt, return null for location (system will use user location as fallback)
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
* Ontario, Canada: 13% HST
@@ -132,12 +159,19 @@ class DocumentProcessor:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
"""Extract data from PDF by converting to image first"""
async def _process_pdf(
self, pdf_path: str, user_location: str = None
) -> Dict[str, Any]:
"""Extract data from PDF by converting to image first
Args:
pdf_path: Path to the PDF file
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
"""
try:
# For now, extract text from PDF and process as text
text_content = self._extract_text_from_pdf(pdf_path)
return self._process_text_content(text_content)
return self._process_text_content(text_content, user_location)
except Exception as e:
return {"error": f"PDF processing error: {str(e)}"}
@@ -154,9 +188,28 @@ class DocumentProcessor:
except Exception:
return ""
def _process_text_content(self, text_content: str) -> Dict[str, Any]:
"""Process text content using Groq (fallback for PDFs)"""
def _process_text_content(
self, text_content: str, user_location: str = None
) -> Dict[str, Any]:
"""Process text content using Groq (fallback for PDFs)
Args:
text_content: Extracted text from PDF
user_location: User's location string in format "State/Province, Country" (e.g., "Ontario, Canada")
"""
try:
# Build user location context
user_location_context = ""
if user_location:
user_location_context = f"""
USER LOCATION CONTEXT:
The user is located in {user_location}.
- If the receipt location is MISSING or UNCLEAR, use the user's location ({user_location}) for tax calculations.
- If the receipt clearly shows a different location, use the receipt's location instead.
- Apply depreciation rules based on the user's location.
"""
prompt = f"""
Analyze this receipt text and extract the following information in JSON format:
@@ -190,11 +243,11 @@ class DocumentProcessor:
- Categorize based on vendor type
- Confidence score 0-1 based on clarity
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
{user_location_context}
LOCATION & TAX RULES:
- Extract location from receipt (look for store address, province/state, country)
- Format location as "Province/State, Country" (e.g., "Ontario, Canada" or "California, USA")
- If location not shown, return null for location
- If location not shown on receipt, return null for location (system will use user location as fallback)
- If tax_amount is NOT clearly shown on receipt, calculate it based on location:
* Ontario, Canada: 13% HST