Implement code changes to enhance functionality and improve performance

This commit is contained in:
2025-10-10 17:18:52 +00:00
parent 3559cbe19d
commit c8da3c61ca
4 changed files with 1632 additions and 749 deletions
+44 -20
View File
@@ -2,6 +2,7 @@ import base64
import json
import logging
import os
import re
from datetime import datetime
from typing import Any, Dict
@@ -125,23 +126,36 @@ class DocumentProcessor:
# Build AI rules context for categorization
ai_rules_context = ""
if ai_rules and len(ai_rules) > 0:
ai_rules_context = "\n CATEGORIZATION RULES (IMPORTANT - Apply these first):"
# Create a simple, direct instruction for each rule
ai_rules_context = "\n "
for idx, rule in enumerate(ai_rules, 1):
condition = rule.get("condition", "")
action = rule.get("action", "")
ai_rules_context += f"\n {idx}. If {condition} → set category to '{action}'"
ai_rules_context += "\n - Apply these custom rules before using default categorization logic\n - If multiple rules match, use the first matching rule\n - If no rules match, use default categorization based on vendor type"
# Extract the keyword and category from the rule
keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
if keyword_match and category_match:
keyword = keyword_match.group(1)
category = category_match.group(1).strip()
# Create one simple instruction per line
ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n '
ai_rules_context += "\n"
# Create Groq vision prompt
prompt = f"""
Analyze this receipt image and extract the following information in JSON format:
Analyze this receipt image and extract the following information in JSON format.
{ai_rules_context}
JSON Format:
{{
"vendor": "Store/company name",
"description": "Detailed description of items/services purchased",
"total_amount": 0.00,
"tax_amount": 0.00,
"date": "YYYY-MM-DD",
"category": "Food/Transport/Office/Other",
"category": "Check rules above first",
"confidence": 0.95,
"currency": "USD",
"location": "Province/State, Country",
@@ -150,10 +164,11 @@ class DocumentProcessor:
"name_of_asset": null,
"cca_rate": null,
"useful_life": null,
"residual_value": null
"residual_value": null,
"extraction_success": True
}}
Rules:
EXTRACTION Rules:
- Extract vendor name as it appears on receipt
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
- Total amount should be the final total including tax
@@ -161,7 +176,7 @@ class DocumentProcessor:
- Date should be the date on the receipt
- Confidence score 0-1 based on how clear the receipt is
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
{ai_rules_context}
{user_location_context}
LOCATION & TAX RULES:
- Extract location from receipt (look for store address, province/state, country)
@@ -199,11 +214,9 @@ class DocumentProcessor:
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
- If is_depreciable is false, set name_of_asset, cca_rate, useful_life, and residual_value to null
CATEGORY RULES:
- Assign the category based on all the details in the receipt
Return only valid JSON.
"""
logger.info(f"This is the prompt: {prompt}")
# Call Groq vision API with correct format
response = self.client.chat.completions.create(
messages=[
@@ -293,16 +306,27 @@ class DocumentProcessor:
# Build AI rules context for categorization
ai_rules_context = ""
if ai_rules and len(ai_rules) > 0:
ai_rules_context = "\n CATEGORIZATION RULES (IMPORTANT - Apply these first):"
# Create a simple, direct instruction for each rule
ai_rules_context = "\n "
for idx, rule in enumerate(ai_rules, 1):
condition = rule.get("condition", "")
action = rule.get("action", "")
ai_rules_context += f"\n {idx}. If {condition} → set category to '{action}'"
ai_rules_context += "\n - Apply these custom rules before using default categorization logic\n - If multiple rules match, use the first matching rule\n - If no rules match, use default categorization based on vendor type"
# Extract the keyword and category from the rule
keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
if keyword_match and category_match:
keyword = keyword_match.group(1)
category = category_match.group(1).strip()
# Create one simple instruction per line
ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n '
ai_rules_context += "\n"
prompt = f"""
Analyze this receipt text and extract the following information in JSON format:
Analyze this receipt text and extract the following information in JSON format.
{ai_rules_context}
Receipt Text:
{text_content}
@@ -313,7 +337,7 @@ class DocumentProcessor:
"total_amount": 0.00,
"tax_amount": 0.00,
"date": "YYYY-MM-DD",
"category": "Food/Transport/Office/Other",
"category": "Check rules above first",
"confidence": 0.95,
"currency": "USD",
"location": "Province/State, Country",
@@ -322,10 +346,11 @@ class DocumentProcessor:
"name_of_asset": null,
"cca_rate": null,
"useful_life": null,
"residual_value": null
"residual_value": null,
"extraction_success": True
}}
Rules:
EXTRACTION Rules:
- Extract vendor name as it appears on receipt
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
- Total amount should be the final total including tax
@@ -333,7 +358,6 @@ class DocumentProcessor:
- Date should be the date on the receipt
- Confidence score 0-1 based on clarity
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
{ai_rules_context}
{user_location_context}
LOCATION & TAX RULES:
- Extract location from receipt (look for store address, province/state, country)