Implement code changes to enhance functionality and improve performance
This commit is contained in:
@@ -2,6 +2,7 @@ import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
|
||||
@@ -125,23 +126,36 @@ class DocumentProcessor:
|
||||
# Build AI rules context for categorization
|
||||
ai_rules_context = ""
|
||||
if ai_rules and len(ai_rules) > 0:
|
||||
ai_rules_context = "\n CATEGORIZATION RULES (IMPORTANT - Apply these first):"
|
||||
# Create a simple, direct instruction for each rule
|
||||
ai_rules_context = "\n "
|
||||
for idx, rule in enumerate(ai_rules, 1):
|
||||
condition = rule.get("condition", "")
|
||||
action = rule.get("action", "")
|
||||
ai_rules_context += f"\n {idx}. If {condition} → set category to '{action}'"
|
||||
ai_rules_context += "\n - Apply these custom rules before using default categorization logic\n - If multiple rules match, use the first matching rule\n - If no rules match, use default categorization based on vendor type"
|
||||
|
||||
# Extract the keyword and category from the rule
|
||||
keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
|
||||
category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
|
||||
|
||||
if keyword_match and category_match:
|
||||
keyword = keyword_match.group(1)
|
||||
category = category_match.group(1).strip()
|
||||
# Create one simple instruction per line
|
||||
ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n '
|
||||
|
||||
ai_rules_context += "\n"
|
||||
|
||||
# Create Groq vision prompt
|
||||
prompt = f"""
|
||||
Analyze this receipt image and extract the following information in JSON format:
|
||||
Analyze this receipt image and extract the following information in JSON format.
|
||||
{ai_rules_context}
|
||||
JSON Format:
|
||||
{{
|
||||
"vendor": "Store/company name",
|
||||
"description": "Detailed description of items/services purchased",
|
||||
"total_amount": 0.00,
|
||||
"tax_amount": 0.00,
|
||||
"date": "YYYY-MM-DD",
|
||||
"category": "Food/Transport/Office/Other",
|
||||
"category": "Check rules above first",
|
||||
"confidence": 0.95,
|
||||
"currency": "USD",
|
||||
"location": "Province/State, Country",
|
||||
@@ -150,10 +164,11 @@ class DocumentProcessor:
|
||||
"name_of_asset": null,
|
||||
"cca_rate": null,
|
||||
"useful_life": null,
|
||||
"residual_value": null
|
||||
"residual_value": null,
|
||||
"extraction_success": True
|
||||
}}
|
||||
|
||||
Rules:
|
||||
EXTRACTION Rules:
|
||||
- Extract vendor name as it appears on receipt
|
||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||
- Total amount should be the final total including tax
|
||||
@@ -161,7 +176,7 @@ class DocumentProcessor:
|
||||
- Date should be the date on the receipt
|
||||
- Confidence score 0-1 based on how clear the receipt is
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||
{ai_rules_context}
|
||||
|
||||
{user_location_context}
|
||||
LOCATION & TAX RULES:
|
||||
- Extract location from receipt (look for store address, province/state, country)
|
||||
@@ -199,11 +214,9 @@ class DocumentProcessor:
|
||||
* residual_value: Estimated value at end of life (typically 10% of purchase price for equipment, 20% for vehicles)
|
||||
- If is_depreciable is false, set name_of_asset, cca_rate, useful_life, and residual_value to null
|
||||
|
||||
CATEGORY RULES:
|
||||
- Assign the category based on all the details in the receipt
|
||||
Return only valid JSON.
|
||||
"""
|
||||
|
||||
logger.info(f"This is the prompt: {prompt}")
|
||||
# Call Groq vision API with correct format
|
||||
response = self.client.chat.completions.create(
|
||||
messages=[
|
||||
@@ -293,16 +306,27 @@ class DocumentProcessor:
|
||||
# Build AI rules context for categorization
|
||||
ai_rules_context = ""
|
||||
if ai_rules and len(ai_rules) > 0:
|
||||
ai_rules_context = "\n CATEGORIZATION RULES (IMPORTANT - Apply these first):"
|
||||
# Create a simple, direct instruction for each rule
|
||||
ai_rules_context = "\n "
|
||||
for idx, rule in enumerate(ai_rules, 1):
|
||||
condition = rule.get("condition", "")
|
||||
action = rule.get("action", "")
|
||||
ai_rules_context += f"\n {idx}. If {condition} → set category to '{action}'"
|
||||
ai_rules_context += "\n - Apply these custom rules before using default categorization logic\n - If multiple rules match, use the first matching rule\n - If no rules match, use default categorization based on vendor type"
|
||||
|
||||
# Extract the keyword and category from the rule
|
||||
keyword_match = re.search(r'CONTAINS\s+"([^"]+)"', condition, re.IGNORECASE)
|
||||
category_match = re.search(r'SET_CATEGORY:\s*(.+)', action, re.IGNORECASE)
|
||||
|
||||
if keyword_match and category_match:
|
||||
keyword = keyword_match.group(1)
|
||||
category = category_match.group(1).strip()
|
||||
# Create one simple instruction per line
|
||||
ai_rules_context += f'If the Vendor name contains "{keyword}": Set category to "{category}"\n '
|
||||
|
||||
ai_rules_context += "\n"
|
||||
|
||||
prompt = f"""
|
||||
Analyze this receipt text and extract the following information in JSON format:
|
||||
|
||||
Analyze this receipt text and extract the following information in JSON format.
|
||||
{ai_rules_context}
|
||||
Receipt Text:
|
||||
{text_content}
|
||||
|
||||
@@ -313,7 +337,7 @@ class DocumentProcessor:
|
||||
"total_amount": 0.00,
|
||||
"tax_amount": 0.00,
|
||||
"date": "YYYY-MM-DD",
|
||||
"category": "Food/Transport/Office/Other",
|
||||
"category": "Check rules above first",
|
||||
"confidence": 0.95,
|
||||
"currency": "USD",
|
||||
"location": "Province/State, Country",
|
||||
@@ -322,10 +346,11 @@ class DocumentProcessor:
|
||||
"name_of_asset": null,
|
||||
"cca_rate": null,
|
||||
"useful_life": null,
|
||||
"residual_value": null
|
||||
"residual_value": null,
|
||||
"extraction_success": True
|
||||
}}
|
||||
|
||||
Rules:
|
||||
EXTRACTION Rules:
|
||||
- Extract vendor name as it appears on receipt
|
||||
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||||
- Total amount should be the final total including tax
|
||||
@@ -333,7 +358,6 @@ class DocumentProcessor:
|
||||
- Date should be the date on the receipt
|
||||
- Confidence score 0-1 based on clarity
|
||||
- Currency should be the currency used on the receipt (e.g., "USD", "EUR", "CAD")
|
||||
{ai_rules_context}
|
||||
{user_location_context}
|
||||
LOCATION & TAX RULES:
|
||||
- Extract location from receipt (look for store address, province/state, country)
|
||||
|
||||
Reference in New Issue
Block a user