Fix date parsing for image import to handle various date formats
This commit is contained in:
@@ -329,3 +329,49 @@ class DocumentProcessor:
|
|||||||
"error": f"JSON parsing error: {str(e)}",
|
"error": f"JSON parsing error: {str(e)}",
|
||||||
"transactions": []
|
"transactions": []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _parse_date_to_iso(self, date_str: str) -> str:
|
||||||
|
"""Parse various date formats and convert to YYYY-MM-DD"""
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
date_str = date_str.strip().upper()
|
||||||
|
|
||||||
|
# Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
|
||||||
|
month_pattern = r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?'
|
||||||
|
match = re.match(month_pattern, date_str)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
month_abbr, day, year = match.groups()
|
||||||
|
month_map = {
|
||||||
|
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
|
||||||
|
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
|
||||||
|
}
|
||||||
|
|
||||||
|
month = month_map[month_abbr]
|
||||||
|
day = int(day)
|
||||||
|
year = int(year) if year else datetime.now().year
|
||||||
|
|
||||||
|
# Handle 2-digit years
|
||||||
|
if year < 100:
|
||||||
|
year += 2000
|
||||||
|
|
||||||
|
return f"{year:04d}-{month:02d}-{day:02d}"
|
||||||
|
|
||||||
|
# Handle YYYY-MM-DD format
|
||||||
|
if re.match(r'\d{4}-\d{2}-\d{2}', date_str):
|
||||||
|
return date_str
|
||||||
|
|
||||||
|
# Handle MM/DD/YYYY format
|
||||||
|
if re.match(r'\d{1,2}/\d{1,2}/\d{4}', date_str):
|
||||||
|
return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
# Handle MM/DD/YY format
|
||||||
|
if re.match(r'\d{1,2}/\d{1,2}/\d{2}', date_str):
|
||||||
|
return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
@@ -200,11 +200,16 @@ async def import_transactions_from_image(file: UploadFile = File(...)):
|
|||||||
# Generate unique ID
|
# Generate unique ID
|
||||||
txn_id = f"img_{file.filename}_{idx+1}"
|
txn_id = f"img_{file.filename}_{idx+1}"
|
||||||
|
|
||||||
# Parse date
|
# Parse date - handle various formats
|
||||||
txn_date = txn.get("date", "")
|
txn_date_raw = txn.get("date", "")
|
||||||
if not txn_date:
|
if not txn_date_raw:
|
||||||
raise ValueError("No date found in transaction")
|
raise ValueError("No date found in transaction")
|
||||||
|
|
||||||
|
# Convert date to YYYY-MM-DD format
|
||||||
|
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
|
||||||
|
if not txn_date:
|
||||||
|
raise ValueError(f"Could not parse date: {txn_date_raw}")
|
||||||
|
|
||||||
# Parse amount
|
# Parse amount
|
||||||
amount_str = str(txn.get("amount", "0"))
|
amount_str = str(txn.get("amount", "0"))
|
||||||
amount = float(amount_str.replace('$', '').replace(',', '').strip())
|
amount = float(amount_str.replace('$', '').replace(',', '').strip())
|
||||||
|
|||||||
Reference in New Issue
Block a user