Fix date parsing for image import to handle various date formats

This commit is contained in:
Iyeoluwa Akinrinola
2025-07-03 00:44:01 +01:00
parent e81745b638
commit a202abf5c0
2 changed files with 55 additions and 4 deletions
+46
View File
@@ -329,3 +329,49 @@ class DocumentProcessor:
"error": f"JSON parsing error: {str(e)}",
"transactions": []
}
def _parse_date_to_iso(self, date_str: str) -> str:
"""Parse various date formats and convert to YYYY-MM-DD"""
try:
import re
from datetime import datetime
date_str = date_str.strip().upper()
# Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
month_pattern = r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?'
match = re.match(month_pattern, date_str)
if match:
month_abbr, day, year = match.groups()
month_map = {
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
}
month = month_map[month_abbr]
day = int(day)
year = int(year) if year else datetime.now().year
# Handle 2-digit years
if year < 100:
year += 2000
return f"{year:04d}-{month:02d}-{day:02d}"
# Handle YYYY-MM-DD format
if re.match(r'\d{4}-\d{2}-\d{2}', date_str):
return date_str
# Handle MM/DD/YYYY format
if re.match(r'\d{1,2}/\d{1,2}/\d{4}', date_str):
return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
# Handle MM/DD/YY format
if re.match(r'\d{1,2}/\d{1,2}/\d{2}', date_str):
return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
return None
except Exception:
return None
+8 -3
View File
@@ -200,11 +200,16 @@ async def import_transactions_from_image(file: UploadFile = File(...)):
# Generate unique ID
txn_id = f"img_{file.filename}_{idx+1}"
# Parse date
txn_date = txn.get("date", "")
if not txn_date:
# Parse date - handle various formats
txn_date_raw = txn.get("date", "")
if not txn_date_raw:
raise ValueError("No date found in transaction")
# Convert date to YYYY-MM-DD format
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
if not txn_date:
raise ValueError(f"Could not parse date: {txn_date_raw}")
# Parse amount
amount_str = str(txn.get("amount", "0"))
amount = float(amount_str.replace('$', '').replace(',', '').strip())