Fix date parsing for image import to handle various date formats
This commit is contained in:
@@ -329,3 +329,49 @@ class DocumentProcessor:
|
||||
"error": f"JSON parsing error: {str(e)}",
|
||||
"transactions": []
|
||||
}
|
||||
|
||||
def _parse_date_to_iso(self, date_str: str) -> str:
|
||||
"""Parse various date formats and convert to YYYY-MM-DD"""
|
||||
try:
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
date_str = date_str.strip().upper()
|
||||
|
||||
# Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
|
||||
month_pattern = r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?'
|
||||
match = re.match(month_pattern, date_str)
|
||||
|
||||
if match:
|
||||
month_abbr, day, year = match.groups()
|
||||
month_map = {
|
||||
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
|
||||
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
|
||||
}
|
||||
|
||||
month = month_map[month_abbr]
|
||||
day = int(day)
|
||||
year = int(year) if year else datetime.now().year
|
||||
|
||||
# Handle 2-digit years
|
||||
if year < 100:
|
||||
year += 2000
|
||||
|
||||
return f"{year:04d}-{month:02d}-{day:02d}"
|
||||
|
||||
# Handle YYYY-MM-DD format
|
||||
if re.match(r'\d{4}-\d{2}-\d{2}', date_str):
|
||||
return date_str
|
||||
|
||||
# Handle MM/DD/YYYY format
|
||||
if re.match(r'\d{1,2}/\d{1,2}/\d{4}', date_str):
|
||||
return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
|
||||
|
||||
# Handle MM/DD/YY format
|
||||
if re.match(r'\d{1,2}/\d{1,2}/\d{2}', date_str):
|
||||
return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
|
||||
|
||||
return None
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
@@ -200,11 +200,16 @@ async def import_transactions_from_image(file: UploadFile = File(...)):
|
||||
# Generate unique ID
|
||||
txn_id = f"img_{file.filename}_{idx+1}"
|
||||
|
||||
# Parse date
|
||||
txn_date = txn.get("date", "")
|
||||
if not txn_date:
|
||||
# Parse date - handle various formats
|
||||
txn_date_raw = txn.get("date", "")
|
||||
if not txn_date_raw:
|
||||
raise ValueError("No date found in transaction")
|
||||
|
||||
# Convert date to YYYY-MM-DD format
|
||||
txn_date = document_processor._parse_date_to_iso(txn_date_raw)
|
||||
if not txn_date:
|
||||
raise ValueError(f"Could not parse date: {txn_date_raw}")
|
||||
|
||||
# Parse amount
|
||||
amount_str = str(txn.get("amount", "0"))
|
||||
amount = float(amount_str.replace('$', '').replace(',', '').strip())
|
||||
|
||||
Reference in New Issue
Block a user