Fix date parsing for image import to handle various date formats
This commit is contained in:
+47
-1
@@ -328,4 +328,50 @@ class DocumentProcessor:
|
||||
"extraction_success": False,
|
||||
"error": f"JSON parsing error: {str(e)}",
|
||||
"transactions": []
|
||||
}
|
||||
}
|
||||
|
||||
def _parse_date_to_iso(self, date_str: str) -> str:
|
||||
"""Parse various date formats and convert to YYYY-MM-DD"""
|
||||
try:
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
date_str = date_str.strip().upper()
|
||||
|
||||
# Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
|
||||
month_pattern = r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?'
|
||||
match = re.match(month_pattern, date_str)
|
||||
|
||||
if match:
|
||||
month_abbr, day, year = match.groups()
|
||||
month_map = {
|
||||
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
|
||||
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
|
||||
}
|
||||
|
||||
month = month_map[month_abbr]
|
||||
day = int(day)
|
||||
year = int(year) if year else datetime.now().year
|
||||
|
||||
# Handle 2-digit years
|
||||
if year < 100:
|
||||
year += 2000
|
||||
|
||||
return f"{year:04d}-{month:02d}-{day:02d}"
|
||||
|
||||
# Handle YYYY-MM-DD format
|
||||
if re.match(r'\d{4}-\d{2}-\d{2}', date_str):
|
||||
return date_str
|
||||
|
||||
# Handle MM/DD/YYYY format
|
||||
if re.match(r'\d{1,2}/\d{1,2}/\d{4}', date_str):
|
||||
return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
|
||||
|
||||
# Handle MM/DD/YY format
|
||||
if re.match(r'\d{1,2}/\d{1,2}/\d{2}', date_str):
|
||||
return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
|
||||
|
||||
return None
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
Reference in New Issue
Block a user