548 lines
21 KiB
Python
548 lines
21 KiB
Python
|
|
import base64
|
||
|
|
import logging
|
||
|
|
import os
|
||
|
|
from datetime import datetime
|
||
|
|
from typing import Any, Dict
|
||
|
|
|
||
|
|
import aiofiles
|
||
|
|
import groq
|
||
|
|
import PyPDF2
|
||
|
|
|
||
|
|
from config import settings
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
class DocumentProcessor:
|
||
|
|
def __init__(self):
|
||
|
|
self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
|
||
|
|
self.model = "meta-llama/llama-4-scout-17b-16e-instruct" # Vision model
|
||
|
|
|
||
|
|
async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
|
||
|
|
"""Process uploaded file and extract receipt data"""
|
||
|
|
try:
|
||
|
|
if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
|
||
|
|
return await self._process_image(file_path)
|
||
|
|
elif file_type.lower() == "pdf":
|
||
|
|
return await self._process_pdf(file_path)
|
||
|
|
else:
|
||
|
|
raise ValueError(f"Unsupported file type: {file_type}")
|
||
|
|
except Exception as e:
|
||
|
|
return {"error": str(e)}
|
||
|
|
|
||
|
|
async def _process_image(self, image_path: str) -> Dict[str, Any]:
|
||
|
|
"""Extract data from image using Groq vision"""
|
||
|
|
try:
|
||
|
|
# Encode image to base64
|
||
|
|
base64_image = self._encode_image(image_path)
|
||
|
|
|
||
|
|
# Create Groq vision prompt
|
||
|
|
prompt = """
|
||
|
|
Analyze this receipt image and extract the following information in JSON format:
|
||
|
|
{
|
||
|
|
"vendor": "Store/company name",
|
||
|
|
"description": "Detailed description of items/services purchased",
|
||
|
|
"total_amount": 0.00,
|
||
|
|
"tax_amount": 0.00,
|
||
|
|
"date": "YYYY-MM-DD",
|
||
|
|
"category": "Food/Transport/Office/Other",
|
||
|
|
"confidence": 0.95,
|
||
|
|
"currency": "USD"
|
||
|
|
}
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Extract vendor name as it appears on receipt
|
||
|
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||
|
|
- Total amount should be the final total including tax
|
||
|
|
- Tax amount is separate tax line if available
|
||
|
|
- Date should be the date on the receipt
|
||
|
|
- Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
|
||
|
|
- Confidence score 0-1 based on how clear the receipt is
|
||
|
|
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
||
|
|
|
||
|
|
Return only valid JSON.
|
||
|
|
"""
|
||
|
|
|
||
|
|
# Call Groq vision API with correct format
|
||
|
|
response = self.client.chat.completions.create(
|
||
|
|
messages=[
|
||
|
|
{
|
||
|
|
"role": "user",
|
||
|
|
"content": [
|
||
|
|
{"type": "text", "text": prompt},
|
||
|
|
{
|
||
|
|
"type": "image_url",
|
||
|
|
"image_url": {
|
||
|
|
"url": f"data:image/jpeg;base64,{base64_image}",
|
||
|
|
},
|
||
|
|
},
|
||
|
|
],
|
||
|
|
}
|
||
|
|
],
|
||
|
|
model=self.model,
|
||
|
|
max_tokens=500,
|
||
|
|
temperature=0.1,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Parse response
|
||
|
|
result_text = response.choices[0].message.content.strip()
|
||
|
|
return self._parse_extraction_result(result_text)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return {"error": f"Image processing error: {str(e)}"}
|
||
|
|
|
||
|
|
def _encode_image(self, image_path: str) -> str:
|
||
|
|
"""Encode image to base64 string"""
|
||
|
|
with open(image_path, "rb") as image_file:
|
||
|
|
return base64.b64encode(image_file.read()).decode("utf-8")
|
||
|
|
|
||
|
|
async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
|
||
|
|
"""Extract data from PDF by converting to image first"""
|
||
|
|
try:
|
||
|
|
# For now, extract text from PDF and process as text
|
||
|
|
text_content = self._extract_text_from_pdf(pdf_path)
|
||
|
|
return self._process_text_content(text_content)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return {"error": f"PDF processing error: {str(e)}"}
|
||
|
|
|
||
|
|
def _extract_text_from_pdf(self, pdf_path: str) -> str:
|
||
|
|
"""Extract text from PDF"""
|
||
|
|
try:
|
||
|
|
with open(pdf_path, "rb") as file:
|
||
|
|
pdf_reader = PyPDF2.PdfReader(file)
|
||
|
|
text = ""
|
||
|
|
for page in pdf_reader.pages:
|
||
|
|
text += page.extract_text() + "\n"
|
||
|
|
return text
|
||
|
|
except Exception:
|
||
|
|
return ""
|
||
|
|
|
||
|
|
def _process_text_content(self, text_content: str) -> Dict[str, Any]:
|
||
|
|
"""Process text content using Groq (fallback for PDFs)"""
|
||
|
|
try:
|
||
|
|
prompt = f"""
|
||
|
|
Analyze this receipt text and extract the following information in JSON format:
|
||
|
|
|
||
|
|
Receipt Text:
|
||
|
|
{text_content}
|
||
|
|
|
||
|
|
Extract:
|
||
|
|
{{
|
||
|
|
"vendor": "Store/company name",
|
||
|
|
"description": "Detailed description of items/services purchased",
|
||
|
|
"total_amount": 0.00,
|
||
|
|
"tax_amount": 0.00,
|
||
|
|
"date": "YYYY-MM-DD",
|
||
|
|
"category": "Food/Transport/Office/Other",
|
||
|
|
"confidence": 0.95,
|
||
|
|
"currency": "USD"
|
||
|
|
}}
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Extract vendor name as it appears on receipt
|
||
|
|
- Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
|
||
|
|
- Total amount should be the final total including tax
|
||
|
|
- Tax amount is separate tax line if available
|
||
|
|
- Date should be the date on the receipt
|
||
|
|
- Categorize based on vendor type
|
||
|
|
- Confidence score 0-1 based on clarity
|
||
|
|
- Currency should be the currency used on the receipt (e.g., "USD", "EUR")
|
||
|
|
|
||
|
|
Return only valid JSON.
|
||
|
|
"""
|
||
|
|
|
||
|
|
response = self.client.chat.completions.create(
|
||
|
|
model=self.model,
|
||
|
|
messages=[{"role": "user", "content": prompt}],
|
||
|
|
max_tokens=500,
|
||
|
|
temperature=0.1,
|
||
|
|
)
|
||
|
|
|
||
|
|
result_text = response.choices[0].message.content.strip()
|
||
|
|
return self._parse_extraction_result(result_text)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return {"error": f"Text processing error: {str(e)}"}
|
||
|
|
|
||
|
|
def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
|
||
|
|
"""Parse Groq response and extract JSON data"""
|
||
|
|
try:
|
||
|
|
# Clean up response and extract JSON
|
||
|
|
import json
|
||
|
|
import re
|
||
|
|
|
||
|
|
# Find JSON in response - try multiple patterns
|
||
|
|
json_match = re.search(r"\{.*\}", result_text, re.DOTALL)
|
||
|
|
if json_match:
|
||
|
|
json_str = json_match.group()
|
||
|
|
|
||
|
|
# Clean up common JSON issues
|
||
|
|
json_str = re.sub(
|
||
|
|
r",\s*([}\]])", r"\1", json_str
|
||
|
|
) # Remove trailing commas
|
||
|
|
json_str = re.sub(
|
||
|
|
r"([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:", r'\1"\2":', json_str
|
||
|
|
) # Quote unquoted keys
|
||
|
|
|
||
|
|
try:
|
||
|
|
data = json.loads(json_str)
|
||
|
|
except json.JSONDecodeError as e:
|
||
|
|
# Try to fix common JSON issues
|
||
|
|
logger.warning(f"Initial JSON parsing failed: {e}")
|
||
|
|
|
||
|
|
# Try to extract individual fields using regex
|
||
|
|
vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
|
||
|
|
description_match = re.search(
|
||
|
|
r'"description"\s*:\s*"([^"]*)"', json_str
|
||
|
|
)
|
||
|
|
total_amount_match = re.search(
|
||
|
|
r'"total_amount"\s*:\s*([0-9.]+)', json_str
|
||
|
|
)
|
||
|
|
tax_amount_match = re.search(
|
||
|
|
r'"tax_amount"\s*:\s*([0-9.]+)', json_str
|
||
|
|
)
|
||
|
|
date_match = re.search(r'"date"\s*:\s*"([^"]*)"', json_str)
|
||
|
|
category_match = re.search(r'"category"\s*:\s*"([^"]*)"', json_str)
|
||
|
|
confidence_match = re.search(
|
||
|
|
r'"confidence"\s*:\s*([0-9.]+)', json_str
|
||
|
|
)
|
||
|
|
currency_match = re.search(
|
||
|
|
r'"currency"\s*:\s*"([^"]*)"', json_str
|
||
|
|
)
|
||
|
|
|
||
|
|
data = {
|
||
|
|
"vendor": vendor_match.group(1) if vendor_match else "",
|
||
|
|
"description": description_match.group(1)
|
||
|
|
if description_match
|
||
|
|
else "",
|
||
|
|
"total_amount": float(total_amount_match.group(1))
|
||
|
|
if total_amount_match
|
||
|
|
else 0.0,
|
||
|
|
"tax_amount": float(tax_amount_match.group(1))
|
||
|
|
if tax_amount_match
|
||
|
|
else 0.0,
|
||
|
|
"date": date_match.group(1) if date_match else "",
|
||
|
|
"category": category_match.group(1)
|
||
|
|
if category_match
|
||
|
|
else "Other",
|
||
|
|
"confidence": float(confidence_match.group(1))
|
||
|
|
if confidence_match
|
||
|
|
else 0.5,
|
||
|
|
"currency": currency_match.group(1) if currency_match else "CAD"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Validate and clean data
|
||
|
|
return {
|
||
|
|
"vendor": str(data.get("vendor", "")).strip(),
|
||
|
|
"description": str(data.get("description", "")).strip(),
|
||
|
|
"total_amount": float(data.get("total_amount", 0)),
|
||
|
|
"tax_amount": float(data.get("tax_amount", 0)),
|
||
|
|
"date": str(data.get("date", "")).strip(),
|
||
|
|
"category": str(data.get("category", "Other")).strip(),
|
||
|
|
"confidence": float(data.get("confidence", 0.5)),
|
||
|
|
"extraction_success": True,
|
||
|
|
"currency": data.get("currency", "CAD").strip(),
|
||
|
|
}
|
||
|
|
else:
|
||
|
|
# Try to extract fields from plain text
|
||
|
|
logger.warning("No JSON found in response, attempting text extraction")
|
||
|
|
return self._extract_from_plain_text(result_text)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"JSON parsing error: {str(e)}")
|
||
|
|
return {
|
||
|
|
"error": f"JSON parsing error: {str(e)}",
|
||
|
|
"extraction_success": False,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _extract_from_plain_text(self, text: str) -> Dict[str, Any]:
|
||
|
|
"""Extract receipt data from plain text when JSON parsing fails"""
|
||
|
|
try:
|
||
|
|
import re
|
||
|
|
|
||
|
|
# Extract vendor (look for common patterns)
|
||
|
|
vendor_patterns = [
|
||
|
|
r"(?:vendor|store|merchant|company)\s*[:\-]?\s*([A-Za-z0-9\s&.,]+)",
|
||
|
|
r"([A-Z][A-Za-z0-9\s&.,]{3,30})", # Capitalized words
|
||
|
|
]
|
||
|
|
|
||
|
|
vendor = ""
|
||
|
|
for pattern in vendor_patterns:
|
||
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
||
|
|
if match:
|
||
|
|
vendor = match.group(1).strip()
|
||
|
|
break
|
||
|
|
|
||
|
|
# Extract amount (look for currency patterns)
|
||
|
|
amount_patterns = [
|
||
|
|
r"\$?\s*([0-9,]+\.?[0-9]*)",
|
||
|
|
r"(?:total|amount|sum)\s*[:\-]?\s*\$?\s*([0-9,]+\.?[0-9]*)",
|
||
|
|
]
|
||
|
|
|
||
|
|
total_amount = 0.0
|
||
|
|
for pattern in amount_patterns:
|
||
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
||
|
|
if match:
|
||
|
|
try:
|
||
|
|
total_amount = float(match.group(1).replace(",", ""))
|
||
|
|
break
|
||
|
|
except ValueError:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Extract date
|
||
|
|
date_patterns = [
|
||
|
|
r"(\d{4}-\d{2}-\d{2})",
|
||
|
|
r"(\d{1,2}/\d{1,2}/\d{2,4})",
|
||
|
|
r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}",
|
||
|
|
]
|
||
|
|
|
||
|
|
date = ""
|
||
|
|
for pattern in date_patterns:
|
||
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
||
|
|
if match:
|
||
|
|
date = match.group(0)
|
||
|
|
break
|
||
|
|
|
||
|
|
return {
|
||
|
|
"vendor": vendor or "Unknown",
|
||
|
|
"total_amount": total_amount,
|
||
|
|
"tax_amount": 0.0,
|
||
|
|
"date": date or "",
|
||
|
|
"category": "Other",
|
||
|
|
"confidence": 0.3, # Low confidence for text extraction
|
||
|
|
"extraction_success": True,
|
||
|
|
}
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Text extraction error: {str(e)}")
|
||
|
|
return {
|
||
|
|
"vendor": "Unknown",
|
||
|
|
"total_amount": 0.0,
|
||
|
|
"tax_amount": 0.0,
|
||
|
|
"date": "",
|
||
|
|
"category": "Other",
|
||
|
|
"confidence": 0.1,
|
||
|
|
"extraction_success": False,
|
||
|
|
"error": f"Text extraction failed: {str(e)}",
|
||
|
|
}
|
||
|
|
|
||
|
|
async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
|
||
|
|
"""Save uploaded file to temporary storage"""
|
||
|
|
try:
|
||
|
|
# Create uploads directory if it doesn't exist
|
||
|
|
upload_dir = "uploads"
|
||
|
|
os.makedirs(upload_dir, exist_ok=True)
|
||
|
|
|
||
|
|
# Generate unique filename
|
||
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
|
safe_filename = f"{timestamp}_{filename.replace(' ', '_')}"
|
||
|
|
file_path = os.path.join(upload_dir, safe_filename)
|
||
|
|
|
||
|
|
# Save file
|
||
|
|
async with aiofiles.open(file_path, "wb") as f:
|
||
|
|
await f.write(file_content)
|
||
|
|
|
||
|
|
return file_path
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
raise Exception(f"Failed to save file: {str(e)}")
|
||
|
|
|
||
|
|
async def extract_transactions_from_image(self, image_path: str) -> Dict[str, Any]:
|
||
|
|
"""Extract multiple transactions from an image (bank statement, credit card statement, etc.)"""
|
||
|
|
try:
|
||
|
|
# Encode image to base64
|
||
|
|
base64_image = self._encode_image(image_path)
|
||
|
|
|
||
|
|
# Create Groq vision prompt for transaction extraction
|
||
|
|
prompt = """
|
||
|
|
Analyze this financial document image (bank statement, credit card statement, etc.) and extract ALL transactions in JSON format.
|
||
|
|
|
||
|
|
Look for transaction lists, payment records, or any financial entries that show:
|
||
|
|
- Date
|
||
|
|
- Amount (positive or negative)
|
||
|
|
- Vendor/Description/Payee name
|
||
|
|
- Any additional notes or memo
|
||
|
|
|
||
|
|
Return the transactions as a JSON array:
|
||
|
|
{
|
||
|
|
"extraction_success": true,
|
||
|
|
"transactions": [
|
||
|
|
{
|
||
|
|
"date": "YYYY-MM-DD",
|
||
|
|
"amount": 0.00,
|
||
|
|
"vendor": "Vendor name",
|
||
|
|
"memo": "Additional notes"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"date": "YYYY-MM-DD",
|
||
|
|
"amount": -0.00,
|
||
|
|
"vendor": "Another vendor",
|
||
|
|
"memo": "Payment or charge description"
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Extract ALL visible transactions
|
||
|
|
- Include both positive (credits) and negative (debits) amounts
|
||
|
|
- Use the actual date format from the document
|
||
|
|
- Vendor should be the merchant/payee name
|
||
|
|
- Memo can include transaction type, reference numbers, etc.
|
||
|
|
- If no transactions found, return empty array but set extraction_success to true
|
||
|
|
|
||
|
|
Return only valid JSON.
|
||
|
|
"""
|
||
|
|
|
||
|
|
# Call Groq vision API
|
||
|
|
response = self.client.chat.completions.create(
|
||
|
|
messages=[
|
||
|
|
{
|
||
|
|
"role": "user",
|
||
|
|
"content": [
|
||
|
|
{"type": "text", "text": prompt},
|
||
|
|
{
|
||
|
|
"type": "image_url",
|
||
|
|
"image_url": {
|
||
|
|
"url": f"data:image/jpeg;base64,{base64_image}",
|
||
|
|
},
|
||
|
|
},
|
||
|
|
],
|
||
|
|
}
|
||
|
|
],
|
||
|
|
model=self.model,
|
||
|
|
max_tokens=2000, # Higher token limit for multiple transactions
|
||
|
|
temperature=0.1,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Parse response
|
||
|
|
result_text = response.choices[0].message.content.strip()
|
||
|
|
return self._parse_transaction_extraction_result(result_text)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
"extraction_success": False,
|
||
|
|
"error": f"Transaction extraction error: {str(e)}",
|
||
|
|
"transactions": [],
|
||
|
|
}
|
||
|
|
|
||
|
|
def _parse_transaction_extraction_result(self, result_text: str) -> Dict[str, Any]:
|
||
|
|
"""Parse Groq response for transaction extraction"""
|
||
|
|
try:
|
||
|
|
import json
|
||
|
|
import re
|
||
|
|
|
||
|
|
# Find the first '{' and last '}'
|
||
|
|
start = result_text.find("{")
|
||
|
|
end = result_text.rfind("}")
|
||
|
|
if start == -1 or end == -1 or end <= start:
|
||
|
|
return {
|
||
|
|
"extraction_success": False,
|
||
|
|
"error": "Could not find JSON object in AI response",
|
||
|
|
"transactions": [],
|
||
|
|
}
|
||
|
|
json_str = result_text[start : end + 1]
|
||
|
|
|
||
|
|
# Remove trailing commas before } or ]
|
||
|
|
json_str = re.sub(r",\s*([}\]])", r"\1", json_str)
|
||
|
|
|
||
|
|
try:
|
||
|
|
data = json.loads(json_str)
|
||
|
|
except Exception as e:
|
||
|
|
import logging
|
||
|
|
|
||
|
|
logging.error(f"JSON parsing error: {str(e)}")
|
||
|
|
logging.error(f"Offending JSON string:\n{json_str}")
|
||
|
|
return {
|
||
|
|
"extraction_success": False,
|
||
|
|
"error": f"JSON parsing error: {str(e)}",
|
||
|
|
"transactions": [],
|
||
|
|
}
|
||
|
|
|
||
|
|
# Validate and clean data
|
||
|
|
transactions = data.get("transactions", [])
|
||
|
|
cleaned_transactions = []
|
||
|
|
for txn in transactions:
|
||
|
|
try:
|
||
|
|
cleaned_txn = {
|
||
|
|
"date": str(txn.get("date", "")).strip(),
|
||
|
|
"amount": float(
|
||
|
|
str(txn.get("amount", 0)).replace("$", "").replace(",", "")
|
||
|
|
),
|
||
|
|
"vendor": str(txn.get("vendor", "")).strip(),
|
||
|
|
"memo": str(txn.get("memo", "")).strip(),
|
||
|
|
}
|
||
|
|
cleaned_transactions.append(cleaned_txn)
|
||
|
|
except Exception:
|
||
|
|
continue
|
||
|
|
return {
|
||
|
|
"extraction_success": data.get("extraction_success", True),
|
||
|
|
"transactions": cleaned_transactions,
|
||
|
|
"total_transactions": len(cleaned_transactions),
|
||
|
|
}
|
||
|
|
except Exception as e:
|
||
|
|
import logging
|
||
|
|
|
||
|
|
logging.error(f"JSON parsing error (outer): {str(e)}")
|
||
|
|
return {
|
||
|
|
"extraction_success": False,
|
||
|
|
"error": f"JSON parsing error: {str(e)}",
|
||
|
|
"transactions": [],
|
||
|
|
}
|
||
|
|
|
||
|
|
def _parse_date_to_iso(self, date_str: str) -> str:
|
||
|
|
"""Parse various date formats and convert to YYYY-MM-DD"""
|
||
|
|
try:
|
||
|
|
import re
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
date_str = date_str.strip().upper()
|
||
|
|
|
||
|
|
# Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
|
||
|
|
month_pattern = r"(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?"
|
||
|
|
match = re.match(month_pattern, date_str)
|
||
|
|
|
||
|
|
if match:
|
||
|
|
month_abbr, day, year = match.groups()
|
||
|
|
month_map = {
|
||
|
|
"JAN": 1,
|
||
|
|
"FEB": 2,
|
||
|
|
"MAR": 3,
|
||
|
|
"APR": 4,
|
||
|
|
"MAY": 5,
|
||
|
|
"JUN": 6,
|
||
|
|
"JUL": 7,
|
||
|
|
"AUG": 8,
|
||
|
|
"SEP": 9,
|
||
|
|
"OCT": 10,
|
||
|
|
"NOV": 11,
|
||
|
|
"DEC": 12,
|
||
|
|
}
|
||
|
|
|
||
|
|
month = month_map[month_abbr]
|
||
|
|
day = int(day)
|
||
|
|
year = int(year) if year else datetime.now().year
|
||
|
|
|
||
|
|
# Handle 2-digit years
|
||
|
|
if year < 100:
|
||
|
|
year += 2000
|
||
|
|
|
||
|
|
return f"{year:04d}-{month:02d}-{day:02d}"
|
||
|
|
|
||
|
|
# Handle YYYY-MM-DD format
|
||
|
|
if re.match(r"\d{4}-\d{2}-\d{2}", date_str):
|
||
|
|
return date_str
|
||
|
|
|
||
|
|
# Handle MM/DD/YYYY format
|
||
|
|
if re.match(r"\d{1,2}/\d{1,2}/\d{4}", date_str):
|
||
|
|
return datetime.strptime(date_str, "%m/%d/%Y").strftime("%Y-%m-%d")
|
||
|
|
|
||
|
|
# Handle MM/DD/YY format
|
||
|
|
if re.match(r"\d{1,2}/\d{1,2}/\d{2}", date_str):
|
||
|
|
return datetime.strptime(date_str, "%m/%d/%y").strftime("%Y-%m-%d")
|
||
|
|
|
||
|
|
return None
|
||
|
|
|
||
|
|
except Exception:
|
||
|
|
return None
|