import base64
import logging
import os
from datetime import datetime
from typing import Any, Dict

import aiofiles
import groq
import PyPDF2

from config import settings

logger = logging.getLogger(__name__)


class DocumentProcessor:
    def __init__(self):
        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
        self.model = "meta-llama/llama-4-scout-17b-16e-instruct"  # Vision model

    async def process_file(self, file_path: str, file_type: str) -> Dict[str, Any]:
        """Process uploaded file and extract receipt data"""
        try:
            if file_type.lower() in ["jpg", "jpeg", "png", "gif", "bmp"]:
                return await self._process_image(file_path)
            elif file_type.lower() == "pdf":
                return await self._process_pdf(file_path)
            else:
                raise ValueError(f"Unsupported file type: {file_type}")
        except Exception as e:
            return {"error": str(e)}

    async def _process_image(self, image_path: str) -> Dict[str, Any]:
        """Extract data from image using Groq vision"""
        try:
            # Encode image to base64
            base64_image = self._encode_image(image_path)

            # Create Groq vision prompt
            prompt = """
            Analyze this receipt image and extract the following information in JSON format:
            {
                "vendor": "Store/company name",
                "description": "Detailed description of items/services purchased",
                "total_amount": 0.00,
                "tax_amount": 0.00,
                "date": "YYYY-MM-DD",
                "category": "Food/Transport/Office/Other",
                "confidence": 0.95,
                "currency": "USD"
            }
            
            Rules:
            - Extract vendor name as it appears on receipt
            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
            - Total amount should be the final total including tax
            - Tax amount is separate tax line if available
            - Date should be the date on the receipt
            - Categorize based on vendor type (Starbucks=Food, Shell=Transport, etc.)
            - Confidence score 0-1 based on how clear the receipt is
            - Currency should be the currency used on the receipt (e.g., "USD", "EUR")

            Return only valid JSON.
            """

            # Call Groq vision API with correct format
            response = self.client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}",
                                },
                            },
                        ],
                    }
                ],
                model=self.model,
                max_tokens=500,
                temperature=0.1,
            )

            # Parse response
            result_text = response.choices[0].message.content.strip()
            return self._parse_extraction_result(result_text)

        except Exception as e:
            return {"error": f"Image processing error: {str(e)}"}

    def _encode_image(self, image_path: str) -> str:
        """Encode image to base64 string"""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")

    async def _process_pdf(self, pdf_path: str) -> Dict[str, Any]:
        """Extract data from PDF by converting to image first"""
        try:
            # For now, extract text from PDF and process as text
            text_content = self._extract_text_from_pdf(pdf_path)
            return self._process_text_content(text_content)

        except Exception as e:
            return {"error": f"PDF processing error: {str(e)}"}

    def _extract_text_from_pdf(self, pdf_path: str) -> str:
        """Extract text from PDF"""
        try:
            with open(pdf_path, "rb") as file:
                pdf_reader = PyPDF2.PdfReader(file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text() + "\n"
                return text
        except Exception:
            return ""

    def _process_text_content(self, text_content: str) -> Dict[str, Any]:
        """Process text content using Groq (fallback for PDFs)"""
        try:
            prompt = f"""
            Analyze this receipt text and extract the following information in JSON format:
            
            Receipt Text:
            {text_content}
            
            Extract:
            {{
                "vendor": "Store/company name",
                "description": "Detailed description of items/services purchased",
                "total_amount": 0.00,
                "tax_amount": 0.00,
                "date": "YYYY-MM-DD",
                "category": "Food/Transport/Office/Other",
                "confidence": 0.95,
                "currency": "USD"
            }}
            
            Rules:
            - Extract vendor name as it appears on receipt
            - Extract description of items/services purchased (e.g., "Coffee and sandwich", "Gasoline", "Office supplies")
            - Total amount should be the final total including tax
            - Tax amount is separate tax line if available
            - Date should be the date on the receipt
            - Categorize based on vendor type
            - Confidence score 0-1 based on clarity
            - Currency should be the currency used on the receipt (e.g., "USD", "EUR")
            
            Return only valid JSON.
            """

            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=500,
                temperature=0.1,
            )

            result_text = response.choices[0].message.content.strip()
            return self._parse_extraction_result(result_text)

        except Exception as e:
            return {"error": f"Text processing error: {str(e)}"}

    def _parse_extraction_result(self, result_text: str) -> Dict[str, Any]:
        """Parse Groq response and extract JSON data"""
        try:
            # Clean up response and extract JSON
            import json
            import re

            # Find JSON in response - try multiple patterns
            json_match = re.search(r"\{.*\}", result_text, re.DOTALL)
            if json_match:
                json_str = json_match.group()

                # Clean up common JSON issues
                json_str = re.sub(
                    r",\s*([}\]])", r"\1", json_str
                )  # Remove trailing commas
                json_str = re.sub(
                    r"([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:", r'\1"\2":', json_str
                )  # Quote unquoted keys

                try:
                    data = json.loads(json_str)
                except json.JSONDecodeError as e:
                    # Try to fix common JSON issues
                    logger.warning(f"Initial JSON parsing failed: {e}")

                    # Try to extract individual fields using regex
                    vendor_match = re.search(r'"vendor"\s*:\s*"([^"]*)"', json_str)
                    description_match = re.search(
                        r'"description"\s*:\s*"([^"]*)"', json_str
                    )
                    total_amount_match = re.search(
                        r'"total_amount"\s*:\s*([0-9.]+)', json_str
                    )
                    tax_amount_match = re.search(
                        r'"tax_amount"\s*:\s*([0-9.]+)', json_str
                    )
                    date_match = re.search(r'"date"\s*:\s*"([^"]*)"', json_str)
                    category_match = re.search(r'"category"\s*:\s*"([^"]*)"', json_str)
                    confidence_match = re.search(
                        r'"confidence"\s*:\s*([0-9.]+)', json_str
                    )
                    currency_match = re.search(
                        r'"currency"\s*:\s*"([^"]*)"', json_str
                    )

                    data = {
                        "vendor": vendor_match.group(1) if vendor_match else "",
                        "description": description_match.group(1)
                        if description_match
                        else "",
                        "total_amount": float(total_amount_match.group(1))
                        if total_amount_match
                        else 0.0,
                        "tax_amount": float(tax_amount_match.group(1))
                        if tax_amount_match
                        else 0.0,
                        "date": date_match.group(1) if date_match else "",
                        "category": category_match.group(1)
                        if category_match
                        else "Other",
                        "confidence": float(confidence_match.group(1))
                        if confidence_match
                        else 0.5,
                        "currency": currency_match.group(1) if currency_match else "CAD"
                    }

                # Validate and clean data
                return {
                    "vendor": str(data.get("vendor", "")).strip(),
                    "description": str(data.get("description", "")).strip(),
                    "total_amount": float(data.get("total_amount", 0)),
                    "tax_amount": float(data.get("tax_amount", 0)),
                    "date": str(data.get("date", "")).strip(),
                    "category": str(data.get("category", "Other")).strip(),
                    "confidence": float(data.get("confidence", 0.5)),
                    "extraction_success": True,
                    "currency": data.get("currency", "CAD").strip(),
                }
            else:
                # Try to extract fields from plain text
                logger.warning("No JSON found in response, attempting text extraction")
                return self._extract_from_plain_text(result_text)

        except Exception as e:
            logger.error(f"JSON parsing error: {str(e)}")
            return {
                "error": f"JSON parsing error: {str(e)}",
                "extraction_success": False,
            }

    def _extract_from_plain_text(self, text: str) -> Dict[str, Any]:
        """Extract receipt data from plain text when JSON parsing fails"""
        try:
            import re

            # Extract vendor (look for common patterns)
            vendor_patterns = [
                r"(?:vendor|store|merchant|company)\s*[:\-]?\s*([A-Za-z0-9\s&.,]+)",
                r"([A-Z][A-Za-z0-9\s&.,]{3,30})",  # Capitalized words
            ]

            vendor = ""
            for pattern in vendor_patterns:
                match = re.search(pattern, text, re.IGNORECASE)
                if match:
                    vendor = match.group(1).strip()
                    break

            # Extract amount (look for currency patterns)
            amount_patterns = [
                r"\$?\s*([0-9,]+\.?[0-9]*)",
                r"(?:total|amount|sum)\s*[:\-]?\s*\$?\s*([0-9,]+\.?[0-9]*)",
            ]

            total_amount = 0.0
            for pattern in amount_patterns:
                match = re.search(pattern, text, re.IGNORECASE)
                if match:
                    try:
                        total_amount = float(match.group(1).replace(",", ""))
                        break
                    except ValueError:
                        continue

            # Extract date
            date_patterns = [
                r"(\d{4}-\d{2}-\d{2})",
                r"(\d{1,2}/\d{1,2}/\d{2,4})",
                r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}",
            ]

            date = ""
            for pattern in date_patterns:
                match = re.search(pattern, text, re.IGNORECASE)
                if match:
                    date = match.group(0)
                    break

            return {
                "vendor": vendor or "Unknown",
                "total_amount": total_amount,
                "tax_amount": 0.0,
                "date": date or "",
                "category": "Other",
                "confidence": 0.3,  # Low confidence for text extraction
                "extraction_success": True,
            }

        except Exception as e:
            logger.error(f"Text extraction error: {str(e)}")
            return {
                "vendor": "Unknown",
                "total_amount": 0.0,
                "tax_amount": 0.0,
                "date": "",
                "category": "Other",
                "confidence": 0.1,
                "extraction_success": False,
                "error": f"Text extraction failed: {str(e)}",
            }

    async def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
        """Save uploaded file to temporary storage"""
        try:
            # Create uploads directory if it doesn't exist
            upload_dir = "uploads"
            os.makedirs(upload_dir, exist_ok=True)

            # Generate unique filename
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            safe_filename = f"{timestamp}_{filename.replace(' ', '_')}"
            file_path = os.path.join(upload_dir, safe_filename)

            # Save file
            async with aiofiles.open(file_path, "wb") as f:
                await f.write(file_content)

            return file_path

        except Exception as e:
            raise Exception(f"Failed to save file: {str(e)}")

    async def extract_transactions_from_image(self, image_path: str) -> Dict[str, Any]:
        """Extract multiple transactions from an image (bank statement, credit card statement, etc.)"""
        try:
            # Encode image to base64
            base64_image = self._encode_image(image_path)

            # Create Groq vision prompt for transaction extraction
            prompt = """
            Analyze this financial document image (bank statement, credit card statement, etc.) and extract ALL transactions in JSON format.
            
            Look for transaction lists, payment records, or any financial entries that show:
            - Date
            - Amount (positive or negative)
            - Vendor/Description/Payee name
            - Any additional notes or memo
            
            Return the transactions as a JSON array:
            {
                "extraction_success": true,
                "transactions": [
                    {
                        "date": "YYYY-MM-DD",
                        "amount": 0.00,
                        "vendor": "Vendor name",
                        "memo": "Additional notes"
                    },
                    {
                        "date": "YYYY-MM-DD", 
                        "amount": -0.00,
                        "vendor": "Another vendor",
                        "memo": "Payment or charge description"
                    }
                ]
            }
            
            Rules:
            - Extract ALL visible transactions
            - Include both positive (credits) and negative (debits) amounts
            - Use the actual date format from the document
            - Vendor should be the merchant/payee name
            - Memo can include transaction type, reference numbers, etc.
            - If no transactions found, return empty array but set extraction_success to true
            
            Return only valid JSON.
            """

            # Call Groq vision API
            response = self.client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}",
                                },
                            },
                        ],
                    }
                ],
                model=self.model,
                max_tokens=2000,  # Higher token limit for multiple transactions
                temperature=0.1,
            )

            # Parse response
            result_text = response.choices[0].message.content.strip()
            return self._parse_transaction_extraction_result(result_text)

        except Exception as e:
            return {
                "extraction_success": False,
                "error": f"Transaction extraction error: {str(e)}",
                "transactions": [],
            }

    def _parse_transaction_extraction_result(self, result_text: str) -> Dict[str, Any]:
        """Parse Groq response for transaction extraction"""
        try:
            import json
            import re

            # Find the first '{' and last '}'
            start = result_text.find("{")
            end = result_text.rfind("}")
            if start == -1 or end == -1 or end <= start:
                return {
                    "extraction_success": False,
                    "error": "Could not find JSON object in AI response",
                    "transactions": [],
                }
            json_str = result_text[start : end + 1]

            # Remove trailing commas before } or ]
            json_str = re.sub(r",\s*([}\]])", r"\1", json_str)

            try:
                data = json.loads(json_str)
            except Exception as e:
                import logging

                logging.error(f"JSON parsing error: {str(e)}")
                logging.error(f"Offending JSON string:\n{json_str}")
                return {
                    "extraction_success": False,
                    "error": f"JSON parsing error: {str(e)}",
                    "transactions": [],
                }

            # Validate and clean data
            transactions = data.get("transactions", [])
            cleaned_transactions = []
            for txn in transactions:
                try:
                    cleaned_txn = {
                        "date": str(txn.get("date", "")).strip(),
                        "amount": float(
                            str(txn.get("amount", 0)).replace("$", "").replace(",", "")
                        ),
                        "vendor": str(txn.get("vendor", "")).strip(),
                        "memo": str(txn.get("memo", "")).strip(),
                    }
                    cleaned_transactions.append(cleaned_txn)
                except Exception:
                    continue
            return {
                "extraction_success": data.get("extraction_success", True),
                "transactions": cleaned_transactions,
                "total_transactions": len(cleaned_transactions),
            }
        except Exception as e:
            import logging

            logging.error(f"JSON parsing error (outer): {str(e)}")
            return {
                "extraction_success": False,
                "error": f"JSON parsing error: {str(e)}",
                "transactions": [],
            }

    def _parse_date_to_iso(self, date_str: str) -> str:
        """Parse various date formats and convert to YYYY-MM-DD"""
        try:
            import re
            from datetime import datetime

            date_str = date_str.strip().upper()

            # Handle formats like "MAY 22", "JUN 01", "MAY 22, 2024"
            month_pattern = r"(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+(\d{1,2})(?:,\s*(\d{4}))?"
            match = re.match(month_pattern, date_str)

            if match:
                month_abbr, day, year = match.groups()
                month_map = {
                    "JAN": 1,
                    "FEB": 2,
                    "MAR": 3,
                    "APR": 4,
                    "MAY": 5,
                    "JUN": 6,
                    "JUL": 7,
                    "AUG": 8,
                    "SEP": 9,
                    "OCT": 10,
                    "NOV": 11,
                    "DEC": 12,
                }

                month = month_map[month_abbr]
                day = int(day)
                year = int(year) if year else datetime.now().year

                # Handle 2-digit years
                if year < 100:
                    year += 2000

                return f"{year:04d}-{month:02d}-{day:02d}"

            # Handle YYYY-MM-DD format
            if re.match(r"\d{4}-\d{2}-\d{2}", date_str):
                return date_str

            # Handle MM/DD/YYYY format
            if re.match(r"\d{1,2}/\d{1,2}/\d{4}", date_str):
                return datetime.strptime(date_str, "%m/%d/%Y").strftime("%Y-%m-%d")

            # Handle MM/DD/YY format
            if re.match(r"\d{1,2}/\d{1,2}/\d{2}", date_str):
                return datetime.strptime(date_str, "%m/%d/%y").strftime("%Y-%m-%d")

            return None

        except Exception:
            return None