app/services/ai_matcher.py

import logging
import time
from typing import List, Tuple

import groq

from config import settings
from schemas import Match, Receipt, Transaction

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class AIMatcher:
    def __init__(self, use_batch_matching=True):
        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
        self.model = settings.model
        self.max_retries = 3
        self.retry_delay = 2  # seconds - increased for rate limiting
        self.rate_limit_delay = 1.0  # seconds between API calls
        self.last_api_call = 0
        self.use_batch_matching = (
            use_batch_matching  # Toggle between new and legacy methods
        )

    def match_receipts_to_transactions(
        self, receipts: List[Receipt], transactions: List[Transaction]
    ) -> List[Match]:
        """Match receipts to transactions using AI"""
        logger.info(
            f"Starting AI matching for {len(receipts)} receipts against {len(transactions)} transactions"
        )
        matches = []

        for i, receipt in enumerate(receipts):
            logger.info(
                f"Processing receipt {i + 1}/{len(receipts)}: {receipt.vendor} - ${receipt.amount}"
            )

            # Rate limiting
            self._rate_limit()

            # Get the BEST match for this receipt (highest confidence score)
            best_match = self._find_best_match(receipt, transactions)
            if best_match:
                matches.append(best_match)
                logger.info(
                    f"Found match: {best_match.confidence_score:.3f} - {best_match.match_reason}"
                )
            else:
                logger.warning(
                    f"No match found for receipt: {receipt.vendor} - ${receipt.amount}"
                )

        # Sort by confidence score (highest first)
        matches = sorted(matches, key=lambda x: x.confidence_score, reverse=True)
        logger.info(f"AI matching completed. Found {len(matches)} matches")
        return matches

    def _rate_limit(self):
        """Implement rate limiting to avoid API quota exhaustion"""
        current_time = time.time()
        time_since_last_call = current_time - self.last_api_call

        if time_since_last_call < self.rate_limit_delay:
            sleep_time = self.rate_limit_delay - time_since_last_call
            logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
            time.sleep(sleep_time)

        self.last_api_call = time.time()

    def _find_best_match(
        self, receipt: Receipt, transactions: List[Transaction]
    ) -> Match:
        """Find the BEST match for a receipt using a single AI call for all candidates"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
            logger.warning(
                f"No candidates found for receipt: {receipt.vendor} - ${receipt.amount}"
            )
            return None

        logger.info(f"Found {len(candidates)} candidates for receipt: {receipt.vendor}")

        # Choose matching method based on configuration
        if self.use_batch_matching:
            # New efficient method: single AI call for all candidates
            best_match = self._find_best_match_single_call(receipt, candidates)
        else:
            # Legacy method: individual AI calls (fallback)
            best_match = self._find_best_match_legacy(receipt, candidates)

        return best_match

    def _find_best_match_single_call(
        self, receipt: Receipt, candidates: List[Transaction]
    ) -> Match:
        """Find the best match using a single AI call to evaluate all candidates"""
        if not candidates:
            return None

        # Limit candidates to avoid token limits (adjust based on your needs)
        max_candidates = 10
        if len(candidates) > max_candidates:
            # Sort by amount similarity and take top candidates
            candidates = sorted(
                candidates, key=lambda t: abs(receipt.amount - abs(t.amount))
            )[:max_candidates]
            logger.info(
                f"Limited candidates to top {max_candidates} by amount similarity"
            )

        # Build comprehensive prompt with all candidates
        candidates_text = ""
        for i, transaction in enumerate(candidates):
            transaction_amount_abs = abs(transaction.amount)
            date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
            amount_diff = abs(receipt.amount - transaction_amount_abs - receipt.tax)
            amount_percent_diff = (
                (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
            )

            candidates_text += f"""
Candidate {i + 1}:
- Vendor: {transaction.vendor}
- Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
- Notes: {transaction.notes}
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%) Taking in account receipt tax
"""
        logger.info(f"\nThis is the receipt: {receipt}\n")
        logger.info(f"\nCandidate text: {candidates_text}\n")

        prompt = f"""You are an expert at matching receipts to bank transactions. Your PRIMARY goal is to find the candidate with the CLOSEST AMOUNT match.

RECEIPT TO MATCH:
- Vendor: {receipt.vendor}
- Amount: ${receipt.amount}
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
- Description: {receipt.description}
- Category: {receipt.category}

CANDIDATE TRANSACTIONS:
{candidates_text}

CRITICAL INSTRUCTIONS FOR SELECTION:
1. FIRST: Find the candidate(s) with the SMALLEST amount percentage difference
2. ONLY if multiple candidates have similar amounts (within 2% of each other), THEN consider vendor/date/notes
3. USE THE PERCENTAGE DIFFERENCE PROVIDED for each candidate - DO NOT calculate yourself
4. IGNORE vendor/description matches if amounts are far apart (>20% difference)
5. The candidate with the closest amount is almost always the correct match

SCORING CRITERIA - AMOUNT DIFFERENCE IS 90% OF THE DECISION:

Step 1: Calculate BASE SCORE using the provided amount percentage difference:
- 0-1% difference: Base score = 0.95
- 1-2% difference: Base score = 0.90
- 2-3% difference: Base score = 0.85
- 3-5% difference: Base score = 0.75
- 5-7% difference: Base score = 0.65
- 7-10% difference: Base score = 0.55
- 10-15% difference: Base score = 0.40
- 15-20% difference: Base score = 0.25
- 20-30% difference: Base score = 0.15
- 30-50% difference: Base score = 0.08
- 50-100% difference: Base score = 0.03
- >100% difference: Base score = 0.01

Step 2: ADJUST the base score (±0.10 maximum):
- Vendor exact match: +0.10
- Vendor similar/partial match: +0.05
- Date within 7 days: +0.05
- Date within 30 days: +0.02
- Description/notes keywords match: +0.02
- Vendor completely different: -0.05
- Date >90 days apart: -0.03

Step 3: Ensure final score is between 0.0 and 1.0


CRITICAL: You MUST return valid JSON only. No explanations, no text before or after.

Return format:
{{"candidate_number": 1, "confidence_score": 0.65, "reason": "5.8% amount difference with similar vendor"}}

Another example:
{{"candidate_number": 2, "confidence_score": 0.01, "reason": "9850% amount difference, extremely poor match"}}

Return ONLY JSON for the best candidate:"""

        # logger.info(f"This is the prompt: {prompt}")
        for attempt in range(self.max_retries):
            try:
                result = self._call_groq_api_with_timeout(
                    prompt, timeout=45
                )  # Longer timeout for complex prompt

                # Parse the single result
                candidate_num, score, reason = self._parse_single_match_response(result)

                if candidate_num == -1:  # Parsing error occurred
                    logger.warning(
                        f"Failed to parse AI response for receipt: {receipt.vendor}"
                    )
                    return None

                if 0 <= candidate_num < len(candidates):
                    best_transaction = candidates[candidate_num]
                    
                    # Validate the match - catch AI errors with extreme amount differences
                    transaction_amount_abs = abs(best_transaction.amount)
                    amount_diff = abs(receipt.amount - transaction_amount_abs)
                    amount_percent_diff = (
                        (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
                    )
                    
                    # If amount difference is >100%, force very low score
                    if amount_percent_diff > 100:
                        logger.warning(
                            f"Overriding AI score for extreme mismatch: {receipt.amount} vs {transaction_amount_abs} ({amount_percent_diff:.1f}% diff)"
                        )
                        score = min(0.05, score)  # Cap at 0.05 for extreme mismatches
                        reason = f"{amount_percent_diff:.1f}% amount difference, extreme mismatch"
                    
                    logger.info(
                        f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
                    )
                    return Match(receipt, best_transaction, score, reason)
                else:
                    logger.warning(
                        f"AI returned invalid candidate number: {candidate_num}"
                    )
                    return None

            except Exception as e:
                logger.warning(
                    f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
                )
                if attempt < self.max_retries - 1:
                    sleep_time = self.retry_delay * (2**attempt)
                    logger.info(f"Waiting {sleep_time} seconds before retry...")
                    time.sleep(sleep_time)
                else:
                    logger.error(f"All attempts failed for receipt {receipt.id}")
                    return None

        return None

    def _parse_single_match_response(self, result: str) -> Tuple[int, float, str]:
        """Parse AI response for single best match (JSON format)"""
        import json
        import re
        
        result = result.strip()
        logger.debug(f"Parsing single match response: {result}")

        try:
            # First, try to parse the entire result as JSON
            try:
                data = json.loads(result)
                candidate_num = int(data.get("candidate_number", -1)) - 1
                score = float(data.get("confidence_score", 0.0))
                reason = str(data.get("reason", "No reason provided"))
                score = max(0.0, min(1.0, score))
                logger.debug(f"Parsed JSON: candidate={candidate_num}, score={score}, reason={reason}")
                return candidate_num, score, reason
            except json.JSONDecodeError:
                pass
            
            # Try to extract JSON object from the response using improved regex
            # This handles nested braces better
            json_pattern = r'\{[^{}]*"candidate_number"[^{}]*"confidence_score"[^{}]*"reason"[^{}]*\}'
            json_match = re.search(json_pattern, result)
            
            if json_match:
                json_str = json_match.group()
                data = json.loads(json_str)
                candidate_num = int(data.get("candidate_number", -1)) - 1
                score = float(data.get("confidence_score", 0.0))
                reason = str(data.get("reason", "No reason provided"))
                score = max(0.0, min(1.0, score))
                logger.debug(f"Parsed extracted JSON: candidate={candidate_num}, score={score}, reason={reason}")
                return candidate_num, score, reason
            
            # Try to find any JSON-like structure with the required fields
            candidate_match = re.search(r'"candidate_number"\s*:\s*(\d+)', result)
            score_match = re.search(r'"confidence_score"\s*:\s*([\d.]+)', result)
            reason_match = re.search(r'"reason"\s*:\s*"([^"]*)"', result)
            
            if candidate_match and score_match and reason_match:
                candidate_num = int(candidate_match.group(1)) - 1
                score = float(score_match.group(1))
                reason = reason_match.group(1)
                score = max(0.0, min(1.0, score))
                logger.debug(f"Parsed fields individually: candidate={candidate_num}, score={score}, reason={reason}")
                return candidate_num, score, reason

        except (json.JSONDecodeError, ValueError, KeyError) as e:
            logger.warning(f"Error parsing JSON response: {e}")
            
            # Fallback to old pipe-delimited format for backwards compatibility
            try:
                if "|" in result:
                    parts = result.split("|")
                    if len(parts) >= 3:
                        candidate_str = parts[0].strip()
                        score_str = parts[1].strip()
                        reason = "|".join(parts[2:]).strip()

                        # Extract candidate number
                        candidate_match = re.search(r"\d+", candidate_str)
                        if candidate_match:
                            candidate_num = (
                                int(candidate_match.group()) - 1
                            )  # Convert to 0-based index
                        else:
                            raise ValueError("No candidate number found")

                        # Extract score
                        score_clean = "".join(
                            c for c in score_str if c.isdigit() or c == "."
                        )
                        score = float(score_clean) if score_clean else 0.0

                        # Ensure score is in valid range
                        score = max(0.0, min(1.0, score))

                        logger.debug(
                            f"Parsed (fallback): candidate={candidate_num}, score={score}, reason={reason}"
                        )
                        return candidate_num, score, reason
            except Exception as fallback_error:
                logger.warning(f"Fallback parsing also failed: {fallback_error}")

        # Final fallback
        # logger.warning(f"Could not parse single match response: {result}")
        return -1, 0.0, f"Parse error: {result[:50]}..."

    def _filter_candidates(
        self, receipt: Receipt, transactions: List[Transaction]
    ) -> List[Transaction]:
        """Filter transactions to create a reasonable candidate list"""
        candidates = []
        
        for transaction in transactions:
            # Use absolute value for transaction amount comparison
            transaction_amount_abs = abs(transaction.amount)
            amount_diff = abs(receipt.amount - transaction_amount_abs)
            
            # Calculate percentage difference
            if receipt.amount > 0:
                percent_diff = (amount_diff / receipt.amount) * 100
            else:
                percent_diff = 0
            
            # Be more restrictive: exclude transactions with >300% difference
            # This prevents extreme mismatches while still being generous
            if percent_diff <= 300:
                candidates.append(transaction)
            else:
                logger.debug(
                    f"Filtered out transaction ${transaction_amount_abs} for receipt ${receipt.amount} ({percent_diff:.1f}% difference)"
                )

        logger.debug(
            f"Filtered {len(transactions)} transactions to {len(candidates)} candidates for receipt ${receipt.amount}"
        )
        return candidates

    def _find_best_match_legacy(
        self, receipt: Receipt, transactions: List[Transaction]
    ) -> Match:
        """Legacy method: Find the best match using individual API calls (kept as fallback)"""
        candidates = self._filter_candidates(receipt, transactions)
        if not candidates:
            return None

        best_match = None
        highest_score = 0

        for transaction in candidates:
            score, reason = self._calculate_match_score(receipt, transaction)
            logger.debug(
                f"Score {score:.3f} for transaction {transaction.vendor}: {reason}"
            )

            if score > highest_score:
                highest_score = score
                best_match = Match(receipt, transaction, score, reason)

        return best_match

    def _calculate_match_score(
        self, receipt: Receipt, transaction: Transaction
    ) -> Tuple[float, str]:
        """Calculate match score using AI"""
        # Calculate differences for the AI to consider
        date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
        transaction_amount_abs = abs(transaction.amount)
        amount_diff = abs(receipt.amount - transaction_amount_abs)
        amount_percent_diff = (
            (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
        )

        prompt = f"""
        Compare this receipt with this transaction and provide a confidence score (0-1) and brief reason, the reason must be a single sentence without any special formatting.
        
        Receipt: {receipt.vendor}, ${receipt.amount}, {receipt.receipt_date.strftime("%Y-%m-%d")}
        Receipt Description: {receipt.description}
        Receipt Category: {receipt.category}
        Transaction: {transaction.vendor}, ${transaction.amount} (absolute: ${transaction_amount_abs}), {transaction.transaction_date.strftime("%Y-%m-%d")}
        Transaction Notes: {transaction.notes}
        
        Differences:
        - Date difference: {date_diff} days
        - Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
        - Vendor comparison: "{receipt.vendor}" vs "{transaction.vendor}"
        - Description/Notes comparison: "{receipt.description}" vs "{transaction.notes}"
        - Category: {receipt.category}
        
        Score this potential match based on how likely it is the correct match:
        
        - Perfect matches (same vendor, amount, date): 0.95-1.0
        - High confidence (minor differences): 0.8-0.94
        - Medium confidence (moderate differences): 0.6-0.79
        - Low confidence (significant differences): 0.4-0.59
        - Very low confidence (major differences): 0.2-0.39
        - Minimal similarity: 0.1-0.19
        - No meaningful similarity: 0.0-0.09
        
        Consider description and category similarity in your scoring.
        
        THINGS TO NOTE:
        The most important factor to consider is the Amount for both the transaction and the receipt, the closer the amounts, the higher the score. 
        If the amounts are different or not close return a low score (0-0.1) based on other factors.
        
        IMPORTANT: Return ONLY the score and reason separated by a pipe character.
        Format: [score]|[reason]
        Example: 0.85|Same vendor, same amount, 2 days apart
        """

        for attempt in range(self.max_retries):
            try:
                result = self._call_groq_api_with_timeout(
                    prompt, timeout=30
                )  # Increased timeout

                # Parse the result - handle multiple formats
                score, reason = self._parse_ai_response(result)

                # logger.debug(f"AI Response: {result}")
                # logger.debug(f"Parsed: score={score}, reason={reason}")

                return score, reason

            except Exception as e:
                logger.warning(
                    f"Attempt {attempt + 1} failed for receipt {receipt.id}: {str(e)}"
                )
                if attempt < self.max_retries - 1:
                    # Exponential backoff for rate limiting
                    sleep_time = self.retry_delay * (2**attempt)
                    logger.info(f"Waiting {sleep_time} seconds before retry...")
                    time.sleep(sleep_time)
                else:
                    logger.error(f"All attempts failed for receipt {receipt.id}")
                    return 0.0, f"AI error after {self.max_retries} attempts: {str(e)}"

    def _parse_ai_response(self, result: str) -> Tuple[float, str]:
        """Parse AI response with robust error handling"""
        result = result.strip()
        logger.debug(f"Parsing AI response: {result}")

        # Try to find score in various formats
        if "|" in result:
            parts = result.split("|")
            logger.debug(f"Split response into {len(parts)} parts: {parts}")

            # Look for a numeric score in any part
            for i, part in enumerate(parts):
                part = part.strip()
                try:
                    # Remove any non-numeric characters except decimal point
                    score_str_clean = "".join(
                        c for c in part if c.isdigit() or c == "."
                    )
                    if score_str_clean:
                        score = float(score_str_clean)
                        if 0 <= score <= 1:  # Valid confidence score
                            # Get reason from other parts
                            reason_parts = [
                                p.strip()
                                for j, p in enumerate(parts)
                                if j != i and p.strip()
                            ]
                            reason = (
                                " | ".join(reason_parts)
                                if reason_parts
                                else "Score extracted"
                            )
                            logger.debug(
                                f"Found score {score} in part {i}, reason: {reason}"
                            )
                            return score, reason
                except ValueError:
                    continue

        # Try to extract just a number from the response
        try:
            import re

            numbers = re.findall(r"\d+\.?\d*", result)
            if numbers:
                for num_str in numbers:
                    score = float(num_str)
                    if 0 <= score <= 1:  # Valid confidence score
                        logger.debug(f"Extracted score {score} from response")
                        return score, f"Extracted from response: {result[:50]}..."
        except (ValueError, IndexError):
            pass

        # Fallback - try to find any number and normalize it
        try:
            import re

            numbers = re.findall(r"\d+\.?\d*", result)
            if numbers:
                score = float(numbers[0])
                # Normalize to 0-1 range if it's a percentage or other scale
                if score > 1:
                    score = score / 100  # Assume percentage
                score = max(0, min(1, score))  # Clamp to 0-1
                logger.debug(f"Normalized score {score} from response")
                return score, f"Normalized from response: {result[:50]}..."
        except (ValueError, IndexError):
            pass

        # Final fallback
        logger.warning(f"Could not parse AI response: {result}")
        return 0.0, f"Unparseable response: {result[:50]}..."

    def _call_groq_api_with_timeout(self, prompt: str, timeout: int = 15) -> str:
        """Make API call with timeout and retry logic"""
        import concurrent.futures

        def api_call():
            try:
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": "You are a JSON-only response assistant. Return only valid JSON, no explanations."},
                        {"role": "user", "content": prompt}
                    ],
                    max_tokens=150,
                    temperature=0,
                )
                return response.choices[0].message.content.strip()
            except Exception as e:
                raise e

        try:
            with concurrent.futures.ThreadPoolExecutor() as executor:
                future = executor.submit(api_call)
                return future.result(timeout=timeout)
        except concurrent.futures.TimeoutError:
            raise Exception(f"API call timed out after {timeout} seconds")
        except Exception as e:
            raise e