ds_quickbooks/app/services/llm_tax_analyzer.py

import json
import logging
from typing import Any, Dict

import groq
from config import settings
from schemas import Receipt, Transaction

logger = logging.getLogger(__name__)


class LLMTaxAnalyzer:
    """
    Uses LLM to intelligently apply tax rules based on context.

    Implements four core tax rules:
    1. Sales Tax Rule - Based on receipt location (shipping/billing address)
    2. Foreign Exchange Rule - Handles currency mismatches
    3. Depreciation Rule - Capital assets (based on user location)
    4. Meals & Entertainment Rule - 50% tax deduction, 100% accounting deduction
    """

    # Provincial tax rates for reference
    PROVINCIAL_TAX_RATES = {
        "ON": {"rate": 0.13, "name": "HST", "type": "Harmonized"},
        "QC": {"rate": 0.14975, "name": "QST + GST", "type": "Combined"},
        "BC": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
        "AB": {"rate": 0.05, "name": "GST", "type": "Federal only"},
        "SK": {"rate": 0.11, "name": "PST + GST", "type": "Combined"},
        "MB": {"rate": 0.12, "name": "PST + GST", "type": "Combined"},
        "NS": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
        "NB": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
        "NL": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
        "PE": {"rate": 0.15, "name": "HST", "type": "Harmonized"},
        "NT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
        "NU": {"rate": 0.05, "name": "GST", "type": "Federal only"},
        "YT": {"rate": 0.05, "name": "GST", "type": "Federal only"},
    }

    # CCA rates by asset class (simplified)
    CCA_RATES = {
        "vehicles": 0.30,  # Class 10
        "computer_equipment": 0.55,  # Class 50
        "furniture": 0.20,  # Class 8
        "buildings": 0.04,  # Class 1
        "machinery": 0.20,  # Class 8
    }

    def __init__(self):
        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
        self.model = settings.model
        self.max_retries = 3

    def analyze_and_apply_tax_rules_batch(
        self,
        matches: list,  # List of Match objects
        user_location: str = "ON",
    ) -> list:
        """
        Batch process all matches in a SINGLE LLM call to reduce costs.
        Analyzes all receipt-transaction pairs together and applies tax rules.
        Falls back to individual processing if batch fails.
        """
        if not matches:
            return matches

        logger.info(f"Starting batch tax analysis for {len(matches)} matches")

        # Build batch context for all matches
        try:
            batch_context = self._build_batch_analysis_context(matches, user_location)
        except Exception as e:
            logger.error(f"Error building batch context: {str(e)}")
            # If we can't even build the context, return matches as-is
            for match in matches:
                match.match_reason += " (Batch analysis setup failed)"
            return matches

        # Get LLM analysis for ALL matches at once
        llm_batch_analysis = self._get_llm_tax_analysis_batch(
            batch_context, len(matches)
        )

        # Check if we got any analysis back
        if not llm_batch_analysis:
            logger.warning("Batch LLM analysis returned empty results")

            # Fallback: Try processing each match individually if batch size is small
            if (
                len(matches) <= 5
            ):  # Only fallback for small batches to avoid excessive API calls
                logger.info(
                    f"Attempting individual processing fallback for {len(matches)} matches"
                )
                return self._process_matches_individually(matches, user_location)
            else:
                logger.warning(
                    f"Batch too large ({len(matches)} matches) for individual fallback - returning matches without enhanced tax analysis"
                )
                for match in matches:
                    match.match_reason += " (Batch tax analysis unavailable)"
                return matches

        logger.info(f"Received batch analysis for {len(llm_batch_analysis)} matches")

        # Apply results to each match
        enhanced_matches = []
        for i, match in enumerate(matches):
            try:
                # Get the analysis for this specific match from the batch results
                match_key = f"match_{i}"
                match_analysis = llm_batch_analysis.get(match_key, {})

                if match_analysis and isinstance(match_analysis, dict):
                    # Apply the tax analysis to this match
                    enhanced_match = self._apply_tax_analysis_to_match(
                        match, match_analysis
                    )
                    enhanced_matches.append(enhanced_match)
                else:
                    # No analysis available for this match, use as-is
                    logger.warning(
                        f"No analysis found for match {i} (key: {match_key})"
                    )
                    match.match_reason += " (Tax analysis incomplete)"
                    enhanced_matches.append(match)
            except Exception as e:
                logger.error(f"Error applying tax analysis to match {i}: {str(e)}")
                match.match_reason += " (Tax analysis error)"
                enhanced_matches.append(match)

        logger.info(
            f"Completed batch tax analysis, enhanced {len(enhanced_matches)} matches"
        )
        # logger.info(
        #     f"\n\n\nFinal batch enhanced matches: {enhanced_matches}"
        # )
        return enhanced_matches

    def _process_matches_individually(self, matches: list, user_location: str) -> list:
        """
        Fallback method: Process matches one at a time using the legacy method.
        Only used when batch processing fails and batch size is small.
        """
        logger.info(f"Processing {len(matches)} matches individually as fallback")
        enhanced_matches = []

        for i, match in enumerate(matches):
            try:
                # Use the legacy single-match analysis method
                tax_analysis = self.analyze_and_apply_tax_rules(
                    match.receipt, match.transaction, user_location
                )

                # Apply the analysis to the match
                enhanced_match = self._apply_tax_analysis_to_match(match, tax_analysis)
                enhanced_matches.append(enhanced_match)
                logger.info(
                    f"Successfully processed match {i + 1}/{len(matches)} individually"
                )

            except Exception as e:
                logger.error(f"Error in individual processing for match {i}: {str(e)}")
                match.match_reason += " (Individual tax analysis failed)"
                enhanced_matches.append(match)

        return enhanced_matches

    def analyze_and_apply_tax_rules(
        self,
        receipt: Receipt,
        transaction: Transaction,
        user_location: str = "ON",  # Default to Ontario
    ) -> Dict[str, Any]:
        """
        Legacy single-match analysis method (kept for backward compatibility).
        Use analyze_and_apply_tax_rules_batch() for better performance.

        Use LLM to intelligently analyze and apply all tax rules:
        1. Sales tax based on receipt location (shipping/billing address priority)
        2. Foreign exchange rules for currency mismatches
        3. Depreciation rules for capital assets (based on user location)
        4. Meals & Entertainment deduction rules
        """

        # Prepare context for LLM
        analysis_context = self._build_analysis_context(
            receipt, transaction, user_location
        )

        # Get LLM analysis
        llm_analysis = self._get_llm_tax_analysis(analysis_context)

        # Parse and structure the results
        structured_results = self._structure_analysis_results(
            llm_analysis, receipt, transaction, user_location
        )

        return structured_results

    def _build_analysis_context(
        self, receipt: Receipt, transaction: Transaction, user_location: str
    ) -> str:
        """Build comprehensive context for LLM analysis"""

        # Extract location information
        receipt_location = self._extract_receipt_location(receipt)

        # Normalize user_location to province code (handle "Canada", "Ontario", "ON", etc.)
        user_province = self._normalize_location_to_province(user_location)

        logger.info(
            f"Building tax analysis context - User Location: {user_location} → Province Code: {user_province}"
        )

        # Build tax rates reference
        tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
        cca_rates_info = json.dumps(self.CCA_RATES, indent=2)

        context = f"""
RECEIPT DETAILS:
- Vendor: {receipt.vendor}
- Amount: ${receipt.amount:.2f}
- Currency: {receipt.currency}
- Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
- Category: {receipt.category}
- Description: {receipt.description}
- Billing Address: {self._format_address(receipt.billing_address)}
- Shipping Address: {self._format_address(receipt.shipping_address)}
- Is Meals & Entertainment: {receipt.is_meals_entertainment}

TRANSACTION DETAILS:
- Vendor: {transaction.vendor}
- Amount: ${transaction.amount:.2f}
- Currency: {transaction.currency}
- Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
- Notes: {transaction.notes}
- FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}

USER CONTEXT:
- User Location (Province): {user_province}
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}

RECEIPT LOCATION DETECTED:
{receipt_location}

PROVINCIAL TAX RATES REFERENCE:
{tax_rates_info}

CCA DEPRECIATION RATES BY ASSET CLASS:
{cca_rates_info}
"""
        return context

    def _normalize_location_to_province(self, location: str) -> str:
        """
        Normalize various location formats to province code.
        Handles: "ON", "Ontario", "Canada", etc.
        """
        location_upper = location.upper().strip()

        # Direct province code match
        if location_upper in self.PROVINCIAL_TAX_RATES:
            return location_upper

        # Map full province names to codes
        province_name_map = {
            "ONTARIO": "ON",
            "QUEBEC": "QC",
            "BRITISH COLUMBIA": "BC",
            "ALBERTA": "AB",
            "SASKATCHEWAN": "SK",
            "MANITOBA": "MB",
            "NOVA SCOTIA": "NS",
            "NEW BRUNSWICK": "NB",
            "NEWFOUNDLAND AND LABRADOR": "NL",
            "NEWFOUNDLAND": "NL",
            "PRINCE EDWARD ISLAND": "PE",
            "NORTHWEST TERRITORIES": "NT",
            "NUNAVUT": "NU",
            "YUKON": "YT",
        }

        if location_upper in province_name_map:
            return province_name_map[location_upper]

        # Default to Ontario if country is Canada or unspecified
        if location_upper in ["CANADA", "CAN", "CA", ""]:
            logger.warning(f"Location '{location}' is too generic, defaulting to ON")
            return "ON"

        # If nothing matches, default to Ontario
        logger.warning(f"Could not parse location '{location}', defaulting to ON")
        return "ON"

    def _extract_receipt_location(self, receipt: Receipt) -> str:
        """Extract and format receipt location information"""

        # Priority: Use shipping address if available, then billing
        location = (
            receipt.shipping_address
            if receipt.shipping_address
            else receipt.billing_address
        )

        if location:
            return f"""
- Province: {location.province}
- City: {location.city}
- Country: {location.country}
- Postal Code: {location.postal_code}
"""
        else:
            return "- No address information available (will use user location)"

    def _format_address(self, address) -> str:
        """Format address for display"""
        if address:
            return f"{address.city}, {address.province}, {address.country} ({address.postal_code})"
        return "Not provided"

    def _get_llm_tax_analysis(self, context: str) -> str:
        """Get tax rule analysis from LLM"""

        prompt = f"""
You are a tax expert analyzing a receipt-transaction match. Apply the following tax rules intelligently:
And you are to calculate the tax for the receipt based on the context provided.

{context}

=== FOUR CORE TAX RULES ===

### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.

**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).

**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
   - Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST
   - The user's location is only for depreciation purposes

b) User in Ontario, Receipt from USA (New York):
   - DO NOT apply Canadian sales tax
   - This is an international transaction
   - Flag for FX review instead

c) User in USA (New York), Receipt from California:
   - Apply California's sales tax rate (receipt location)
   - Not New York's rate (user location)

d) User in Ontario, Receipt has NO address information:
   - DEFAULT to user's location (Ontario 13% HST)
   - This is the fallback when receipt location is unknown

**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review

### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.

**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate the absolute discrepancy: |receipt_amount - transaction_amount|
- ALWAYS flag for manual review - DO NOT fetch exchange rates automatically
- If FX rate is provided in transaction data, note it but still require manual review

**Examples**:
- Transaction: USD $100, Receipt: CAD $125 → Discrepancy: $25, Flag for review
- The user must manually approve or adjust the FX difference

### 3. DEPRECIATION RULE
**Purpose**: Calculate depreciation for assets using two methods.

**Key Principle**: Depreciation is ALWAYS based on USER'S location, NOT receipt location.

**Asset Identification**:
- Only applies to capital assets: vehicles, equipment, furniture, buildings, machinery
- Identify from receipt category and description
- Typical threshold: Assets generally > $500

**Two Methods Required**:
a) **Straight-Line Depreciation** (for accounting purposes):
   Formula: (Cost - Residual Value) / Useful Life
   Example: Asset $10,000, 5-year life, $1,000 residual = $1,800/year

b) **CCA Depreciation** (for tax purposes - Canada):
   Method: Declining Balance
   Formula: Book Value × CCA Rate each year
   Example: Truck $20,000, 30% CCA:
   - Year 1: $20,000 × 30% = $6,000
   - Year 2: ($20,000 - $6,000) × 30% = $4,200
   - Continues declining each year

**CCA Classes** (Canada):
- Vehicles: 30% (Class 10)
- Computer Equipment: 55% (Class 50)
- Furniture/Machinery: 20% (Class 8)
- Buildings: 4% (Class 1)

### 4. MEALS & ENTERTAINMENT TAX DEDUCTION RULE
**Purpose**: Apply correct deductions for meals and entertainment expenses.

**Deduction Rules**:
- **For Tax Purposes**: Only 50% of total receipt amount is deductible
- **For Accounting Purposes**: 100% of total receipt amount is deductible
- **Sales Tax**: Full sales tax amount is deductible for accounting

**Example**:
- Receipt: $100 meal + $12 sales tax = $112 total
- **Tax Deduction**: $50 (50% of meal) + $12 (full tax) = $62
- **Accounting Deduction**: $100 (full meal) + $12 (full tax) = $112

=== LOCATION-BASED SCENARIO HANDLING ===

**When Receipt Location ≠ User Location**:

1. **Sales Tax**: Use RECEIPT's location for tax calculation
   - Exception: If international (different country), no Canadian sales tax + flag FX
   - Exception: If no location on receipt, use user's location as default

2. **Depreciation**: ALWAYS use USER's location for depreciation rules
   - Receipt location is irrelevant for depreciation
   - Apply user's country/province depreciation methods

3. **FX Handling**:
   - If receipt currency ≠ transaction currency: Flag for manual review
   - Do NOT automatically fetch or apply exchange rates

4. **Missing Location**:
   - If receipt has no address: Default to user's location for sales tax
   - Still apply user's location for depreciation

=== ANALYSIS REQUIRED ===

Provide a structured JSON response with the following format:

**CRITICAL INSTRUCTION FOR final_tax_amount:**
- This field MUST contain ONLY the calculated sales tax amount in dollars
- This is NOT the total amount including tax
- This is ONLY the tax portion (HST/GST/PST/QST)
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)

{{
  "final_tax_amount": XX.XX,  // ONLY the calculated tax amount (e.g., 13.00 for $100 + $13 HST)
  "sales_tax": {{
    "applicable_province": "XX",
    "applicable_rate": 0.XX,
    "tax_name": "HST/GST/PST/QST",
    "calculated_tax": XX.XX,  // This should match final_tax_amount above
    "stated_tax": XX.XX,
    "discrepancy": XX.XX,
    "reason": "Detailed explanation",
    "requires_review": true/false
  }},
  "foreign_exchange": {{
    "currency_mismatch": true/false,
    "receipt_currency": "XXX",
    "transaction_currency": "XXX",
    "receipt_amount": XX.XX,
    "transaction_amount": XX.XX,
    "discrepancy": XX.XX,
    "requires_manual_review": true/false,
    "reason": "Explanation of FX situation"
  }},
  "depreciation": {{
    "is_capital_asset": true/false,
    "asset_class": "category name or N/A",
    "suggested_cca_rate": 0.XX,
    "straight_line_applicable": true/false,
    "cca_applicable": true/false,
    "straight_line_example": "Brief calculation example if applicable",
    "cca_example": "Brief calculation example if applicable",
    "reason": "Why this is/isn't a capital asset, which CCA class, and why depreciation based on user's location"
  }},
  "meals_entertainment": {{
    "is_meals_entertainment": true/false,
    "tax_deduction_amount": XX.XX,
    "accounting_deduction_amount": XX.XX,
    "sales_tax_included": XX.XX,
    "reason": "Explanation of M&E rule application"
  }},
  "confidence_adjustment": {{
    "boost": 0.XX,
    "reduce": 0.XX,
    "reason": "Why confidence should be adjusted based on tax analysis"
  }},
  "overall_assessment": "Comprehensive summary: which rules applied, why, what location used for what purpose, and any required actions"
}}

**IMPORTANT**: The "final_tax_amount" field at the top level must contain the final calculated tax amount. This should be the calculated_tax from sales_tax analysis. If this is a meals & entertainment expense, ensure you return the FULL tax amount here (not the 50% adjusted amount).

**Critical Reminders**:
- Sales tax uses RECEIPT location (or user location if receipt has none)
- Depreciation ALWAYS uses USER location
- For different addresses, use SHIPPING address for sales tax
- International transactions: no Canadian tax + FX flag
- Be precise with all calculations
- Always explain your reasoning clearly
"""

        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a Canadian tax expert. Analyze transactions and apply tax rules accurately. Always return valid JSON.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.1,  # Low temperature for consistent, factual responses
                max_tokens=2000,
            )

            content = response.choices[0].message.content.strip()
            logger.info(f"LLM tax analysis received: {len(content)} characters")
            return content

        except Exception as e:
            logger.error(f"Error getting LLM tax analysis: {str(e)}")
            return self._get_fallback_analysis()

    def _get_fallback_analysis(self) -> str:
        """Return fallback analysis if LLM fails"""
        return json.dumps(
            {
                "final_tax_amount": 0.0,
                "sales_tax": {
                    "applicable_province": "ON",
                    "applicable_rate": 0.13,
                    "tax_name": "HST",
                    "calculated_tax": 0.0,
                    "stated_tax": 0.0,
                    "discrepancy": 0.0,
                    "reason": "LLM analysis failed - using defaults",
                    "requires_review": True,
                },
                "foreign_exchange": {
                    "currency_mismatch": False,
                    "requires_manual_review": False,
                    "reason": "Analysis not available",
                },
                "depreciation": {
                    "is_capital_asset": False,
                    "reason": "Analysis not available",
                },
                "meals_entertainment": {
                    "is_meals_entertainment": False,
                    "reason": "Analysis not available",
                },
                "confidence_adjustment": {
                    "boost": 0.0,
                    "reduce": 0.1,
                    "reason": "LLM analysis failed - recommend manual review",
                },
                "overall_assessment": "Automatic analysis failed. Manual review recommended.",
            }
        )

    def _structure_analysis_results(
        self,
        llm_response: str,
        receipt: Receipt,
        transaction: Transaction,
        user_location: str,
    ) -> Dict[str, Any]:
        """Parse LLM response and structure it for application"""

        try:
            # Extract JSON from LLM response (may have markdown code blocks)
            json_str = llm_response
            if "```json" in llm_response:
                json_str = llm_response.split("```json")[1].split("```")[0].strip()
            elif "```" in llm_response:
                json_str = llm_response.split("```")[1].split("```")[0].strip()

            analysis = json.loads(json_str)

            # Add metadata
            analysis["metadata"] = {
                "user_location": user_location,
                "receipt_id": receipt.id,
                "transaction_id": transaction.id,
                "analysis_method": "LLM-based",
                "model": self.model,
            }

            return analysis

        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse LLM response as JSON: {str(e)}")
            logger.error(f"LLM response was: {llm_response}")

            # Return structured fallback
            return {
                "final_tax_amount": receipt.tax if receipt.tax else 0.0,
                "sales_tax": {
                    "requires_review": True,
                    "reason": "Failed to parse LLM response",
                },
                "foreign_exchange": {
                    "requires_manual_review": receipt.currency != transaction.currency
                },
                "depreciation": {"is_capital_asset": False},
                "confidence_adjustment": {
                    "boost": 0.0,
                    "reduce": 0.15,
                    "reason": "Analysis parsing failed",
                },
                "overall_assessment": "Analysis failed. Manual review required.",
                "error": str(e),
                "metadata": {
                    "user_location": user_location,
                    "analysis_method": "fallback",
                },
            }

    def _build_batch_analysis_context(self, matches: list, user_location: str) -> str:
        """Build comprehensive context for batch LLM analysis of all matches"""

        # Normalize user_location to province code
        user_province = self._normalize_location_to_province(user_location)

        logger.info(
            f"Building batch tax analysis context for {len(matches)} matches - User Location: {user_location} → Province Code: {user_province}"
        )

        # Build tax rates and CCA references once
        tax_rates_info = json.dumps(self.PROVINCIAL_TAX_RATES, indent=2)
        cca_rates_info = json.dumps(self.CCA_RATES, indent=2)

        # Build match entries
        matches_info = []
        for i, match in enumerate(matches):
            receipt = match.receipt
            transaction = match.transaction
            receipt_location = self._extract_receipt_location(receipt)

            match_info = f"""
MATCH {i} (ID: match_{i}):
Receipt Details:
  - Vendor: {receipt.vendor}
  - Amount: ${receipt.amount:.2f}
  - Currency: {receipt.currency}
  - Date: {receipt.receipt_date.strftime("%Y-%m-%d")}
  - Category: {receipt.category}
  - Description: {receipt.description}
  - Billing Address: {self._format_address(receipt.billing_address)}
  - Shipping Address: {self._format_address(receipt.shipping_address)}
  - Is Meals & Entertainment: {receipt.is_meals_entertainment}

Transaction Details:
  - Vendor: {transaction.vendor}
  - Amount: ${transaction.amount:.2f}
  - Currency: {transaction.currency}
  - Date: {transaction.transaction_date.strftime("%Y-%m-%d")}
  - Notes: {transaction.notes}
  - FX Rate: {transaction.fx_rate if transaction.fx_rate else "N/A"}

Receipt Location Detected:
{receipt_location}
"""
            matches_info.append(match_info)

        matches_section = "\n".join(matches_info)

        context = f"""
USER CONTEXT:
- User Location (Province): {user_province}
- User Province Tax Rate: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("rate", 0.13) * 100}%
- User Tax Type: {self.PROVINCIAL_TAX_RATES.get(user_province, {}).get("name", "HST")}

PROVINCIAL TAX RATES REFERENCE:
{tax_rates_info}

CCA DEPRECIATION RATES BY ASSET CLASS:
{cca_rates_info}

=== MATCHES TO ANALYZE ({len(matches)} total) ===
{matches_section}
"""
        return context

    def _get_llm_tax_analysis_batch(self, context: str, num_matches: int) -> Dict[str, Any]:
        """Get tax rule analysis from LLM for ALL matches in a single call"""

        prompt = f"""
You are a Canadian tax expert analyzing MULTIPLE receipt-transaction matches.

{context}

=== FOUR CORE TAX RULES ===

### 1. SALES TAX RULE
**Purpose**: Calculate and apply correct sales tax based on shipping and billing addresses.

**Key Principles**:
- When billing and shipping addresses are THE SAME: Apply sales tax based on that address location.
- When billing and shipping addresses are DIFFERENT: Apply sales tax based on the SHIPPING address.
- Tax rate is determined by the RECEIPT'S location, NOT the user's location (unless no receipt location).

**Scenario Examples**:
a) User in Ontario, Receipt from Quebec:
   - Apply Quebec's tax rate (14.975% QST+GST), not Ontario's 13% HST

b) User in Ontario, Receipt from USA (New York):
   - DO NOT apply Canadian sales tax
   - This is an international transaction
   - Flag for FX review instead

c) User in Ontario, Receipt has NO address information:
   - DEFAULT to user's location (Ontario 13% HST)

**Tax Calculation**:
- Compare calculated tax vs stated tax on receipt
- Flag discrepancies for review

### 2. FOREIGN EXCHANGE (FX) RULE
**Purpose**: Handle currency mismatches between receipts and transactions.

**Actions**:
- Identify when receipt currency ≠ transaction currency (e.g., USD vs CAD)
- Calculate expected transaction amount using FX rate if available
- Flag discrepancies > $5 or 5% for manual review
- If FX rate missing but currencies differ, flag for review

### 3. DEPRECIATION RULE
**Purpose**: Identify capital assets requiring depreciation based on USER'S location.

**Critical**: Depreciation is ALWAYS based on the USER'S location (for Canadian tax filing), NOT the receipt location.

**Capital Asset Criteria**:
- Cost > $500 typically
- Useful life > 1 year
- Examples: computers, vehicles, furniture, machinery, buildings

**CCA Classes**: Assign appropriate class and rate based on asset type and user's jurisdiction

### 4. MEALS & ENTERTAINMENT RULE
**Purpose**: Apply 50% tax deduction limit for M&E expenses.

**Actions**:
- Identify M&E expenses (meals, entertainment, client dinners, etc.)
- Tax Deduction: 50% of total amount (including tax)
- Accounting Deduction: 100% of total amount (including tax)
- Always include sales tax in both calculations

=== YOUR TASK ===

Analyze EACH match and return a JSON object where each key is the match ID and the value is the complete tax analysis.

**CRITICAL INSTRUCTION FOR final_tax_amount:**
- This field MUST contain ONLY the calculated sales tax amount in dollars
- This is NOT the total amount including tax
- This is ONLY the tax portion (HST/GST/PST/QST)
- Example: If receipt total is $100 and calculated tax is $13, return 13.00 (not 113.00)
- For meals & entertainment: Return the FULL calculated tax amount (not the 50% adjusted amount)
- VERIFY: final_tax_amount should equal sales_tax.calculated_tax
-

Return your response as a SINGLE JSON object in this format:

{{
  "match_0": {{
    "final_tax_amount": XX.XX, // ONLY the calculated tax amount
    "sales_tax": {{
      "applicable_province": "XX",
      "applicable_rate": 0.XX,
      "tax_name": "HST/GST/PST",
      "calculated_tax": XX.XX,
      "stated_tax": XX.XX,
      "discrepancy": XX.XX,
      "reason": "Detailed explanation",
      "requires_review": true/false
    }},
    "foreign_exchange": {{
      "currency_mismatch": true/false,
      "receipt_currency": "XXX",
      "transaction_currency": "XXX",
      "expected_transaction_amount": XX.XX,
      "actual_transaction_amount": XX.XX,
      "discrepancy": XX.XX,
      "requires_manual_review": true/false,
      "reason": "Explanation"
    }},
    "depreciation": {{
      "is_capital_asset": true/false,
      "asset_class": "class_XX",
      "cca_rate": 0.XX,
      "applicable_jurisdiction": "XX",
      "reason": "Explanation"
    }},
    "meals_entertainment": {{
      "is_meals_entertainment": true/false,
      "tax_deduction_amount": XX.XX,
      "accounting_deduction_amount": XX.XX,
      "sales_tax_included": XX.XX,
      "reason": "Explanation"
    }},
    "confidence_adjustment": {{
      "boost": 0.XX,
      "reduce": 0.XX,
      "reason": "Why confidence should be adjusted"
    }},
    "overall_assessment": "Summary for this match"
  }},
  "match_1": {{
    ... same structure ...
  }},
  ... for all {num_matches} matches ...
}}
"""
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.1,  # Low temperature for consistent, factual responses
                max_tokens=8000,  # Higher limit for batch processing
            )

            content = response.choices[0].message.content

            # Validate that we got content
            if not content:
                logger.error("LLM returned empty response")
                return {}

            content = content.strip()

            # Check if content is empty after stripping
            if not content:
                logger.error("LLM returned whitespace-only response")
                return {}

            logger.info(
                f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches"
            )
            logger.debug(f"Raw LLM response: {content[:500]}...")  # Log first 500 chars

            # Parse the JSON response - handle various markdown code block formats
            json_str = content

            # Check for markdown code blocks with various language identifiers
            if "```json" in content:
                json_str = content.split("```json")[1].split("```")[0].strip()
            elif "```javascript" in content:
                json_str = content.split("```javascript")[1].split("```")[0].strip()
            elif "```js" in content:
                json_str = content.split("```js")[1].split("```")[0].strip()
            elif "```" in content:
                # Generic code block - extract content between first ``` and last ```
                parts = content.split("```")
                if len(parts) >= 3:
                    # Take the second part (index 1), which is between first and second ```
                    json_str = parts[1].strip()
                    # Remove language identifier if it's on the first line
                    lines = json_str.split("\n", 1)
                    if len(lines) > 1 and lines[0].strip() in [
                        "json",
                        "javascript",
                        "js",
                        "",
                    ]:
                        json_str = lines[1].strip()

            # Validate JSON string is not empty
            if not json_str:
                logger.error("Extracted JSON string is empty")
                logger.error(f"Original content was: {content[:500]}")
                return {}

            batch_analysis = json.loads(json_str)

            # Validate we got a dictionary back
            if not isinstance(batch_analysis, dict):
                logger.error(f"LLM returned non-dict type: {type(batch_analysis)}")
                return {}

            logger.info(
                f"Successfully parsed batch analysis with {len(batch_analysis)} matches"
            )
            return batch_analysis

        except json.JSONDecodeError as e:
            logger.error(f"JSON decode error in batch LLM tax analysis: {str(e)}")
            logger.error(
                f"Failed to parse: {json_str[:500] if 'json_str' in locals() else 'N/A'}"
            )
            return {}
        except Exception as e:
            logger.error(f"Error getting batch LLM tax analysis: {str(e)}")
            logger.error(f"Exception type: {type(e).__name__}")
            # Return empty dict so each match can handle fallback individually
            return {}

    def _apply_tax_analysis_to_match(self, match, tax_analysis: Dict[str, Any]):
        """Apply tax analysis results to a match object"""

        # **CRITICAL FIX: Ensure final_tax_amount matches calculated_tax**
        final_tax = tax_analysis.get("final_tax_amount", 0.0)
        calculated_tax = tax_analysis.get("sales_tax", {}).get("calculated_tax", 0.0)

        # If there's a mismatch, use calculated_tax as the source of truth
        if abs(final_tax - calculated_tax) > 0.01:
            logger.warning(
                f"Correcting final_tax_amount mismatch for {match.receipt.vendor}: "
                f"LLM returned final_tax_amount={final_tax}, but calculated_tax={calculated_tax}. "
                f"Using calculated_tax as final value."
            )
            tax_analysis["final_tax_amount"] = calculated_tax

        # Special case: If final_tax is 0 but calculated_tax > 0, always use calculated_tax
        if final_tax == 0.0 and calculated_tax > 0.0:
            logger.warning(
                f"Correcting zero final_tax_amount for {match.receipt.vendor}: "
                f"LLM returned 0 but calculated {calculated_tax} HST. Setting final_tax_amount={calculated_tax}"
            )
            tax_analysis["final_tax_amount"] = calculated_tax
            tax_analysis["sales_tax"]["requires_review"] = True

        # Apply the corrected tax analysis
        match.tax_analysis = tax_analysis

        logger.debug(
            f"Applied tax analysis to match: {match.receipt.vendor} -> "
            f"final_tax_amount={tax_analysis['final_tax_amount']}"
        )

        # Apply confidence adjustments based on tax analysis
        confidence_adj = tax_analysis.get("confidence_adjustment", {})

        # Boost confidence if tax rules validate the match
        boost = confidence_adj.get("boost", 0.0)
        if boost > 0:
            match.confidence_score = min(1.0, match.confidence_score + boost)
            match.match_reason += f" (Tax analysis confidence boost: +{boost:.2f})"

        # Reduce confidence if tax issues detected
        reduce = confidence_adj.get("reduce", 0.0)
        if reduce > 0:
            match.confidence_score = max(0.0, match.confidence_score - reduce)
            match.match_reason += f" (Tax issues detected: -{reduce:.2f})"

        # Add flags for manual review if needed
        review_flags = []

        # Check sales tax issues
        sales_tax = tax_analysis.get("sales_tax", {})
        if sales_tax.get("requires_review", False):
            review_flags.append("Sales Tax Review Required")

        # Check FX issues
        fx_analysis = tax_analysis.get("foreign_exchange", {})
        if fx_analysis.get("requires_manual_review", False):
            review_flags.append(
                f"FX Review Required (Discrepancy: ${fx_analysis.get('discrepancy', 0):.2f})"
            )

        # Check depreciation
        depreciation = tax_analysis.get("depreciation", {})
        if depreciation.get("is_capital_asset", False):
            review_flags.append(
                f"Capital Asset - Depreciation Applicable ({depreciation.get('asset_class', 'Unknown')})"
            )

        # Check meals & entertainment
        meals_ent = tax_analysis.get("meals_entertainment", {})
        if meals_ent.get("is_meals_entertainment", False):
            tax_deduction = meals_ent.get("tax_deduction_amount", 0)
            accounting_deduction = meals_ent.get("accounting_deduction_amount", 0)
            review_flags.append(
                f"M&E Expense - Tax Deduction: ${tax_deduction:.2f} (50%), Accounting: ${accounting_deduction:.2f} (100%)"
            )

        # Add review flags to match reason
        if review_flags:
            match.match_reason += " | REVIEW: " + "; ".join(review_flags)

        return match