From 7c412bcf9e67e1073f5db4ec434a2e6860ce709f Mon Sep 17 00:00:00 2001
From: bolade <babawale030@gmail.com>
Date: Sun, 5 Oct 2025 20:03:46 +0100
Subject: [PATCH] Enhance batch processing in LLMTaxAnalyzer with fallback to
 individual analysis on failure

---
 app/services/llm_tax_analyzer.py | 112 +++++++++++++++++++++++++++++--
 1 file changed, 108 insertions(+), 4 deletions(-)

diff --git a/app/services/llm_tax_analyzer.py b/app/services/llm_tax_analyzer.py
index a4e9280..07f5b49 100644
--- a/app/services/llm_tax_analyzer.py
+++ b/app/services/llm_tax_analyzer.py
@@ -59,26 +59,59 @@ class LLMTaxAnalyzer:
         """
         Batch process all matches in a SINGLE LLM call to reduce costs.
         Analyzes all receipt-transaction pairs together and applies tax rules.
+        Falls back to individual processing if batch fails.
         """
         if not matches:
             return matches
 
+        logger.info(f"Starting batch tax analysis for {len(matches)} matches")
+
         # Build batch context for all matches
-        batch_context = self._build_batch_analysis_context(matches, user_location)
+        try:
+            batch_context = self._build_batch_analysis_context(matches, user_location)
+        except Exception as e:
+            logger.error(f"Error building batch context: {str(e)}")
+            # If we can't even build the context, return matches as-is
+            for match in matches:
+                match.match_reason += " (Batch analysis setup failed)"
+            return matches
 
         # Get LLM analysis for ALL matches at once
         llm_batch_analysis = self._get_llm_tax_analysis_batch(
             batch_context, len(matches)
         )
 
+        # Check if we got any analysis back
+        if not llm_batch_analysis:
+            logger.warning("Batch LLM analysis returned empty results")
+
+            # Fallback: Try processing each match individually if batch size is small
+            if (
+                len(matches) <= 5
+            ):  # Only fallback for small batches to avoid excessive API calls
+                logger.info(
+                    f"Attempting individual processing fallback for {len(matches)} matches"
+                )
+                return self._process_matches_individually(matches, user_location)
+            else:
+                logger.warning(
+                    f"Batch too large ({len(matches)} matches) for individual fallback - returning matches without enhanced tax analysis"
+                )
+                for match in matches:
+                    match.match_reason += " (Batch tax analysis unavailable)"
+                return matches
+
+        logger.info(f"Received batch analysis for {len(llm_batch_analysis)} matches")
+
         # Apply results to each match
         enhanced_matches = []
         for i, match in enumerate(matches):
             try:
                 # Get the analysis for this specific match from the batch results
-                match_analysis = llm_batch_analysis.get(f"match_{i}", {})
+                match_key = f"match_{i}"
+                match_analysis = llm_batch_analysis.get(match_key, {})
 
-                if match_analysis:
+                if match_analysis and isinstance(match_analysis, dict):
                     # Apply the tax analysis to this match
                     enhanced_match = self._apply_tax_analysis_to_match(
                         match, match_analysis
@@ -86,6 +119,9 @@ class LLMTaxAnalyzer:
                     enhanced_matches.append(enhanced_match)
                 else:
                     # No analysis available for this match, use as-is
+                    logger.warning(
+                        f"No analysis found for match {i} (key: {match_key})"
+                    )
                     match.match_reason += " (Tax analysis incomplete)"
                     enhanced_matches.append(match)
             except Exception as e:
@@ -93,6 +129,38 @@ class LLMTaxAnalyzer:
                 match.match_reason += " (Tax analysis error)"
                 enhanced_matches.append(match)
 
+        logger.info(
+            f"Completed batch tax analysis, enhanced {len(enhanced_matches)} matches"
+        )
+        return enhanced_matches
+
+    def _process_matches_individually(self, matches: list, user_location: str) -> list:
+        """
+        Fallback method: Process matches one at a time using the legacy method.
+        Only used when batch processing fails and batch size is small.
+        """
+        logger.info(f"Processing {len(matches)} matches individually as fallback")
+        enhanced_matches = []
+
+        for i, match in enumerate(matches):
+            try:
+                # Use the legacy single-match analysis method
+                tax_analysis = self.analyze_and_apply_tax_rules(
+                    match.receipt, match.transaction, user_location
+                )
+
+                # Apply the analysis to the match
+                enhanced_match = self._apply_tax_analysis_to_match(match, tax_analysis)
+                enhanced_matches.append(enhanced_match)
+                logger.info(
+                    f"Successfully processed match {i + 1}/{len(matches)} individually"
+                )
+
+            except Exception as e:
+                logger.error(f"Error in individual processing for match {i}: {str(e)}")
+                match.match_reason += " (Individual tax analysis failed)"
+                enhanced_matches.append(match)
+
         return enhanced_matches
 
     def analyze_and_apply_tax_rules(
@@ -757,10 +825,24 @@ Return your response as a SINGLE JSON object in this format:
                 max_tokens=8000,  # Higher limit for batch processing
             )
 
-            content = response.choices[0].message.content.strip()
+            content = response.choices[0].message.content
+
+            # Validate that we got content
+            if not content:
+                logger.error("LLM returned empty response")
+                return {}
+
+            content = content.strip()
+
+            # Check if content is empty after stripping
+            if not content:
+                logger.error("LLM returned whitespace-only response")
+                return {}
+
             logger.info(
                 f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches"
             )
+            logger.debug(f"Raw LLM response: {content[:500]}...")  # Log first 500 chars
 
             # Parse the JSON response
             json_str = content
@@ -769,11 +851,33 @@ Return your response as a SINGLE JSON object in this format:
             elif "```" in content:
                 json_str = content.split("```")[1].split("```")[0].strip()
 
+            # Validate JSON string is not empty
+            if not json_str:
+                logger.error("Extracted JSON string is empty")
+                logger.error(f"Original content was: {content}")
+                return {}
+
             batch_analysis = json.loads(json_str)
+
+            # Validate we got a dictionary back
+            if not isinstance(batch_analysis, dict):
+                logger.error(f"LLM returned non-dict type: {type(batch_analysis)}")
+                return {}
+
+            logger.info(
+                f"Successfully parsed batch analysis with {len(batch_analysis)} matches"
+            )
             return batch_analysis
 
+        except json.JSONDecodeError as e:
+            logger.error(f"JSON decode error in batch LLM tax analysis: {str(e)}")
+            logger.error(
+                f"Failed to parse: {json_str[:500] if 'json_str' in locals() else 'N/A'}"
+            )
+            return {}
         except Exception as e:
             logger.error(f"Error getting batch LLM tax analysis: {str(e)}")
+            logger.error(f"Exception type: {type(e).__name__}")
             # Return empty dict so each match can handle fallback individually
             return {}