Enhance batch processing in LLMTaxAnalyzer with fallback to individual analysis on failure

2025-10-05 20:03:46 +01:00
parent ae200bd30f
commit 7c412bcf9e
1 changed files with 108 additions and 4 deletions
@@ -59,26 +59,59 @@ class LLMTaxAnalyzer:
        """
        Batch process all matches in a SINGLE LLM call to reduce costs.
        Analyzes all receipt-transaction pairs together and applies tax rules.
        Falls back to individual processing if batch fails.
        """
        if not matches:
            return matches
        logger.info(f"Starting batch tax analysis for {len(matches)} matches")
        # Build batch context for all matches
-        batch_context = self._build_batch_analysis_context(matches, user_location)
+        try:
            batch_context = self._build_batch_analysis_context(matches, user_location)
        except Exception as e:
            logger.error(f"Error building batch context: {str(e)}")
            # If we can't even build the context, return matches as-is
            for match in matches:
                match.match_reason += " (Batch analysis setup failed)"
            return matches
        # Get LLM analysis for ALL matches at once
        llm_batch_analysis = self._get_llm_tax_analysis_batch(
            batch_context, len(matches)
        )
        # Check if we got any analysis back
        if not llm_batch_analysis:
            logger.warning("Batch LLM analysis returned empty results")
            # Fallback: Try processing each match individually if batch size is small
            if (
                len(matches) <= 5
            ):  # Only fallback for small batches to avoid excessive API calls
                logger.info(
                    f"Attempting individual processing fallback for {len(matches)} matches"
                )
                return self._process_matches_individually(matches, user_location)
            else:
                logger.warning(
                    f"Batch too large ({len(matches)} matches) for individual fallback - returning matches without enhanced tax analysis"
                )
                for match in matches:
                    match.match_reason += " (Batch tax analysis unavailable)"
                return matches
        logger.info(f"Received batch analysis for {len(llm_batch_analysis)} matches")
        # Apply results to each match
        enhanced_matches = []
        for i, match in enumerate(matches):
            try:
                # Get the analysis for this specific match from the batch results
-                match_analysis = llm_batch_analysis.get(f"match_{i}", {})
+                match_key = f"match_{i}"
                match_analysis = llm_batch_analysis.get(match_key, {})
-                if match_analysis:
+                if match_analysis and isinstance(match_analysis, dict):
                    # Apply the tax analysis to this match
                    enhanced_match = self._apply_tax_analysis_to_match(
                        match, match_analysis
@@ -86,6 +119,9 @@ class LLMTaxAnalyzer:
                    enhanced_matches.append(enhanced_match)
                else:
                    # No analysis available for this match, use as-is
                    logger.warning(
                        f"No analysis found for match {i} (key: {match_key})"
                    )
                    match.match_reason += " (Tax analysis incomplete)"
                    enhanced_matches.append(match)
            except Exception as e:
@@ -93,6 +129,38 @@ class LLMTaxAnalyzer:
                match.match_reason += " (Tax analysis error)"
                enhanced_matches.append(match)
        logger.info(
            f"Completed batch tax analysis, enhanced {len(enhanced_matches)} matches"
        )
        return enhanced_matches
    def _process_matches_individually(self, matches: list, user_location: str) -> list:
        """
        Fallback method: Process matches one at a time using the legacy method.
        Only used when batch processing fails and batch size is small.
        """
        logger.info(f"Processing {len(matches)} matches individually as fallback")
        enhanced_matches = []
        for i, match in enumerate(matches):
            try:
                # Use the legacy single-match analysis method
                tax_analysis = self.analyze_and_apply_tax_rules(
                    match.receipt, match.transaction, user_location
                )
                # Apply the analysis to the match
                enhanced_match = self._apply_tax_analysis_to_match(match, tax_analysis)
                enhanced_matches.append(enhanced_match)
                logger.info(
                    f"Successfully processed match {i + 1}/{len(matches)} individually"
                )
            except Exception as e:
                logger.error(f"Error in individual processing for match {i}: {str(e)}")
                match.match_reason += " (Individual tax analysis failed)"
                enhanced_matches.append(match)
        return enhanced_matches
    def analyze_and_apply_tax_rules(
@@ -757,10 +825,24 @@ Return your response as a SINGLE JSON object in this format:
                max_tokens=8000,  # Higher limit for batch processing
            )
-            content = response.choices[0].message.content.strip()
+            content = response.choices[0].message.content
            # Validate that we got content
            if not content:
                logger.error("LLM returned empty response")
                return {}
            content = content.strip()
            # Check if content is empty after stripping
            if not content:
                logger.error("LLM returned whitespace-only response")
                return {}
            logger.info(
                f"LLM batch tax analysis received: {len(content)} characters for {num_matches} matches"
            )
            logger.debug(f"Raw LLM response: {content[:500]}...")  # Log first 500 chars
            # Parse the JSON response
            json_str = content
@@ -769,11 +851,33 @@ Return your response as a SINGLE JSON object in this format:
            elif "```" in content:
                json_str = content.split("```")[1].split("```")[0].strip()
            # Validate JSON string is not empty
            if not json_str:
                logger.error("Extracted JSON string is empty")
                logger.error(f"Original content was: {content}")
                return {}
            batch_analysis = json.loads(json_str)
            # Validate we got a dictionary back
            if not isinstance(batch_analysis, dict):
                logger.error(f"LLM returned non-dict type: {type(batch_analysis)}")
                return {}
            logger.info(
                f"Successfully parsed batch analysis with {len(batch_analysis)} matches"
            )
            return batch_analysis
        except json.JSONDecodeError as e:
            logger.error(f"JSON decode error in batch LLM tax analysis: {str(e)}")
            logger.error(
                f"Failed to parse: {json_str[:500] if 'json_str' in locals() else 'N/A'}"
            )
            return {}
        except Exception as e:
            logger.error(f"Error getting batch LLM tax analysis: {str(e)}")
            logger.error(f"Exception type: {type(e).__name__}")
            # Return empty dict so each match can handle fallback individually
            return {}