diff --git a/app/services/llm_tax_analyzer.py b/app/services/llm_tax_analyzer.py index 07f5b49..996fe9b 100644 --- a/app/services/llm_tax_analyzer.py +++ b/app/services/llm_tax_analyzer.py @@ -817,7 +817,7 @@ Return your response as a SINGLE JSON object in this format: messages=[ { "role": "system", - "content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Always return valid JSON with all requested matches.", + "content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.", }, {"role": "user", "content": prompt}, ], @@ -844,17 +844,36 @@ Return your response as a SINGLE JSON object in this format: ) logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars - # Parse the JSON response + # Parse the JSON response - handle various markdown code block formats json_str = content + + # Check for markdown code blocks with various language identifiers if "```json" in content: json_str = content.split("```json")[1].split("```")[0].strip() + elif "```javascript" in content: + json_str = content.split("```javascript")[1].split("```")[0].strip() + elif "```js" in content: + json_str = content.split("```js")[1].split("```")[0].strip() elif "```" in content: - json_str = content.split("```")[1].split("```")[0].strip() + # Generic code block - extract content between first ``` and last ``` + parts = content.split("```") + if len(parts) >= 3: + # Take the second part (index 1), which is between first and second ``` + json_str = parts[1].strip() + # Remove language identifier if it's on the first line + lines = json_str.split("\n", 1) + if len(lines) > 1 and lines[0].strip() in [ + "json", + "javascript", + "js", + "", + ]: + json_str = lines[1].strip() # Validate JSON string is not empty if not json_str: logger.error("Extracted JSON string is empty") - logger.error(f"Original content was: {content}") + logger.error(f"Original content was: {content[:500]}") return {} batch_analysis = json.loads(json_str)