Refine JSON response handling in batch analysis to exclude markdown code blocks and improve extraction logic

This commit is contained in:
bolade
2025-10-05 20:36:47 +01:00
parent 7c412bcf9e
commit e3f610e01a
+23 -4
View File
@@ -817,7 +817,7 @@ Return your response as a SINGLE JSON object in this format:
messages=[
{
"role": "system",
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Always return valid JSON with all requested matches.",
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.",
},
{"role": "user", "content": prompt},
],
@@ -844,17 +844,36 @@ Return your response as a SINGLE JSON object in this format:
)
logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars
# Parse the JSON response
# Parse the JSON response - handle various markdown code block formats
json_str = content
# Check for markdown code blocks with various language identifiers
if "```json" in content:
json_str = content.split("```json")[1].split("```")[0].strip()
elif "```javascript" in content:
json_str = content.split("```javascript")[1].split("```")[0].strip()
elif "```js" in content:
json_str = content.split("```js")[1].split("```")[0].strip()
elif "```" in content:
json_str = content.split("```")[1].split("```")[0].strip()
# Generic code block - extract content between first ``` and last ```
parts = content.split("```")
if len(parts) >= 3:
# Take the second part (index 1), which is between first and second ```
json_str = parts[1].strip()
# Remove language identifier if it's on the first line
lines = json_str.split("\n", 1)
if len(lines) > 1 and lines[0].strip() in [
"json",
"javascript",
"js",
"",
]:
json_str = lines[1].strip()
# Validate JSON string is not empty
if not json_str:
logger.error("Extracted JSON string is empty")
logger.error(f"Original content was: {content}")
logger.error(f"Original content was: {content[:500]}")
return {}
batch_analysis = json.loads(json_str)