Refine JSON response handling in batch analysis to exclude markdown code blocks and improve extraction logic
This commit is contained in:
@@ -817,7 +817,7 @@ Return your response as a SINGLE JSON object in this format:
|
|||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Always return valid JSON with all requested matches.",
|
"content": "You are a Canadian tax expert. Analyze multiple transactions in batch and apply tax rules accurately. Return ONLY valid JSON - no markdown code blocks, no explanations, just pure JSON.",
|
||||||
},
|
},
|
||||||
{"role": "user", "content": prompt},
|
{"role": "user", "content": prompt},
|
||||||
],
|
],
|
||||||
@@ -844,17 +844,36 @@ Return your response as a SINGLE JSON object in this format:
|
|||||||
)
|
)
|
||||||
logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars
|
logger.debug(f"Raw LLM response: {content[:500]}...") # Log first 500 chars
|
||||||
|
|
||||||
# Parse the JSON response
|
# Parse the JSON response - handle various markdown code block formats
|
||||||
json_str = content
|
json_str = content
|
||||||
|
|
||||||
|
# Check for markdown code blocks with various language identifiers
|
||||||
if "```json" in content:
|
if "```json" in content:
|
||||||
json_str = content.split("```json")[1].split("```")[0].strip()
|
json_str = content.split("```json")[1].split("```")[0].strip()
|
||||||
|
elif "```javascript" in content:
|
||||||
|
json_str = content.split("```javascript")[1].split("```")[0].strip()
|
||||||
|
elif "```js" in content:
|
||||||
|
json_str = content.split("```js")[1].split("```")[0].strip()
|
||||||
elif "```" in content:
|
elif "```" in content:
|
||||||
json_str = content.split("```")[1].split("```")[0].strip()
|
# Generic code block - extract content between first ``` and last ```
|
||||||
|
parts = content.split("```")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
# Take the second part (index 1), which is between first and second ```
|
||||||
|
json_str = parts[1].strip()
|
||||||
|
# Remove language identifier if it's on the first line
|
||||||
|
lines = json_str.split("\n", 1)
|
||||||
|
if len(lines) > 1 and lines[0].strip() in [
|
||||||
|
"json",
|
||||||
|
"javascript",
|
||||||
|
"js",
|
||||||
|
"",
|
||||||
|
]:
|
||||||
|
json_str = lines[1].strip()
|
||||||
|
|
||||||
# Validate JSON string is not empty
|
# Validate JSON string is not empty
|
||||||
if not json_str:
|
if not json_str:
|
||||||
logger.error("Extracted JSON string is empty")
|
logger.error("Extracted JSON string is empty")
|
||||||
logger.error(f"Original content was: {content}")
|
logger.error(f"Original content was: {content[:500]}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
batch_analysis = json.loads(json_str)
|
batch_analysis = json.loads(json_str)
|
||||||
|
|||||||
Reference in New Issue
Block a user