feat: Implement async AI analysis for email threads
- Added `get_latest_email_date()` function in `database.py` to retrieve the most recent email date for a given account and folder. - Enhanced `fetch_folder_emails()` in `zoho_client.py` to intelligently determine the start date for fetching emails based on the latest email date in the database. - Introduced `analyze_and_update_threads_async()` for asynchronous analysis of email threads, allowing concurrent processing. - Created a synchronous wrapper `analyze_and_update_threads()` for easier integration. - Updated `fetch_emails()` to support database session and account email parameters. - Added comprehensive documentation in `AI_ANALYSIS_GUIDE.md` detailing the new AI analysis functionality. - Implemented tests for the new features, including `test_fetch_with_db.py`, `test_ai_analysis.py`, and `test_single_analysis.py`. - Added error handling and logging improvements throughout the codebase.
This commit is contained in:
@@ -1,9 +1,27 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, List
|
||||
|
||||
from groq import Groq
|
||||
|
||||
# Rate limiting variables
|
||||
_last_api_call = 0
|
||||
_min_interval = 1.0 # Minimum seconds between API calls
|
||||
|
||||
|
||||
def _rate_limit():
|
||||
"""Simple rate limiting to avoid overwhelming the Groq API."""
|
||||
global _last_api_call
|
||||
current_time = time.time()
|
||||
time_since_last = current_time - _last_api_call
|
||||
|
||||
if time_since_last < _min_interval:
|
||||
sleep_time = _min_interval - time_since_last
|
||||
time.sleep(sleep_time)
|
||||
|
||||
_last_api_call = time.time()
|
||||
|
||||
|
||||
def _format_messages_for_context(messages: List[dict]) -> str:
|
||||
lines = []
|
||||
@@ -76,8 +94,40 @@ def analyze_thread(
|
||||
)
|
||||
|
||||
try:
|
||||
# Validate input before sending to API
|
||||
if not msgs:
|
||||
return _heuristic_analyze(msgs)
|
||||
|
||||
# Check message content length to avoid oversized requests
|
||||
formatted_context = _format_messages_for_context(msgs)
|
||||
if len(formatted_context) > 10000: # Limit context size
|
||||
# Truncate messages if too long
|
||||
truncated_msgs = msgs[-2:] # Use only last 2 messages
|
||||
formatted_context = _format_messages_for_context(truncated_msgs)
|
||||
print(
|
||||
f"Warning: Truncated message context due to length ({len(formatted_context)} chars)"
|
||||
)
|
||||
|
||||
user_prompt = (
|
||||
f"Thread subject: {thread_subject or ''}\n\n"
|
||||
"Recent messages (oldest to newest):\n\n"
|
||||
f"{formatted_context}\n\n"
|
||||
"Respond with only JSON, no extra commentary."
|
||||
)
|
||||
|
||||
# Validate prompt length
|
||||
total_prompt_length = len(system_prompt) + len(user_prompt)
|
||||
if total_prompt_length > 15000: # Further reduce if still too long
|
||||
print(
|
||||
f"Warning: Prompt too long ({total_prompt_length} chars), falling back to heuristic"
|
||||
)
|
||||
return _heuristic_analyze(msgs)
|
||||
|
||||
# Apply rate limiting before API call
|
||||
_rate_limit()
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model=os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile"),
|
||||
model=os.getenv("GROQ_MODEL", "llama3-8b-8192"),
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
@@ -96,6 +146,23 @@ def analyze_thread(
|
||||
):
|
||||
raise ValueError("Invalid schema from model")
|
||||
return data
|
||||
except Exception:
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"JSON decode error from Groq API: {e}")
|
||||
return _heuristic_analyze(msgs)
|
||||
except Exception as e:
|
||||
# Log the specific error for debugging
|
||||
error_msg = str(e)
|
||||
print(f"Groq API error: {error_msg}")
|
||||
|
||||
# Check for specific error types
|
||||
if "400" in error_msg or "Bad Request" in error_msg:
|
||||
print("400 Bad Request - likely prompt too long or invalid format")
|
||||
elif "429" in error_msg or "rate limit" in error_msg.lower():
|
||||
print("Rate limit exceeded - consider reducing concurrent requests")
|
||||
elif "401" in error_msg or "unauthorized" in error_msg.lower():
|
||||
print("Unauthorized - check GROQ_API_KEY")
|
||||
elif "503" in error_msg or "service unavailable" in error_msg.lower():
|
||||
print("Service unavailable - Groq API may be down")
|
||||
|
||||
# Fallback to heuristic
|
||||
return _heuristic_analyze(msgs)
|
||||
|
||||
Reference in New Issue
Block a user