import json import os import time from typing import Dict, List from groq import Groq # Rate limiting variables _last_api_call = 0 _min_interval = 1.0 # Minimum seconds between API calls def _rate_limit(): """Simple rate limiting to avoid overwhelming the Groq API.""" global _last_api_call current_time = time.time() time_since_last = current_time - _last_api_call if time_since_last < _min_interval: sleep_time = _min_interval - time_since_last time.sleep(sleep_time) _last_api_call = time.time() def _format_messages_for_context(messages: List[dict]) -> str: lines = [] for m in messages: direction = "IN" if m.get("is_incoming", True) else "OUT" date = m.get("date_sent") subj = m.get("subject") or "" from_email = m.get("from_email") or "" to_email = m.get("to_email") or "" body = (m.get("body") or "").strip() if len(body) > 1000: body = body[:1000] + "..." lines.append( f"[{date}] [{direction}] {from_email} -> {to_email}\nSubject: {subj}\n{body}" ) return "\n\n---\n\n".join(lines) def _heuristic_analyze(messages: List[dict]) -> Dict: # Simple fallback if Groq isn't available body_concat = "\n\n".join([(m.get("body") or "") for m in messages[-4:]]) question_like = "?" in body_concat or any( kw in body_concat.lower() for kw in ["could you", "can you", "please", "let me know", "need", "request"] ) last_subj = (messages[-1].get("subject") or "") if messages else "" return { "actionable": bool(question_like), "summary": (body_concat[:350] + "...") if len(body_concat) > 350 else body_concat, "subject": last_subj, "confidence": 0.35, "model": "heuristic", } def analyze_thread( thread_subject: str, messages: List[dict], max_messages: int = 4 ) -> Dict: """ Analyze a thread using Groq LLM. Returns dict with keys: - actionable: bool - summary: str - subject: str - confidence: float (0..1) - model: str Gracefully falls back to a heuristic when GROQ_API_KEY is missing or calls fail. """ msgs = messages[-max_messages:] if max_messages else messages api_key = os.getenv("GROQ_API_KEY") if not api_key: return _heuristic_analyze(msgs) client = Groq(api_key=api_key) system_prompt = ( "You are a helpful assistant that triages email threads and writes concise summaries. " "Decide if the thread requires a reply from our side now, based on the last few messages. " "Ignore newsletters/automations (e.g., from no-reply), and focus on whether there's a clear question or request. " "Return a strict JSON object with keys: actionable (true/false), summary (<= 80 words), confidence (0..1)." ) user_prompt = ( f"Thread subject: {thread_subject or ''}\n\n" "Recent messages (oldest to newest):\n\n" f"{_format_messages_for_context(msgs)}\n\n" "Respond with only JSON, no extra commentary." ) try: # Validate input before sending to API if not msgs: return _heuristic_analyze(msgs) # Check message content length to avoid oversized requests formatted_context = _format_messages_for_context(msgs) if len(formatted_context) > 10000: # Limit context size # Truncate messages if too long truncated_msgs = msgs[-2:] # Use only last 2 messages formatted_context = _format_messages_for_context(truncated_msgs) print( f"Warning: Truncated message context due to length ({len(formatted_context)} chars)" ) user_prompt = ( f"Thread subject: {thread_subject or ''}\n\n" "Recent messages (oldest to newest):\n\n" f"{formatted_context}\n\n" "Respond with only JSON, no extra commentary." ) # Validate prompt length total_prompt_length = len(system_prompt) + len(user_prompt) if total_prompt_length > 15000: # Further reduce if still too long print( f"Warning: Prompt too long ({total_prompt_length} chars), falling back to heuristic" ) return _heuristic_analyze(msgs) # Apply rate limiting before API call _rate_limit() completion = client.chat.completions.create( model=os.getenv("GROQ_MODEL", "llama3-8b-8192"), messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], temperature=0.2, max_tokens=300, ) content = completion.choices[0].message.content.strip() # Attempt to extract JSON data = json.loads(content) data.setdefault("subject", thread_subject or "") data.setdefault("model", os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile")) # Basic validation if not isinstance(data.get("actionable"), bool) or not isinstance( data.get("summary"), str ): raise ValueError("Invalid schema from model") return data except json.JSONDecodeError as e: print(f"JSON decode error from Groq API: {e}") return _heuristic_analyze(msgs) except Exception as e: # Log the specific error for debugging error_msg = str(e) print(f"Groq API error: {error_msg}") # Check for specific error types if "400" in error_msg or "Bad Request" in error_msg: print("400 Bad Request - likely prompt too long or invalid format") elif "429" in error_msg or "rate limit" in error_msg.lower(): print("Rate limit exceeded - consider reducing concurrent requests") elif "401" in error_msg or "unauthorized" in error_msg.lower(): print("Unauthorized - check GROQ_API_KEY") elif "503" in error_msg or "service unavailable" in error_msg.lower(): print("Service unavailable - Groq API may be down") # Fallback to heuristic return _heuristic_analyze(msgs)