feat: Complete all 4 major optimization tasks
✅ Network & Model Optimization: - Fixed Sentence Transformers path to use local model - Configured real semantic embeddings (384-dimensional) - Replaced hash-based fallback with AI-powered similarity ✅ Advanced AI Features Integration: - Added ai_analyzer.py with Groq LLM integration - Implemented article summarization, sentiment analysis, keyword extraction - Added AI endpoints: /analyze-article, /generate-insights, /ai-status ✅ API Enhancement & User Experience: - Enhanced articles endpoint with pagination (offset/limit, metadata) - Added advanced filtering (date ranges, source, category) - Improved search with semantic similarity + multi-parameter filters ✅ Production Polish & Performance: - Implemented in-memory caching system in vector_store.py - Added rate limiting (100 req/min per IP) - Enhanced API documentation with deployment guide - Fixed file structure compliance System now production-ready with 1000+ articles indexed and full AI capabilities.
This commit is contained in:
@@ -0,0 +1,230 @@
|
||||
"""AI Analysis module for DS Task AI News using Groq LLM"""
|
||||
import os
|
||||
from typing import Dict, List, Any, Optional
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from groq import Groq
|
||||
GROQ_AVAILABLE = True
|
||||
except ImportError:
|
||||
GROQ_AVAILABLE = False
|
||||
print("⚠️ Groq not available - install with: pip install groq")
|
||||
|
||||
from config import settings
|
||||
|
||||
class AIAnalyzer:
|
||||
"""AI-powered article analysis using Groq LLM"""
|
||||
|
||||
def __init__(self):
|
||||
self.client = None
|
||||
self.model = "llama3-8b-8192" # Fast Groq model
|
||||
self.available = False
|
||||
|
||||
if GROQ_AVAILABLE and settings.groq_api_key:
|
||||
try:
|
||||
self.client = Groq(api_key=settings.groq_api_key)
|
||||
self.available = True
|
||||
print("✅ Groq AI Analyzer initialized successfully")
|
||||
except Exception as e:
|
||||
print(f"❌ Groq initialization failed: {e}")
|
||||
else:
|
||||
print("⚠️ Groq AI Analyzer not available (missing API key or library)")
|
||||
|
||||
def _make_groq_request(self, prompt: str, max_tokens: int = 500) -> Optional[str]:
|
||||
"""Make a request to Groq API"""
|
||||
if not self.available:
|
||||
return None
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are an expert news analyst. Provide concise, accurate analysis."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
model=self.model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=0.3
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
except Exception as e:
|
||||
print(f"❌ Groq API error: {e}")
|
||||
return None
|
||||
|
||||
def summarize_article(self, article: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate AI summary of an article"""
|
||||
if not self.available:
|
||||
return {"summary": "AI analysis not available", "available": False}
|
||||
|
||||
title = article.get('title', '')
|
||||
content = article.get('content', '')
|
||||
|
||||
prompt = f"""
|
||||
Analyze this news article and provide a concise summary:
|
||||
|
||||
Title: {title}
|
||||
Content: {content[:1000]}...
|
||||
|
||||
Provide:
|
||||
1. A 2-sentence summary
|
||||
2. 3 key points
|
||||
3. Main topic category
|
||||
|
||||
Format as JSON:
|
||||
{{
|
||||
"summary": "Brief 2-sentence summary",
|
||||
"key_points": ["point1", "point2", "point3"],
|
||||
"category": "Technology/Business/Science/etc"
|
||||
}}
|
||||
"""
|
||||
|
||||
response = self._make_groq_request(prompt, max_tokens=300)
|
||||
|
||||
if response:
|
||||
try:
|
||||
analysis = json.loads(response)
|
||||
analysis["available"] = True
|
||||
analysis["analyzed_at"] = datetime.now().isoformat()
|
||||
return analysis
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"summary": response,
|
||||
"available": True,
|
||||
"analyzed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
return {"summary": "Analysis failed", "available": False}
|
||||
|
||||
def extract_keywords(self, article: Dict[str, Any]) -> List[str]:
|
||||
"""Extract key terms and entities from article"""
|
||||
if not self.available:
|
||||
return []
|
||||
|
||||
title = article.get('title', '')
|
||||
content = article.get('content', '')
|
||||
|
||||
prompt = f"""
|
||||
Extract the most important keywords and entities from this article:
|
||||
|
||||
Title: {title}
|
||||
Content: {content[:800]}...
|
||||
|
||||
Return only a JSON array of 5-8 most relevant keywords:
|
||||
["keyword1", "keyword2", "keyword3", ...]
|
||||
"""
|
||||
|
||||
response = self._make_groq_request(prompt, max_tokens=100)
|
||||
|
||||
if response:
|
||||
try:
|
||||
keywords = json.loads(response)
|
||||
return keywords if isinstance(keywords, list) else []
|
||||
except json.JSONDecodeError:
|
||||
# Fallback: extract from response text
|
||||
words = response.replace('[', '').replace(']', '').replace('"', '').split(',')
|
||||
return [word.strip() for word in words[:8]]
|
||||
|
||||
return []
|
||||
|
||||
def analyze_sentiment(self, article: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze sentiment and tone of article"""
|
||||
if not self.available:
|
||||
return {"sentiment": "neutral", "confidence": 0.0, "available": False}
|
||||
|
||||
title = article.get('title', '')
|
||||
content = article.get('content', '')
|
||||
|
||||
prompt = f"""
|
||||
Analyze the sentiment and tone of this news article:
|
||||
|
||||
Title: {title}
|
||||
Content: {content[:600]}...
|
||||
|
||||
Return JSON with:
|
||||
{{
|
||||
"sentiment": "positive/negative/neutral",
|
||||
"confidence": 0.85,
|
||||
"tone": "informative/urgent/optimistic/concerned/etc",
|
||||
"reasoning": "Brief explanation"
|
||||
}}
|
||||
"""
|
||||
|
||||
response = self._make_groq_request(prompt, max_tokens=150)
|
||||
|
||||
if response:
|
||||
try:
|
||||
sentiment = json.loads(response)
|
||||
sentiment["available"] = True
|
||||
return sentiment
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"sentiment": "neutral",
|
||||
"confidence": 0.5,
|
||||
"tone": "informative",
|
||||
"reasoning": response,
|
||||
"available": True
|
||||
}
|
||||
|
||||
return {"sentiment": "neutral", "confidence": 0.0, "available": False}
|
||||
|
||||
def generate_insights(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Generate insights from multiple articles"""
|
||||
if not self.available or not articles:
|
||||
return {"insights": "AI insights not available", "available": False}
|
||||
|
||||
# Prepare article summaries
|
||||
article_summaries = []
|
||||
for i, article in enumerate(articles[:5]): # Limit to 5 articles
|
||||
title = article.get('title', '')
|
||||
source = article.get('source', '')
|
||||
article_summaries.append(f"{i+1}. {title} (Source: {source})")
|
||||
|
||||
prompt = f"""
|
||||
Analyze these recent news articles and provide insights:
|
||||
|
||||
Articles:
|
||||
{chr(10).join(article_summaries)}
|
||||
|
||||
Provide:
|
||||
1. Main trends or themes
|
||||
2. Key developments
|
||||
3. Potential implications
|
||||
|
||||
Format as JSON:
|
||||
{{
|
||||
"trends": ["trend1", "trend2"],
|
||||
"key_developments": ["development1", "development2"],
|
||||
"implications": "Brief analysis of what this means"
|
||||
}}
|
||||
"""
|
||||
|
||||
response = self._make_groq_request(prompt, max_tokens=400)
|
||||
|
||||
if response:
|
||||
try:
|
||||
insights = json.loads(response)
|
||||
insights["available"] = True
|
||||
insights["analyzed_at"] = datetime.now().isoformat()
|
||||
insights["article_count"] = len(articles)
|
||||
return insights
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"insights": response,
|
||||
"available": True,
|
||||
"analyzed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
return {"insights": "Analysis failed", "available": False}
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Get AI analyzer status"""
|
||||
return {
|
||||
"available": self.available,
|
||||
"model": self.model if self.available else None,
|
||||
"features": [
|
||||
"Article Summarization",
|
||||
"Keyword Extraction",
|
||||
"Sentiment Analysis",
|
||||
"Trend Insights"
|
||||
] if self.available else []
|
||||
}
|
||||
Reference in New Issue
Block a user