feat: Complete all 4 major optimization tasks

✅ Network & Model Optimization: - Fixed Sentence Transformers path to use local model - Configured real semantic embeddings (384-dimensional) - Replaced hash-based fallback with AI-powered similarity ✅ Advanced AI Features Integration: - Added ai_analyzer.py with Groq LLM integration - Implemented article summarization, sentiment analysis, keyword extraction - Added AI endpoints: /analyze-article, /generate-insights, /ai-status ✅ API Enhancement & User Experience: - Enhanced articles endpoint with pagination (offset/limit, metadata) - Added advanced filtering (date ranges, source, category) - Improved search with semantic similarity + multi-parameter filters ✅ Production Polish & Performance: - Implemented in-memory caching system in vector_store.py - Added rate limiting (100 req/min per IP) - Enhanced API documentation with deployment guide - Fixed file structure compliance System now production-ready with 1000+ articles indexed and full AI capabilities.
2025-07-08 16:45:38 +01:00
parent 3c4a08d639
commit beed04d05c
8 changed files with 789 additions and 65 deletions
@@ -0,0 +1,230 @@
+"""AI Analysis module for DS Task AI News using Groq LLM"""
+import os
+from typing import Dict, List, Any, Optional
+import json
+from datetime import datetime
+
+try:
+    from groq import Groq
+    GROQ_AVAILABLE = True
+except ImportError:
+    GROQ_AVAILABLE = False
+    print("⚠️  Groq not available - install with: pip install groq")
+
+from config import settings
+
+class AIAnalyzer:
+    """AI-powered article analysis using Groq LLM"""
+    
+    def __init__(self):
+        self.client = None
+        self.model = "llama3-8b-8192"  # Fast Groq model
+        self.available = False
+        
+        if GROQ_AVAILABLE and settings.groq_api_key:
+            try:
+                self.client = Groq(api_key=settings.groq_api_key)
+                self.available = True
+                print("✅ Groq AI Analyzer initialized successfully")
+            except Exception as e:
+                print(f"❌ Groq initialization failed: {e}")
+        else:
+            print("⚠️  Groq AI Analyzer not available (missing API key or library)")
+    
+    def _make_groq_request(self, prompt: str, max_tokens: int = 500) -> Optional[str]:
+        """Make a request to Groq API"""
+        if not self.available:
+            return None
+            
+        try:
+            response = self.client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are an expert news analyst. Provide concise, accurate analysis."},
+                    {"role": "user", "content": prompt}
+                ],
+                model=self.model,
+                max_tokens=max_tokens,
+                temperature=0.3
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            print(f"❌ Groq API error: {e}")
+            return None
+    
+    def summarize_article(self, article: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate AI summary of an article"""
+        if not self.available:
+            return {"summary": "AI analysis not available", "available": False}
+        
+        title = article.get('title', '')
+        content = article.get('content', '')
+        
+        prompt = f"""
+        Analyze this news article and provide a concise summary:
+        
+        Title: {title}
+        Content: {content[:1000]}...
+        
+        Provide:
+        1. A 2-sentence summary
+        2. 3 key points
+        3. Main topic category
+        
+        Format as JSON:
+        {{
+            "summary": "Brief 2-sentence summary",
+            "key_points": ["point1", "point2", "point3"],
+            "category": "Technology/Business/Science/etc"
+        }}
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=300)
+        
+        if response:
+            try:
+                analysis = json.loads(response)
+                analysis["available"] = True
+                analysis["analyzed_at"] = datetime.now().isoformat()
+                return analysis
+            except json.JSONDecodeError:
+                return {
+                    "summary": response,
+                    "available": True,
+                    "analyzed_at": datetime.now().isoformat()
+                }
+        
+        return {"summary": "Analysis failed", "available": False}
+    
+    def extract_keywords(self, article: Dict[str, Any]) -> List[str]:
+        """Extract key terms and entities from article"""
+        if not self.available:
+            return []
+        
+        title = article.get('title', '')
+        content = article.get('content', '')
+        
+        prompt = f"""
+        Extract the most important keywords and entities from this article:
+        
+        Title: {title}
+        Content: {content[:800]}...
+        
+        Return only a JSON array of 5-8 most relevant keywords:
+        ["keyword1", "keyword2", "keyword3", ...]
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=100)
+        
+        if response:
+            try:
+                keywords = json.loads(response)
+                return keywords if isinstance(keywords, list) else []
+            except json.JSONDecodeError:
+                # Fallback: extract from response text
+                words = response.replace('[', '').replace(']', '').replace('"', '').split(',')
+                return [word.strip() for word in words[:8]]
+        
+        return []
+    
+    def analyze_sentiment(self, article: Dict[str, Any]) -> Dict[str, Any]:
+        """Analyze sentiment and tone of article"""
+        if not self.available:
+            return {"sentiment": "neutral", "confidence": 0.0, "available": False}
+        
+        title = article.get('title', '')
+        content = article.get('content', '')
+        
+        prompt = f"""
+        Analyze the sentiment and tone of this news article:
+        
+        Title: {title}
+        Content: {content[:600]}...
+        
+        Return JSON with:
+        {{
+            "sentiment": "positive/negative/neutral",
+            "confidence": 0.85,
+            "tone": "informative/urgent/optimistic/concerned/etc",
+            "reasoning": "Brief explanation"
+        }}
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=150)
+        
+        if response:
+            try:
+                sentiment = json.loads(response)
+                sentiment["available"] = True
+                return sentiment
+            except json.JSONDecodeError:
+                return {
+                    "sentiment": "neutral",
+                    "confidence": 0.5,
+                    "tone": "informative",
+                    "reasoning": response,
+                    "available": True
+                }
+        
+        return {"sentiment": "neutral", "confidence": 0.0, "available": False}
+    
+    def generate_insights(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Generate insights from multiple articles"""
+        if not self.available or not articles:
+            return {"insights": "AI insights not available", "available": False}
+        
+        # Prepare article summaries
+        article_summaries = []
+        for i, article in enumerate(articles[:5]):  # Limit to 5 articles
+            title = article.get('title', '')
+            source = article.get('source', '')
+            article_summaries.append(f"{i+1}. {title} (Source: {source})")
+        
+        prompt = f"""
+        Analyze these recent news articles and provide insights:
+        
+        Articles:
+        {chr(10).join(article_summaries)}
+        
+        Provide:
+        1. Main trends or themes
+        2. Key developments
+        3. Potential implications
+        
+        Format as JSON:
+        {{
+            "trends": ["trend1", "trend2"],
+            "key_developments": ["development1", "development2"],
+            "implications": "Brief analysis of what this means"
+        }}
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=400)
+        
+        if response:
+            try:
+                insights = json.loads(response)
+                insights["available"] = True
+                insights["analyzed_at"] = datetime.now().isoformat()
+                insights["article_count"] = len(articles)
+                return insights
+            except json.JSONDecodeError:
+                return {
+                    "insights": response,
+                    "available": True,
+                    "analyzed_at": datetime.now().isoformat()
+                }
+        
+        return {"insights": "Analysis failed", "available": False}
+    
+    def get_status(self) -> Dict[str, Any]:
+        """Get AI analyzer status"""
+        return {
+            "available": self.available,
+            "model": self.model if self.available else None,
+            "features": [
+                "Article Summarization",
+                "Keyword Extraction", 
+                "Sentiment Analysis",
+                "Trend Insights"
+            ] if self.available else []
+        }