DS_TASK_AI_VIEWS/backend/news_fetcher.py

"""AI Analysis module for DS Task AI News using Groq LLM"""
import os
from typing import Dict, List, Any, Optional
import json
from datetime import datetime

try:
    from groq import Groq
    GROQ_AVAILABLE = True
except ImportError:
    GROQ_AVAILABLE = False
    print("⚠️  Groq not available - install with: pip install groq")

from config import settings

class AIAnalyzer:
    """AI-powered article analysis using Groq LLM"""

    def __init__(self):
        self.client = None
        self.model = "llama3-8b-8192"  # Fast Groq model
        self.available = False

        if GROQ_AVAILABLE and settings.groq_api_key:
            try:
                self.client = Groq(api_key=settings.groq_api_key)
                self.available = True
                print("✅ Groq AI Analyzer initialized successfully")
            except Exception as e:
                print(f"❌ Groq initialization failed: {e}")
        else:
            print("⚠️  Groq AI Analyzer not available (missing API key or library)")

    def _make_groq_request(self, prompt: str, max_tokens: int = 500) -> Optional[str]:
        """Make a request to Groq API"""
        if not self.available:
            return None

        try:
            response = self.client.chat.completions.create(
                messages=[
                    {"role": "system", "content": "You are an expert news analyst. Provide concise, accurate analysis."},
                    {"role": "user", "content": prompt}
                ],
                model=self.model,
                max_tokens=max_tokens,
                temperature=0.3
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            print(f"❌ Groq API error: {e}")
            return None

    def summarize_article(self, article: Dict[str, Any]) -> Dict[str, Any]:
        """Generate AI summary of an article"""
        if not self.available:
            return {"summary": "AI analysis not available", "available": False}

        title = article.get('title', '')
        content = article.get('content', '')

        prompt = f"""
        Analyze this news article and provide a concise summary:

        Title: {title}
        Content: {content[:1000]}...

        Provide:
        1. A 2-sentence summary
        2. 3 key points
        3. Main topic category

        Format as JSON:
        {{
            "summary": "Brief 2-sentence summary",
            "key_points": ["point1", "point2", "point3"],
            "category": "Technology/Business/Science/etc"
        }}
        """

        response = self._make_groq_request(prompt, max_tokens=300)

        if response:
            try:
                analysis = json.loads(response)
                analysis["available"] = True
                analysis["analyzed_at"] = datetime.now().isoformat()
                return analysis
            except json.JSONDecodeError:
                return {
                    "summary": response,
                    "available": True,
                    "analyzed_at": datetime.now().isoformat()
                }

        return {"summary": "Analysis failed", "available": False}

    def extract_keywords(self, article: Dict[str, Any]) -> List[str]:
        """Extract key terms and entities from article"""
        if not self.available:
            return []

        title = article.get('title', '')
        content = article.get('content', '')

        prompt = f"""
        Extract the most important keywords and entities from this article:

        Title: {title}
        Content: {content[:800]}...

        Return only a JSON array of 5-8 most relevant keywords:
        ["keyword1", "keyword2", "keyword3", ...]
        """

        response = self._make_groq_request(prompt, max_tokens=100)

        if response:
            try:
                keywords = json.loads(response)
                return keywords if isinstance(keywords, list) else []
            except json.JSONDecodeError:
                # Fallback: extract from response text
                words = response.replace('[', '').replace(']', '').replace('"', '').split(',')
                return [word.strip() for word in words[:8]]

        return []

    def analyze_sentiment(self, article: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze sentiment and tone of article"""
        if not self.available:
            return {"sentiment": "neutral", "confidence": 0.0, "available": False}

        title = article.get('title', '')
        content = article.get('content', '')

        prompt = f"""
        Analyze the sentiment and tone of this news article:

        Title: {title}
        Content: {content[:600]}...

        Return JSON with:
        {{
            "sentiment": "positive/negative/neutral",
            "confidence": 0.85,
            "tone": "informative/urgent/optimistic/concerned/etc",
            "reasoning": "Brief explanation"
        }}
        """

        response = self._make_groq_request(prompt, max_tokens=150)

        if response:
            try:
                sentiment = json.loads(response)
                sentiment["available"] = True
                return sentiment
            except json.JSONDecodeError:
                return {
                    "sentiment": "neutral",
                    "confidence": 0.5,
                    "tone": "informative",
                    "reasoning": response,
                    "available": True
                }

        return {"sentiment": "neutral", "confidence": 0.0, "available": False}

    def generate_insights(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Generate insights from multiple articles"""
        if not self.available or not articles:
            return {"insights": "AI insights not available", "available": False}

        # Prepare article summaries
        article_summaries = []
        for i, article in enumerate(articles[:5]):  # Limit to 5 articles
            title = article.get('title', '')
            source = article.get('source', '')
            article_summaries.append(f"{i+1}. {title} (Source: {source})")

        prompt = f"""
        Analyze these recent news articles and provide insights:

        Articles:
        {chr(10).join(article_summaries)}

        Provide:
        1. Main trends or themes
        2. Key developments
        3. Potential implications

        Format as JSON:
        {{
            "trends": ["trend1", "trend2"],
            "key_developments": ["development1", "development2"],
            "implications": "Brief analysis of what this means"
        }}
        """

        response = self._make_groq_request(prompt, max_tokens=400)

        if response:
            try:
                insights = json.loads(response)
                insights["available"] = True
                insights["analyzed_at"] = datetime.now().isoformat()
                insights["article_count"] = len(articles)
                return insights
            except json.JSONDecodeError:
                return {
                    "insights": response,
                    "available": True,
                    "analyzed_at": datetime.now().isoformat()
                }

        return {"insights": "Analysis failed", "available": False}

    def get_status(self) -> Dict[str, Any]:
        """Get AI analyzer status"""
        return {
            "available": self.available,
            "model": self.model if self.available else None,
            "features": [
                "Article Summarization",
                "Keyword Extraction",
                "Sentiment Analysis",
                "Trend Insights"
            ] if self.available else []
        }