508270e732
- Added proper User-Agent headers to avoid blocking by RSS servers - Implemented fallback mechanism: HTTP request with headers -> direct feedparser - Extended timeout to 15 seconds for better reliability - Enhanced error logging with detailed feed parsing information - Improved handling of 'bozo' (malformed) feeds with better reporting - Added informative messages for feeds with no new content This resolves RSS fetching issues and improves news aggregation reliability.
230 lines
7.8 KiB
Python
230 lines
7.8 KiB
Python
"""AI Analysis module for DS Task AI News using Groq LLM"""
|
|
import os
|
|
from typing import Dict, List, Any, Optional
|
|
import json
|
|
from datetime import datetime
|
|
|
|
try:
|
|
from groq import Groq
|
|
GROQ_AVAILABLE = True
|
|
except ImportError:
|
|
GROQ_AVAILABLE = False
|
|
print("⚠️ Groq not available - install with: pip install groq")
|
|
|
|
from config import settings
|
|
|
|
class AIAnalyzer:
|
|
"""AI-powered article analysis using Groq LLM"""
|
|
|
|
def __init__(self):
|
|
self.client = None
|
|
self.model = "llama3-8b-8192" # Fast Groq model
|
|
self.available = False
|
|
|
|
if GROQ_AVAILABLE and settings.groq_api_key:
|
|
try:
|
|
self.client = Groq(api_key=settings.groq_api_key)
|
|
self.available = True
|
|
print("✅ Groq AI Analyzer initialized successfully")
|
|
except Exception as e:
|
|
print(f"❌ Groq initialization failed: {e}")
|
|
else:
|
|
print("⚠️ Groq AI Analyzer not available (missing API key or library)")
|
|
|
|
def _make_groq_request(self, prompt: str, max_tokens: int = 500) -> Optional[str]:
|
|
"""Make a request to Groq API"""
|
|
if not self.available:
|
|
return None
|
|
|
|
try:
|
|
response = self.client.chat.completions.create(
|
|
messages=[
|
|
{"role": "system", "content": "You are an expert news analyst. Provide concise, accurate analysis."},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
model=self.model,
|
|
max_tokens=max_tokens,
|
|
temperature=0.3
|
|
)
|
|
return response.choices[0].message.content.strip()
|
|
except Exception as e:
|
|
print(f"❌ Groq API error: {e}")
|
|
return None
|
|
|
|
def summarize_article(self, article: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Generate AI summary of an article"""
|
|
if not self.available:
|
|
return {"summary": "AI analysis not available", "available": False}
|
|
|
|
title = article.get('title', '')
|
|
content = article.get('content', '')
|
|
|
|
prompt = f"""
|
|
Analyze this news article and provide a concise summary:
|
|
|
|
Title: {title}
|
|
Content: {content[:1000]}...
|
|
|
|
Provide:
|
|
1. A 2-sentence summary
|
|
2. 3 key points
|
|
3. Main topic category
|
|
|
|
Format as JSON:
|
|
{{
|
|
"summary": "Brief 2-sentence summary",
|
|
"key_points": ["point1", "point2", "point3"],
|
|
"category": "Technology/Business/Science/etc"
|
|
}}
|
|
"""
|
|
|
|
response = self._make_groq_request(prompt, max_tokens=300)
|
|
|
|
if response:
|
|
try:
|
|
analysis = json.loads(response)
|
|
analysis["available"] = True
|
|
analysis["analyzed_at"] = datetime.now().isoformat()
|
|
return analysis
|
|
except json.JSONDecodeError:
|
|
return {
|
|
"summary": response,
|
|
"available": True,
|
|
"analyzed_at": datetime.now().isoformat()
|
|
}
|
|
|
|
return {"summary": "Analysis failed", "available": False}
|
|
|
|
def extract_keywords(self, article: Dict[str, Any]) -> List[str]:
|
|
"""Extract key terms and entities from article"""
|
|
if not self.available:
|
|
return []
|
|
|
|
title = article.get('title', '')
|
|
content = article.get('content', '')
|
|
|
|
prompt = f"""
|
|
Extract the most important keywords and entities from this article:
|
|
|
|
Title: {title}
|
|
Content: {content[:800]}...
|
|
|
|
Return only a JSON array of 5-8 most relevant keywords:
|
|
["keyword1", "keyword2", "keyword3", ...]
|
|
"""
|
|
|
|
response = self._make_groq_request(prompt, max_tokens=100)
|
|
|
|
if response:
|
|
try:
|
|
keywords = json.loads(response)
|
|
return keywords if isinstance(keywords, list) else []
|
|
except json.JSONDecodeError:
|
|
# Fallback: extract from response text
|
|
words = response.replace('[', '').replace(']', '').replace('"', '').split(',')
|
|
return [word.strip() for word in words[:8]]
|
|
|
|
return []
|
|
|
|
def analyze_sentiment(self, article: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Analyze sentiment and tone of article"""
|
|
if not self.available:
|
|
return {"sentiment": "neutral", "confidence": 0.0, "available": False}
|
|
|
|
title = article.get('title', '')
|
|
content = article.get('content', '')
|
|
|
|
prompt = f"""
|
|
Analyze the sentiment and tone of this news article:
|
|
|
|
Title: {title}
|
|
Content: {content[:600]}...
|
|
|
|
Return JSON with:
|
|
{{
|
|
"sentiment": "positive/negative/neutral",
|
|
"confidence": 0.85,
|
|
"tone": "informative/urgent/optimistic/concerned/etc",
|
|
"reasoning": "Brief explanation"
|
|
}}
|
|
"""
|
|
|
|
response = self._make_groq_request(prompt, max_tokens=150)
|
|
|
|
if response:
|
|
try:
|
|
sentiment = json.loads(response)
|
|
sentiment["available"] = True
|
|
return sentiment
|
|
except json.JSONDecodeError:
|
|
return {
|
|
"sentiment": "neutral",
|
|
"confidence": 0.5,
|
|
"tone": "informative",
|
|
"reasoning": response,
|
|
"available": True
|
|
}
|
|
|
|
return {"sentiment": "neutral", "confidence": 0.0, "available": False}
|
|
|
|
def generate_insights(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Generate insights from multiple articles"""
|
|
if not self.available or not articles:
|
|
return {"insights": "AI insights not available", "available": False}
|
|
|
|
# Prepare article summaries
|
|
article_summaries = []
|
|
for i, article in enumerate(articles[:5]): # Limit to 5 articles
|
|
title = article.get('title', '')
|
|
source = article.get('source', '')
|
|
article_summaries.append(f"{i+1}. {title} (Source: {source})")
|
|
|
|
prompt = f"""
|
|
Analyze these recent news articles and provide insights:
|
|
|
|
Articles:
|
|
{chr(10).join(article_summaries)}
|
|
|
|
Provide:
|
|
1. Main trends or themes
|
|
2. Key developments
|
|
3. Potential implications
|
|
|
|
Format as JSON:
|
|
{{
|
|
"trends": ["trend1", "trend2"],
|
|
"key_developments": ["development1", "development2"],
|
|
"implications": "Brief analysis of what this means"
|
|
}}
|
|
"""
|
|
|
|
response = self._make_groq_request(prompt, max_tokens=400)
|
|
|
|
if response:
|
|
try:
|
|
insights = json.loads(response)
|
|
insights["available"] = True
|
|
insights["analyzed_at"] = datetime.now().isoformat()
|
|
insights["article_count"] = len(articles)
|
|
return insights
|
|
except json.JSONDecodeError:
|
|
return {
|
|
"insights": response,
|
|
"available": True,
|
|
"analyzed_at": datetime.now().isoformat()
|
|
}
|
|
|
|
return {"insights": "Analysis failed", "available": False}
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
"""Get AI analyzer status"""
|
|
return {
|
|
"available": self.available,
|
|
"model": self.model if self.available else None,
|
|
"features": [
|
|
"Article Summarization",
|
|
"Keyword Extraction",
|
|
"Sentiment Analysis",
|
|
"Trend Insights"
|
|
] if self.available else []
|
|
} |