2025-07-24 16:35:04 +01:00
|
|
|
from typing import List, Dict, Optional
|
2025-07-08 19:57:35 +01:00
|
|
|
from .embeddings import get_embeddings, get_query_embedding, rerank_results
|
|
|
|
|
from .vector_store import VectorDB
|
|
|
|
|
import groq
|
|
|
|
|
from .config import Config
|
2025-07-07 22:08:02 +01:00
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
# Initialize Groq client
|
|
|
|
|
groq_client = groq.Groq(api_key=Config.GROQ_API_KEY)
|
2025-07-07 22:08:02 +01:00
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
# Vector database instance
|
|
|
|
|
vector_db = VectorDB()
|
|
|
|
|
|
|
|
|
|
|
2025-07-24 16:35:04 +01:00
|
|
|
class NewsRecommender:
|
|
|
|
|
"""News recommendation system using vector similarity search"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, vector_db: VectorDB):
|
|
|
|
|
self.vector_db = vector_db
|
|
|
|
|
|
|
|
|
|
def recommend_by_text(self, text_description: str, top_n: int = 3) -> List[Dict]:
|
|
|
|
|
"""
|
|
|
|
|
Recommend articles based on text description
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
text_description: Text description to find similar articles for
|
|
|
|
|
top_n: Number of recommendations to return
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of recommended articles
|
|
|
|
|
"""
|
|
|
|
|
query_embedding = get_query_embedding(text_description)
|
|
|
|
|
if not query_embedding:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
# Search for similar articles
|
|
|
|
|
similar_articles = self.vector_db.search(query_embedding, k=top_n)
|
|
|
|
|
|
|
|
|
|
# Re-rank results for better relevance
|
|
|
|
|
if similar_articles:
|
|
|
|
|
documents = [f"{art['title']} {art['content']}" for
|
|
|
|
|
art in similar_articles]
|
|
|
|
|
reranked = rerank_results(text_description, documents)
|
|
|
|
|
|
|
|
|
|
if reranked:
|
|
|
|
|
# Reorder recommendations based on reranking
|
|
|
|
|
reordered = []
|
|
|
|
|
for result in reranked:
|
|
|
|
|
if result.index < len(similar_articles):
|
|
|
|
|
reordered.append(similar_articles[result.index])
|
|
|
|
|
return reordered
|
|
|
|
|
|
|
|
|
|
return similar_articles
|
|
|
|
|
|
|
|
|
|
def get_personalized_recommendations(self, user_interests: str, top_n:
|
|
|
|
|
int = 5) -> List[Dict]:
|
|
|
|
|
"""Get personalized recommendations based on user interests"""
|
|
|
|
|
return self.recommend_by_text(user_interests, top_n)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Initialize recommender instance
|
|
|
|
|
news_recommender = NewsRecommender(vector_db)
|
|
|
|
|
|
|
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
def process_articles_for_vector_db(articles):
|
|
|
|
|
"""Process articles and add to vector database"""
|
|
|
|
|
if not articles:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Extract text content for embedding
|
|
|
|
|
texts = []
|
|
|
|
|
for article in articles:
|
|
|
|
|
text = f"{article['title']} {article['content']}"
|
|
|
|
|
texts.append(text)
|
|
|
|
|
|
|
|
|
|
# Generate embeddings
|
|
|
|
|
embeddings = get_embeddings(texts)
|
|
|
|
|
if embeddings:
|
|
|
|
|
vector_db.add_vectors(embeddings, articles)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def recommend_similar(article_id, top_n=3):
|
|
|
|
|
"""Recommend similar articles based on article ID"""
|
|
|
|
|
article = vector_db.get_article_by_id(article_id)
|
|
|
|
|
if not article:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
# Get embedding for the article
|
|
|
|
|
article_text = f"{article['title']} {article['content']}"
|
|
|
|
|
|
2025-07-24 16:35:04 +01:00
|
|
|
# Use the new recommender with text description
|
|
|
|
|
recommendations = news_recommender.recommend_by_text(article_text, top_n + 1)
|
2025-07-08 19:57:35 +01:00
|
|
|
|
|
|
|
|
# Filter out the original article
|
2025-07-24 16:35:04 +01:00
|
|
|
filtered_recommendations = [art for art in recommendations if art.get('slug') != article_id]
|
2025-07-08 19:57:35 +01:00
|
|
|
|
2025-07-24 16:35:04 +01:00
|
|
|
return filtered_recommendations[:top_n]
|
2025-07-08 19:57:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_article_with_groq(article_text):
|
|
|
|
|
"""Analyze article using Groq LLM"""
|
|
|
|
|
try:
|
|
|
|
|
response = groq_client.chat.completions.create(
|
|
|
|
|
model=Config.GROQ_MODEL,
|
|
|
|
|
messages=[
|
|
|
|
|
{
|
|
|
|
|
"role": "system",
|
2025-07-24 16:35:04 +01:00
|
|
|
"content": "You are an AI news analyst. Provide insights, "
|
|
|
|
|
"key points, and sentiment analysis for the given article."
|
2025-07-08 19:57:35 +01:00
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": f"Analyze this news article: {article_text}"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
max_tokens=500,
|
|
|
|
|
temperature=0.3
|
|
|
|
|
)
|
|
|
|
|
return response.choices[0].message.content
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error analyzing article with Groq: {str(e)}")
|
|
|
|
|
return "Analysis unavailable"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_personalized_recommendations(user_interests, top_n=5):
|
|
|
|
|
"""Get personalized recommendations based on user interests"""
|
2025-07-24 16:35:04 +01:00
|
|
|
return news_recommender.get_personalized_recommendations(user_interests,
|
|
|
|
|
top_n)
|