Files
DS_Task_AI_News/backend/recommender.py
T

98 lines
3.0 KiB
Python
Raw Normal View History

2025-07-08 19:57:35 +01:00
from .embeddings import get_embeddings, get_query_embedding, rerank_results
from .vector_store import VectorDB
import groq
from .config import Config
2025-07-07 22:08:02 +01:00
2025-07-08 19:57:35 +01:00
# Initialize Groq client
groq_client = groq.Groq(api_key=Config.GROQ_API_KEY)
2025-07-07 22:08:02 +01:00
2025-07-08 19:57:35 +01:00
# Vector database instance
vector_db = VectorDB()
def process_articles_for_vector_db(articles):
"""Process articles and add to vector database"""
if not articles:
return
# Extract text content for embedding
texts = []
for article in articles:
text = f"{article['title']} {article['content']}"
texts.append(text)
# Generate embeddings
embeddings = get_embeddings(texts)
if embeddings:
vector_db.add_vectors(embeddings, articles)
def recommend_similar(article_id, top_n=3):
"""Recommend similar articles based on article ID"""
article = vector_db.get_article_by_id(article_id)
if not article:
return []
# Get embedding for the article
article_text = f"{article['title']} {article['content']}"
query_embedding = get_query_embedding(article_text)
if not query_embedding:
return []
# Search for similar articles
similar_articles = vector_db.search(query_embedding, k=top_n + 1)
# Filter out the original article
recommendations = [art for art in similar_articles if art.get('slug') != article_id]
return recommendations[:top_n]
def analyze_article_with_groq(article_text):
"""Analyze article using Groq LLM"""
try:
response = groq_client.chat.completions.create(
model=Config.GROQ_MODEL,
messages=[
{
"role": "system",
"content": "You are an AI news analyst. Provide insights, key points, and sentiment analysis for the given article."
},
{
"role": "user",
"content": f"Analyze this news article: {article_text}"
}
],
max_tokens=500,
temperature=0.3
)
return response.choices[0].message.content
except Exception as e:
print(f"Error analyzing article with Groq: {str(e)}")
return "Analysis unavailable"
def get_personalized_recommendations(user_interests, top_n=5):
"""Get personalized recommendations based on user interests"""
query_embedding = get_query_embedding(user_interests)
if not query_embedding:
return []
recommendations = vector_db.search(query_embedding, k=top_n)
# Re-rank results for better relevance
if recommendations:
documents = [f"{art['title']} {art['content']}" for art in recommendations]
reranked = rerank_results(user_interests, documents)
if reranked:
# Reorder recommendations based on reranking
reordered = []
for result in reranked:
if result.index < len(recommendations):
reordered.append(recommendations[result.index])
return reordered
return recommendations