update recommender and news_fetcher
This commit is contained in:
+59
-29
@@ -1,3 +1,4 @@
|
||||
from typing import List, Dict, Optional
|
||||
from .embeddings import get_embeddings, get_query_embedding, rerank_results
|
||||
from .vector_store import VectorDB
|
||||
import groq
|
||||
@@ -10,6 +11,56 @@ groq_client = groq.Groq(api_key=Config.GROQ_API_KEY)
|
||||
vector_db = VectorDB()
|
||||
|
||||
|
||||
class NewsRecommender:
|
||||
"""News recommendation system using vector similarity search"""
|
||||
|
||||
def __init__(self, vector_db: VectorDB):
|
||||
self.vector_db = vector_db
|
||||
|
||||
def recommend_by_text(self, text_description: str, top_n: int = 3) -> List[Dict]:
|
||||
"""
|
||||
Recommend articles based on text description
|
||||
|
||||
Args:
|
||||
text_description: Text description to find similar articles for
|
||||
top_n: Number of recommendations to return
|
||||
|
||||
Returns:
|
||||
List of recommended articles
|
||||
"""
|
||||
query_embedding = get_query_embedding(text_description)
|
||||
if not query_embedding:
|
||||
return []
|
||||
|
||||
# Search for similar articles
|
||||
similar_articles = self.vector_db.search(query_embedding, k=top_n)
|
||||
|
||||
# Re-rank results for better relevance
|
||||
if similar_articles:
|
||||
documents = [f"{art['title']} {art['content']}" for
|
||||
art in similar_articles]
|
||||
reranked = rerank_results(text_description, documents)
|
||||
|
||||
if reranked:
|
||||
# Reorder recommendations based on reranking
|
||||
reordered = []
|
||||
for result in reranked:
|
||||
if result.index < len(similar_articles):
|
||||
reordered.append(similar_articles[result.index])
|
||||
return reordered
|
||||
|
||||
return similar_articles
|
||||
|
||||
def get_personalized_recommendations(self, user_interests: str, top_n:
|
||||
int = 5) -> List[Dict]:
|
||||
"""Get personalized recommendations based on user interests"""
|
||||
return self.recommend_by_text(user_interests, top_n)
|
||||
|
||||
|
||||
# Initialize recommender instance
|
||||
news_recommender = NewsRecommender(vector_db)
|
||||
|
||||
|
||||
def process_articles_for_vector_db(articles):
|
||||
"""Process articles and add to vector database"""
|
||||
if not articles:
|
||||
@@ -35,18 +86,14 @@ def recommend_similar(article_id, top_n=3):
|
||||
|
||||
# Get embedding for the article
|
||||
article_text = f"{article['title']} {article['content']}"
|
||||
query_embedding = get_query_embedding(article_text)
|
||||
|
||||
if not query_embedding:
|
||||
return []
|
||||
|
||||
# Search for similar articles
|
||||
similar_articles = vector_db.search(query_embedding, k=top_n + 1)
|
||||
# Use the new recommender with text description
|
||||
recommendations = news_recommender.recommend_by_text(article_text, top_n + 1)
|
||||
|
||||
# Filter out the original article
|
||||
recommendations = [art for art in similar_articles if art.get('slug') != article_id]
|
||||
filtered_recommendations = [art for art in recommendations if art.get('slug') != article_id]
|
||||
|
||||
return recommendations[:top_n]
|
||||
return filtered_recommendations[:top_n]
|
||||
|
||||
|
||||
def analyze_article_with_groq(article_text):
|
||||
@@ -57,7 +104,8 @@ def analyze_article_with_groq(article_text):
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an AI news analyst. Provide insights, key points, and sentiment analysis for the given article."
|
||||
"content": "You are an AI news analyst. Provide insights, "
|
||||
"key points, and sentiment analysis for the given article."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
@@ -75,23 +123,5 @@ def analyze_article_with_groq(article_text):
|
||||
|
||||
def get_personalized_recommendations(user_interests, top_n=5):
|
||||
"""Get personalized recommendations based on user interests"""
|
||||
query_embedding = get_query_embedding(user_interests)
|
||||
if not query_embedding:
|
||||
return []
|
||||
|
||||
recommendations = vector_db.search(query_embedding, k=top_n)
|
||||
|
||||
# Re-rank results for better relevance
|
||||
if recommendations:
|
||||
documents = [f"{art['title']} {art['content']}" for art in recommendations]
|
||||
reranked = rerank_results(user_interests, documents)
|
||||
|
||||
if reranked:
|
||||
# Reorder recommendations based on reranking
|
||||
reordered = []
|
||||
for result in reranked:
|
||||
if result.index < len(recommendations):
|
||||
reordered.append(recommendations[result.index])
|
||||
return reordered
|
||||
|
||||
return recommendations
|
||||
return news_recommender.get_personalized_recommendations(user_interests,
|
||||
top_n)
|
||||
|
||||
Reference in New Issue
Block a user