Add backend functionality for news fetching, processing, and recommendations
- Implemented NewsFetcher class to fetch articles from RSS feeds and clean HTML content. - Added EmbeddingGenerator for generating embeddings using Cohere API. - Created VectorStore for storing and retrieving articles using Pinecone. - Developed NewsRecommender for analyzing articles and generating insights with Groq. - Set up FastAPI application with endpoints for fetching news and providing recommendations. - Configured logging for better traceability and debugging. - Updated .gitignore to include environment variables and data directories. - Added requirements.txt for project dependencies.
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
import cohere
|
||||
from typing import List, Dict, Any
|
||||
from config import COHERE_API_KEY
|
||||
|
||||
class EmbeddingGenerator:
|
||||
def __init__(self):
|
||||
self.client = cohere.Client(COHERE_API_KEY)
|
||||
|
||||
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Generate embeddings for a list of texts using Cohere."""
|
||||
try:
|
||||
response = self.client.embed(
|
||||
texts=texts,
|
||||
model="embed-english-v3.0",
|
||||
input_type="search_document"
|
||||
)
|
||||
return response.embeddings
|
||||
except Exception as e:
|
||||
print(f"Error generating embeddings: {str(e)}")
|
||||
return []
|
||||
|
||||
def process_articles(self, articles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Process articles and add embeddings to them."""
|
||||
# Prepare texts for embedding
|
||||
texts = [
|
||||
f"{article['title']} {article['content']}"
|
||||
for article in articles
|
||||
]
|
||||
|
||||
# Generate embeddings
|
||||
embeddings = self.generate_embeddings(texts)
|
||||
|
||||
# Add embeddings to articles
|
||||
for article, embedding in zip(articles, embeddings):
|
||||
article["embedding"] = embedding
|
||||
|
||||
return articles
|
||||
|
||||
def get_query_embedding(self, query: str) -> List[float]:
|
||||
"""Generate embedding for a search query."""
|
||||
try:
|
||||
response = self.client.embed(
|
||||
texts=[query],
|
||||
model="embed-english-v3.0",
|
||||
input_type="search_query"
|
||||
)
|
||||
return response.embeddings[0]
|
||||
except Exception as e:
|
||||
print(f"Error generating query embedding: {str(e)}")
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user