Files
ds_task_ai_news_bolade/backend/embeddings.py
T
boladeE e3d00bb4dc Add backend functionality for news fetching, processing, and recommendations
- Implemented NewsFetcher class to fetch articles from RSS feeds and clean HTML content.
- Added EmbeddingGenerator for generating embeddings using Cohere API.
- Created VectorStore for storing and retrieving articles using Pinecone.
- Developed NewsRecommender for analyzing articles and generating insights with Groq.
- Set up FastAPI application with endpoints for fetching news and providing recommendations.
- Configured logging for better traceability and debugging.
- Updated .gitignore to include environment variables and data directories.
- Added requirements.txt for project dependencies.
2025-04-14 21:44:43 +01:00

51 lines
1.7 KiB
Python

import cohere
from typing import List, Dict, Any
from config import COHERE_API_KEY
class EmbeddingGenerator:
def __init__(self):
self.client = cohere.Client(COHERE_API_KEY)
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Generate embeddings for a list of texts using Cohere."""
try:
response = self.client.embed(
texts=texts,
model="embed-english-v3.0",
input_type="search_document"
)
return response.embeddings
except Exception as e:
print(f"Error generating embeddings: {str(e)}")
return []
def process_articles(self, articles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Process articles and add embeddings to them."""
# Prepare texts for embedding
texts = [
f"{article['title']} {article['content']}"
for article in articles
]
# Generate embeddings
embeddings = self.generate_embeddings(texts)
# Add embeddings to articles
for article, embedding in zip(articles, embeddings):
article["embedding"] = embedding
return articles
def get_query_embedding(self, query: str) -> List[float]:
"""Generate embedding for a search query."""
try:
response = self.client.embed(
texts=[query],
model="embed-english-v3.0",
input_type="search_query"
)
return response.embeddings[0]
except Exception as e:
print(f"Error generating query embedding: {str(e)}")
return []