Files
ds_task_ai_news_bolade/backend/vector_store.py
T
boladeE 82fe3608d2 Refactor backend configuration and enhance news fetching functionality
- Introduced a Config dataclass in config.py to manage API keys, RSS feeds, and directory paths more effectively.
- Updated the NewsFetcher class to include retry logic for fetching articles from RSS feeds.
- Modified the EmbeddingGenerator and NewsRecommender classes to utilize the new configuration structure.
- Enhanced main.py to implement API token verification for secure access to news fetching and recommendations.
2025-04-16 17:55:36 +01:00

87 lines
3.2 KiB
Python

from pinecone import Pinecone, ServerlessSpec
from typing import List, Dict, Any, Optional
from config import config
class VectorStore:
def __init__(self, pinecone_client: Optional[Pinecone] = None):
self.pinecone = pinecone_client or Pinecone(api_key=config.pinecone_api_key)
self.index_name = config.pinecone_index_name
self._ensure_index()
def _ensure_index(self):
"""Ensure the Pinecone index exists, create if it doesn't."""
# Check if index exists, create if it doesn't
if self.index_name not in self.pinecone.list_indexes().names():
# Create a new index with the correct dimension
self.pinecone.create_index(
name=self.index_name,
dimension=config.vector_dimension,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1")
)
print(f"Created new index '{self.index_name}' with dimension {config.vector_dimension}")
self.index = self.pinecone.Index(self.index_name)
def upsert_articles(self, articles: List[Dict[str, Any]]) -> bool:
"""Upsert articles to the vector store."""
try:
vectors = []
for article in articles:
if "embedding" not in article:
continue
vector = {
"id": article["id"],
"values": article["embedding"],
"metadata": {
"title": article["title"],
"content": article["content"],
"link": article["link"],
"published": article["published"],
"source": article["source"],
"categories": article["categories"]
}
}
vectors.append(vector)
if vectors:
self.index.upsert(vectors=vectors)
return True
except Exception as e:
print(f"Error upserting articles: {str(e)}")
return False
def search_similar(self, query_embedding: List[float], top_k: int = None) -> List[Dict[str, Any]]:
"""Search for similar articles using the query embedding."""
try:
results = self.index.query(
vector=query_embedding,
top_k=top_k or config.top_k_results,
include_metadata=True
)
articles = []
for match in results.matches:
article = {
"id": match.id,
"score": match.score,
**match.metadata
}
articles.append(article)
return articles
except Exception as e:
print(f"Error searching similar articles: {str(e)}")
return []
def delete_article(self, article_id: str) -> bool:
"""Delete an article from the vector store."""
try:
self.index.delete(ids=[article_id])
return True
except Exception as e:
print(f"Error deleting article: {str(e)}")
return False