Files
ds_task_ai_news_bolade/backend/vector_store.py
T
boladeE bc485b44b8 Update README and backend functionality for improved news application
- Enhanced README.md with a clearer project overview, features, technologies used, and installation instructions.
- Updated vector dimension in config.py from 4096 to 1024 for Cohere embeddings.
- Modified main.py to serve HTML responses for the home page, news fetching, and recommendations.
- Improved error handling and ensured articles have links in the responses.
- Cleaned up news_fetcher.py by removing unnecessary print statements.
- Updated recommender.py to refine insights generation and summary extraction.
- Added Jinja2 for templating and improved the project structure for better organization.
- Included API documentation for better understanding of endpoints and usage.
2025-04-15 11:59:39 +01:00

92 lines
3.2 KiB
Python

from pinecone import Pinecone, ServerlessSpec
from typing import List, Dict, Any
from config import (
PINECONE_API_KEY,
PINECONE_INDEX_NAME,
VECTOR_DIMENSION,
TOP_K_RESULTS
)
class VectorStore:
def __init__(self):
self.pinecone = Pinecone(api_key=PINECONE_API_KEY)
self.index_name = PINECONE_INDEX_NAME
self._ensure_index()
def _ensure_index(self):
"""Ensure the Pinecone index exists, create if it doesn't."""
# Check if index exists, create if it doesn't
if self.index_name not in self.pinecone.list_indexes().names():
# Create a new index with the correct dimension
self.pinecone.create_index(
name=self.index_name,
dimension=VECTOR_DIMENSION,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1")
)
print(f"Created new index '{self.index_name}' with dimension {VECTOR_DIMENSION}")
self.index = self.pinecone.Index(self.index_name)
def upsert_articles(self, articles: List[Dict[str, Any]]) -> bool:
"""Upsert articles to the vector store."""
try:
vectors = []
for article in articles:
if "embedding" not in article:
continue
vector = {
"id": article["id"],
"values": article["embedding"],
"metadata": {
"title": article["title"],
"content": article["content"],
"link": article["link"],
"published": article["published"],
"source": article["source"],
"categories": article["categories"]
}
}
vectors.append(vector)
if vectors:
self.index.upsert(vectors=vectors)
return True
except Exception as e:
print(f"Error upserting articles: {str(e)}")
return False
def search_similar(self, query_embedding: List[float], top_k: int = TOP_K_RESULTS) -> List[Dict[str, Any]]:
"""Search for similar articles using the query embedding."""
try:
results = self.index.query(
vector=query_embedding,
top_k=top_k,
include_metadata=True
)
articles = []
for match in results.matches:
article = {
"id": match.id,
"score": match.score,
**match.metadata
}
articles.append(article)
return articles
except Exception as e:
print(f"Error searching similar articles: {str(e)}")
return []
def delete_article(self, article_id: str) -> bool:
"""Delete an article from the vector store."""
try:
self.index.delete(ids=[article_id])
return True
except Exception as e:
print(f"Error deleting article: {str(e)}")
return False