feat: Implement Pinecone vector store integration

- Update config.py with Pinecone settings and model configurations
- Implement VectorStore class with Pinecone backend
- Add comprehensive vector operations (add, search, delete)
- Set up proper error handling and metadata management
- Add .gitignore for Python project
This commit is contained in:
boladeE
2025-04-16 23:09:52 +01:00
commit 859c17aad8
27 changed files with 2820 additions and 0 deletions
+55
View File
@@ -0,0 +1,55 @@
import cohere
import numpy as np
from typing import List, Union
import os
from dotenv import load_dotenv
load_dotenv()
class CohereEmbeddings:
def __init__(self):
self.api_key = os.getenv('COHERE_API_KEY')
if not self.api_key:
raise ValueError("COHERE_API_KEY environment variable is not set")
self.client = cohere.Client(self.api_key)
def generate(self, text: Union[str, List[str]]) -> np.ndarray:
"""
Generate embeddings for the given text using Cohere.
Args:
text: Single text string or list of texts
Returns:
numpy array of embeddings
"""
if isinstance(text, str):
text = [text]
response = self.client.embed(
texts=text,
model='embed-english-v3.0',
input_type='search_document'
)
return np.array(response.embeddings)
def generate_batch(self, texts: List[str], batch_size: int = 96) -> List[np.ndarray]:
"""
Generate embeddings for a large batch of texts.
Args:
texts: List of texts to generate embeddings for
batch_size: Size of each batch
Returns:
List of numpy arrays containing embeddings
"""
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
embeddings = self.generate(batch)
all_embeddings.extend(embeddings)
return all_embeddings