feat: Implement Pinecone vector store integration

- Update config.py with Pinecone settings and model configurations - Implement VectorStore class with Pinecone backend - Add comprehensive vector operations (add, search, delete) - Set up proper error handling and metadata management - Add .gitignore for Python project
2025-04-16 23:09:52 +01:00
commit 859c17aad8
27 changed files with 2820 additions and 0 deletions
@@ -0,0 +1,55 @@
+import cohere
+import numpy as np
+from typing import List, Union
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class CohereEmbeddings:
+    def __init__(self):
+        self.api_key = os.getenv('COHERE_API_KEY')
+        if not self.api_key:
+            raise ValueError("COHERE_API_KEY environment variable is not set")
+        self.client = cohere.Client(self.api_key)
+    
+    def generate(self, text: Union[str, List[str]]) -> np.ndarray:
+        """
+        Generate embeddings for the given text using Cohere.
+        
+        Args:
+            text: Single text string or list of texts
+            
+        Returns:
+            numpy array of embeddings
+        """
+        if isinstance(text, str):
+            text = [text]
+        
+        response = self.client.embed(
+            texts=text,
+            model='embed-english-v3.0',
+            input_type='search_document'
+        )
+        
+        return np.array(response.embeddings)
+    
+    def generate_batch(self, texts: List[str], batch_size: int = 96) -> List[np.ndarray]:
+        """
+        Generate embeddings for a large batch of texts.
+        
+        Args:
+            texts: List of texts to generate embeddings for
+            batch_size: Size of each batch
+            
+        Returns:
+            List of numpy arrays containing embeddings
+        """
+        all_embeddings = []
+        
+        for i in range(0, len(texts), batch_size):
+            batch = texts[i:i + batch_size]
+            embeddings = self.generate(batch)
+            all_embeddings.extend(embeddings)
+        
+        return all_embeddings