feat: Implement Pinecone vector store integration
- Update config.py with Pinecone settings and model configurations - Implement VectorStore class with Pinecone backend - Add comprehensive vector operations (add, search, delete) - Set up proper error handling and metadata management - Add .gitignore for Python project
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
from typing import Dict, List
|
||||
import json
|
||||
import os
|
||||
from transformers import pipeline
|
||||
import numpy as np
|
||||
|
||||
class BrandStyleChecker:
|
||||
def __init__(self):
|
||||
self.style_guidelines = self._load_style_guidelines()
|
||||
self.classifier = pipeline(
|
||||
"text-classification",
|
||||
model="distilbert-base-uncased-finetuned-sst-2-english",
|
||||
device=-1
|
||||
)
|
||||
self.tone_keywords = self._load_tone_keywords()
|
||||
|
||||
def _load_style_guidelines(self) -> Dict:
|
||||
"""Load brand style guidelines from file."""
|
||||
guidelines_path = 'data/style_guidelines/brand_guidelines.json'
|
||||
if os.path.exists(guidelines_path):
|
||||
with open(guidelines_path, 'r') as f:
|
||||
return json.load(f)
|
||||
return {
|
||||
"tone": "professional yet approachable",
|
||||
"voice": "confident and authoritative",
|
||||
"key_phrases": [],
|
||||
"avoided_phrases": [],
|
||||
"brand_values": []
|
||||
}
|
||||
|
||||
def _load_tone_keywords(self) -> Dict[str, List[str]]:
|
||||
"""Load tone keywords for analysis."""
|
||||
keywords_path = 'data/style_guidelines/tone_keywords.json'
|
||||
if os.path.exists(keywords_path):
|
||||
with open(keywords_path, 'r') as f:
|
||||
return json.load(f)
|
||||
return {
|
||||
"professional": ["expert", "professional", "industry", "experience"],
|
||||
"approachable": ["friendly", "helpful", "understand", "support"],
|
||||
"confident": ["guaranteed", "proven", "success", "expertise"],
|
||||
"authoritative": ["leading", "best", "premier", "trusted"]
|
||||
}
|
||||
|
||||
def check_alignment(self, content: str) -> float:
|
||||
"""
|
||||
Check how well the content aligns with the brand style.
|
||||
|
||||
Args:
|
||||
content: The content to check
|
||||
|
||||
Returns:
|
||||
Alignment score between 0 and 1
|
||||
"""
|
||||
scores = []
|
||||
|
||||
# Check sentiment alignment
|
||||
sentiment_score = self._check_sentiment(content)
|
||||
scores.append(sentiment_score)
|
||||
|
||||
# Check keyword presence
|
||||
keyword_score = self._check_keywords(content)
|
||||
scores.append(keyword_score)
|
||||
|
||||
# Check tone consistency
|
||||
tone_score = self._check_tone_consistency(content)
|
||||
scores.append(tone_score)
|
||||
|
||||
# Calculate final score (weighted average)
|
||||
weights = [0.3, 0.4, 0.3] # Adjust weights based on importance
|
||||
final_score = np.average(scores, weights=weights)
|
||||
|
||||
return float(final_score)
|
||||
|
||||
def _check_sentiment(self, content: str) -> float:
|
||||
"""Check if the sentiment aligns with brand guidelines."""
|
||||
result = self.classifier(content)[0]
|
||||
# Assuming positive sentiment (score > 0.5) is desired
|
||||
return result['score'] if result['label'] == 'POSITIVE' else 1 - result['score']
|
||||
|
||||
def _check_keywords(self, content: str) -> float:
|
||||
"""Check presence of brand-aligned keywords."""
|
||||
content_lower = content.lower()
|
||||
total_keywords = sum(len(keywords) for keywords in self.tone_keywords.values())
|
||||
found_keywords = sum(
|
||||
sum(1 for keyword in keywords if keyword in content_lower)
|
||||
for keywords in self.tone_keywords.values()
|
||||
)
|
||||
return found_keywords / total_keywords if total_keywords > 0 else 0.0
|
||||
|
||||
def _check_tone_consistency(self, content: str) -> float:
|
||||
"""Check consistency with brand tone guidelines."""
|
||||
# This is a simplified version - in practice, you might want to use
|
||||
# more sophisticated NLP techniques or a fine-tuned model
|
||||
content_lower = content.lower()
|
||||
tone_matches = 0
|
||||
total_checks = 0
|
||||
|
||||
# Check for professional tone
|
||||
if any(word in content_lower for word in ["we", "our", "us"]):
|
||||
tone_matches += 1
|
||||
total_checks += 1
|
||||
|
||||
# Check for approachable tone
|
||||
if any(word in content_lower for word in ["you", "your", "help", "support"]):
|
||||
tone_matches += 1
|
||||
total_checks += 1
|
||||
|
||||
# Check for confident tone
|
||||
if any(word in content_lower for word in ["guarantee", "proven", "expert"]):
|
||||
tone_matches += 1
|
||||
total_checks += 1
|
||||
|
||||
return tone_matches / total_checks if total_checks > 0 else 0.0
|
||||
@@ -0,0 +1,47 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# API Keys
|
||||
COHERE_API_KEY: str = os.getenv('COHERE_API_KEY', '')
|
||||
PINECONE_API_KEY: str = os.getenv('PINECONE_API_KEY', '')
|
||||
|
||||
# Model Settings
|
||||
MODEL_NAME: str = "facebook/opt-350m" # Using the finetuned model
|
||||
EMBEDDING_MODEL: str = "embed-english-v3.0"
|
||||
|
||||
# Vector Store Settings
|
||||
VECTOR_DIMENSION: int = 768 # Default dimension for Cohere embeddings
|
||||
MAX_SEARCH_RESULTS: int = 5
|
||||
|
||||
# Pinecone Settings
|
||||
PINECONE_ENVIRONMENT: str = os.getenv('PINECONE_ENVIRONMENT', 'us-west1-gcp')
|
||||
PINECONE_INDEX_NAME: str = os.getenv('PINECONE_INDEX_NAME', 'marketing-assistant')
|
||||
|
||||
# Content Generation Settings
|
||||
MAX_CONTENT_LENGTH: int = 500
|
||||
TEMPERATURE: float = 0.7
|
||||
TOP_P: float = 0.9
|
||||
|
||||
# Brand Style Settings
|
||||
BRAND_GUIDELINES_PATH: str = "data/style_guidelines/brand_guidelines.json"
|
||||
TONE_KEYWORDS_PATH: str = "data/style_guidelines/tone_keywords.json"
|
||||
|
||||
# Storage Settings
|
||||
VECTOR_STORE_PATH: str = "data/vector_store"
|
||||
PAST_CAMPAIGNS_PATH: str = "data/past_campaigns"
|
||||
USER_QUERIES_PATH: str = "data/user_queries"
|
||||
|
||||
# Finetuned Model Settings
|
||||
FINETUNED_MODEL_PATH: str = "../finetuned_model"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
|
||||
# Create global settings instance
|
||||
settings = Settings()
|
||||
@@ -0,0 +1,98 @@
|
||||
from transformers import pipeline
|
||||
from typing import List, Optional
|
||||
import torch
|
||||
from finetuned_model import finetuned_model
|
||||
|
||||
class MarketingCopywriter:
|
||||
def __init__(self):
|
||||
# Use the finetuned model instead of the default GPT-2
|
||||
self.model = finetuned_model
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
content_type: str,
|
||||
similar_content: List[str],
|
||||
tone: Optional[str] = None,
|
||||
) -> str:
|
||||
# Generate the marketing copy using the finetuned model
|
||||
generated_texts = self.model.generate_with_context(
|
||||
prompt=prompt,
|
||||
content_type=content_type,
|
||||
similar_content=similar_content,
|
||||
tone=tone,
|
||||
max_length=500,
|
||||
num_return_sequences=1,
|
||||
temperature=0.7,
|
||||
top_p=0.9
|
||||
)
|
||||
|
||||
# Return the first generated text
|
||||
return generated_texts[0] if generated_texts else ""
|
||||
|
||||
def _build_context(
|
||||
self,
|
||||
prompt: str,
|
||||
content_type: str,
|
||||
similar_content: List[str],
|
||||
tone: Optional[str],
|
||||
target_audience: Optional[str]
|
||||
) -> str:
|
||||
context = f"Content Type: {content_type}\n"
|
||||
if tone:
|
||||
context += f"Tone: {tone}\n"
|
||||
if target_audience:
|
||||
context += f"Target Audience: {target_audience}\n"
|
||||
|
||||
context += "\nSimilar Content Examples:\n"
|
||||
for content in similar_content[:3]: # Use top 3 similar content pieces
|
||||
context += f"- {content}\n"
|
||||
|
||||
context += f"\nGenerate marketing copy for: {prompt}\n"
|
||||
return context
|
||||
|
||||
def _post_process(self, text: str) -> str:
|
||||
# Clean up the generated text
|
||||
text = text.strip()
|
||||
# Add any additional post-processing steps here
|
||||
return text
|
||||
|
||||
# Initialize the copywriter
|
||||
copywriter = MarketingCopywriter()
|
||||
|
||||
def generate_marketing_copy(
|
||||
prompt: str,
|
||||
content_type: str,
|
||||
similar_content: List[str],
|
||||
tone: Optional[str] = None,
|
||||
target_audience: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate marketing copy based on the given parameters.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for content generation
|
||||
content_type: Type of content (email, social media, etc.)
|
||||
similar_content: List of similar content for context
|
||||
tone: Optional tone specification
|
||||
target_audience: Optional target audience specification
|
||||
|
||||
Returns:
|
||||
Generated marketing copy
|
||||
"""
|
||||
return copywriter.generate(
|
||||
prompt=prompt,
|
||||
content_type=content_type,
|
||||
similar_content=similar_content,
|
||||
tone=tone,
|
||||
target_audience=target_audience
|
||||
)
|
||||
|
||||
|
||||
generate_marketing_copy(
|
||||
prompt="Help me write a blog post about the benefits of using our product",
|
||||
content_type="blog post",
|
||||
similar_content=[],
|
||||
tone="",
|
||||
target_audience=""
|
||||
)
|
||||
@@ -0,0 +1,55 @@
|
||||
import cohere
|
||||
import numpy as np
|
||||
from typing import List, Union
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
class CohereEmbeddings:
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv('COHERE_API_KEY')
|
||||
if not self.api_key:
|
||||
raise ValueError("COHERE_API_KEY environment variable is not set")
|
||||
self.client = cohere.Client(self.api_key)
|
||||
|
||||
def generate(self, text: Union[str, List[str]]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for the given text using Cohere.
|
||||
|
||||
Args:
|
||||
text: Single text string or list of texts
|
||||
|
||||
Returns:
|
||||
numpy array of embeddings
|
||||
"""
|
||||
if isinstance(text, str):
|
||||
text = [text]
|
||||
|
||||
response = self.client.embed(
|
||||
texts=text,
|
||||
model='embed-english-v3.0',
|
||||
input_type='search_document'
|
||||
)
|
||||
|
||||
return np.array(response.embeddings)
|
||||
|
||||
def generate_batch(self, texts: List[str], batch_size: int = 96) -> List[np.ndarray]:
|
||||
"""
|
||||
Generate embeddings for a large batch of texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to generate embeddings for
|
||||
batch_size: Size of each batch
|
||||
|
||||
Returns:
|
||||
List of numpy arrays containing embeddings
|
||||
"""
|
||||
all_embeddings = []
|
||||
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i:i + batch_size]
|
||||
embeddings = self.generate(batch)
|
||||
all_embeddings.extend(embeddings)
|
||||
|
||||
return all_embeddings
|
||||
@@ -0,0 +1,158 @@
|
||||
import os
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
class FinetunedModel:
|
||||
def __init__(self, model_path: str = "../finetuned_model"):
|
||||
"""
|
||||
Initialize the finetuned model.
|
||||
|
||||
Args:
|
||||
model_path: Path to the finetuned model directory
|
||||
"""
|
||||
self.model_path = model_path
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
print(f"Loading finetuned model from {model_path}")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||
self.model = AutoModelForCausalLM.from_pretrained(model_path)
|
||||
self.model.to(self.device)
|
||||
|
||||
# Set pad token if not set
|
||||
if self.tokenizer.pad_token is None:
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
max_length: int = 200,
|
||||
num_return_sequences: int = 1,
|
||||
temperature: float = 0.7,
|
||||
top_p: float = 0.9,
|
||||
**kwargs
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generate text using the finetuned model.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate text from
|
||||
max_length: Maximum length of the generated text
|
||||
num_return_sequences: Number of sequences to generate
|
||||
temperature: Sampling temperature (higher = more random)
|
||||
top_p: Nucleus sampling parameter
|
||||
**kwargs: Additional arguments to pass to the model
|
||||
|
||||
Returns:
|
||||
List of generated text sequences
|
||||
"""
|
||||
# Format the prompt
|
||||
formatted_prompt = f"Prompt: {prompt}\nCompletion:"
|
||||
|
||||
# Tokenize the prompt
|
||||
inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
|
||||
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
||||
|
||||
# Generate text
|
||||
with torch.no_grad():
|
||||
outputs = self.model.generate(
|
||||
**inputs,
|
||||
max_length=max_length,
|
||||
num_return_sequences=num_return_sequences,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
do_sample=True,
|
||||
pad_token_id=self.tokenizer.eos_token_id,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# Decode the generated text
|
||||
generated_texts = []
|
||||
for output in outputs:
|
||||
generated_text = self.tokenizer.decode(output, skip_special_tokens=True)
|
||||
# Extract just the completion part
|
||||
completion = generated_text.split("Completion:")[-1].strip()
|
||||
generated_texts.append(completion)
|
||||
|
||||
return generated_texts
|
||||
|
||||
def generate_with_context(
|
||||
self,
|
||||
prompt: str,
|
||||
content_type: str,
|
||||
similar_content: List[str],
|
||||
tone: Optional[str] = None,
|
||||
target_audience: Optional[str] = None,
|
||||
max_length: int = 200,
|
||||
num_return_sequences: int = 1,
|
||||
temperature: float = 0.7,
|
||||
top_p: float = 0.9,
|
||||
**kwargs
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generate text with additional context.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for content generation
|
||||
content_type: Type of content (email, social media, etc.)
|
||||
similar_content: List of similar content for context
|
||||
tone: Optional tone specification
|
||||
target_audience: Optional target audience specification
|
||||
max_length: Maximum length of the generated text
|
||||
num_return_sequences: Number of sequences to generate
|
||||
temperature: Sampling temperature (higher = more random)
|
||||
top_p: Nucleus sampling parameter
|
||||
**kwargs: Additional arguments to pass to the model
|
||||
|
||||
Returns:
|
||||
List of generated text sequences
|
||||
"""
|
||||
# Build the context
|
||||
context = self._build_context(prompt, content_type, similar_content, tone, target_audience)
|
||||
|
||||
# Generate text
|
||||
return self.generate(
|
||||
prompt=context,
|
||||
max_length=max_length,
|
||||
num_return_sequences=num_return_sequences,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def _build_context(
|
||||
self,
|
||||
prompt: str,
|
||||
content_type: str,
|
||||
similar_content: List[str],
|
||||
tone: Optional[str],
|
||||
target_audience: Optional[str]
|
||||
) -> str:
|
||||
"""
|
||||
Build a context string for the model.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for content generation
|
||||
content_type: Type of content (email, social media, etc.)
|
||||
similar_content: List of similar content for context
|
||||
tone: Optional tone specification
|
||||
target_audience: Optional target audience specification
|
||||
|
||||
Returns:
|
||||
Context string for the model
|
||||
"""
|
||||
context = f"Content Type: {content_type}\n"
|
||||
if tone:
|
||||
context += f"Tone: {tone}\n"
|
||||
if target_audience:
|
||||
context += f"Target Audience: {target_audience}\n"
|
||||
|
||||
context += "\nSimilar Content Examples:\n"
|
||||
for content in similar_content[:3]: # Use top 3 similar content pieces
|
||||
context += f"- {content}\n"
|
||||
|
||||
context += f"\nGenerate marketing copy for: {prompt}\n"
|
||||
return context
|
||||
|
||||
# Initialize the model
|
||||
finetuned_model = FinetunedModel()
|
||||
@@ -0,0 +1,93 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List
|
||||
import uvicorn
|
||||
from copywriter import generate_marketing_copy
|
||||
from vector_store import VectorStore
|
||||
from embeddings import CohereEmbeddings
|
||||
from brand_style import BrandStyleChecker
|
||||
from config import Settings
|
||||
from finetuned_model import finetuned_model
|
||||
|
||||
app = FastAPI(title="Marketing Assistant AI")
|
||||
settings = Settings()
|
||||
vector_store = VectorStore()
|
||||
embeddings = CohereEmbeddings()
|
||||
brand_checker = BrandStyleChecker()
|
||||
|
||||
class CopyRequest(BaseModel):
|
||||
prompt: str
|
||||
content_type: str
|
||||
tone: Optional[str] = None
|
||||
target_audience: Optional[str] = None
|
||||
|
||||
class CopyResponse(BaseModel):
|
||||
content: str
|
||||
confidence_score: float
|
||||
brand_alignment_score: float
|
||||
|
||||
class DirectModelRequest(BaseModel):
|
||||
prompt: str
|
||||
max_length: Optional[int] = 200
|
||||
num_return_sequences: Optional[int] = 1
|
||||
temperature: Optional[float] = 0.7
|
||||
top_p: Optional[float] = 0.9
|
||||
|
||||
class DirectModelResponse(BaseModel):
|
||||
generated_texts: List[str]
|
||||
|
||||
@app.post("/generate-copy", response_model=CopyResponse)
|
||||
async def create_marketing_copy(request: CopyRequest):
|
||||
try:
|
||||
# Generate embeddings for the prompt
|
||||
prompt_embedding = embeddings.generate(request.prompt)
|
||||
|
||||
# Retrieve similar content from vector store
|
||||
similar_content = vector_store.search(prompt_embedding)
|
||||
|
||||
# Generate marketing copy
|
||||
content = generate_marketing_copy(
|
||||
prompt=request.prompt,
|
||||
content_type=request.content_type,
|
||||
similar_content=similar_content,
|
||||
tone=request.tone,
|
||||
target_audience=request.target_audience
|
||||
)
|
||||
|
||||
# Check brand alignment
|
||||
brand_alignment = brand_checker.check_alignment(content)
|
||||
|
||||
return CopyResponse(
|
||||
content=content,
|
||||
confidence_score=0.85, # This should be calculated based on model confidence
|
||||
brand_alignment_score=brand_alignment
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/direct-model", response_model=DirectModelResponse)
|
||||
async def direct_model_inference(request: DirectModelRequest):
|
||||
"""
|
||||
Direct inference using the finetuned model without using the vector store or other components.
|
||||
This endpoint is useful for testing the model directly.
|
||||
"""
|
||||
try:
|
||||
# Generate text using the finetuned model
|
||||
generated_texts = finetuned_model.generate(
|
||||
prompt=request.prompt,
|
||||
max_length=request.max_length,
|
||||
num_return_sequences=request.num_return_sequences,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p
|
||||
)
|
||||
|
||||
return DirectModelResponse(generated_texts=generated_texts)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "healthy"}
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("main:app", host="localhost", port=8000, reload=True)
|
||||
@@ -0,0 +1,12 @@
|
||||
fastapi==0.104.1
|
||||
uvicorn==0.24.0
|
||||
cohere==4.37
|
||||
faiss-cpu==1.7.4
|
||||
python-dotenv==1.0.0
|
||||
pydantic==2.4.2
|
||||
numpy==1.24.3
|
||||
transformers==4.35.2
|
||||
torch==2.1.1
|
||||
python-multipart==0.0.6
|
||||
PyPDF2==3.0.1
|
||||
pycryptodome==3.17.1
|
||||
@@ -0,0 +1,97 @@
|
||||
import pinecone
|
||||
from typing import List, Dict, Any, Optional
|
||||
import uuid
|
||||
from config import settings
|
||||
|
||||
class VectorStore:
|
||||
def __init__(self):
|
||||
# Initialize Pinecone
|
||||
pinecone.init(
|
||||
api_key=settings.PINECONE_API_KEY,
|
||||
environment=settings.PINECONE_ENVIRONMENT
|
||||
)
|
||||
|
||||
# Create or get the index
|
||||
if settings.PINECONE_INDEX_NAME not in pinecone.list_indexes():
|
||||
pinecone.create_index(
|
||||
name=settings.PINECONE_INDEX_NAME,
|
||||
dimension=settings.VECTOR_DIMENSION,
|
||||
metric="cosine"
|
||||
)
|
||||
|
||||
self.index = pinecone.Index(settings.PINECONE_INDEX_NAME)
|
||||
|
||||
def add_content(self, content: str, metadata: Optional[Dict[str, Any]] = None) -> str:
|
||||
"""
|
||||
Add content to the vector store with optional metadata.
|
||||
Returns the ID of the added content.
|
||||
"""
|
||||
content_id = str(uuid.uuid4())
|
||||
|
||||
# Prepare metadata
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
metadata['content'] = content
|
||||
|
||||
# Upsert the vector with metadata
|
||||
self.index.upsert(
|
||||
vectors=[(content_id, [0] * settings.VECTOR_DIMENSION, metadata)],
|
||||
namespace="content"
|
||||
)
|
||||
|
||||
return content_id
|
||||
|
||||
def search(self, query_vector: List[float], top_k: int = settings.MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for similar content using a query vector.
|
||||
Returns a list of dictionaries containing content and metadata.
|
||||
"""
|
||||
results = self.index.query(
|
||||
vector=query_vector,
|
||||
top_k=top_k,
|
||||
include_metadata=True,
|
||||
namespace="content"
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
'id': match.id,
|
||||
'content': match.metadata['content'],
|
||||
'score': match.score,
|
||||
**{k: v for k, v in match.metadata.items() if k != 'content'}
|
||||
}
|
||||
for match in results.matches
|
||||
]
|
||||
|
||||
def get_all_content(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve all content from the vector store.
|
||||
"""
|
||||
# Fetch all vectors from the index
|
||||
results = self.index.query(
|
||||
vector=[0] * settings.VECTOR_DIMENSION,
|
||||
top_k=10000, # Adjust based on your needs
|
||||
include_metadata=True,
|
||||
namespace="content"
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
'id': match.id,
|
||||
'content': match.metadata['content'],
|
||||
**{k: v for k, v in match.metadata.items() if k != 'content'}
|
||||
}
|
||||
for match in results.matches
|
||||
]
|
||||
|
||||
def delete_content(self, content_id: str) -> bool:
|
||||
"""
|
||||
Delete content from the vector store by ID.
|
||||
Returns True if successful, False otherwise.
|
||||
"""
|
||||
try:
|
||||
self.index.delete(ids=[content_id], namespace="content")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error deleting content: {e}")
|
||||
return False
|
||||
Reference in New Issue
Block a user