feat(feedback): Add content improvement feedback system

Frontend (frontend/app.js):

- Add textarea for improvement feedback

- Add submit button with loading state

- Handle API response and display improved content

Backend (backend/copywriter.py):

- Add improve_copy() method using Cohere API

- Integrate retry mechanism for API calls

Backend (backend/main.py):

- Add /improve-content POST endpoint

- Implement error handling and return improved content with metadata

Testing:

- Verified feedback submission flow

- Confirmed improved content generation

- Tested error scenarios and loading states
This commit is contained in:
Michael Ikehi
2025-04-21 17:32:33 +01:00
parent 6d07556b85
commit c1a894ad50
11 changed files with 1001 additions and 1471 deletions
+39 -35
View File
@@ -13,7 +13,7 @@ import config
class BrandStyleManager: class BrandStyleManager:
"""Manages brand style guidelines and ensures content consistency.""" """Manages brand style guidelines and ensures content consistency."""
def __init__(self): def __init__(self):
"""Initialize the BrandStyleManager with default or stored style guidelines.""" """Initialize the BrandStyleManager with default or stored style guidelines."""
self.style_path = Path(config.DATA_DIR) / "style_guidelines" / "brand_style.json" self.style_path = Path(config.DATA_DIR) / "style_guidelines" / "brand_style.json"
@@ -117,7 +117,7 @@ class BrandStyleManager:
""" """
} }
logger.info("BrandStyleManager initialized successfully") logger.info("BrandStyleManager initialized successfully")
def _load_or_create_style(self) -> Dict[str, Any]: def _load_or_create_style(self) -> Dict[str, Any]:
"""Load existing style guidelines or create new ones with defaults.""" """Load existing style guidelines or create new ones with defaults."""
try: try:
@@ -129,37 +129,37 @@ class BrandStyleManager:
else: else:
# Create directory if it doesn't exist # Create directory if it doesn't exist
self.style_path.parent.mkdir(exist_ok=True) self.style_path.parent.mkdir(exist_ok=True)
# Use default style guidelines # Use default style guidelines
style = config.DEFAULT_BRAND_STYLE style = config.DEFAULT_BRAND_STYLE
# Save default style # Save default style
with open(self.style_path, 'w') as f: with open(self.style_path, 'w') as f:
json.dump(style, f, indent=2) json.dump(style, f, indent=2)
logger.info("Created default brand style guidelines") logger.info("Created default brand style guidelines")
return style return style
except Exception as e: except Exception as e:
logger.error(f"Error loading or creating style guidelines: {str(e)}") logger.error(f"Error loading or creating style guidelines: {str(e)}")
# Fall back to default style # Fall back to default style
return config.DEFAULT_BRAND_STYLE return config.DEFAULT_BRAND_STYLE
def get_style_guidelines(self) -> Dict[str, Any]: def get_style_guidelines(self) -> Dict[str, Any]:
""" """
Get current brand style guidelines. Get current brand style guidelines.
Returns: Returns:
Dictionary of style guidelines Dictionary of style guidelines
""" """
return self.style_guidelines return self.style_guidelines
def update_style_guidelines(self, new_style: Dict[str, Any]) -> Dict[str, Any]: def update_style_guidelines(self, new_style: Dict[str, Any]) -> Dict[str, Any]:
""" """
Update brand style guidelines. Update brand style guidelines.
Args: Args:
new_style: Dictionary with new style guidelines new_style: Dictionary with new style guidelines
Returns: Returns:
Updated style guidelines dictionary Updated style guidelines dictionary
""" """
@@ -167,37 +167,41 @@ class BrandStyleManager:
# Merge new style with existing # Merge new style with existing
for key, value in new_style.items(): for key, value in new_style.items():
self.style_guidelines[key] = value self.style_guidelines[key] = value
# Ensure brand name is preserved # Ensure brand name is preserved
self.style_guidelines['brand_name'] = config.BRAND_NAME self.style_guidelines['brand_name'] = config.BRAND_NAME
# Save updated style # Save updated style
with open(self.style_path, 'w') as f: with open(self.style_path, 'w') as f:
json.dump(self.style_guidelines, f, indent=2) json.dump(self.style_guidelines, f, indent=2)
logger.info("Updated brand style guidelines") logger.info("Updated brand style guidelines")
return self.style_guidelines return self.style_guidelines
except Exception as e: except Exception as e:
logger.error(f"Error updating style guidelines: {str(e)}") logger.error(f"Error updating style guidelines: {str(e)}")
raise raise
def format_prompt_with_brand_style(self, user_prompt: str, content_type: Optional[str] = None) -> str: def format_prompt_with_brand_style(self, user_prompt: str, content_type: Optional[str] = None) -> str:
"""Format user prompt to match the established writing style.""" """Format user prompt to match the distinctive communication style."""
style_instructions = [ style_instructions = [
"Follow these writing style guidelines:", "Follow these distinctive communication style guidelines:",
"- Use direct commands that empower the reader", "- Use empowering, assertive language that inspires action",
"- Address the reader directly using 'you' and 'your'", "- Address the reader directly using 'you' and 'your' with conviction",
"- Create rhythmic, repetitive patterns in key messages", "- Create rhythmic, repetitive patterns in key messages for emphasis",
"- Maintain a clear, confident, and authoritative tone", "- Maintain a clear, confident, and conversational teaching tone",
"- Use simple, practical language without jargon", "- Use simple, practical language that communicates profound ideas",
"- Acknowledge challenges while focusing on solutions", "- Use embedded commands (e.g., 'Decide now to change your thinking')",
"- Include empowering phrases that emphasize reader's control and choice" "- Include cause-effect statements (e.g., 'Because you understand this, you will now take action')",
"- Speak with conviction and clarity rather than hesitation",
"- Replace tentative phrases with confident declarations",
"- Use a motivational coach-like clarity in all communications",
"- IMPORTANT: Do not mention any specific person's name in the content"
] ]
# Content type specific formatting # Content type specific formatting
content_format = self._get_content_format(content_type) if content_type else "" content_format = self._get_content_format(content_type) if content_type else ""
return "\n".join([ return "\n".join([
f"Generate content based on this request:", f"Generate content based on this request:",
f"\"{user_prompt}\"", f"\"{user_prompt}\"",
@@ -205,37 +209,37 @@ class BrandStyleManager:
"\n".join(style_instructions), "\n".join(style_instructions),
content_format content_format
]) ])
def check_content_alignment(self, content: str) -> Dict[str, Any]: def check_content_alignment(self, content: str) -> Dict[str, Any]:
""" """
Check if generated content aligns with brand style guidelines. Check if generated content aligns with brand style guidelines.
Args: Args:
content: Generated marketing content content: Generated marketing content
Returns: Returns:
Dictionary with alignment metrics and suggestions Dictionary with alignment metrics and suggestions
""" """
style = self.style_guidelines style = self.style_guidelines
taboo_words = style.get('taboo_words', []) taboo_words = style.get('taboo_words', [])
preferred_terms = style.get('preferred_terms', {}) preferred_terms = style.get('preferred_terms', {})
# Check for taboo words # Check for taboo words
found_taboo_words = [] found_taboo_words = []
for word in taboo_words: for word in taboo_words:
if word.lower() in content.lower(): if word.lower() in content.lower():
found_taboo_words.append(word) found_taboo_words.append(word)
# Check for preferred terminology # Check for preferred terminology
terminology_issues = [] terminology_issues = []
for avoid, use in preferred_terms.items(): for avoid, use in preferred_terms.items():
if avoid.lower() in content.lower(): if avoid.lower() in content.lower():
terminology_issues.append(f"Found '{avoid}', should use '{use}' instead") terminology_issues.append(f"Found '{avoid}', should use '{use}' instead")
# Calculate an overall alignment score (simple implementation) # Calculate an overall alignment score (simple implementation)
issues_count = len(found_taboo_words) + len(terminology_issues) issues_count = len(found_taboo_words) + len(terminology_issues)
alignment_score = max(0, 100 - (issues_count * 10)) # Reduce score for each issue alignment_score = max(0, 100 - (issues_count * 10)) # Reduce score for each issue
return { return {
'alignment_score': alignment_score, 'alignment_score': alignment_score,
'taboo_words_found': found_taboo_words, 'taboo_words_found': found_taboo_words,
@@ -246,16 +250,16 @@ class BrandStyleManager:
def _get_content_format(self, content_type: str) -> str: def _get_content_format(self, content_type: str) -> str:
""" """
Get formatting instructions for specific content type. Get formatting instructions for specific content type.
Args: Args:
content_type: Type of content to generate content_type: Type of content to generate
Returns: Returns:
Formatting instructions as string Formatting instructions as string
""" """
if not content_type: if not content_type:
return "" return ""
format_instructions = self.content_formats.get(content_type, "") format_instructions = self.content_formats.get(content_type, "")
if format_instructions: if format_instructions:
return f"\nContent type specific instructions:\n{format_instructions.strip()}" return f"\nContent type specific instructions:\n{format_instructions.strip()}"
+13 -10
View File
@@ -52,12 +52,12 @@ CONTENT_TYPES = [
"newsletter" "newsletter"
] ]
# Tone options - simplified to match the core style # Tone options - specifically matching Adriana James' communication style
TONE_OPTIONS = [ TONE_OPTIONS = [
"direct",
"empowering", "empowering",
"confident", "assertive",
"practical" "inspirational",
"direct"
] ]
# Content length options # Content length options
@@ -67,19 +67,22 @@ LENGTH_OPTIONS = [
"long", # > 300 words "long", # > 300 words
] ]
# Default brand style guidelines # Default brand style guidelines - fixed to match Adriana James' distinct communication style
DEFAULT_BRAND_STYLE = { DEFAULT_BRAND_STYLE = {
"tone": ["direct", "empowering", "confident", "practical"], "tone": ["empowering", "assertive", "inspirational", "direct"],
"voice_characteristics": ["clear", "authoritative", "steady", "rhythmic"], "voice_characteristics": ["clear", "confident", "conversational", "teaching"],
"writing_patterns": ["direct commands", "personal pronouns", "repetitive rhythms"], "writing_patterns": ["direct commands", "personal pronouns", "repetitive rhythms", "embedded commands", "cause-effect statements"],
"taboo_words": ["cheap", "discount", "bargain", "failure", "impossible", "difficult"], "taboo_words": ["cheap", "discount", "bargain", "failure", "impossible", "difficult", "might", "try", "consider"],
"preferred_terms": { "preferred_terms": {
"problems": "challenges", "problems": "challenges",
"try": "take action", "try": "take action",
"difficult": "ready for growth", "difficult": "ready for growth",
"failure": "learning opportunity", "failure": "learning opportunity",
"hope": "know", "hope": "know",
"maybe": "will" "maybe": "will",
"might help you": "you can do this",
"consider doing this": "decide now to change your thinking",
"this could work": "this works because"
} }
} }
+97 -42
View File
@@ -16,13 +16,13 @@ from vector_store import vector_store
class Copywriter: class Copywriter:
"""Generates marketing copy using a fine-tuned LLM.""" """Generates marketing copy using a fine-tuned LLM."""
def __init__(self): def __init__(self):
"""Initialize the Copywriter with Cohere LLM client.""" """Initialize the Copywriter with Cohere LLM client."""
self.model = "command" # Cohere's generation model self.model = "command" # Cohere's generation model
self.api_key = config.COHERE_API_KEY self.api_key = config.COHERE_API_KEY
logger.info("Copywriter initialized with Cohere API successfully") logger.info("Copywriter initialized with Cohere API successfully")
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
async def generate_copy( async def generate_copy(
self, self,
@@ -40,34 +40,43 @@ class Copywriter:
try: try:
# Step 1: Format prompt with brand style guidelines # Step 1: Format prompt with brand style guidelines
branded_prompt = brand_style_manager.format_prompt_with_brand_style(prompt, content_type) branded_prompt = brand_style_manager.format_prompt_with_brand_style(prompt, content_type)
# Step 2: Find similar content for reference (if enabled) # Step 2: Find similar content for reference (if enabled)
reference_content = [] reference_content = []
if reference_similar_content: if reference_similar_content:
logger.info(f"Searching for similar content to reference for prompt: {prompt[:50]}...")
search_results = await vector_store.search(prompt, top_k=3) search_results = await vector_store.search(prompt, top_k=3)
if search_results: if search_results:
reference_content = [result['text'] for result in search_results] reference_content = [result['text'] for result in search_results]
logger.info(f"Found {len(reference_content)} similar content items to reference")
for i, content in enumerate(reference_content):
logger.debug(f"Reference content {i+1}: {content[:100]}...")
else:
logger.warning("No similar content found in vector store for reference")
# Step 3: Add length and CTA instructions if needed # Step 3: Add length and CTA instructions if needed
if length: if length:
branded_prompt += f"\n- Generate {length} content" branded_prompt += f"\n- Generate {length} content"
if include_cta: if include_cta:
branded_prompt += "\n- Include a direct, empowering call to action" branded_prompt += "\n- Include a direct, empowering call to action"
# Step 4: Add reference content if available # Step 4: Add reference content if available
if reference_content: if reference_content:
branded_prompt += "\n\nReference these successful examples for tone and style:\n" branded_prompt += "\n\nReference these successful examples for tone and style:\n"
branded_prompt += "\n---\n".join(reference_content) branded_prompt += "\n---\n".join(reference_content)
# Step 5: Generate content using the LLM # Step 5: Generate content using the LLM
generated_content = await self._call_llm_api(branded_prompt, max_tokens) generated_content = await self._call_llm_api(branded_prompt, max_tokens)
# Step 6: Check content alignment with brand style # Step 6: Post-process to remove any mentions of Adriana James
generated_content = self._remove_name_mentions(generated_content)
# Step 7: Check content alignment with brand style
alignment_check = brand_style_manager.check_content_alignment(generated_content) alignment_check = brand_style_manager.check_content_alignment(generated_content)
# Step 7: Generate alternative headline suggestions # Step 7: Generate alternative headline suggestions
headline_suggestions = await self._generate_headline_suggestions(prompt, generated_content) headline_suggestions = await self._generate_headline_suggestions(prompt, generated_content)
# Step 8: Return the generated content with metadata # Step 8: Return the generated content with metadata
result = { result = {
"content": generated_content, "content": generated_content,
@@ -79,21 +88,21 @@ class Copywriter:
"generated_at": None # Will be added by the API "generated_at": None # Will be added by the API
} }
} }
# Add alignment issues if any # Add alignment issues if any
if alignment_check['taboo_words_found'] or alignment_check['terminology_issues']: if alignment_check['taboo_words_found'] or alignment_check['terminology_issues']:
result["alignment_issues"] = { result["alignment_issues"] = {
"taboo_words_found": alignment_check['taboo_words_found'], "taboo_words_found": alignment_check['taboo_words_found'],
"terminology_issues": alignment_check['terminology_issues'] "terminology_issues": alignment_check['terminology_issues']
} }
logger.info(f"Generated content with {len(generated_content)} characters") logger.info(f"Generated content with {len(generated_content)} characters")
return result return result
except Exception as e: except Exception as e:
logger.error(f"Error generating copy: {str(e)}") logger.error(f"Error generating copy: {str(e)}")
raise raise
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
async def _call_llm_api(self, prompt: str, max_tokens: int = 1000) -> str: async def _call_llm_api(self, prompt: str, max_tokens: int = 1000) -> str:
""" """
@@ -102,12 +111,11 @@ class Copywriter:
Args: Args:
prompt: The formatted prompt for the LLM prompt: The formatted prompt for the LLM
max_tokens: Maximum tokens for the generated response max_tokens: Maximum tokens for the generated response
Returns: Returns:
Generated content as a string Generated content as a string with preserved formatting
""" """
try: try:
# Use Cohere's generate API with the API key from config
cohere_api_key = config.COHERE_API_KEY cohere_api_key = config.COHERE_API_KEY
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
@@ -118,19 +126,30 @@ class Copywriter:
"Content-Type": "application/json" "Content-Type": "application/json"
}, },
json={ json={
"model": "command", # Cohere's generation model "model": "command",
"prompt": prompt, "prompt": f"{prompt}\n\nNote: Please preserve formatting with proper paragraphs, line breaks, and bullet points where appropriate.",
"max_tokens": max_tokens, "max_tokens": max_tokens,
"temperature": 0.7, "temperature": 0.7,
"k": 0, "k": 0,
"p": 0.75 "p": 0.75,
"return_likelihoods": "NONE"
}, },
timeout=30.0 timeout=30.0
) )
if response.status_code == 200: if response.status_code == 200:
result = response.json() result = response.json()
return result["generations"][0]["text"].strip() generated_text = result["generations"][0]["text"].strip()
# Preserve paragraph breaks and formatting
formatted_text = (
generated_text
.replace("\n\n", "<paragraph-break>") # Preserve paragraph breaks
.replace("\n- ", "\n") # Convert hyphens to bullets
.replace("<paragraph-break>", "\n\n") # Restore paragraph breaks
)
return formatted_text
else: else:
logger.error(f"Cohere API error: {response.status_code}, {response.text}") logger.error(f"Cohere API error: {response.status_code}, {response.text}")
raise Exception(f"Cohere API error: {response.status_code}") raise Exception(f"Cohere API error: {response.status_code}")
@@ -138,24 +157,25 @@ class Copywriter:
except Exception as e: except Exception as e:
logger.error(f"Error calling Cohere API: {str(e)}") logger.error(f"Error calling Cohere API: {str(e)}")
raise raise
async def _generate_headline_suggestions(self, original_prompt: str, generated_content: str) -> List[str]: async def _generate_headline_suggestions(self, original_prompt: str, generated_content: str) -> List[str]:
""" """
Generate alternative headline suggestions based on the content. Generate alternative headline suggestions based on the content.
Args: Args:
original_prompt: The original user prompt original_prompt: The original user prompt
generated_content: The generated marketing content generated_content: The generated marketing content
Returns: Returns:
List of headline suggestions List of headline suggestions
""" """
try: try:
# Create a prompt for headline generation # Create a prompt for headline generation
headline_prompt = f""" headline_prompt = f"""
Generate 3 alternative marketing headlines for the following content. Generate 3 alternative marketing headlines for the following content.
Make headlines compelling, concise, and aligned with the content's message. Make headlines compelling, concise, and aligned with the content's message.
Each headline should be unique and capture attention. Each headline should be unique and capture attention.
IMPORTANT: Do not mention any specific person's name in the headlines.
ORIGINAL PROMPT: ORIGINAL PROMPT:
{original_prompt} {original_prompt}
@@ -179,6 +199,9 @@ class Copywriter:
if headline.strip() and not headline.lower().startswith(('headline', 'title', '-', '*', '')) if headline.strip() and not headline.lower().startswith(('headline', 'title', '-', '*', ''))
] ]
# Remove any mentions of Adriana James from headlines
headlines = [self._remove_name_mentions(headline) for headline in headlines]
# Ensure we have exactly 3 headlines # Ensure we have exactly 3 headlines
if len(headlines) > 3: if len(headlines) > 3:
headlines = headlines[:3] headlines = headlines[:3]
@@ -192,15 +215,15 @@ class Copywriter:
logger.error(f"Error generating headline suggestions: {str(e)}") logger.error(f"Error generating headline suggestions: {str(e)}")
# Return empty list instead of mock response on error # Return empty list instead of mock response on error
return [] return []
async def improve_copy(self, content: str, feedback: str) -> str: async def improve_copy(self, content: str, feedback: str) -> str:
""" """
Improve content based on user feedback. Improve content based on user feedback.
Args: Args:
content: Original generated content content: Original generated content
feedback: User feedback for improvement feedback: User feedback for improvement
Returns: Returns:
Improved content Improved content
""" """
@@ -208,53 +231,57 @@ class Copywriter:
# Format prompt for improvement # Format prompt for improvement
improve_prompt = f""" improve_prompt = f"""
Please improve the following marketing content based on the feedback provided: Please improve the following marketing content based on the feedback provided:
IMPORTANT: Do not mention any specific person's name in the content.
ORIGINAL CONTENT: ORIGINAL CONTENT:
{content} {content}
FEEDBACK: FEEDBACK:
{feedback} {feedback}
IMPROVED CONTENT: IMPROVED CONTENT:
""" """
# Call LLM to improve content # Call LLM to improve content
improved_content = await self._call_llm_api(improve_prompt, max_tokens=1200) improved_content = await self._call_llm_api(improve_prompt, max_tokens=1200)
# Remove any mentions of Adriana James from improved content
improved_content = self._remove_name_mentions(improved_content)
logger.info(f"Improved content based on feedback") logger.info(f"Improved content based on feedback")
return improved_content return improved_content
except Exception as e: except Exception as e:
logger.error(f"Error improving content: {str(e)}") logger.error(f"Error improving content: {str(e)}")
raise raise
async def analyze_content_performance(self, content: str) -> Dict[str, Any]: async def analyze_content_performance(self, content: str) -> Dict[str, Any]:
""" """
Analyze marketing content for performance prediction. Analyze marketing content for performance prediction.
Args: Args:
content: Marketing content to analyze content: Marketing content to analyze
Returns: Returns:
Dictionary with analysis results Dictionary with analysis results
""" """
try: try:
# This would be enhanced with actual ML models in production # This would be enhanced with actual ML models in production
# Simplified mock response for demonstration # Simplified mock response for demonstration
# Very basic analysis using length and keyword presence # Very basic analysis using length and keyword presence
word_count = len(content.split()) word_count = len(content.split())
has_cta = any(phrase in content.lower() for phrase in ["call", "contact", "get started", "try", "buy", "sign up"]) has_cta = any(phrase in content.lower() for phrase in ["call", "contact", "get started", "try", "buy", "sign up"])
sentence_count = len([s for s in content.split(".") if s.strip()]) sentence_count = len([s for s in content.split(".") if s.strip()])
avg_words_per_sentence = word_count / max(1, sentence_count) avg_words_per_sentence = word_count / max(1, sentence_count)
# Simple scoring system # Simple scoring system
readability_score = 100 - min(100, max(0, abs(avg_words_per_sentence - 15) * 5)) readability_score = 100 - min(100, max(0, abs(avg_words_per_sentence - 15) * 5))
cta_score = 90 if has_cta else 60 cta_score = 90 if has_cta else 60
length_score = min(100, max(0, word_count / 3)) length_score = min(100, max(0, word_count / 3))
overall_score = (readability_score + cta_score + length_score) / 3 overall_score = (readability_score + cta_score + length_score) / 3
return { return {
"overall_score": round(overall_score, 1), "overall_score": round(overall_score, 1),
"readability_score": round(readability_score, 1), "readability_score": round(readability_score, 1),
@@ -272,10 +299,38 @@ class Copywriter:
"Consider adding more content for better engagement" if word_count < 100 else "Your content length is appropriate" "Consider adding more content for better engagement" if word_count < 100 else "Your content length is appropriate"
] ]
} }
except Exception as e: except Exception as e:
logger.error(f"Error analyzing content: {str(e)}") logger.error(f"Error analyzing content: {str(e)}")
raise raise
def _remove_name_mentions(self, content: str) -> str:
"""
Remove any mentions of specific names from the generated content.
Args:
content: The generated content to process
Returns:
Content with name mentions removed
"""
try:
# Remove any mentions of "Adriana James" (case insensitive)
import re
pattern = re.compile(r'\bAdriana\s+James\b', re.IGNORECASE)
content = pattern.sub('', content)
# Clean up any double spaces that might result from the removal
content = re.sub(r'\s+', ' ', content)
# Clean up any lines that might now be empty
content = '\n'.join([line for line in content.split('\n') if line.strip()])
logger.info("Removed any name mentions from generated content")
return content
except Exception as e:
logger.error(f"Error removing name mentions: {str(e)}")
return content
# Create a singleton instance # Create a singleton instance
copywriter = Copywriter() copywriter = Copywriter()
+143 -70
View File
@@ -18,22 +18,27 @@ from embeddings import embeddings_manager
class VectorStore: class VectorStore:
"""Manages vector database operations for content retrieval.""" """Manages vector database operations for content retrieval."""
def __init__(self): def __init__(self):
"""Initialize the VectorStore with FAISS index.""" """Initialize the VectorStore with FAISS index."""
self.store_path = Path(config.VECTOR_DB_PATH) self.store_path = Path(config.VECTOR_DB_PATH)
self.store_path.mkdir(exist_ok=True) self.store_path.mkdir(exist_ok=True)
self.index_path = self.store_path / "faiss_index.bin" self.index_path = self.store_path / "faiss_index.bin"
self.metadata_path = self.store_path / "metadata.pkl" self.metadata_path = self.store_path / "metadata.pkl"
self.dimension = None self.dimension = None
self.index = None self.index = None
self.metadata = [] self.metadata = []
self._load_or_create_index() self._load_or_create_index()
logger.info("VectorStore initialized successfully") logger.info("VectorStore initialized successfully")
# Check if the index is empty and load sample data if needed
if self.index.ntotal == 0:
logger.warning("Vector store is empty. Loading sample data...")
self._load_sample_data()
def _load_or_create_index(self) -> None: def _load_or_create_index(self) -> None:
"""Load existing index or create new one if it doesn't exist.""" """Load existing index or create new one if it doesn't exist."""
try: try:
@@ -46,17 +51,17 @@ class VectorStore:
logger.info(f"Loaded existing vector index with {self.index.ntotal} vectors") logger.info(f"Loaded existing vector index with {self.index.ntotal} vectors")
else: else:
# Default dimension for Cohere embeddings # Default dimension for Cohere embeddings
self.dimension = 1024 self.dimension = 1024
self.index = faiss.IndexFlatL2(self.dimension) self.index = faiss.IndexFlatL2(self.dimension)
self.metadata = [] self.metadata = []
logger.info(f"Created new vector index with dimension {self.dimension}") logger.info(f"Created new vector index with dimension {self.dimension}")
# Save the empty index and metadata # Save the empty index and metadata
self._save_index() self._save_index()
except Exception as e: except Exception as e:
logger.error(f"Error loading or creating index: {str(e)}") logger.error(f"Error loading or creating index: {str(e)}")
raise raise
def _save_index(self) -> None: def _save_index(self) -> None:
"""Save the index and metadata to disk.""" """Save the index and metadata to disk."""
try: try:
@@ -67,19 +72,19 @@ class VectorStore:
except Exception as e: except Exception as e:
logger.error(f"Error saving index: {str(e)}") logger.error(f"Error saving index: {str(e)}")
raise raise
async def add_documents( async def add_documents(
self, self,
texts: List[str], texts: List[str],
metadata_list: Optional[List[Dict[str, Any]]] = None metadata_list: Optional[List[Dict[str, Any]]] = None
) -> List[int]: ) -> List[int]:
""" """
Add documents to the vector store. Add documents to the vector store.
Args: Args:
texts: List of text documents to add texts: List of text documents to add
metadata_list: List of metadata dictionaries for each document metadata_list: List of metadata dictionaries for each document
Returns: Returns:
List of document IDs (vector indices) List of document IDs (vector indices)
""" """
@@ -87,16 +92,16 @@ class VectorStore:
if not texts: if not texts:
logger.warning("No texts provided to add to vector store") logger.warning("No texts provided to add to vector store")
return [] return []
if metadata_list is None: if metadata_list is None:
metadata_list = [{} for _ in texts] metadata_list = [{} for _ in texts]
if len(texts) != len(metadata_list): if len(texts) != len(metadata_list):
raise ValueError("Number of texts and metadata entries must match") raise ValueError("Number of texts and metadata entries must match")
# Generate embeddings # Generate embeddings
embeddings = await embeddings_manager.get_embeddings(texts) embeddings = await embeddings_manager.get_embeddings(texts)
# Check if embeddings match our dimension # Check if embeddings match our dimension
if embeddings.shape[1] != self.dimension: if embeddings.shape[1] != self.dimension:
logger.warning(f"Embedding dimension mismatch: expected {self.dimension}, got {embeddings.shape[1]}") logger.warning(f"Embedding dimension mismatch: expected {self.dimension}, got {embeddings.shape[1]}")
@@ -107,93 +112,97 @@ class VectorStore:
logger.info(f"Adapted to new dimension: {self.dimension}") logger.info(f"Adapted to new dimension: {self.dimension}")
else: else:
raise ValueError(f"Embedding dimension mismatch: expected {self.dimension}, got {embeddings.shape[1]}") raise ValueError(f"Embedding dimension mismatch: expected {self.dimension}, got {embeddings.shape[1]}")
# Add timestamp to metadata # Add timestamp to metadata
timestamp = datetime.now().isoformat() timestamp = datetime.now().isoformat()
for meta in metadata_list: for meta in metadata_list:
meta['timestamp'] = timestamp meta['timestamp'] = timestamp
meta['document_id'] = len(self.metadata) + len(metadata_list) meta['document_id'] = len(self.metadata) + len(metadata_list)
# Store texts in metadata # Store texts in metadata
for i, (text, meta) in enumerate(zip(texts, metadata_list)): for i, (text, meta) in enumerate(zip(texts, metadata_list)):
meta['text'] = text meta['text'] = text
# Add vectors to index # Add vectors to index
start_idx = self.index.ntotal start_idx = self.index.ntotal
self.index.add(embeddings.astype(np.float32)) self.index.add(embeddings.astype(np.float32))
self.metadata.extend(metadata_list) self.metadata.extend(metadata_list)
# Save updated index # Save updated index
self._save_index() self._save_index()
# Return document IDs # Return document IDs
doc_ids = list(range(start_idx, start_idx + len(texts))) doc_ids = list(range(start_idx, start_idx + len(texts)))
logger.info(f"Added {len(texts)} documents to vector store") logger.info(f"Added {len(texts)} documents to vector store")
return doc_ids return doc_ids
except Exception as e: except Exception as e:
logger.error(f"Error adding documents to vector store: {str(e)}") logger.error(f"Error adding documents to vector store: {str(e)}")
raise raise
async def search( async def search(
self, self,
query: str, query: str,
top_k: int = 5, top_k: int = 5,
filters: Optional[Dict[str, Any]] = None, filters: Optional[Dict[str, Any]] = None,
rerank: bool = True rerank: bool = True
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
""" """
Search for similar documents. Search for similar documents.
Args: Args:
query: The search query query: The search query
top_k: Number of results to return top_k: Number of results to return
filters: Dictionary of metadata filters filters: Dictionary of metadata filters
rerank: Whether to use Cohere's reranking rerank: Whether to use Cohere's reranking
Returns: Returns:
List of result dictionaries with document content and metadata List of result dictionaries with document content and metadata
""" """
try: try:
logger.info(f"Searching vector store with query: {query[:50]}... (top_k={top_k})")
if self.index.ntotal == 0: if self.index.ntotal == 0:
logger.warning("Empty vector store, no results to return") logger.warning("Empty vector store, no results to return")
return [] return []
logger.info(f"Vector store contains {self.index.ntotal} documents")
# Generate query embedding # Generate query embedding
query_embedding = await embeddings_manager.get_query_embedding(query) query_embedding = await embeddings_manager.get_query_embedding(query)
query_embedding = query_embedding.reshape(1, -1).astype(np.float32) query_embedding = query_embedding.reshape(1, -1).astype(np.float32)
# First pass: find more candidates than needed for reranking # First pass: find more candidates than needed for reranking
search_k = top_k * 3 if rerank else top_k search_k = top_k * 3 if rerank else top_k
search_k = min(search_k, self.index.ntotal) # Don't request more than we have search_k = min(search_k, self.index.ntotal) # Don't request more than we have
distances, indices = self.index.search(query_embedding, search_k) distances, indices = self.index.search(query_embedding, search_k)
# Get metadata and texts for matching indices # Get metadata and texts for matching indices
results = [] results = []
for i, idx in enumerate(indices[0]): for i, idx in enumerate(indices[0]):
if idx < 0 or idx >= len(self.metadata): if idx < 0 or idx >= len(self.metadata):
continue # Skip invalid indices continue # Skip invalid indices
metadata = self.metadata[idx] metadata = self.metadata[idx]
text = metadata.get('text', '') text = metadata.get('text', '')
# Apply filters if any # Apply filters if any
if filters and not self._matches_filters(metadata, filters): if filters and not self._matches_filters(metadata, filters):
continue continue
results.append({ results.append({
'document_id': idx, 'document_id': idx,
'text': text, 'text': text,
'metadata': {k: v for k, v in metadata.items() if k != 'text'}, 'metadata': {k: v for k, v in metadata.items() if k != 'text'},
'distance': float(distances[0][i]) 'distance': float(distances[0][i])
}) })
# Apply reranking if requested # Apply reranking if requested
if rerank and results: if rerank and results:
texts = [r['text'] for r in results] texts = [r['text'] for r in results]
reranked = await embeddings_manager.rerank_results(query, texts, top_n=top_k) reranked = await embeddings_manager.rerank_results(query, texts, top_n=top_k)
# Map reranked results back to our original results # Map reranked results back to our original results
reranked_results = [] reranked_results = []
for item in reranked: for item in reranked:
@@ -203,41 +212,41 @@ class VectorStore:
**results[orig_idx], **results[orig_idx],
'relevance_score': item['relevance_score'] 'relevance_score': item['relevance_score']
}) })
results = reranked_results results = reranked_results
else: else:
# Just take the top_k results # Just take the top_k results
results = results[:top_k] results = results[:top_k]
logger.info(f"Found {len(results)} matching documents for query") logger.info(f"Found {len(results)} matching documents for query")
return results return results
except Exception as e: except Exception as e:
logger.error(f"Error searching vector store: {str(e)}") logger.error(f"Error searching vector store: {str(e)}")
raise raise
def _matches_filters(self, metadata: Dict[str, Any], filters: Dict[str, Any]) -> bool: def _matches_filters(self, metadata: Dict[str, Any], filters: Dict[str, Any]) -> bool:
"""Check if metadata matches the specified filters.""" """Check if metadata matches the specified filters."""
for key, value in filters.items(): for key, value in filters.items():
if key not in metadata: if key not in metadata:
return False return False
if isinstance(value, list): if isinstance(value, list):
# Check if metadata value is in the list # Check if metadata value is in the list
if metadata[key] not in value: if metadata[key] not in value:
return False return False
elif metadata[key] != value: elif metadata[key] != value:
return False return False
return True return True
async def delete_document(self, document_id: int) -> bool: async def delete_document(self, document_id: int) -> bool:
""" """
Delete a document from the vector store. Delete a document from the vector store.
Args: Args:
document_id: ID of the document to delete document_id: ID of the document to delete
Returns: Returns:
Boolean indicating success Boolean indicating success
""" """
@@ -245,28 +254,28 @@ class VectorStore:
if document_id < 0 or document_id >= len(self.metadata): if document_id < 0 or document_id >= len(self.metadata):
logger.warning(f"Invalid document ID: {document_id}") logger.warning(f"Invalid document ID: {document_id}")
return False return False
# FAISS doesn't support direct deletion, so we need to rebuild the index # FAISS doesn't support direct deletion, so we need to rebuild the index
# Mark the document as deleted in metadata # Mark the document as deleted in metadata
self.metadata[document_id]['deleted'] = True self.metadata[document_id]['deleted'] = True
# Save updated metadata # Save updated metadata
self._save_index() self._save_index()
logger.info(f"Marked document {document_id} as deleted") logger.info(f"Marked document {document_id} as deleted")
return True return True
except Exception as e: except Exception as e:
logger.error(f"Error deleting document: {str(e)}") logger.error(f"Error deleting document: {str(e)}")
raise raise
async def get_document(self, document_id: int) -> Optional[Dict[str, Any]]: async def get_document(self, document_id: int) -> Optional[Dict[str, Any]]:
""" """
Retrieve a document by ID. Retrieve a document by ID.
Args: Args:
document_id: ID of the document to retrieve document_id: ID of the document to retrieve
Returns: Returns:
Document with metadata or None if not found Document with metadata or None if not found
""" """
@@ -274,35 +283,35 @@ class VectorStore:
if document_id < 0 or document_id >= len(self.metadata): if document_id < 0 or document_id >= len(self.metadata):
logger.warning(f"Invalid document ID: {document_id}") logger.warning(f"Invalid document ID: {document_id}")
return None return None
metadata = self.metadata[document_id] metadata = self.metadata[document_id]
# Check if document is marked as deleted # Check if document is marked as deleted
if metadata.get('deleted', False): if metadata.get('deleted', False):
logger.warning(f"Document {document_id} is marked as deleted") logger.warning(f"Document {document_id} is marked as deleted")
return None return None
text = metadata.get('text', '') text = metadata.get('text', '')
return { return {
'document_id': document_id, 'document_id': document_id,
'text': text, 'text': text,
'metadata': {k: v for k, v in metadata.items() if k != 'text' and k != 'deleted'} 'metadata': {k: v for k, v in metadata.items() if k != 'text' and k != 'deleted'}
} }
except Exception as e: except Exception as e:
logger.error(f"Error retrieving document: {str(e)}") logger.error(f"Error retrieving document: {str(e)}")
raise raise
async def update_document(self, document_id: int, text: str, metadata: Optional[Dict[str, Any]] = None) -> bool: async def update_document(self, document_id: int, text: str, metadata: Optional[Dict[str, Any]] = None) -> bool:
""" """
Update a document in the vector store. Update a document in the vector store.
Args: Args:
document_id: ID of the document to update document_id: ID of the document to update
text: New document text text: New document text
metadata: New metadata (will be merged with existing) metadata: New metadata (will be merged with existing)
Returns: Returns:
Boolean indicating success Boolean indicating success
""" """
@@ -310,38 +319,102 @@ class VectorStore:
if document_id < 0 or document_id >= len(self.metadata): if document_id < 0 or document_id >= len(self.metadata):
logger.warning(f"Invalid document ID: {document_id}") logger.warning(f"Invalid document ID: {document_id}")
return False return False
# Get existing metadata # Get existing metadata
existing_metadata = self.metadata[document_id] existing_metadata = self.metadata[document_id]
# Check if document is marked as deleted # Check if document is marked as deleted
if existing_metadata.get('deleted', False): if existing_metadata.get('deleted', False):
logger.warning(f"Cannot update deleted document {document_id}") logger.warning(f"Cannot update deleted document {document_id}")
return False return False
# Generate new embedding # Generate new embedding
embeddings = await embeddings_manager.get_embeddings([text]) embeddings = await embeddings_manager.get_embeddings([text])
# Update the vector in the index # Update the vector in the index
faiss.IndexFlatL2_update_vectors(self.index, embeddings.astype(np.float32), np.array([document_id], dtype=np.int64)) faiss.IndexFlatL2_update_vectors(self.index, embeddings.astype(np.float32), np.array([document_id], dtype=np.int64))
# Update metadata # Update metadata
if metadata: if metadata:
for key, value in metadata.items(): for key, value in metadata.items():
existing_metadata[key] = value existing_metadata[key] = value
existing_metadata['text'] = text existing_metadata['text'] = text
existing_metadata['updated_at'] = datetime.now().isoformat() existing_metadata['updated_at'] = datetime.now().isoformat()
# Save updated index # Save updated index
self._save_index() self._save_index()
logger.info(f"Updated document {document_id}") logger.info(f"Updated document {document_id}")
return True return True
except Exception as e: except Exception as e:
logger.error(f"Error updating document: {str(e)}") logger.error(f"Error updating document: {str(e)}")
raise raise
def _load_sample_data(self) -> None:
"""Load sample data from past campaigns into the vector store."""
try:
# Path to past campaigns directory
campaigns_dir = Path(config.DATA_DIR) / "past_campaigns"
if not campaigns_dir.exists() or not campaigns_dir.is_dir():
logger.warning(f"Past campaigns directory not found: {campaigns_dir}")
return
# Find all JSON files in the directory
campaign_files = list(campaigns_dir.glob("*.json"))
if not campaign_files:
logger.warning("No campaign files found in past_campaigns directory")
return
# Load and process each campaign file
texts = []
metadata_list = []
for file_path in campaign_files:
try:
with open(file_path, 'r') as f:
campaign_data = json.load(f)
# Extract content and metadata
if 'content' in campaign_data:
texts.append(campaign_data['content'])
# Create metadata entry
metadata = {
'content_type': campaign_data.get('content_type', 'unknown'),
'campaign_name': campaign_data.get('metadata', {}).get('campaign_name', file_path.stem),
'source': 'past_campaign',
'file_path': str(file_path)
}
# Add performance metrics if available
if 'metadata' in campaign_data and 'performance_metrics' in campaign_data['metadata']:
metadata['performance_metrics'] = campaign_data['metadata']['performance_metrics']
metadata_list.append(metadata)
logger.debug(f"Loaded campaign from {file_path.name}")
except Exception as e:
logger.error(f"Error loading campaign file {file_path}: {str(e)}")
continue
if not texts:
logger.warning("No valid campaign content found in files")
return
# Add documents to vector store
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
doc_ids = loop.run_until_complete(self.add_documents(texts, metadata_list))
logger.info(f"Added {len(doc_ids)} past campaigns to vector store")
finally:
loop.close()
except Exception as e:
logger.error(f"Error loading sample data: {str(e)}")
# Create a singleton instance # Create a singleton instance
vector_store = VectorStore() vector_store = VectorStore()
Binary file not shown.
Binary file not shown.
Binary file not shown.
+189 -304
View File
File diff suppressed because it is too large Load Diff
+20 -23
View File
@@ -71,7 +71,7 @@
<option value="newsletter">Newsletter</option> <option value="newsletter">Newsletter</option>
</select> </select>
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="length">Length</label> <label for="length">Length</label>
<select id="length"> <select id="length">
@@ -152,7 +152,7 @@
<h2>Content Templates</h2> <h2>Content Templates</h2>
<p>Use pre-built templates for faster content creation.</p> <p>Use pre-built templates for faster content creation.</p>
</div> </div>
<div class="templates-grid"> <div class="templates-grid">
<div class="template-card"> <div class="template-card">
<div class="template-icon"><i class="fas fa-envelope"></i></div> <div class="template-icon"><i class="fas fa-envelope"></i></div>
@@ -193,7 +193,7 @@
<h2>Content History</h2> <h2>Content History</h2>
<p>View and reuse your previously generated content.</p> <p>View and reuse your previously generated content.</p>
</div> </div>
<div class="history-filters"> <div class="history-filters">
<div class="form-group"> <div class="form-group">
<select id="history-filter-type"> <select id="history-filter-type">
@@ -227,38 +227,36 @@
<section id="brand-style-page" class="page"> <section id="brand-style-page" class="page">
<div class="page-header"> <div class="page-header">
<h2>Brand Style Guidelines</h2> <h2>Brand Style Guidelines</h2>
<p>Customize the AI to match Adriana James' brand voice and tone.</p> <p>Adriana James' brand voice and tone guidelines are fixed to maintain consistency.</p>
<div class="alert alert-info">
<i class="fas fa-info-circle"></i>
<span>The brand style is locked to Adriana James' distinct communication style—both in her written and spoken tone. This ensures all content maintains her authentic voice.</span>
</div>
</div> </div>
<div class="brand-style-form"> <div class="brand-style-form">
<div class="form-section"> <div class="form-section">
<h3>Brand Tone</h3> <h3>Brand Tone</h3>
<p>Select the tone options that best represent the brand.</p> <p>Adriana James' distinctive tone is characterized by:</p>
<div class="tag-selector" id="tone-selector"> <div class="tag-selector read-only" id="tone-selector">
<span class="tag selected">professional</span>
<span class="tag selected">friendly</span>
<span class="tag selected">inspirational</span>
<span class="tag selected">empowering</span> <span class="tag selected">empowering</span>
<span class="tag">excited</span> <span class="tag selected">assertive</span>
<span class="tag">authoritative</span> <span class="tag selected">inspirational</span>
<span class="tag">casual</span> <span class="tag selected">direct</span>
<span class="tag">humorous</span>
</div> </div>
<p class="style-description">Her tone carries a motivational coach-like clarity, using embedded commands and cause-effect statements that inspire action.</p>
</div> </div>
<div class="form-section"> <div class="form-section">
<h3>Voice Characteristics</h3> <h3>Voice Characteristics</h3>
<p>Define the key characteristics of the brand voice.</p> <p>Adriana James speaks with these distinctive characteristics:</p>
<div class="tag-selector" id="voice-selector"> <div class="tag-selector read-only" id="voice-selector">
<span class="tag selected">clear</span> <span class="tag selected">clear</span>
<span class="tag selected">direct</span>
<span class="tag selected">empowering</span>
<span class="tag selected">confident</span> <span class="tag selected">confident</span>
<span class="tag selected">authentic</span> <span class="tag selected">conversational</span>
<span class="tag">innovative</span> <span class="tag selected">teaching</span>
<span class="tag">visionary</span>
<span class="tag">approachable</span>
</div> </div>
<p class="style-description">She speaks with conviction and clarity, using simple language to communicate profound ideas. Instead of saying "This might help you," she would say "You can do this—because your unconscious mind already knows how."</p>
</div> </div>
<div class="form-section"> <div class="form-section">
@@ -339,8 +337,7 @@
</div> </div>
<div class="form-actions"> <div class="form-actions">
<button id="save-brand-style" class="btn btn-primary">Save Brand Style</button> <p class="style-note"><i class="fas fa-lock"></i> Brand style settings are locked to maintain Adriana James' authentic voice across all content.</p>
<button id="reset-brand-style" class="btn btn-outline">Reset to Defaults</button>
</div> </div>
</div> </div>
</section> </section>
+80 -17
View File
@@ -446,9 +446,30 @@ header {
} }
.result-content { .result-content {
padding: 25px;
white-space: pre-wrap; white-space: pre-wrap;
font-family: var(--font-family);
line-height: 1.6; line-height: 1.6;
padding: 20px;
background: white;
border-radius: 8px;
box-shadow: var(--shadow-sm);
}
.result-content ul,
.result-content ol {
padding-left: 20px;
margin: 1em 0;
}
.result-content p {
margin: 1em 0;
}
/* Style bullet points */
.result-content {
margin-left: 1em;
display: list-item;
list-style-type: disc;
} }
.metadata-panel { .metadata-panel {
@@ -745,6 +766,48 @@ header {
margin-top: 10px; margin-top: 10px;
} }
.tag-selector.read-only .tag {
cursor: default;
}
.style-description {
margin-top: 15px;
font-style: italic;
color: var(--grey-600);
line-height: 1.6;
}
.style-note {
display: flex;
align-items: center;
color: var(--grey-600);
font-style: italic;
}
.style-note i {
margin-right: 8px;
color: var(--grey-500);
}
.alert {
padding: 15px;
border-radius: var(--radius-md);
margin-top: 15px;
display: flex;
align-items: center;
}
.alert i {
margin-right: 10px;
font-size: 18px;
}
.alert-info {
background-color: rgba(98, 54, 255, 0.1);
color: var(--primary-dark);
border-left: 4px solid var(--primary-color);
}
.tag { .tag {
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
@@ -885,40 +948,40 @@ header {
width: 80px; width: 80px;
padding: 15px 0; padding: 15px 0;
} }
.logo h2 { .logo h2 {
width: 50px; width: 50px;
height: 50px; height: 50px;
font-size: 20px; font-size: 20px;
} }
.menu li { .menu li {
justify-content: center; justify-content: center;
padding: 12px; padding: 12px;
} }
.menu li i { .menu li i {
margin-right: 0; margin-right: 0;
font-size: 20px; font-size: 20px;
} }
.menu li span { .menu li span {
display: none; display: none;
} }
.user-info { .user-info {
justify-content: center; justify-content: center;
padding: 10px; padding: 10px;
} }
.user-avatar { .user-avatar {
margin-right: 0; margin-right: 0;
} }
.user-name { .user-name {
display: none; display: none;
} }
.content { .content {
margin-left: 80px; margin-left: 80px;
max-width: calc(100vw - 80px); max-width: calc(100vw - 80px);
@@ -930,35 +993,35 @@ header {
flex-direction: column; flex-direction: column;
gap: 10px; gap: 10px;
} }
.templates-grid { .templates-grid {
grid-template-columns: 1fr; grid-template-columns: 1fr;
} }
.history-item, .history-item,
.training-item { .training-item {
flex-direction: column; flex-direction: column;
align-items: flex-start; align-items: flex-start;
} }
.history-item-type, .history-item-type,
.training-item-type { .training-item-type {
margin-bottom: 10px; margin-bottom: 10px;
} }
.history-item-content, .history-item-content,
.training-item-content { .training-item-content {
padding: 0; padding: 0;
margin-bottom: 10px; margin-bottom: 10px;
} }
.history-item-date { .history-item-date {
text-align: left; text-align: left;
margin-bottom: 10px; margin-bottom: 10px;
} }
.checkbox-group { .checkbox-group {
flex-direction: column; flex-direction: column;
gap: 10px; gap: 10px;
} }
} }
+420 -970
View File
File diff suppressed because it is too large Load Diff