backend/copywriter.py

"""
Copywriter module for the Marketing Assistant AI.
Core AI-powered content generation using a fine-tuned LLM.
"""

import os
import json
import httpx
from typing import Dict, List, Any, Optional, Tuple
from loguru import logger
from tenacity import retry, stop_after_attempt, wait_exponential

import config
from brand_style import brand_style_manager
from vector_store import vector_store

class Copywriter:
    """Generates marketing copy using a fine-tuned LLM."""
    
    def __init__(self):
        """Initialize the Copywriter with Cohere LLM client."""
        self.model = "command"  # Cohere's generation model
        self.api_key = config.COHERE_API_KEY
        logger.info("Copywriter initialized with Cohere API successfully")
    
    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
    async def generate_copy(
        self,
        prompt: str,
        content_type: Optional[str] = None,
        tone: Optional[str] = None,
        length: Optional[str] = None,
        include_cta: bool = False,
        reference_similar_content: bool = True,
        max_tokens: int = 1000
    ) -> Dict[str, Any]:
        """
        Generate marketing copy based on the user prompt and parameters.
        
        Args:
            prompt: User prompt for content generation
            content_type: Type of content to generate
            tone: Desired tone of the content
            length: Desired length of the content
            include_cta: Whether to include a call to action
            reference_similar_content: Whether to fetch and reference similar content
            max_tokens: Maximum tokens for the generated response
            
        Returns:
            Dictionary with generated content and metadata
        """
        try:
            # Step 1: Format prompt with brand style guidelines
            branded_prompt = brand_style_manager.format_prompt_with_brand_style(prompt, content_type)
            
            # Step 2: Find similar content for reference (if enabled)
            reference_content = []
            if reference_similar_content:
                search_results = await vector_store.search(prompt, top_k=3)
                if search_results:
                    reference_content = [result['text'] for result in search_results]
            
            # Step 3: Add additional instructions based on parameters
            full_prompt = branded_prompt
            
            if tone:
                full_prompt += f"\n- Use a {tone} tone"
                
            if length:
                length_instructions = {
                    "short": "Keep the content brief and to the point (under 100 words).",
                    "medium": "Write a moderate amount of content (100-300 words).",
                    "long": "Create comprehensive content with depth (over 300 words)."
                }
                full_prompt += f"\n- {length_instructions.get(length, '')}"
                
            if include_cta:
                full_prompt += "\n- Include a strong call to action at the end"
                
            # Step 4: Add reference content if available
            if reference_content:
                full_prompt += "\n\nFor reference, here are some similar pieces of content that have performed well in the past:"
                for i, content in enumerate(reference_content, 1):
                    # Truncate reference content if it's too long
                    preview = content[:300] + "..." if len(content) > 300 else content
                    full_prompt += f"\n\nReference {i}:\n{preview}"
                
                full_prompt += "\n\nUse these references for inspiration, but create original content."
            
            # Step 5: Generate content using the LLM
            generated_content = await self._call_llm_api(full_prompt, max_tokens)
            
            # Step 6: Check content alignment with brand style
            alignment_check = brand_style_manager.check_content_alignment(generated_content)
            
            # Step 7: Generate alternative headline suggestions
            headline_suggestions = await self._generate_headline_suggestions(prompt, generated_content)
            
            # Step 8: Return the generated content with metadata
            result = {
                "content": generated_content,
                "suggestions": headline_suggestions,
                "metadata": {
                    "content_type": content_type,
                    "tone": tone,
                    "alignment_score": alignment_check['alignment_score'],
                    "generated_at": None  # Will be added by the API
                }
            }
            
            # Add alignment issues if any
            if alignment_check['taboo_words_found'] or alignment_check['terminology_issues']:
                result["alignment_issues"] = {
                    "taboo_words_found": alignment_check['taboo_words_found'],
                    "terminology_issues": alignment_check['terminology_issues']
                }
            
            logger.info(f"Generated content with {len(generated_content)} characters")
            return result
            
        except Exception as e:
            logger.error(f"Error generating copy: {str(e)}")
            raise
    
    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
    async def _call_llm_api(self, prompt: str, max_tokens: int = 1000) -> str:
        """
        Call the Cohere API to generate content.
        
        Args:
            prompt: The formatted prompt for the LLM
            max_tokens: Maximum tokens for the generated response
            
        Returns:
            Generated content as a string
        """
        try:
            # Use Cohere's generate API with the API key from config
            cohere_api_key = config.COHERE_API_KEY
            
            async with httpx.AsyncClient() as client:
                response = await client.post(
                    "https://api.cohere.ai/v1/generate",
                    headers={
                        "Authorization": f"Bearer {cohere_api_key}",
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": "command",  # Cohere's generation model
                        "prompt": prompt,
                        "max_tokens": max_tokens,
                        "temperature": 0.7,
                        "k": 0,
                        "p": 0.75
                    },
                    timeout=30.0
                )
                
                if response.status_code == 200:
                    result = response.json()
                    return result["generations"][0]["text"].strip()
                else:
                    logger.error(f"Cohere API error: {response.status_code}, {response.text}")
                    raise Exception(f"Cohere API error: {response.status_code}")
            
        except Exception as e:
            logger.error(f"Error calling Cohere API: {str(e)}")
            raise
    
    async def _generate_headline_suggestions(self, original_prompt: str, generated_content: str) -> List[str]:
        """
        Generate alternative headline suggestions based on the content.
        
        Args:
            original_prompt: The original user prompt
            generated_content: The generated marketing content
            
        Returns:
            List of headline suggestions
        """
        try:
            # This would call the LLM to generate headlines
            # Simplified mock response for demonstration
            return [
                "Alternative Headline 1: Discover the Power of Adriana James' Solutions",
                "Alternative Headline 2: Transform Your Results with Adriana James",
                "Alternative Headline 3: The Adriana James Approach: Excellence Redefined"
            ]
        except Exception as e:
            logger.error(f"Error generating headline suggestions: {str(e)}")
            return []
    
    async def improve_copy(self, content: str, feedback: str) -> str:
        """
        Improve content based on user feedback.
        
        Args:
            content: Original generated content
            feedback: User feedback for improvement
            
        Returns:
            Improved content
        """
        try:
            # Format prompt for improvement
            improve_prompt = f"""
            Please improve the following marketing content based on the feedback provided:
            
            ORIGINAL CONTENT:
            {content}
            
            FEEDBACK:
            {feedback}
            
            IMPROVED CONTENT:
            """
            
            # Call LLM to improve content
            improved_content = await self._call_llm_api(improve_prompt, max_tokens=1200)
            
            logger.info(f"Improved content based on feedback")
            return improved_content
            
        except Exception as e:
            logger.error(f"Error improving content: {str(e)}")
            raise
    
    async def analyze_content_performance(self, content: str) -> Dict[str, Any]:
        """
        Analyze marketing content for performance prediction.
        
        Args:
            content: Marketing content to analyze
            
        Returns:
            Dictionary with analysis results
        """
        try:
            # This would be enhanced with actual ML models in production
            # Simplified mock response for demonstration
            
            # Very basic analysis using length and keyword presence
            word_count = len(content.split())
            has_cta = any(phrase in content.lower() for phrase in ["call", "contact", "get started", "try", "buy", "sign up"])
            sentence_count = len([s for s in content.split(".") if s.strip()])
            avg_words_per_sentence = word_count / max(1, sentence_count)
            
            # Simple scoring system
            readability_score = 100 - min(100, max(0, abs(avg_words_per_sentence - 15) * 5))
            cta_score = 90 if has_cta else 60
            length_score = min(100, max(0, word_count / 3))
            
            overall_score = (readability_score + cta_score + length_score) / 3
            
            return {
                "overall_score": round(overall_score, 1),
                "readability_score": round(readability_score, 1),
                "cta_effectiveness": round(cta_score, 1),
                "length_appropriateness": round(length_score, 1),
                "metrics": {
                    "word_count": word_count,
                    "sentence_count": sentence_count,
                    "avg_words_per_sentence": round(avg_words_per_sentence, 1),
                    "has_cta": has_cta
                },
                "improvement_suggestions": [
                    "Consider adding a stronger call to action" if cta_score < 80 else "Your call to action is effective",
                    "Try to use shorter sentences for better readability" if avg_words_per_sentence > 20 else "Your sentence length is good for readability",
                    "Consider adding more content for better engagement" if word_count < 100 else "Your content length is appropriate"
                ]
            }
            
        except Exception as e:
            logger.error(f"Error analyzing content: {str(e)}")
            raise

# Create a singleton instance
copywriter = Copywriter()
feat: Initial implementation of Marketing Assistant AI for Adriana James 2025-04-17 08:50:12 +01:00			`"""`
			`Copywriter module for the Marketing Assistant AI.`
			`Core AI-powered content generation using a fine-tuned LLM.`
			`"""`

			`import os`
			`import json`
			`import httpx`
			`from typing import Dict, List, Any, Optional, Tuple`
			`from loguru import logger`
			`from tenacity import retry, stop_after_attempt, wait_exponential`

			`import config`
			`from brand_style import brand_style_manager`
			`from vector_store import vector_store`

			`class Copywriter:`
			`"""Generates marketing copy using a fine-tuned LLM."""`

			`def __init__(self):`
			`"""Initialize the Copywriter with Cohere LLM client."""`
			`self.model = "command" # Cohere's generation model`
			`self.api_key = config.COHERE_API_KEY`
			`logger.info("Copywriter initialized with Cohere API successfully")`

			`@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))`
			`async def generate_copy(`
			`self,`
			`prompt: str,`
			`content_type: Optional[str] = None,`
			`tone: Optional[str] = None,`
			`length: Optional[str] = None,`
			`include_cta: bool = False,`
			`reference_similar_content: bool = True,`
			`max_tokens: int = 1000`
			`) -> Dict[str, Any]:`
			`"""`
			`Generate marketing copy based on the user prompt and parameters.`

			`Args:`
			`prompt: User prompt for content generation`
			`content_type: Type of content to generate`
			`tone: Desired tone of the content`
			`length: Desired length of the content`
			`include_cta: Whether to include a call to action`
			`reference_similar_content: Whether to fetch and reference similar content`
			`max_tokens: Maximum tokens for the generated response`

			`Returns:`
			`Dictionary with generated content and metadata`
			`"""`
			`try:`
			`# Step 1: Format prompt with brand style guidelines`
			`branded_prompt = brand_style_manager.format_prompt_with_brand_style(prompt, content_type)`

			`# Step 2: Find similar content for reference (if enabled)`
			`reference_content = []`
			`if reference_similar_content:`
			`search_results = await vector_store.search(prompt, top_k=3)`
			`if search_results:`
			`reference_content = [result['text'] for result in search_results]`

			`# Step 3: Add additional instructions based on parameters`
			`full_prompt = branded_prompt`

			`if tone:`
			`full_prompt += f"\n- Use a {tone} tone"`

			`if length:`
			`length_instructions = {`
			`"short": "Keep the content brief and to the point (under 100 words).",`
			`"medium": "Write a moderate amount of content (100-300 words).",`
			`"long": "Create comprehensive content with depth (over 300 words)."`
			`}`
			`full_prompt += f"\n- {length_instructions.get(length, '')}"`

			`if include_cta:`
			`full_prompt += "\n- Include a strong call to action at the end"`

			`# Step 4: Add reference content if available`
			`if reference_content:`
			`full_prompt += "\n\nFor reference, here are some similar pieces of content that have performed well in the past:"`
			`for i, content in enumerate(reference_content, 1):`
			`# Truncate reference content if it's too long`
			`preview = content[:300] + "..." if len(content) > 300 else content`
			`full_prompt += f"\n\nReference {i}:\n{preview}"`

			`full_prompt += "\n\nUse these references for inspiration, but create original content."`

			`# Step 5: Generate content using the LLM`
			`generated_content = await self._call_llm_api(full_prompt, max_tokens)`

			`# Step 6: Check content alignment with brand style`
			`alignment_check = brand_style_manager.check_content_alignment(generated_content)`

			`# Step 7: Generate alternative headline suggestions`
			`headline_suggestions = await self._generate_headline_suggestions(prompt, generated_content)`

			`# Step 8: Return the generated content with metadata`
			`result = {`
			`"content": generated_content,`
			`"suggestions": headline_suggestions,`
			`"metadata": {`
			`"content_type": content_type,`
			`"tone": tone,`
			`"alignment_score": alignment_check['alignment_score'],`
			`"generated_at": None # Will be added by the API`
			`}`
			`}`

			`# Add alignment issues if any`
			`if alignment_check['taboo_words_found'] or alignment_check['terminology_issues']:`
			`result["alignment_issues"] = {`
			`"taboo_words_found": alignment_check['taboo_words_found'],`
			`"terminology_issues": alignment_check['terminology_issues']`
			`}`

			`logger.info(f"Generated content with {len(generated_content)} characters")`
			`return result`

			`except Exception as e:`
			`logger.error(f"Error generating copy: {str(e)}")`
			`raise`

			`@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))`
			`async def _call_llm_api(self, prompt: str, max_tokens: int = 1000) -> str:`
			`"""`
			`Call the Cohere API to generate content.`

			`Args:`
			`prompt: The formatted prompt for the LLM`
			`max_tokens: Maximum tokens for the generated response`

			`Returns:`
			`Generated content as a string`
			`"""`
			`try:`
			`# Use Cohere's generate API with the API key from config`
			`cohere_api_key = config.COHERE_API_KEY`

			`async with httpx.AsyncClient() as client:`
			`response = await client.post(`
			`"https://api.cohere.ai/v1/generate",`
			`headers={`
			`"Authorization": f"Bearer {cohere_api_key}",`
			`"Content-Type": "application/json"`
			`},`
			`json={`
			`"model": "command", # Cohere's generation model`
			`"prompt": prompt,`
			`"max_tokens": max_tokens,`
			`"temperature": 0.7,`
			`"k": 0,`
			`"p": 0.75`
			`},`
			`timeout=30.0`
			`)`

			`if response.status_code == 200:`
			`result = response.json()`
			`return result["generations"][0]["text"].strip()`
			`else:`
			`logger.error(f"Cohere API error: {response.status_code}, {response.text}")`
			`raise Exception(f"Cohere API error: {response.status_code}")`

			`except Exception as e:`
			`logger.error(f"Error calling Cohere API: {str(e)}")`
			`raise`

			`async def _generate_headline_suggestions(self, original_prompt: str, generated_content: str) -> List[str]:`
			`"""`
			`Generate alternative headline suggestions based on the content.`

			`Args:`
			`original_prompt: The original user prompt`
			`generated_content: The generated marketing content`

			`Returns:`
			`List of headline suggestions`
			`"""`
			`try:`
			`# This would call the LLM to generate headlines`
			`# Simplified mock response for demonstration`
			`return [`
			`"Alternative Headline 1: Discover the Power of Adriana James' Solutions",`
			`"Alternative Headline 2: Transform Your Results with Adriana James",`
			`"Alternative Headline 3: The Adriana James Approach: Excellence Redefined"`
			`]`
			`except Exception as e:`
			`logger.error(f"Error generating headline suggestions: {str(e)}")`
			`return []`

			`async def improve_copy(self, content: str, feedback: str) -> str:`
			`"""`
			`Improve content based on user feedback.`

			`Args:`
			`content: Original generated content`
			`feedback: User feedback for improvement`

			`Returns:`
			`Improved content`
			`"""`
			`try:`
			`# Format prompt for improvement`
			`improve_prompt = f"""`
			`Please improve the following marketing content based on the feedback provided:`

			`ORIGINAL CONTENT:`
			`{content}`

			`FEEDBACK:`
			`{feedback}`

			`IMPROVED CONTENT:`
			`"""`

			`# Call LLM to improve content`
			`improved_content = await self._call_llm_api(improve_prompt, max_tokens=1200)`

			`logger.info(f"Improved content based on feedback")`
			`return improved_content`

			`except Exception as e:`
			`logger.error(f"Error improving content: {str(e)}")`
			`raise`

			`async def analyze_content_performance(self, content: str) -> Dict[str, Any]:`
			`"""`
			`Analyze marketing content for performance prediction.`

			`Args:`
			`content: Marketing content to analyze`

			`Returns:`
			`Dictionary with analysis results`
			`"""`
			`try:`
			`# This would be enhanced with actual ML models in production`
			`# Simplified mock response for demonstration`

			`# Very basic analysis using length and keyword presence`
			`word_count = len(content.split())`
			`has_cta = any(phrase in content.lower() for phrase in ["call", "contact", "get started", "try", "buy", "sign up"])`
			`sentence_count = len([s for s in content.split(".") if s.strip()])`
			`avg_words_per_sentence = word_count / max(1, sentence_count)`

			`# Simple scoring system`
			`readability_score = 100 - min(100, max(0, abs(avg_words_per_sentence - 15) * 5))`
			`cta_score = 90 if has_cta else 60`
			`length_score = min(100, max(0, word_count / 3))`

			`overall_score = (readability_score + cta_score + length_score) / 3`

			`return {`
			`"overall_score": round(overall_score, 1),`
			`"readability_score": round(readability_score, 1),`
			`"cta_effectiveness": round(cta_score, 1),`
			`"length_appropriateness": round(length_score, 1),`
			`"metrics": {`
			`"word_count": word_count,`
			`"sentence_count": sentence_count,`
			`"avg_words_per_sentence": round(avg_words_per_sentence, 1),`
			`"has_cta": has_cta`
			`},`
			`"improvement_suggestions": [`
			`"Consider adding a stronger call to action" if cta_score < 80 else "Your call to action is effective",`
			`"Try to use shorter sentences for better readability" if avg_words_per_sentence > 20 else "Your sentence length is good for readability",`
			`"Consider adding more content for better engagement" if word_count < 100 else "Your content length is appropriate"`
			`]`
			`}`

			`except Exception as e:`
			`logger.error(f"Error analyzing content: {str(e)}")`
			`raise`

			`# Create a singleton instance`
			`copywriter = Copywriter()`