Initial commit of Marketing Assistant AI project, including backend setup with FastAPI, brand style management, and marketing copy generation features. Added .gitignore, README, and various data files for brand voice, past campaigns, and book excerpts. Implemented vector store for content retrieval and embeddings using Cohere API. Included HTML template for user interface.

This commit is contained in:
boladeE
2025-04-17 22:24:53 +01:00
commit e80ba5c0d7
18 changed files with 840 additions and 0 deletions
+154
View File
@@ -0,0 +1,154 @@
import os
import json
from typing import List, Dict, Any
import numpy as np
from PyPDF2 import PdfReader
from embeddings import CohereEmbeddings
from vector_store import VectorStore
from config import settings
class BrandStyleManager:
def __init__(self):
self.settings = settings
self.embeddings = CohereEmbeddings()
self.vector_store = VectorStore()
self.brand_voice = self._load_brand_voice()
self.sample_campaigns = self._load_sample_campaigns()
def _load_brand_voice(self) -> Dict[str, Any]:
"""Load brand voice guidelines from JSON."""
file_path = "data/style_guidelines/brand_voice.json"
if os.path.exists(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
return {}
def _load_sample_campaigns(self) -> List[Dict[str, Any]]:
"""Load sample campaigns from JSON."""
file_path = "data/past_campaigns/sample_campaigns.json"
if os.path.exists(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data.get("campaigns", [])
return []
def _extract_text_from_pdf(self, pdf_path: str) -> str:
"""Extract text from a PDF file."""
text = ""
try:
reader = PdfReader(pdf_path)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n\n"
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return text
def _load_book_excerpts(self):
"""Load and index book excerpts from PDF files in the data directory."""
book_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))
all_texts = []
all_embeddings = []
# Look for PDF files in the data directory
for filename in os.listdir(book_dir):
if filename.endswith(".pdf"):
file_path = os.path.join(book_dir, filename)
print(f"Processing PDF file: {file_path}")
# Extract text from PDF
content = self._extract_text_from_pdf(file_path)
if not content:
print(f"No text extracted from {file_path}")
continue
# Split content into chunks (simple splitting by paragraphs)
chunks = [chunk.strip() for chunk in content.split('\n\n') if chunk.strip()]
# Generate embeddings for each chunk
for chunk in chunks:
if len(chunk) > 50: # Only process chunks with sufficient content
embedding = self.embeddings.generate_embedding(chunk)
all_texts.append(chunk)
all_embeddings.append(embedding)
# Add all content to the vector store
if all_texts and all_embeddings:
print(f"Adding {len(all_texts)} chunks to vector store")
self.vector_store.add_documents(all_texts, all_embeddings)
else:
print("No content found to add to vector store")
def get_relevant_context(self, prompt: str, k: int = 5) -> List[Dict]:
"""Get relevant context for a given prompt from book excerpts."""
# Generate embedding for the prompt
prompt_embedding = self.embeddings.generate_embedding(prompt)
# Search for similar content in book excerpts
results = self.vector_store.search(prompt_embedding, k=k)
# Optionally rerank results
if results:
texts = [result["text"] for result in results]
reranked = self.embeddings.rerank_results(prompt, texts, top_n=k)
# Convert reranked results to the expected format
return [{"text": text} for text in reranked]
# If no results, return empty list
return []
def get_brand_voice(self) -> Dict[str, Any]:
"""Get brand voice guidelines."""
return self.brand_voice
def get_sample_campaigns(self) -> List[Dict[str, Any]]:
"""Get sample campaigns."""
return self.sample_campaigns
def update_book_excerpt(self, pdf_path: str):
"""Add new book excerpt from PDF to the vector store."""
if not os.path.exists(pdf_path):
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
# Extract text from PDF
content = self._extract_text_from_pdf(pdf_path)
if not content:
raise ValueError(f"No text extracted from PDF: {pdf_path}")
# Split content into chunks
chunks = [chunk.strip() for chunk in content.split('\n\n') if chunk.strip()]
# Generate embeddings for each chunk
all_texts = []
all_embeddings = []
for chunk in chunks:
if len(chunk) > 50: # Only process chunks with sufficient content
embedding = self.embeddings.generate_embedding(chunk)
all_texts.append(chunk)
all_embeddings.append(embedding)
# Add to vector store
if all_texts and all_embeddings:
self.vector_store.add_documents(all_texts, all_embeddings)
print(f"Added {len(all_texts)} chunks from {pdf_path} to vector store")
else:
print(f"No content extracted from {pdf_path}")
# # Example usage
# if __name__ == "__main__":
# brand_style_manager = BrandStyleManager()
# # Example: Get relevant context for a marketing prompt
# prompt = "Generate a marketing campaign for an Umbrella company"
# context = brand_style_manager.get_relevant_context(prompt)
# # Print the context in a readable format
# print(f"Relevant context for prompt: '{prompt}'")
# for i, item in enumerate(context):
# print(f"\nReference {i+1}:")
# print(item["text"])
+41
View File
@@ -0,0 +1,41 @@
from dataclasses import dataclass
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
@dataclass
class Settings:
# API Keys
COHERE_API_KEY: str
DEEPSEEK_API_KEY: str
# Vector Store Settings
VECTOR_DIMENSION: int = 1024 # Cohere's embed-english-v3.0 model dimension
INDEX_PATH: str = "data/vector_store/index.faiss"
# Content Settings
MAX_CONTEXT_LENGTH: int = 2000
DEFAULT_MODEL: str = "deepseek-chat"
# Brand Settings
BRAND_TONE: str = "professional and empathetic"
BRAND_VOICE: str = "Adriana James"
@classmethod
def from_env(cls):
"""Create a Settings instance from environment variables."""
return cls(
COHERE_API_KEY=os.getenv("COHERE_API_KEY", ""),
DEEPSEEK_API_KEY=os.getenv("DEEPSEEK_API_KEY", ""),
VECTOR_DIMENSION=int(os.getenv("VECTOR_DIMENSION", "1024")),
INDEX_PATH=os.getenv("INDEX_PATH", "data/vector_store/index.faiss"),
MAX_CONTEXT_LENGTH=int(os.getenv("MAX_CONTEXT_LENGTH", "2000")),
DEFAULT_MODEL=os.getenv("DEFAULT_MODEL", "deepseek-chat"),
BRAND_TONE=os.getenv("BRAND_TONE", "professional and empathetic"),
BRAND_VOICE=os.getenv("BRAND_VOICE", "Adriana James")
)
# Create a global settings instance
settings = Settings.from_env()
+89
View File
@@ -0,0 +1,89 @@
from typing import List, Dict, Any
import requests
import json
from config import settings
from brand_style import BrandStyleManager
# Initialize brand style manager
brand_style_manager = BrandStyleManager()
class MarketingCopywriter:
def __init__(self):
self.settings = settings
self.api_key = self.settings.DEEPSEEK_API_KEY
self.api_url = "https://api.deepseek.com/v1/chat/completions"
def _build_prompt(self, prompt: str, context: List[Dict], content_type: str, tone: str,
brand_voice: Dict[str, Any], sample_campaigns: List[Dict[str, Any]]) -> str:
"""Build a prompt for the LLM using context and parameters."""
# Format context from book excerpts
context_text = "\n".join([f"Reference {i+1}: {ctx['text']}" for i, ctx in enumerate(context)])
# Format brand voice guidelines
brand_voice_text = json.dumps(brand_voice, indent=2)
# Format sample campaigns
sample_campaigns_text = ""
for i, campaign in enumerate(sample_campaigns):
sample_campaigns_text += f"\nExample Campaign {i+1}:\n"
sample_campaigns_text += f"Title: {campaign.get('title', '')}\n"
sample_campaigns_text += f"Subject: {campaign.get('subject', '')}\n"
sample_campaigns_text += f"Content:\n{campaign.get('content', '')}\n"
return f"""You are a professional marketing copywriter for {self.settings.BRAND_VOICE}.
Your task is to create {content_type} content that matches the following request: {prompt}
BRAND VOICE GUIDELINES:
{brand_voice_text}
SAMPLE CAMPAIGNS:
{sample_campaigns_text}
RELEVANT BOOK EXCERPTS:
{context_text}
Guidelines:
1. Maintain a {tone} tone throughout
2. Follow {self.settings.BRAND_VOICE}'s brand voice guidelines
3. Be persuasive and engaging
4. Include a clear call-to-action
5. Keep the content concise and impactful
Generate the marketing copy:"""
def generate_copy(self, prompt: str, context: List[Dict], content_type: str, tone: str,
brand_voice: Dict[str, Any], sample_campaigns: List[Dict[str, Any]]) -> str:
"""Generate marketing copy using DeepSeek."""
full_prompt = self._build_prompt(prompt, context, content_type, tone, brand_voice, sample_campaigns)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
data = {
"model": self.settings.DEFAULT_MODEL,
"messages": [
{"role": "system", "content": "You are a professional marketing copywriter."},
{"role": "user", "content": full_prompt}
],
"temperature": 0.7,
"max_tokens": 1000
}
response = requests.post(self.api_url, headers=headers, json=data)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"].strip()
def generate_marketing_copy(prompt: str) -> str:
"""Helper function to generate marketing copy."""
copywriter = MarketingCopywriter()
context = brand_style_manager.get_relevant_context(prompt)
content_type = "email"
tone = "professional and empathetic"
brand_voice = brand_style_manager.get_brand_voice()
sample_campaigns = brand_style_manager.get_sample_campaigns()
return copywriter.generate_copy(prompt, context, content_type, tone, brand_voice, sample_campaigns)
print(generate_marketing_copy("Generate a marketing campaign for our new comers"))
+37
View File
@@ -0,0 +1,37 @@
import cohere
from typing import List
import numpy as np
from config import settings
class CohereEmbeddings:
def __init__(self):
self.settings = settings
self.client = cohere.Client(self.settings.COHERE_API_KEY)
def generate_embedding(self, text: str) -> np.ndarray:
"""Generate embeddings for a single text using Cohere."""
response = self.client.embed(
texts=[text],
model="embed-english-v3.0",
input_type="search_document"
)
return np.array(response.embeddings[0])
def rerank_results(self, query: str, documents: List[str], top_n: int = 5) -> List[str]:
"""Rerank documents based on relevance to the query."""
results = self.client.rerank(
query=query,
documents=documents,
top_n=top_n,
model="rerank-english-v2.0"
)
# Extract the reranked documents in order
reranked_docs = []
for result in results.results:
# Get the document at the index returned by the rerank API
doc_index = result.index
if 0 <= doc_index < len(documents):
reranked_docs.append(documents[doc_index])
return reranked_docs
+60
View File
@@ -0,0 +1,60 @@
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import uvicorn
from copywriter import generate_marketing_copy
from brand_style import BrandStyleManager
from config import settings
from fastapi.templating import Jinja2Templates
from fastapi import Request
class CopyRequest(BaseModel):
prompt: str
app = FastAPI(title="Marketing Assistant AI")
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
# Initialize templates
templates = Jinja2Templates(directory="backend/templates")
# Initialize brand style manager
@app.get("/")
def root(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.get("/generate-copy")
def create_marketing_copy(request: Request):
print(f"Received request: {request}")
# print(f"Received prompt: {request.prompt}")
generated_copy = "Something"
# print(f"Generated copy: {generated_copy}")
return templates.TemplateResponse("index.html", {"request": request, "generated_copy": generated_copy})
# try:
# # Generate the marketing copy using the simplified function
# generated_copy = generate_marketing_copy(request.prompt)
# print(f"Generated copy: {generated_copy}")
# return {
# "status": "success",
# "data": {
# "generated_copy": generated_copy
# }
# }
# except Exception as e:
# print(f"Error generating copy: {str(e)}")
# raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
uvicorn.run("main:app", host="localhost", port=8000, reload=True)
+11
View File
@@ -0,0 +1,11 @@
fastapi==0.104.1
uvicorn==0.24.0
python-dotenv==1.0.0
cohere==4.37
faiss-cpu==1.7.4
numpy==1.24.3
pydantic==2.4.2
python-multipart==0.0.6
deepseek-ai==0.1.0
requests==2.31.0
PyPDF2==3.0.1
+89
View File
@@ -0,0 +1,89 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Marketing Assistant AI</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
h1 {
color: #333;
text-align: center;
}
.container {
background-color: white;
border-radius: 8px;
padding: 20px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
}
.form-group {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 5px;
font-weight: bold;
}
textarea {
width: 100%;
padding: 10px;
border: 1px solid #ddd;
border-radius: 4px;
font-size: 16px;
min-height: 100px;
resize: vertical;
}
button {
background-color: #4CAF50;
color: white;
border: none;
padding: 12px 20px;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
width: 100%;
}
button:hover {
background-color: #45a049;
}
.response {
margin-top: 20px;
padding: 15px;
border: 1px solid #ddd;
border-radius: 4px;
background-color: #f9f9f9;
}
.response h3 {
margin-top: 0;
color: #333;
}
</style>
</head>
<body>
<h1>Marketing Assistant AI</h1>
<div class="container">
<form action="/generate-copy" method="post">
<div class="form-group">
<label for="prompt">Enter your marketing prompt:</label>
<textarea id="prompt" name="prompt" placeholder="Example: Generate a marketing campaign for an Umbrella company" required></textarea>
</div>
<button type="submit">Generate Marketing Copy</button>
</form>
{% if generated_copy %}
<div class="response">
<h3>Generated Marketing Copy:</h3>
<div>{{ generated_copy | safe }}</div>
</div>
{% endif %}
</div>
</body>
</html>
+70
View File
@@ -0,0 +1,70 @@
import faiss
import numpy as np
from typing import List, Dict
import json
import os
from config import settings
class VectorStore:
def __init__(self):
self.settings = settings
self.index = None
self.documents = []
self._load_or_create_index()
def _load_or_create_index(self):
"""Load existing index or create a new one."""
# Create directory for index if it doesn't exist
os.makedirs(os.path.dirname(self.settings.INDEX_PATH), exist_ok=True)
if os.path.exists(self.settings.INDEX_PATH):
self.index = faiss.read_index(self.settings.INDEX_PATH)
# Load documents metadata
metadata_path = self.settings.INDEX_PATH.replace(".faiss", "_metadata.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as f:
self.documents = json.load(f)
else:
self.index = faiss.IndexFlatL2(self.settings.VECTOR_DIMENSION)
def add_documents(self, texts: List[str], embeddings: List[np.ndarray]):
"""Add new documents to the vector store."""
if len(texts) != len(embeddings):
raise ValueError("Number of texts and embeddings must match")
# Add to FAISS index
self.index.add(np.array(embeddings))
# Update documents list
for text in texts:
self.documents.append({"text": text})
# Save index and metadata
self._save_index()
def search(self, query_embedding: np.ndarray, k: int = 5) -> List[Dict]:
"""Search for similar documents."""
distances, indices = self.index.search(
query_embedding.reshape(1, -1).astype('float32'),
k
)
results = []
for idx, distance in zip(indices[0], distances[0]):
if idx < len(self.documents): # Ensure index is valid
results.append({
"text": self.documents[idx]["text"],
"score": float(distance)
})
return results
def _save_index(self):
"""Save the index and metadata to disk."""
os.makedirs(os.path.dirname(self.settings.INDEX_PATH), exist_ok=True)
faiss.write_index(self.index, self.settings.INDEX_PATH)
# Save metadata
metadata_path = self.settings.INDEX_PATH.replace(".faiss", "_metadata.json")
with open(metadata_path, 'w') as f:
json.dump(self.documents, f)