commit 859c17aad8837dbcc03d9888920237e7d57f50ab Author: boladeE Date: Wed Apr 16 23:09:52 2025 +0100 feat: Implement Pinecone vector store integration - Update config.py with Pinecone settings and model configurations - Implement VectorStore class with Pinecone backend - Add comprehensive vector operations (add, search, delete) - Set up proper error handling and metadata management - Add .gitignore for Python project diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..c9beaa2 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# API Keys +COHERE_API_KEY=your_cohere_api_key_here + +# Model Settings +MODEL_NAME=gpt2 +EMBEDDING_MODEL=embed-english-v3.0 + +# Server Settings +HOST=0.0.0.0 +PORT=8000 +DEBUG=True + +# Storage Settings +VECTOR_STORE_PATH=data/vector_store +PAST_CAMPAIGNS_PATH=data/past_campaigns +USER_QUERIES_PATH=data/user_queries \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c41fa9b --- /dev/null +++ b/.gitignore @@ -0,0 +1,69 @@ +# Environment variables +.env + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ +.venv +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +.DS_Store + +# Project specific +data/vector_store/ +data/past_campaigns/ +data/user_queries/ +finetuned_model/ +*.log +.cohere/ +.pinecone/ + +# Jupyter Notebook +.ipynb_checkpoints +*.ipynb + +# Testing +.coverage +htmlcov/ +.pytest_cache/ +.tox/ + +# Distribution +*.tar.gz +*.zip +*.rar +*.7z + +# Local development +local_settings.py +db.sqlite3 +*.db \ No newline at end of file diff --git a/FINETUNE_README.md b/FINETUNE_README.md new file mode 100644 index 0000000..69e7b07 --- /dev/null +++ b/FINETUNE_README.md @@ -0,0 +1,133 @@ +# LLM Finetuning with Hugging Face + +This repository contains scripts for finetuning language models using Hugging Face's transformers library. + +## Setup + +1. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +2. Make sure you have enough GPU memory for finetuning. For smaller models like OPT-350M, 8GB should be sufficient. For larger models, you may need more. + +## Finetuning a Model + +The `finetune_model.py` script allows you to finetune a language model using a JSON dataset containing prompts and completions. + +### Basic Usage + +```bash +python finetune_model.py +``` + +This will use the default settings: +- Dataset: `datasets/adriana_finetune_dataset.json` +- Model: `facebook/opt-350m` (a more capable model than GPT-2) +- Output directory: `finetuned_model` +- Training epochs: 3 +- Batch size: 4 +- Learning rate: 5e-5 + +### Model Options + +The script is configured to use `facebook/opt-350m` by default, which is a more capable model than GPT-2. Here are some other good options you can use by modifying the `model_name` in the `Args` class: + +- **Smaller models** (faster training, less memory): + - `facebook/opt-125m` (125M parameters) + - `EleutherAI/pythia-70m` (70M parameters) + - `facebook/opt-350m` (350M parameters) + +- **Medium models** (better quality, more memory): + - `facebook/opt-1.3b` (1.3B parameters) + - `EleutherAI/pythia-1.4b` (1.4B parameters) + - `facebook/opt-2.7b` (2.7B parameters) + +- **Large models** (best quality, requires significant memory): + - `facebook/opt-6.7b` (6.7B parameters) + - `EleutherAI/pythia-6.9b` (6.9B parameters) + +For very large models, it's recommended to use LoRA by setting `use_lora = True` in the `Args` class. + +### Advanced Usage + +You can customize the finetuning process by modifying the `Args` class in the `finetune_model.py` file: + +```python +class Args: + def __init__(self): + self.dataset_path = "datasets/adriana_finetune_dataset.json" + self.model_name = "facebook/opt-1.3b" # Change to a different model + self.output_dir = "finetuned_model" + self.num_train_epochs = 5 # Increase epochs for better results + self.per_device_train_batch_size = 2 # Adjust based on your GPU memory + self.learning_rate = 3e-5 # Adjust learning rate + self.use_lora = True # Enable LoRA for efficient finetuning +``` + +## Using LoRA for Efficient Finetuning + +LoRA (Low-Rank Adaptation) is a technique that allows for efficient finetuning of large language models by only training a small number of parameters. This is especially useful when you have limited computational resources. + +To use LoRA, simply set `self.use_lora = True` in the `Args` class: + +```python +class Args: + def __init__(self): + # ... other settings ... + self.use_lora = True # Enable LoRA +``` + +## Dataset Format + +The script expects a JSON file with the following format: + +```json +[ + { + "prompt": "Your prompt here", + "completion": "Your completion here" + }, + ... +] +``` + +## Using the Finetuned Model + +After finetuning, you can use the model with the Hugging Face transformers library: + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +# Load the finetuned model and tokenizer +model_path = "finetuned_model" +tokenizer = AutoTokenizer.from_pretrained(model_path) +model = AutoModelForCausalLM.from_pretrained(model_path) + +# Generate text +prompt = "Create a welcome message for new clients" +inputs = tokenizer(prompt, return_tensors="pt") +outputs = model.generate(**inputs, max_length=100, num_return_sequences=1) +generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(generated_text) +``` + +## Testing the Model + +You can test the finetuned model using the `test_model.py` script: + +```bash +python test_model.py +``` + +This will load the finetuned model from the `finetuned_model` directory and generate text for the default prompt. You can modify the prompt in the `test_model.py` file to test different inputs. + +## Troubleshooting + +- If you encounter CUDA out of memory errors, try: + - Using a smaller model + - Reducing the batch size + - Using LoRA by setting `self.use_lora = True` +- For very large models, consider using 8-bit quantization with `load_in_8bit=True` when loading the model +- If you're finetuning on a CPU, the process will be much slower. Consider using a smaller model or fewer epochs \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5f99f89 --- /dev/null +++ b/README.md @@ -0,0 +1,114 @@ +# Adriana James Marketing Assistant AI + +This project fine-tunes a language model to generate marketing content in the voice and style of Adriana James, based on her book content, past campaigns, and style guidelines. + +## Project Structure + +- `generate_dataset.py`: Script to generate fine-tuning datasets from book content, past campaigns, and style guidelines +- `finetune_model.py`: Script to fine-tune the model using the generated datasets +- `data/`: Directory containing source data + - `book.pdf`: Adriana James' book content + - `past_campaigns/`: Directory containing past marketing campaigns + - `style_guidelines/`: Directory containing brand style guidelines +- `datasets/`: Directory containing generated fine-tuning datasets +- `adriana_model/`: Directory containing the fine-tuned model + +## Setup + +1. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +2. Generate the fine-tuning datasets: + ``` + python generate_dataset.py + ``` + This will create the following datasets in the `datasets/` directory: + - `stage1_book_content.json`: Dataset for fine-tuning on book content + - `stage2_marketing_content.json`: Dataset for fine-tuning on marketing content + - `stage3_style_alignment.json`: Dataset for fine-tuning on style alignment + - `combined_dataset.json`: Combined dataset for all stages + +## Fine-tuning the Model + +The fine-tuning process follows a progressive approach with three stages: + +1. **Stage 1**: Fine-tune on book content to establish Adriana James' core voice +2. **Stage 2**: Fine-tune on marketing content to adapt to marketing formats +3. **Stage 3**: Fine-tune on style alignment to ensure style consistency + +### Running the Fine-tuning Script + +To run the complete progressive fine-tuning process: + +``` +python finetune_model.py --stage all +``` + +To run a specific stage: + +``` +python finetune_model.py --stage 1 # Fine-tune on book content only +python finetune_model.py --stage 2 # Fine-tune on marketing content only +python finetune_model.py --stage 3 # Fine-tune on style alignment only +``` + +### Command-line Arguments + +- `--model_name`: Base model to fine-tune (default: "mistralai/Mistral-7B-v0.1") +- `--output_dir`: Directory to save the fine-tuned model (default: "adriana_model") +- `--stage`: Fine-tuning stage (choices: "1", "2", "3", "all", default: "all") +- `--num_epochs`: Number of epochs for each stage (default: 3) +- `--seed`: Random seed for reproducibility (default: 42) + +## Model Selection + +The default base model is Mistral-7B-v0.1, which is a good balance between performance and resource requirements. For better results, you can use larger models like: + +- `meta-llama/Llama-2-13b-hf` (requires access) +- `tiiuae/falcon-40b` (larger model with good performance) +- `google/flan-t5-xxl` (good for instruction following) + +To use a different model, specify it with the `--model_name` argument: + +``` +python finetune_model.py --model_name tiiuae/falcon-40b +``` + +## Hardware Requirements + +Fine-tuning large language models requires significant computational resources: + +- **Minimum**: 16GB GPU RAM (for 7B parameter models) +- **Recommended**: 24GB+ GPU RAM (for 13B+ parameter models) +- **Optimal**: Multiple GPUs or a high-end GPU with 40GB+ RAM + +For models larger than 7B parameters, you may need to use techniques like: +- 8-bit quantization (already enabled in the script) +- Gradient checkpointing +- LoRA or QLoRA fine-tuning + +## Using the Fine-tuned Model + +After fine-tuning, the model will be saved in the `adriana_model/final` directory. You can load and use it with the Transformers library: + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +# Load the fine-tuned model +model_path = "adriana_model/final" +tokenizer = AutoTokenizer.from_pretrained(model_path) +model = AutoModelForCausalLM.from_pretrained(model_path) + +# Generate content +prompt = "Write a marketing email for a professional development workshop." +inputs = tokenizer(prompt, return_tensors="pt") +outputs = model.generate(**inputs, max_length=200, num_return_sequences=1) +generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(generated_text) +``` + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. diff --git a/backend/brand_style.py b/backend/brand_style.py new file mode 100644 index 0000000..6ab2ebe --- /dev/null +++ b/backend/brand_style.py @@ -0,0 +1,113 @@ +from typing import Dict, List +import json +import os +from transformers import pipeline +import numpy as np + +class BrandStyleChecker: + def __init__(self): + self.style_guidelines = self._load_style_guidelines() + self.classifier = pipeline( + "text-classification", + model="distilbert-base-uncased-finetuned-sst-2-english", + device=-1 + ) + self.tone_keywords = self._load_tone_keywords() + + def _load_style_guidelines(self) -> Dict: + """Load brand style guidelines from file.""" + guidelines_path = 'data/style_guidelines/brand_guidelines.json' + if os.path.exists(guidelines_path): + with open(guidelines_path, 'r') as f: + return json.load(f) + return { + "tone": "professional yet approachable", + "voice": "confident and authoritative", + "key_phrases": [], + "avoided_phrases": [], + "brand_values": [] + } + + def _load_tone_keywords(self) -> Dict[str, List[str]]: + """Load tone keywords for analysis.""" + keywords_path = 'data/style_guidelines/tone_keywords.json' + if os.path.exists(keywords_path): + with open(keywords_path, 'r') as f: + return json.load(f) + return { + "professional": ["expert", "professional", "industry", "experience"], + "approachable": ["friendly", "helpful", "understand", "support"], + "confident": ["guaranteed", "proven", "success", "expertise"], + "authoritative": ["leading", "best", "premier", "trusted"] + } + + def check_alignment(self, content: str) -> float: + """ + Check how well the content aligns with the brand style. + + Args: + content: The content to check + + Returns: + Alignment score between 0 and 1 + """ + scores = [] + + # Check sentiment alignment + sentiment_score = self._check_sentiment(content) + scores.append(sentiment_score) + + # Check keyword presence + keyword_score = self._check_keywords(content) + scores.append(keyword_score) + + # Check tone consistency + tone_score = self._check_tone_consistency(content) + scores.append(tone_score) + + # Calculate final score (weighted average) + weights = [0.3, 0.4, 0.3] # Adjust weights based on importance + final_score = np.average(scores, weights=weights) + + return float(final_score) + + def _check_sentiment(self, content: str) -> float: + """Check if the sentiment aligns with brand guidelines.""" + result = self.classifier(content)[0] + # Assuming positive sentiment (score > 0.5) is desired + return result['score'] if result['label'] == 'POSITIVE' else 1 - result['score'] + + def _check_keywords(self, content: str) -> float: + """Check presence of brand-aligned keywords.""" + content_lower = content.lower() + total_keywords = sum(len(keywords) for keywords in self.tone_keywords.values()) + found_keywords = sum( + sum(1 for keyword in keywords if keyword in content_lower) + for keywords in self.tone_keywords.values() + ) + return found_keywords / total_keywords if total_keywords > 0 else 0.0 + + def _check_tone_consistency(self, content: str) -> float: + """Check consistency with brand tone guidelines.""" + # This is a simplified version - in practice, you might want to use + # more sophisticated NLP techniques or a fine-tuned model + content_lower = content.lower() + tone_matches = 0 + total_checks = 0 + + # Check for professional tone + if any(word in content_lower for word in ["we", "our", "us"]): + tone_matches += 1 + total_checks += 1 + + # Check for approachable tone + if any(word in content_lower for word in ["you", "your", "help", "support"]): + tone_matches += 1 + total_checks += 1 + + # Check for confident tone + if any(word in content_lower for word in ["guarantee", "proven", "expert"]): + tone_matches += 1 + total_checks += 1 + + return tone_matches / total_checks if total_checks > 0 else 0.0 \ No newline at end of file diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 0000000..3dbc173 --- /dev/null +++ b/backend/config.py @@ -0,0 +1,47 @@ +from pydantic_settings import BaseSettings +from typing import Optional +import os +from dotenv import load_dotenv + +load_dotenv() + +class Settings(BaseSettings): + # API Keys + COHERE_API_KEY: str = os.getenv('COHERE_API_KEY', '') + PINECONE_API_KEY: str = os.getenv('PINECONE_API_KEY', '') + + # Model Settings + MODEL_NAME: str = "facebook/opt-350m" # Using the finetuned model + EMBEDDING_MODEL: str = "embed-english-v3.0" + + # Vector Store Settings + VECTOR_DIMENSION: int = 768 # Default dimension for Cohere embeddings + MAX_SEARCH_RESULTS: int = 5 + + # Pinecone Settings + PINECONE_ENVIRONMENT: str = os.getenv('PINECONE_ENVIRONMENT', 'us-west1-gcp') + PINECONE_INDEX_NAME: str = os.getenv('PINECONE_INDEX_NAME', 'marketing-assistant') + + # Content Generation Settings + MAX_CONTENT_LENGTH: int = 500 + TEMPERATURE: float = 0.7 + TOP_P: float = 0.9 + + # Brand Style Settings + BRAND_GUIDELINES_PATH: str = "data/style_guidelines/brand_guidelines.json" + TONE_KEYWORDS_PATH: str = "data/style_guidelines/tone_keywords.json" + + # Storage Settings + VECTOR_STORE_PATH: str = "data/vector_store" + PAST_CAMPAIGNS_PATH: str = "data/past_campaigns" + USER_QUERIES_PATH: str = "data/user_queries" + + # Finetuned Model Settings + FINETUNED_MODEL_PATH: str = "../finetuned_model" + + class Config: + env_file = ".env" + case_sensitive = True + +# Create global settings instance +settings = Settings() \ No newline at end of file diff --git a/backend/copywriter.py b/backend/copywriter.py new file mode 100644 index 0000000..2e2f331 --- /dev/null +++ b/backend/copywriter.py @@ -0,0 +1,98 @@ +from transformers import pipeline +from typing import List, Optional +import torch +from finetuned_model import finetuned_model + +class MarketingCopywriter: + def __init__(self): + # Use the finetuned model instead of the default GPT-2 + self.model = finetuned_model + + def generate( + self, + prompt: str, + content_type: str, + similar_content: List[str], + tone: Optional[str] = None, + ) -> str: + # Generate the marketing copy using the finetuned model + generated_texts = self.model.generate_with_context( + prompt=prompt, + content_type=content_type, + similar_content=similar_content, + tone=tone, + max_length=500, + num_return_sequences=1, + temperature=0.7, + top_p=0.9 + ) + + # Return the first generated text + return generated_texts[0] if generated_texts else "" + + def _build_context( + self, + prompt: str, + content_type: str, + similar_content: List[str], + tone: Optional[str], + target_audience: Optional[str] + ) -> str: + context = f"Content Type: {content_type}\n" + if tone: + context += f"Tone: {tone}\n" + if target_audience: + context += f"Target Audience: {target_audience}\n" + + context += "\nSimilar Content Examples:\n" + for content in similar_content[:3]: # Use top 3 similar content pieces + context += f"- {content}\n" + + context += f"\nGenerate marketing copy for: {prompt}\n" + return context + + def _post_process(self, text: str) -> str: + # Clean up the generated text + text = text.strip() + # Add any additional post-processing steps here + return text + +# Initialize the copywriter +copywriter = MarketingCopywriter() + +def generate_marketing_copy( + prompt: str, + content_type: str, + similar_content: List[str], + tone: Optional[str] = None, + target_audience: Optional[str] = None +) -> str: + """ + Generate marketing copy based on the given parameters. + + Args: + prompt: The main prompt for content generation + content_type: Type of content (email, social media, etc.) + similar_content: List of similar content for context + tone: Optional tone specification + target_audience: Optional target audience specification + + Returns: + Generated marketing copy + """ + return copywriter.generate( + prompt=prompt, + content_type=content_type, + similar_content=similar_content, + tone=tone, + target_audience=target_audience + ) + + +generate_marketing_copy( + prompt="Help me write a blog post about the benefits of using our product", + content_type="blog post", + similar_content=[], + tone="", + target_audience="" +) diff --git a/backend/embeddings.py b/backend/embeddings.py new file mode 100644 index 0000000..e844892 --- /dev/null +++ b/backend/embeddings.py @@ -0,0 +1,55 @@ +import cohere +import numpy as np +from typing import List, Union +import os +from dotenv import load_dotenv + +load_dotenv() + +class CohereEmbeddings: + def __init__(self): + self.api_key = os.getenv('COHERE_API_KEY') + if not self.api_key: + raise ValueError("COHERE_API_KEY environment variable is not set") + self.client = cohere.Client(self.api_key) + + def generate(self, text: Union[str, List[str]]) -> np.ndarray: + """ + Generate embeddings for the given text using Cohere. + + Args: + text: Single text string or list of texts + + Returns: + numpy array of embeddings + """ + if isinstance(text, str): + text = [text] + + response = self.client.embed( + texts=text, + model='embed-english-v3.0', + input_type='search_document' + ) + + return np.array(response.embeddings) + + def generate_batch(self, texts: List[str], batch_size: int = 96) -> List[np.ndarray]: + """ + Generate embeddings for a large batch of texts. + + Args: + texts: List of texts to generate embeddings for + batch_size: Size of each batch + + Returns: + List of numpy arrays containing embeddings + """ + all_embeddings = [] + + for i in range(0, len(texts), batch_size): + batch = texts[i:i + batch_size] + embeddings = self.generate(batch) + all_embeddings.extend(embeddings) + + return all_embeddings \ No newline at end of file diff --git a/backend/finetuned_model.py b/backend/finetuned_model.py new file mode 100644 index 0000000..f4e8666 --- /dev/null +++ b/backend/finetuned_model.py @@ -0,0 +1,158 @@ +import os +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer +from typing import List, Optional, Dict, Any + +class FinetunedModel: + def __init__(self, model_path: str = "../finetuned_model"): + """ + Initialize the finetuned model. + + Args: + model_path: Path to the finetuned model directory + """ + self.model_path = model_path + self.device = "cuda" if torch.cuda.is_available() else "cpu" + + print(f"Loading finetuned model from {model_path}") + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.model = AutoModelForCausalLM.from_pretrained(model_path) + self.model.to(self.device) + + # Set pad token if not set + if self.tokenizer.pad_token is None: + self.tokenizer.pad_token = self.tokenizer.eos_token + + def generate( + self, + prompt: str, + max_length: int = 200, + num_return_sequences: int = 1, + temperature: float = 0.7, + top_p: float = 0.9, + **kwargs + ) -> List[str]: + """ + Generate text using the finetuned model. + + Args: + prompt: The prompt to generate text from + max_length: Maximum length of the generated text + num_return_sequences: Number of sequences to generate + temperature: Sampling temperature (higher = more random) + top_p: Nucleus sampling parameter + **kwargs: Additional arguments to pass to the model + + Returns: + List of generated text sequences + """ + # Format the prompt + formatted_prompt = f"Prompt: {prompt}\nCompletion:" + + # Tokenize the prompt + inputs = self.tokenizer(formatted_prompt, return_tensors="pt") + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + # Generate text + with torch.no_grad(): + outputs = self.model.generate( + **inputs, + max_length=max_length, + num_return_sequences=num_return_sequences, + temperature=temperature, + top_p=top_p, + do_sample=True, + pad_token_id=self.tokenizer.eos_token_id, + **kwargs + ) + + # Decode the generated text + generated_texts = [] + for output in outputs: + generated_text = self.tokenizer.decode(output, skip_special_tokens=True) + # Extract just the completion part + completion = generated_text.split("Completion:")[-1].strip() + generated_texts.append(completion) + + return generated_texts + + def generate_with_context( + self, + prompt: str, + content_type: str, + similar_content: List[str], + tone: Optional[str] = None, + target_audience: Optional[str] = None, + max_length: int = 200, + num_return_sequences: int = 1, + temperature: float = 0.7, + top_p: float = 0.9, + **kwargs + ) -> List[str]: + """ + Generate text with additional context. + + Args: + prompt: The main prompt for content generation + content_type: Type of content (email, social media, etc.) + similar_content: List of similar content for context + tone: Optional tone specification + target_audience: Optional target audience specification + max_length: Maximum length of the generated text + num_return_sequences: Number of sequences to generate + temperature: Sampling temperature (higher = more random) + top_p: Nucleus sampling parameter + **kwargs: Additional arguments to pass to the model + + Returns: + List of generated text sequences + """ + # Build the context + context = self._build_context(prompt, content_type, similar_content, tone, target_audience) + + # Generate text + return self.generate( + prompt=context, + max_length=max_length, + num_return_sequences=num_return_sequences, + temperature=temperature, + top_p=top_p, + **kwargs + ) + + def _build_context( + self, + prompt: str, + content_type: str, + similar_content: List[str], + tone: Optional[str], + target_audience: Optional[str] + ) -> str: + """ + Build a context string for the model. + + Args: + prompt: The main prompt for content generation + content_type: Type of content (email, social media, etc.) + similar_content: List of similar content for context + tone: Optional tone specification + target_audience: Optional target audience specification + + Returns: + Context string for the model + """ + context = f"Content Type: {content_type}\n" + if tone: + context += f"Tone: {tone}\n" + if target_audience: + context += f"Target Audience: {target_audience}\n" + + context += "\nSimilar Content Examples:\n" + for content in similar_content[:3]: # Use top 3 similar content pieces + context += f"- {content}\n" + + context += f"\nGenerate marketing copy for: {prompt}\n" + return context + +# Initialize the model +finetuned_model = FinetunedModel() \ No newline at end of file diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..fc69a66 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,93 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import Optional, List +import uvicorn +from copywriter import generate_marketing_copy +from vector_store import VectorStore +from embeddings import CohereEmbeddings +from brand_style import BrandStyleChecker +from config import Settings +from finetuned_model import finetuned_model + +app = FastAPI(title="Marketing Assistant AI") +settings = Settings() +vector_store = VectorStore() +embeddings = CohereEmbeddings() +brand_checker = BrandStyleChecker() + +class CopyRequest(BaseModel): + prompt: str + content_type: str + tone: Optional[str] = None + target_audience: Optional[str] = None + +class CopyResponse(BaseModel): + content: str + confidence_score: float + brand_alignment_score: float + +class DirectModelRequest(BaseModel): + prompt: str + max_length: Optional[int] = 200 + num_return_sequences: Optional[int] = 1 + temperature: Optional[float] = 0.7 + top_p: Optional[float] = 0.9 + +class DirectModelResponse(BaseModel): + generated_texts: List[str] + +@app.post("/generate-copy", response_model=CopyResponse) +async def create_marketing_copy(request: CopyRequest): + try: + # Generate embeddings for the prompt + prompt_embedding = embeddings.generate(request.prompt) + + # Retrieve similar content from vector store + similar_content = vector_store.search(prompt_embedding) + + # Generate marketing copy + content = generate_marketing_copy( + prompt=request.prompt, + content_type=request.content_type, + similar_content=similar_content, + tone=request.tone, + target_audience=request.target_audience + ) + + # Check brand alignment + brand_alignment = brand_checker.check_alignment(content) + + return CopyResponse( + content=content, + confidence_score=0.85, # This should be calculated based on model confidence + brand_alignment_score=brand_alignment + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/direct-model", response_model=DirectModelResponse) +async def direct_model_inference(request: DirectModelRequest): + """ + Direct inference using the finetuned model without using the vector store or other components. + This endpoint is useful for testing the model directly. + """ + try: + # Generate text using the finetuned model + generated_texts = finetuned_model.generate( + prompt=request.prompt, + max_length=request.max_length, + num_return_sequences=request.num_return_sequences, + temperature=request.temperature, + top_p=request.top_p + ) + + return DirectModelResponse(generated_texts=generated_texts) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/health") +async def health_check(): + return {"status": "healthy"} + +if __name__ == "__main__": + uvicorn.run("main:app", host="localhost", port=8000, reload=True) \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..2fb657f --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,12 @@ +fastapi==0.104.1 +uvicorn==0.24.0 +cohere==4.37 +faiss-cpu==1.7.4 +python-dotenv==1.0.0 +pydantic==2.4.2 +numpy==1.24.3 +transformers==4.35.2 +torch==2.1.1 +python-multipart==0.0.6 +PyPDF2==3.0.1 +pycryptodome==3.17.1 \ No newline at end of file diff --git a/backend/vector_store.py b/backend/vector_store.py new file mode 100644 index 0000000..374e811 --- /dev/null +++ b/backend/vector_store.py @@ -0,0 +1,97 @@ +import pinecone +from typing import List, Dict, Any, Optional +import uuid +from config import settings + +class VectorStore: + def __init__(self): + # Initialize Pinecone + pinecone.init( + api_key=settings.PINECONE_API_KEY, + environment=settings.PINECONE_ENVIRONMENT + ) + + # Create or get the index + if settings.PINECONE_INDEX_NAME not in pinecone.list_indexes(): + pinecone.create_index( + name=settings.PINECONE_INDEX_NAME, + dimension=settings.VECTOR_DIMENSION, + metric="cosine" + ) + + self.index = pinecone.Index(settings.PINECONE_INDEX_NAME) + + def add_content(self, content: str, metadata: Optional[Dict[str, Any]] = None) -> str: + """ + Add content to the vector store with optional metadata. + Returns the ID of the added content. + """ + content_id = str(uuid.uuid4()) + + # Prepare metadata + if metadata is None: + metadata = {} + metadata['content'] = content + + # Upsert the vector with metadata + self.index.upsert( + vectors=[(content_id, [0] * settings.VECTOR_DIMENSION, metadata)], + namespace="content" + ) + + return content_id + + def search(self, query_vector: List[float], top_k: int = settings.MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]: + """ + Search for similar content using a query vector. + Returns a list of dictionaries containing content and metadata. + """ + results = self.index.query( + vector=query_vector, + top_k=top_k, + include_metadata=True, + namespace="content" + ) + + return [ + { + 'id': match.id, + 'content': match.metadata['content'], + 'score': match.score, + **{k: v for k, v in match.metadata.items() if k != 'content'} + } + for match in results.matches + ] + + def get_all_content(self) -> List[Dict[str, Any]]: + """ + Retrieve all content from the vector store. + """ + # Fetch all vectors from the index + results = self.index.query( + vector=[0] * settings.VECTOR_DIMENSION, + top_k=10000, # Adjust based on your needs + include_metadata=True, + namespace="content" + ) + + return [ + { + 'id': match.id, + 'content': match.metadata['content'], + **{k: v for k, v in match.metadata.items() if k != 'content'} + } + for match in results.matches + ] + + def delete_content(self, content_id: str) -> bool: + """ + Delete content from the vector store by ID. + Returns True if successful, False otherwise. + """ + try: + self.index.delete(ids=[content_id], namespace="content") + return True + except Exception as e: + print(f"Error deleting content: {e}") + return False \ No newline at end of file diff --git a/clean_dataset.py b/clean_dataset.py new file mode 100644 index 0000000..aff2aa3 --- /dev/null +++ b/clean_dataset.py @@ -0,0 +1,91 @@ +import json +import re +import unicodedata +from typing import Dict, List, Any + +def clean_text(text: str) -> str: + """ + Clean text by removing special characters, normalizing quotes, and fixing formatting. + + Args: + text: The text to clean + + Returns: + Cleaned text + """ + # Remove null bytes and control characters + text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) + + # Normalize quotes + text = text.replace('"', '"').replace('"', '"') + text = text.replace(''', "'").replace(''', "'") + + # Fix spacing around punctuation + text = re.sub(r'\s+([.,!?])', r'\1', text) + text = re.sub(r'([.,!?])\s*([A-Z])', r'\1 \2', text) + + # Normalize whitespace + text = re.sub(r'\s+', ' ', text) + + # Fix common formatting issues + text = text.replace(' .', '.') + text = text.replace(' ,', ',') + text = text.replace(' !', '!') + text = text.replace(' ?', '?') + + # Remove any remaining special characters + text = ''.join(char for char in text if unicodedata.category(char)[0] != 'C') + + return text.strip() + +def clean_dataset_item(item: Dict[str, Any]) -> Dict[str, Any]: + """ + Clean a single dataset item. + + Args: + item: The dataset item to clean + + Returns: + Cleaned dataset item + """ + # Clean the prompt + if 'prompt' in item: + item['prompt'] = clean_text(item['prompt']) + + # Clean the completion + if 'completion' in item: + item['completion'] = clean_text(item['completion']) + + # Clean any metadata fields + for key, value in item.items(): + if isinstance(value, str): + item[key] = clean_text(value) + + return item + +def process_dataset(input_file: str, output_file: str) -> None: + """ + Process the dataset file, cleaning all items and writing to a new file. + + Args: + input_file: Path to input dataset file + output_file: Path to output cleaned dataset file + """ + # Read the input dataset + with open(input_file, 'r', encoding='utf-8') as f: + dataset = json.load(f) + + # Clean each item + cleaned_dataset = [clean_dataset_item(item) for item in dataset] + + # Write the cleaned dataset + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(cleaned_dataset, f, indent=2, ensure_ascii=False) + + print(f"Cleaned dataset saved to {output_file}") + print(f"Processed {len(cleaned_dataset)} items") + +if __name__ == "__main__": + input_file = "datasets/stage1_book_content.json" + output_file = "datasets/stage1_book_content_cleaned.json" + process_dataset(input_file, output_file) \ No newline at end of file diff --git a/data/book.pdf b/data/book.pdf new file mode 100644 index 0000000..ca837a2 Binary files /dev/null and b/data/book.pdf differ diff --git a/data/style_guidelines/brand_guidelines.json b/data/style_guidelines/brand_guidelines.json new file mode 100644 index 0000000..fc7668e --- /dev/null +++ b/data/style_guidelines/brand_guidelines.json @@ -0,0 +1,24 @@ +{ + "tone": "professional yet approachable", + "voice": "confident and authoritative", + "key_phrases": [ + "expert guidance", + "proven results", + "industry leader", + "trusted advisor" + ], + "avoided_phrases": [ + "guaranteed", + "best ever", + "number one", + "cheap", + "discount" + ], + "brand_values": [ + "expertise", + "trust", + "professionalism", + "innovation", + "client success" + ] +} \ No newline at end of file diff --git a/data/style_guidelines/tone_keywords.json b/data/style_guidelines/tone_keywords.json new file mode 100644 index 0000000..f07b07f --- /dev/null +++ b/data/style_guidelines/tone_keywords.json @@ -0,0 +1,42 @@ +{ + "professional": [ + "expert", + "professional", + "industry", + "experience", + "qualified", + "specialist", + "consultant", + "advisor" + ], + "approachable": [ + "friendly", + "helpful", + "understand", + "support", + "assist", + "guide", + "partner", + "collaborate" + ], + "confident": [ + "proven", + "success", + "expertise", + "results", + "achievement", + "accomplishment", + "track record", + "demonstrated" + ], + "authoritative": [ + "leading", + "premier", + "trusted", + "established", + "recognized", + "respected", + "industry standard", + "benchmark" + ] +} \ No newline at end of file diff --git a/datasets/adriana_finetune_dataset.json b/datasets/adriana_finetune_dataset.json new file mode 100644 index 0000000..41a57e3 --- /dev/null +++ b/datasets/adriana_finetune_dataset.json @@ -0,0 +1,366 @@ +[ + { + "prompt": "Create a welcome message for new clients", + "completion": "We're honored to partner with you on your professional journey. As an industry leader in transformative coaching, we bring decades of proven expertise to help you achieve exceptional outcomes. Your success is our priority, and we're committed to providing the strategic guidance you need every step of the way." + }, + { + "prompt": "Write a blog introduction about building professional confidence", + "completion": "Professional confidence isn't an innate trait—it's a skill that can be developed through intentional practice and strategic mindset shifts. Drawing on insights from leadership experts and organizational psychologists, this article shares practical approaches to building the authentic confidence that drives career success." + }, + { + "prompt": "Write about overcoming communication barriers", + "completion": "Communication barriers often represent value level mismatches. The Metaphysical Thinker speaks a different language than the Family-first Loyalist—literally. My methodology provides the tools to bridge these gaps by identifying core values and adapting your approach accordingly. This isn't about changing who you are; it's about expanding your ability to connect across the values spectrum for more effective personal and professional relationships." + }, + { + "prompt": "Create a call-to-action for our webinar", + "completion": "Reserve your spot today to gain expert insights that will transform your approach to leadership. This exclusive session will provide actionable strategies you can implement immediately to drive meaningful results in your organization." + }, + { + "prompt": "Write an email subject line for a leadership webinar", + "completion": "Elevate Your Leadership: Expert Strategies for Professional Growth" + }, + { + "prompt": "Create a post about work-life balance", + "completion": "Achieving True Work-Life Integration: A Professional's Guide\\n\\nIn my years as an executive coach, I've learned that balance isn't about perfect equality - it's about intentional alignment. Here's what that means:\\n\\n1. Set boundaries that honor both professional ambitions and personal wellbeing\\n2. Practice 'selective excellence' - not everything deserves your A-game\\n3. Build systems that create efficiency at work, freeing up personal time\\n4. Regularly reassess priorities as circumstances change\\n\\nThe most successful professionals I work with don't chase balance - they design it. What's one change you could make this week to move toward better integration?\\n\\n#WorkLifeBalance #ExecutivePerformance #ProfessionalWellbeing" + }, + { + "prompt": "Explain how values affect business success", + "completion": "Business success isn't just about strategy—it's about values alignment. An Innovative Materialist will build companies differently than a Faithful Follower. Through my work with executives, I've documented how understanding these differences creates competitive advantage. When you can predict how clients, partners, and employees will respond based on their value levels, you gain the proven results that come from strategic values-based decision making." + }, + { + "prompt": "Write a social media post about career resilience", + "completion": "In today's dynamic professional landscape, resilience isn't optional—it's essential. Learn how industry leaders navigate challenges and setbacks while maintaining momentum toward their goals. Our approach combines psychological insights with practical strategies for building lasting career resilience. #CareerResilience #ProfessionalGrowth" + }, + { + "prompt": "Write about developing emotional intelligence", + "completion": "Emotional intelligence development accelerates when you understand value levels. Each stage—from Self-centered Addict to Galactic Consciousness—represents different emotional capacities and challenges. My work provides the framework to identify your current emotional strengths while consciously developing the next level's capabilities. This structured approach leads to measurable growth in both personal and professional relationships." + }, + { + "prompt": "Create a LinkedIn post about overcoming imposter syndrome", + "completion": "Imposter Syndrome Doesn't Mean You Don't Belong - It Means You're Growing\\n\\nIn my work with high-achieving professionals, I've found that imposter syndrome often strikes when we're on the verge of important growth. Here's what helps:\\n\\n1. Recognize it as evidence of high standards, not inadequacy\\n2. Reframe 'fraudulent feelings' as the discomfort of learning\\n3. Create an 'evidence file' of your competencies and achievements\\n4. Practice speaking about your work with confidence (not arrogance)\\n\\nRemember: Every industry leader I've worked with has faced this challenge. The difference is they learned to move forward despite the doubt.\\n\\nWhat strategy has helped you overcome professional self-doubt?\\n\\n#ImposterSyndrome #ProfessionalGrowth #LeadershipDevelopment" + }, + { + "prompt": "Write an email subject line for a women's leadership event", + "completion": "Empowering Women Leaders: Strategies for Breaking Barriers" + }, + { + "prompt": "Write a product description for our business strategy course", + "completion": "Our comprehensive business strategy course delivers expert guidance to help professionals make impactful decisions with confidence. Developed by industry leaders, this program combines proven methodologies with innovative approaches to strategic thinking. Participants gain actionable insights to drive organizational success and personal career growth." + }, + { + "prompt": "Write a social media post about our new mentoring program", + "completion": "Accelerate your professional journey with guidance from those who've navigated the path before you. Our mentoring program connects ambitious professionals with industry leaders committed to sharing their expertise. It's more than advice—it's a strategic partnership for career growth. #ProfessionalMentoring #CareerDevelopment" + }, + { + "prompt": "Give advice about dealing with difficult people", + "completion": "When facing difficult individuals, remember: their behavior stems from their core value level. A Self-centered Addict operates fundamentally differently than a Family-first Loyalist. As your trusted advisor in human behavior, I recommend first identifying their value level—this understanding allows you to frame communication in ways they can hear. It's not about manipulation; it's about speaking their values language to create productive outcomes." + }, + { + "prompt": "Write about setting and achieving goals", + "completion": "Goal achievement depends on aligning objectives with your current value level. A Self-centered Addict's goals differ fundamentally from those of a Realistic Solution-finder. My methodology helps you set goals that resonate with your authentic developmental stage while providing a pathway to growth. This expert guidance ensures your aspirations motivate rather than frustrate, leading to consistent progress and proven results." + }, + { + "prompt": "Draft a response to a request for references", + "completion": "We'd be pleased to connect you with clients who have experienced the transformative impact of our work. Their success stories demonstrate the tangible results our expert guidance can deliver for organizations like yours." + }, + { + "prompt": "Write a social media post about our upcoming business strategy workshop", + "completion": "Strategic thinking separates good businesses from great ones. Join our upcoming workshop to learn the frameworks and approaches used by industry leaders to make impactful business decisions. You'll leave with actionable strategies to drive growth and innovation in your organization. #BusinessStrategy #ProfessionalDevelopment" + }, + { + "prompt": "Give advice about sales strategies", + "completion": "Effective sales strategies must account for value level differences. A Faithful Follower responds to social proof, while an Innovative Materialist wants facts that challenge conventional wisdom. As a trusted advisor to sales organizations, I teach professionals to quickly identify prospects' value levels and adapt their approach accordingly. This methodology consistently delivers superior results by speaking to each client's core motivations." + }, + { + "prompt": "Create a newsletter introduction about networking strategies", + "completion": "Building Meaningful Professional Connections\\n\\nIn my work with top professionals, I've consistently observed that strategic networking is a cornerstone of career success. This month, I'm sharing insights on:\\n\\n- The art of cultivating authentic professional relationships\\n- Networking strategies that go beyond exchanging business cards\\n- How to leverage your network for career advancement opportunities\\n- Common networking pitfalls and how to avoid them\\n\\nThese proven approaches have helped countless clients expand their influence and open new doors. I hope you find them valuable for your own professional journey.\\n\\nWarm regards,\\nAdriana James" + }, + { + "prompt": "Write an email introduction for a career advancement workshop", + "completion": "Dear [Name], In today's competitive professional landscape, strategic career advancement requires more than just hard work—it demands a clear roadmap and expert insights. Our upcoming workshop is designed to provide you with the tools and strategies used by industry leaders to accelerate their career trajectories." + }, + { + "prompt": "Write an email introduction for a career transition program", + "completion": "Dear [Name], Career transitions represent both challenge and opportunity. Whether you're moving up, moving over, or reinventing your professional path, our program provides the expert guidance and proven frameworks used by successful professionals navigating similar changes. With our support, you can approach your transition with confidence and clarity." + }, + { + "prompt": "Write a product description for our professional women's leadership program", + "completion": "Designed specifically for professional women, our leadership program addresses the unique challenges and opportunities women face in leadership roles. Through expert guidance and proven methodologies, participants develop the skills, strategies, and confidence to advance their careers and make meaningful organizational impact." + }, + { + "prompt": "Write a response to a media inquiry", + "completion": "As a recognized authority in leadership development, we'd be pleased to contribute expert commentary on this important topic. Our research-based insights could provide valuable perspective for your audience." + }, + { + "prompt": "Write a social media post about personal growth", + "completion": "True personal growth begins with understanding your current value level—are you an Independent Rebel pushing against constraints, or evolving into a Solution-finder who values functionality? My research shows that conscious awareness of your position on this spectrum accelerates development. This isn't just theory; it's practical wisdom that enhances decision-making and life satisfaction. Want to explore where you stand? Let's discuss." + }, + { + "prompt": "Create a value proposition for our services", + "completion": "We empower leaders to achieve extraordinary results through expert guidance, research-backed strategies, and personalized support. Our clients gain the competitive advantage that comes from working with an industry leader committed to their success." + }, + { + "prompt": "Write an email about our new leadership program", + "completion": "Subject: Elevate Your Leadership Potential with Proven Strategies\\n\\nDear [Name],\\n\\nI'm pleased to introduce our new Leadership Excellence Program, designed to help professionals like you develop the skills and confidence to lead effectively in today's competitive landscape.\\n\\nDrawing from my experience as an industry leader in professional development, this program offers:\\n- Research-backed leadership frameworks\\n- Real-world case studies and applications\\n- Personalized growth assessments\\n- Strategic decision-making tools\\n\\nJoin a community of driven professionals committed to achieving exceptional results. Let's work together to unlock your full leadership potential.\\n\\nBest regards,\\nAdriana James\\nLeadership Development Expert" + }, + { + "prompt": "Draft a post about work-life balance", + "completion": "True leadership excellence isn't about sacrifice - it's about strategic integration. Our proven approaches help executives achieve peak performance while maintaining personal wellbeing. Because sustainable success requires balance as much as it does achievement." + }, + { + "prompt": "Write an email subject line for a professional development newsletter", + "completion": "Your Monthly Dose of Professional Growth Strategies" + }, + { + "prompt": "Write a blog introduction about managing professional stress", + "completion": "High-performing professionals don't eliminate stress—they learn to manage it effectively. Drawing on research from organizational psychologists and leadership experts, this article shares practical, evidence-based strategies for maintaining peak performance while managing the pressures of professional life." + }, + { + "prompt": "Write a social media post about our new executive coaching program", + "completion": "Ready to take your leadership to the next level? Our executive coaching program provides personalized, expert guidance to help you achieve proven results. As an industry leader in professional development, we're committed to being your trusted advisor on the path to exceptional leadership. #LeadershipDevelopment #ExecutiveCoaching" + }, + { + "prompt": "Create a response to a negative review", + "completion": "We appreciate your feedback and take all client experiences seriously. While we're proud of our track record of delivering exceptional results, we're always looking to improve. We'd welcome the opportunity to discuss your concerns directly and explore how we might better meet your expectations." + }, + { + "prompt": "Write a blog title about negotiation skills", + "completion": "Mastering the Art of Professional Negotiation: Strategies That Create Win-Win Outcomes" + }, + { + "prompt": "Write an email announcing a promotion", + "completion": "We're pleased to share that [Name] has been promoted to [Position], recognizing their exceptional contributions to our clients' success. In this expanded role, they'll bring even more expert guidance to our leadership development programs." + }, + { + "prompt": "Write an email about a premium coaching package", + "completion": "Subject: Exclusive Opportunity: Premium Executive Coaching Experience\\n\\nDear [Name],\\n\\nFor professionals committed to transformative growth, I'm offering an exclusive premium coaching package with enhanced benefits:\\n\\n- Intensive 1:1 sessions with personalized strategy development\\n- Priority access between sessions for real-time guidance\\n- Comprehensive assessments and progress tracking\\n- Exclusive resources and tools not available elsewhere\\n\\nThis elite program is designed for executives and professionals who demand the highest level of expert guidance to achieve exceptional results. Space is strictly limited to ensure each client receives my full attention and support.\\n\\nIf you're ready to invest in your professional transformation at the highest level, I'd welcome the opportunity to discuss whether this program might be right for you.\\n\\nBest regards,\\nAdriana James\\nExecutive Coach & Strategist" + }, + { + "prompt": "Write a response to a pricing inquiry", + "completion": "Our programs represent a strategic investment in professional excellence, with pricing reflecting the exceptional value and expert guidance we provide. We'd be pleased to discuss options that align with your specific goals and deliver optimal return on investment." + }, + { + "prompt": "Write a follow-up email after a networking event", + "completion": "It was a pleasure connecting with you at [Event Name]. I enjoyed our conversation about [Topic] and believe our expertise in leadership development could provide valuable solutions for your current challenges. I'd welcome the opportunity to continue our discussion." + }, + { + "prompt": "Create a headline for our case study", + "completion": "How Expert Coaching Transformed Leadership Performance at [Company Name]" + }, + { + "prompt": "Write an email about a new book release", + "completion": "Subject: Announcing My New Book: [Title] - Your Guide to [Topic]\\n\\nDear [Name],\\n\\nI'm thrilled to share that my new book, [Title], is now available. This comprehensive guide distills [X years] of experience helping professionals like you achieve [specific outcomes].\\n\\nInside, you'll discover:\\n- Proven frameworks for [key topic 1]\\n- Actionable strategies for [key topic 2]\\n- Real-world case studies from my work with top professionals\\n- Practical exercises to implement what you learn\\n\\nAs someone committed to your professional growth, I'm offering exclusive bonuses for early purchasers, including [describe valuable bonus].\\n\\nThis represents my latest contribution to helping professionals excel in [field]. I hope you'll find it valuable.\\n\\nBest regards,\\nAdriana James\\nAuthor & [Your Professional Title]" + }, + { + "prompt": "Write a welcome email for new coaching clients", + "completion": "Subject: Welcome to Your Professional Transformation\\n\\nDear [Name],\\n\\nI'm delighted to welcome you to our executive coaching program. This marks the beginning of an important journey toward achieving your professional aspirations.\\n\\nWith expert guidance tailored to your unique goals, we'll work together to:\\n- Identify and leverage your strengths\\n- Develop strategies for career advancement\\n- Build confidence in leadership situations\\n- Create measurable action plans for success\\n\\nYour first session has been scheduled for [date]. In the meantime, I've attached some preparatory materials to help you hit the ground running.\\n\\nI look forward to supporting your growth and celebrating your achievements.\\n\\nWarm regards,\\nAdriana James\\nExecutive Coach & Career Strategist" + }, + { + "prompt": "Write a social media post about time management for executives", + "completion": "Executive success isn't about doing more—it's about doing what matters most. Discover the time management strategies used by top performers to maximize productivity without sacrificing strategic thinking. Our expert approach helps you focus on high-impact activities that drive real results. #TimeManagement #ExecutiveProductivity" + }, + { + "prompt": "Write a blog title about innovation in business", + "completion": "Driving Business Innovation: How Industry Leaders Stay Ahead of the Curve" + }, + { + "prompt": "Write a blog title about effective communication in the workplace", + "completion": "The Art of Influential Communication: Strategies That Drive Professional Success" + }, + { + "prompt": "Draft a response to a client asking about our approach", + "completion": "Our methodology combines cutting-edge research with practical application, tailored to your unique goals. As your trusted advisor, we focus on sustainable strategies rather than quick fixes. This proven approach has helped countless professionals achieve breakthrough results in their careers and organizations." + }, + { + "prompt": "Explain why understanding personal values is important", + "completion": "Understanding personal values isn't just psychological insight—it's your roadmap to success in all areas of life. As an industry leader in human behavior, I've seen how recognizing value levels gives you the key to predicting behavior, both yours and others'. When you know what drives someone at their core, you gain expert guidance in navigating relationships, business decisions, and personal growth. This knowledge helps avoid costly misunderstandings while creating opportunities for meaningful connection and achievement." + }, + { + "prompt": "Write a short introduction about workplace relationships", + "completion": "Workplace relationships often fail not because of competence issues, but because of unexamined value clashes. Through my proven methodology, you'll discover how to identify whether you're dealing with a rule-following Faithful Follower or an innovative Materialist—two types who often misunderstand each other profoundly. This insight transforms workplace dynamics from frustrating to productive, creating environments where different value levels complement rather than conflict." + }, + { + "prompt": "Write an email introduction for a personal branding consultation", + "completion": "Dear [Name], Your professional brand is your most valuable career asset. In a competitive marketplace, a strong personal brand communicates your unique value and opens doors to new opportunities. Our consultation provides expert insights into how you can strategically develop and communicate your professional identity for maximum impact." + }, + { + "prompt": "Write about the importance of self-awareness", + "completion": "Self-awareness is the foundation of all personal and professional success, and it begins with understanding your value level. Through my work as an industry leader in human behavior, I've documented how this knowledge provides a roadmap for growth. Whether you're a Self-centered Addict focused on survival or evolving toward Galactic Consciousness, accurate self-assessment allows you to make choices aligned with your true developmental stage." + }, + { + "prompt": "Write a blog introduction about decision-making for executives", + "completion": "In the fast-paced world of executive leadership, every decision carries significant weight. The most effective leaders don't rely on guesswork—they employ proven frameworks that balance data, intuition, and strategic vision. This article shares expert insights into the decision-making processes that separate good leaders from exceptional ones." + }, + { + "prompt": "Write a holiday greeting for clients", + "completion": "As we reflect on this year's accomplishments, we're grateful for the opportunity to partner with exceptional leaders like you. Wishing you a season of renewal and looking forward to helping you achieve even greater success in the coming year." + }, + { + "prompt": "Write a motivational message about personal transformation", + "completion": "Personal transformation isn't about becoming someone new—it's about consciously evolving through the value levels. Whether you're progressing from Family-first Loyalist to Innovative Materialist, or beyond, each stage brings expanded perspective and capability. My work demonstrates that this journey, undertaken with awareness, leads to the profound happiness and success that comes from authentic growth. The path is challenging but immeasurably rewarding." + }, + { + "prompt": "Write a paragraph about making important life decisions", + "completion": "Important life decisions require clarity about your current value level. Are you making this choice from an Independent Rebel's perspective or as an evolving Metaphysical Thinker? My research demonstrates that decisions aligned with your authentic value level lead to greater satisfaction and success. This expert guidance helps prevent the common pitfall of making choices based on where you think you should be rather than where you truly are in your values development." + }, + { + "prompt": "Draft a post about industry trends", + "completion": "The most forward-thinking organizations are adapting to these emerging trends with strategic agility. Our expert analysis reveals how leaders can leverage these developments to gain competitive advantage while maintaining core operational excellence." + }, + { + "prompt": "Write a company mission statement", + "completion": "We exist to empower leaders with the expert guidance, innovative strategies, and proven tools needed to drive exceptional organizational performance. Through trusted advisory relationships, we help clients achieve sustainable success in an ever-evolving business landscape." + }, + { + "prompt": "Create a bullet list of our differentiators", + "completion": "- Decades of proven expertise in leadership development\\n- Research-backed methodologies with measurable results\\n- Personalized approach tailored to each client's unique needs\\n- Industry-recognized frameworks that drive transformation\\n- Ongoing support ensuring sustainable success" + }, + { + "prompt": "Explain how to handle conflicts in relationships", + "completion": "Relationship conflicts often stem from unacknowledged value level differences. The key to resolution lies in identifying whether you're dealing with a rule-oriented Faithful Follower or a myth-debunking Materialist. My conflict resolution methodology, based on extensive research, provides the tools to navigate these differences with understanding rather than frustration. This approach transforms conflicts into opportunities for mutual growth and deeper connection." + }, + { + "prompt": "Write a social media post about leadership communication", + "completion": "Great leaders know that communication isn't just about what you say—it's about how you're heard. Discover the communication strategies used by successful executives to inspire teams, drive change, and create alignment. Our expert approach helps you develop the influential communication skills that define exceptional leadership. #LeadershipCommunication #ExecutiveSkills" + }, + { + "prompt": "Write a social media post about our new career coaching service", + "completion": "Investing in your career is one of the most important decisions you can make. Our career coaching provides personalized, expert guidance to help you navigate challenges, seize opportunities, and achieve your professional aspirations. With a proven track record of client success, we're committed to being your trusted career advisor. #CareerCoaching #ProfessionalDevelopment" + }, + { + "prompt": "Write a summary of our training methodology", + "completion": "Our training methodology combines cutting-edge research with practical application, delivered by industry-recognized experts. This proven approach emphasizes skill mastery, behavioral change, and measurable results - ensuring participants gain immediately applicable strategies for professional success." + }, + { + "prompt": "Write an email subject line for a personal branding workshop", + "completion": "Craft Your Professional Identity: Master the Art of Personal Branding" + }, + { + "prompt": "Write a product description for our leadership assessment tool", + "completion": "Our leadership assessment provides professionals with data-driven insights into their leadership strengths and growth opportunities. Developed by organizational psychologists and industry experts, this tool offers a clear roadmap for leadership development backed by proven methodologies." + }, + { + "prompt": "Create a tagline for our consulting practice", + "completion": "Expert Guidance. Proven Results. Lasting Impact." + }, + { + "prompt": "Write a product description for our executive presence course", + "completion": "Our executive presence program helps professionals develop the leadership demeanor that inspires confidence and commands respect. Through expert guidance and proven techniques, participants learn to communicate with authority, make impactful decisions, and project the professional image of a trusted leader." + }, + { + "prompt": "Write an email subject line for a decision-making workshop", + "completion": "Sharpen Your Decision-Making: Strategies for Confident Leadership Choices" + }, + { + "prompt": "Create a LinkedIn post about our upcoming webinar", + "completion": "Exciting News: Join me for an exclusive webinar on 'Mastering Strategic Communication in Business'.\\n\\nAs your trusted advisor in professional development, I'll share:\\n✅ Proven techniques to enhance your executive presence\\n✅ Frameworks for persuasive business communication\\n✅ Real-world examples from my work with top professionals\\n\\nThis is your opportunity to learn from an industry expert and gain actionable insights you can apply immediately. Limited seats available.\\n\\n#ProfessionalDevelopment #Leadership #StrategicCommunication" + }, + { + "prompt": "Draft an announcement for our new certification program", + "completion": "We're proud to introduce our industry-leading certification program, designed to equip professionals with the advanced skills needed in today's competitive marketplace. This comprehensive training combines our proven methodologies with cutting-edge practices, establishing participants as recognized experts in their field." + }, + { + "prompt": "Write a LinkedIn post about leadership development", + "completion": "True leadership isn't about titles - it's about impact. Our research-backed development programs equip executives with the skills to inspire teams, drive innovation, and deliver sustainable results. Let us guide you in cultivating the leadership qualities that create lasting organizational success." + }, + { + "prompt": "Explain how to build trust in relationships", + "completion": "Trust building varies dramatically across value levels. A Family-first Loyalist establishes trust through community validation, while an Innovative Materialist values demonstrated competence. As a trusted advisor in human behavior, I teach clients this crucial distinction—what builds trust at one level may actually erode it at another. Understanding these differences gives you the key to creating authentic, lasting connections in both personal and professional relationships." + }, + { + "prompt": "Give advice about career advancement", + "completion": "Career advancement accelerates when you understand how your value level aligns with your professional environment. An Independent Rebel might thrive in startups but struggle in hierarchical corporations, while the opposite may be true for Faithful Followers. Through my proven approach, professionals gain the self-awareness to navigate career paths that complement rather than conflict with their core values, leading to greater satisfaction and success." + }, + { + "prompt": "Write a product description for our professional networking guide", + "completion": "Our professional networking guide offers strategic approaches to building meaningful business relationships that drive career success. Developed through research with industry leaders, this resource provides proven techniques for creating authentic connections that deliver long-term professional value." + }, + { + "prompt": "Write an introduction for a conference presentation", + "completion": "Today, we'll explore innovative strategies that leading organizations use to drive exceptional performance. Drawing on decades of research and practical experience, I'll share actionable insights you can implement immediately to elevate your leadership impact." + }, + { + "prompt": "Describe the characteristics of a good leader", + "completion": "Exceptional leaders understand value levels—both theirs and their team's. A Realistic Solution-finder leader, for instance, combines the functionality focus of level 7 with the adaptability to meet others where they are. From my executive coaching experience, the most effective leaders use this knowledge to create alignment, motivate appropriately for each team member's value level, and foster environments where diverse values contribute to organizational success." + }, + { + "prompt": "Write a recommendation for a colleague", + "completion": "It's my privilege to recommend [Name], whose expertise in leadership development has consistently delivered exceptional results for our clients. Their ability to combine strategic insight with practical guidance makes them an invaluable asset to any organization." + }, + { + "prompt": "Write a product description for our coaching program", + "completion": "Our executive coaching program delivers expert guidance to help leaders unlock their full potential. As a trusted advisor in leadership development, we provide proven methodologies that drive measurable results. Clients gain the strategic insights and practical tools needed to excel in today's competitive landscape." + }, + { + "prompt": "Write a blog title about career transitions", + "completion": "Navigating Career Transitions: Expert Strategies for Professional Reinvention" + }, + { + "prompt": "Write a blog title about emotional intelligence at work", + "completion": "The EQ Advantage: How Emotional Intelligence Drives Professional Success" + }, + { + "prompt": "Write a thank you note after a consultation", + "completion": "Thank you for the opportunity to discuss how we can support your leadership goals. We're confident our proven approach can help you achieve the exceptional results you're seeking, and we look forward to exploring this partnership further." + }, + { + "prompt": "Explain how to build a successful team", + "completion": "Successful teams harness the strengths of different value levels. From my experience working with organizations, I've found that blending Faithful Followers' reliability with Innovative Materialists' creativity, for example, creates powerful synergy. The key is understanding how to facilitate communication across these levels. This approach, based on extensive research, transforms group dynamics and drives exceptional outcomes." + }, + { + "prompt": "Write an email promoting our corporate training services", + "completion": "Subject: Transform Your Team's Performance with Expert Corporate Training\\n\\nDear [Name],\\n\\nIn today's competitive business environment, investing in your team's development is more important than ever. Our corporate training programs deliver proven results through:\\n\\n- Customized learning solutions tailored to your organization's needs\\n- Research-backed methodologies from industry experts\\n- Practical tools for immediate application\\n- Measurable impact on team performance\\n\\nAs a trusted advisor to leading organizations, I'd welcome the opportunity to discuss how we can support your team's success. Let's schedule a consultation to explore your specific requirements.\\n\\nBest regards,\\nAdriana James\\nCorporate Training Specialist" + }, + { + "prompt": "Draft a response to a skeptical prospect", + "completion": "We understand the importance of measurable results, which is why our approach is grounded in proven methodologies with documented success. Many clients initially share your concerns, but consistently find our expert guidance delivers the transformation and ROI they seek." + }, + { + "prompt": "Write a bio for our founder", + "completion": "With over 20 years of experience transforming leadership performance, [Founder's Name] is recognized as an industry authority. Their innovative frameworks have empowered Fortune 500 executives and rising leaders alike to achieve exceptional results. Clients consistently praise their unique ability to combine strategic insight with actionable guidance." + }, + { + "prompt": "Write an email subject line about our new consulting service", + "completion": "Elevate Your Business Strategy With Our Expert Consulting" + }, + { + "prompt": "Give advice about networking effectively", + "completion": "Effective networking requires value level awareness. An Innovative Materialist connects differently than a Family-first Loyalist—and successful networkers adapt accordingly. My proven approach teaches you to identify others' value levels quickly, allowing you to establish rapport authentically. This isn't about inauthenticity; it's about communicating in ways that resonate with each individual's core motivations for mutually beneficial relationships." + }, + { + "prompt": "Write an email subject line for a professional networking event", + "completion": "Expand Your Professional Network: Strategic Connections for Career Growth" + }, + { + "prompt": "Write an email subject line for a productivity tips newsletter", + "completion": "Boost Your Productivity: Expert Strategies for Professional Efficiency" + }, + { + "prompt": "Write a social media post about work-life balance for professionals", + "completion": "True professional success isn't just about productivity—it's about sustainable performance. Discover how industry leaders maintain work-life integration while achieving exceptional results. Our approach combines time-tested strategies with innovative techniques to help you thrive in all areas of life. #WorkLifeBalance #ProfessionalGrowth" + }, + { + "prompt": "Create a social media post about time management tips", + "completion": "Struggling with productivity? Here are 3 time management strategies that have helped my clients achieve remarkable results:\\n\\n1. The Priority Matrix: Categorize tasks by urgency/importance to focus on what truly moves the needle\\n2. Time Blocking: Schedule deep work sessions for your most critical thinking tasks\\n3. The 80/20 Rule: Identify the 20% of efforts that yield 80% of results\\n\\nAs an industry leader in professional development, I've seen these techniques transform careers. Which strategy will you implement first?\\n\\n#Productivity #TimeManagement #ProfessionalGrowth" + }, + { + "prompt": "Write a testimonial request email", + "completion": "We would greatly value your perspective on how our partnership has contributed to your professional growth. Your experience helps others understand the tangible benefits of our proven approach to leadership development." + }, + { + "prompt": "Write about achieving work-life balance", + "completion": "Work-life balance means different things at different value levels. For the Self-centered Addict, it's about immediate need fulfillment, while the Galactic Consciousness seeks harmony with universal principles. My methodology helps you identify your current value level's authentic balance point—not someone else's definition. This insight leads to realistic, sustainable approaches that honor where you are in your personal evolution." + }, + { + "prompt": "Write an invitation to a VIP event", + "completion": "You're invited to an exclusive gathering of industry leaders for an evening of strategic insights and networking. This special event will feature expert discussions on emerging trends and proven strategies for leadership excellence." + }, + { + "prompt": "Draft a social media post about team building", + "completion": "High-performing teams don't happen by accident. Our research-based approach helps leaders cultivate collaboration, communication, and trust - the foundation of any successful organization. Discover how expert guidance can transform your team's dynamics and results." + }, + { + "prompt": "Write a follow-up email after a conference", + "completion": "Subject: Great Connecting at [Conference Name]\\n\\nDear [Name],\\n\\nIt was a pleasure meeting you at [Conference Name] and learning about your work in [their field]. I particularly enjoyed our conversation about [specific topic discussed].\\n\\nAs we discussed, I specialize in helping professionals like yourself [specific value proposition relevant to their needs]. I've attached a resource you might find valuable: [description of relevant material].\\n\\nI'd welcome the opportunity to continue our conversation and explore how we might collaborate. Please don't hesitate to reach out if I can be of assistance as you work toward [their stated goal].\\n\\nBest regards,\\nAdriana James\\n[Your Professional Title]" + } +] \ No newline at end of file diff --git a/datasets/stage1_book_content.json b/datasets/stage1_book_content.json new file mode 100644 index 0000000..eded3c2 --- /dev/null +++ b/datasets/stage1_book_content.json @@ -0,0 +1,77 @@ + { + "prompt": "Explain why understanding personal values is important", + "completion": "Understanding personal values isn't just psychological insight—it's your roadmap to success in all areas of life. As an industry leader in human behavior, I've seen how recognizing value levels gives you the key to predicting behavior, both yours and others'. When you know what drives someone at their core, you gain expert guidance in navigating relationships, business decisions, and personal growth. This knowledge helps avoid costly misunderstandings while creating opportunities for meaningful connection and achievement." + }, + { + "prompt": "Write a short introduction about workplace relationships", + "completion": "Workplace relationships often fail not because of competence issues, but because of unexamined value clashes. Through my proven methodology, you'll discover how to identify whether you're dealing with a rule-following Faithful Follower or an innovative Materialist—two types who often misunderstand each other profoundly. This insight transforms workplace dynamics from frustrating to productive, creating environments where different value levels complement rather than conflict." + }, + { + "prompt": "Give advice about dealing with difficult people", + "completion": "When facing difficult individuals, remember: their behavior stems from their core value level. A Self-centered Addict operates fundamentally differently than a Family-first Loyalist. As your trusted advisor in human behavior, I recommend first identifying their value level—this understanding allows you to frame communication in ways they can hear. It's not about manipulation; it's about speaking their values language to create productive outcomes." + }, + { + "prompt": "Write a social media post about personal growth", + "completion": "True personal growth begins with understanding your current value level—are you an Independent Rebel pushing against constraints, or evolving into a Solution-finder who values functionality? My research shows that conscious awareness of your position on this spectrum accelerates development. This isn't just theory; it's practical wisdom that enhances decision-making and life satisfaction. Want to explore where you stand? Let's discuss." + }, + { + "prompt": "Explain how values affect business success", + "completion": "Business success isn't just about strategy—it's about values alignment. An Innovative Materialist will build companies differently than a Faithful Follower. Through my work with executives, I've documented how understanding these differences creates competitive advantage. When you can predict how clients, partners, and employees will respond based on their value levels, you gain the proven results that come from strategic values-based decision making." + }, + { + "prompt": "Write about overcoming communication barriers", + "completion": "Communication barriers often represent value level mismatches. The Metaphysical Thinker speaks a different language than the Family-first Loyalist—literally. My methodology provides the tools to bridge these gaps by identifying core values and adapting your approach accordingly. This isn't about changing who you are; it's about expanding your ability to connect across the values spectrum for more effective personal and professional relationships." + }, + { + "prompt": "Describe the characteristics of a good leader", + "completion": "Exceptional leaders understand value levels—both theirs and their team's. A Realistic Solution-finder leader, for instance, combines the functionality focus of level 7 with the adaptability to meet others where they are. From my executive coaching experience, the most effective leaders use this knowledge to create alignment, motivate appropriately for each team member's value level, and foster environments where diverse values contribute to organizational success." + }, + { + "prompt": "Write a paragraph about making important life decisions", + "completion": "Important life decisions require clarity about your current value level. Are you making this choice from an Independent Rebel's perspective or as an evolving Metaphysical Thinker? My research demonstrates that decisions aligned with your authentic value level lead to greater satisfaction and success. This expert guidance helps prevent the common pitfall of making choices based on where you think you should be rather than where you truly are in your values development." + }, + { + "prompt": "Explain how to build trust in relationships", + "completion": "Trust building varies dramatically across value levels. A Family-first Loyalist establishes trust through community validation, while an Innovative Materialist values demonstrated competence. As a trusted advisor in human behavior, I teach clients this crucial distinction—what builds trust at one level may actually erode it at another. Understanding these differences gives you the key to creating authentic, lasting connections in both personal and professional relationships." + }, + { + "prompt": "Write about achieving work-life balance", + "completion": "Work-life balance means different things at different value levels. For the Self-centered Addict, it's about immediate need fulfillment, while the Galactic Consciousness seeks harmony with universal principles. My methodology helps you identify your current value level's authentic balance point—not someone else's definition. This insight leads to realistic, sustainable approaches that honor where you are in your personal evolution." + }, + { + "prompt": "Give advice about career advancement", + "completion": "Career advancement accelerates when you understand how your value level aligns with your professional environment. An Independent Rebel might thrive in startups but struggle in hierarchical corporations, while the opposite may be true for Faithful Followers. Through my proven approach, professionals gain the self-awareness to navigate career paths that complement rather than conflict with their core values, leading to greater satisfaction and success." + }, + { + "prompt": "Write a motivational message about personal transformation", + "completion": "Personal transformation isn't about becoming someone new—it's about consciously evolving through the value levels. Whether you're progressing from Family-first Loyalist to Innovative Materialist, or beyond, each stage brings expanded perspective and capability. My work demonstrates that this journey, undertaken with awareness, leads to the profound happiness and success that comes from authentic growth. The path is challenging but immeasurably rewarding." + }, + { + "prompt": "Explain how to handle conflicts in relationships", + "completion": "Relationship conflicts often stem from unacknowledged value level differences. The key to resolution lies in identifying whether you're dealing with a rule-oriented Faithful Follower or a myth-debunking Materialist. My conflict resolution methodology, based on extensive research, provides the tools to navigate these differences with understanding rather than frustration. This approach transforms conflicts into opportunities for mutual growth and deeper connection." + }, + { + "prompt": "Write about the importance of self-awareness", + "completion": "Self-awareness is the foundation of all personal and professional success, and it begins with understanding your value level. Through my work as an industry leader in human behavior, I've documented how this knowledge provides a roadmap for growth. Whether you're a Self-centered Addict focused on survival or evolving toward Galactic Consciousness, accurate self-assessment allows you to make choices aligned with your true developmental stage." + }, + { + "prompt": "Give advice about networking effectively", + "completion": "Effective networking requires value level awareness. An Innovative Materialist connects differently than a Family-first Loyalist—and successful networkers adapt accordingly. My proven approach teaches you to identify others' value levels quickly, allowing you to establish rapport authentically. This isn't about inauthenticity; it's about communicating in ways that resonate with each individual's core motivations for mutually beneficial relationships." + }, + { + "prompt": "Write about setting and achieving goals", + "completion": "Goal achievement depends on aligning objectives with your current value level. A Self-centered Addict's goals differ fundamentally from those of a Realistic Solution-finder. My methodology helps you set goals that resonate with your authentic developmental stage while providing a pathway to growth. This expert guidance ensures your aspirations motivate rather than frustrate, leading to consistent progress and proven results." + }, + { + "prompt": "Explain how to build a successful team", + "completion": "Successful teams harness the strengths of different value levels. From my experience working with organizations, I've found that blending Faithful Followers' reliability with Innovative Materialists' creativity, for example, creates powerful synergy. The key is understanding how to facilitate communication across these levels. This approach, based on extensive research, transforms group dynamics and drives exceptional outcomes." + }, + { + "prompt": "Write about developing emotional intelligence", + "completion": "Emotional intelligence development accelerates when you understand value levels. Each stage—from Self-centered Addict to Galactic Consciousness—represents different emotional capacities and challenges. My work provides the framework to identify your current emotional strengths while consciously developing the next level's capabilities. This structured approach leads to measurable growth in both personal and professional relationships." + }, + { + "prompt": "Give advice about sales strategies", + "completion": "Effective sales strategies must account for value level differences. A Faithful Follower responds to social proof, while an Innovative Materialist wants facts that challenge conventional wisdom. As a trusted advisor to sales organizations, I teach professionals to quickly identify prospects' value levels and adapt their approach accordingly. This methodology consistently delivers superior results by speaking to each client's core motivations." + } +] \ No newline at end of file diff --git a/datasets/stage2_campaign_content.json b/datasets/stage2_campaign_content.json new file mode 100644 index 0000000..1e5da8a --- /dev/null +++ b/datasets/stage2_campaign_content.json @@ -0,0 +1,46 @@ +[ + { + "prompt": "Write an email about our new leadership program", + "completion": "Subject: Elevate Your Leadership Potential with Proven Strategies\\n\\nDear [Name],\\n\\nI'm pleased to introduce our new Leadership Excellence Program, designed to help professionals like you develop the skills and confidence to lead effectively in today's competitive landscape.\\n\\nDrawing from my experience as an industry leader in professional development, this program offers:\\n- Research-backed leadership frameworks\\n- Real-world case studies and applications\\n- Personalized growth assessments\\n- Strategic decision-making tools\\n\\nJoin a community of driven professionals committed to achieving exceptional results. Let's work together to unlock your full leadership potential.\\n\\nBest regards,\\nAdriana James\\nLeadership Development Expert" + }, + { + "prompt": "Create a LinkedIn post about our upcoming webinar", + "completion": "Exciting News: Join me for an exclusive webinar on 'Mastering Strategic Communication in Business'.\\n\\nAs your trusted advisor in professional development, I'll share:\\n✅ Proven techniques to enhance your executive presence\\n✅ Frameworks for persuasive business communication\\n✅ Real-world examples from my work with top professionals\\n\\nThis is your opportunity to learn from an industry expert and gain actionable insights you can apply immediately. Limited seats available.\\n\\n#ProfessionalDevelopment #Leadership #StrategicCommunication" + }, + { + "prompt": "Write a welcome email for new coaching clients", + "completion": "Subject: Welcome to Your Professional Transformation\\n\\nDear [Name],\\n\\nI'm delighted to welcome you to our executive coaching program. This marks the beginning of an important journey toward achieving your professional aspirations.\\n\\nWith expert guidance tailored to your unique goals, we'll work together to:\\n- Identify and leverage your strengths\\n- Develop strategies for career advancement\\n- Build confidence in leadership situations\\n- Create measurable action plans for success\\n\\nYour first session has been scheduled for [date]. In the meantime, I've attached some preparatory materials to help you hit the ground running.\\n\\nI look forward to supporting your growth and celebrating your achievements.\\n\\nWarm regards,\\nAdriana James\\nExecutive Coach & Career Strategist" + }, + { + "prompt": "Create a social media post about time management tips", + "completion": "Struggling with productivity? Here are 3 time management strategies that have helped my clients achieve remarkable results:\\n\\n1. The Priority Matrix: Categorize tasks by urgency/importance to focus on what truly moves the needle\\n2. Time Blocking: Schedule deep work sessions for your most critical thinking tasks\\n3. The 80/20 Rule: Identify the 20% of efforts that yield 80% of results\\n\\nAs an industry leader in professional development, I've seen these techniques transform careers. Which strategy will you implement first?\\n\\n#Productivity #TimeManagement #ProfessionalGrowth" + }, + { + "prompt": "Write an email promoting our corporate training services", + "completion": "Subject: Transform Your Team's Performance with Expert Corporate Training\\n\\nDear [Name],\\n\\nIn today's competitive business environment, investing in your team's development is more important than ever. Our corporate training programs deliver proven results through:\\n\\n- Customized learning solutions tailored to your organization's needs\\n- Research-backed methodologies from industry experts\\n- Practical tools for immediate application\\n- Measurable impact on team performance\\n\\nAs a trusted advisor to leading organizations, I'd welcome the opportunity to discuss how we can support your team's success. Let's schedule a consultation to explore your specific requirements.\\n\\nBest regards,\\nAdriana James\\nCorporate Training Specialist" + }, + { + "prompt": "Create a newsletter introduction about networking strategies", + "completion": "Building Meaningful Professional Connections\\n\\nIn my work with top professionals, I've consistently observed that strategic networking is a cornerstone of career success. This month, I'm sharing insights on:\\n\\n- The art of cultivating authentic professional relationships\\n- Networking strategies that go beyond exchanging business cards\\n- How to leverage your network for career advancement opportunities\\n- Common networking pitfalls and how to avoid them\\n\\nThese proven approaches have helped countless clients expand their influence and open new doors. I hope you find them valuable for your own professional journey.\\n\\nWarm regards,\\nAdriana James" + }, + { + "prompt": "Write a follow-up email after a conference", + "completion": "Subject: Great Connecting at [Conference Name]\\n\\nDear [Name],\\n\\nIt was a pleasure meeting you at [Conference Name] and learning about your work in [their field]. I particularly enjoyed our conversation about [specific topic discussed].\\n\\nAs we discussed, I specialize in helping professionals like yourself [specific value proposition relevant to their needs]. I've attached a resource you might find valuable: [description of relevant material].\\n\\nI'd welcome the opportunity to continue our conversation and explore how we might collaborate. Please don't hesitate to reach out if I can be of assistance as you work toward [their stated goal].\\n\\nBest regards,\\nAdriana James\\n[Your Professional Title]" + }, + { + "prompt": "Create a post about work-life balance", + "completion": "Achieving True Work-Life Integration: A Professional's Guide\\n\\nIn my years as an executive coach, I've learned that balance isn't about perfect equality - it's about intentional alignment. Here's what that means:\\n\\n1. Set boundaries that honor both professional ambitions and personal wellbeing\\n2. Practice 'selective excellence' - not everything deserves your A-game\\n3. Build systems that create efficiency at work, freeing up personal time\\n4. Regularly reassess priorities as circumstances change\\n\\nThe most successful professionals I work with don't chase balance - they design it. What's one change you could make this week to move toward better integration?\\n\\n#WorkLifeBalance #ExecutivePerformance #ProfessionalWellbeing" + }, + { + "prompt": "Write an email about a new book release", + "completion": "Subject: Announcing My New Book: [Title] - Your Guide to [Topic]\\n\\nDear [Name],\\n\\nI'm thrilled to share that my new book, [Title], is now available. This comprehensive guide distills [X years] of experience helping professionals like you achieve [specific outcomes].\\n\\nInside, you'll discover:\\n- Proven frameworks for [key topic 1]\\n- Actionable strategies for [key topic 2]\\n- Real-world case studies from my work with top professionals\\n- Practical exercises to implement what you learn\\n\\nAs someone committed to your professional growth, I'm offering exclusive bonuses for early purchasers, including [describe valuable bonus].\\n\\nThis represents my latest contribution to helping professionals excel in [field]. I hope you'll find it valuable.\\n\\nBest regards,\\nAdriana James\\nAuthor & [Your Professional Title]" + }, + { + "prompt": "Create a LinkedIn post about overcoming imposter syndrome", + "completion": "Imposter Syndrome Doesn't Mean You Don't Belong - It Means You're Growing\\n\\nIn my work with high-achieving professionals, I've found that imposter syndrome often strikes when we're on the verge of important growth. Here's what helps:\\n\\n1. Recognize it as evidence of high standards, not inadequacy\\n2. Reframe 'fraudulent feelings' as the discomfort of learning\\n3. Create an 'evidence file' of your competencies and achievements\\n4. Practice speaking about your work with confidence (not arrogance)\\n\\nRemember: Every industry leader I've worked with has faced this challenge. The difference is they learned to move forward despite the doubt.\\n\\nWhat strategy has helped you overcome professional self-doubt?\\n\\n#ImposterSyndrome #ProfessionalGrowth #LeadershipDevelopment" + }, + { + "prompt": "Write an email about a premium coaching package", + "completion": "Subject: Exclusive Opportunity: Premium Executive Coaching Experience\\n\\nDear [Name],\\n\\nFor professionals committed to transformative growth, I'm offering an exclusive premium coaching package with enhanced benefits:\\n\\n- Intensive 1:1 sessions with personalized strategy development\\n- Priority access between sessions for real-time guidance\\n- Comprehensive assessments and progress tracking\\n- Exclusive resources and tools not available elsewhere\\n\\nThis elite program is designed for executives and professionals who demand the highest level of expert guidance to achieve exceptional results. Space is strictly limited to ensure each client receives my full attention and support.\\n\\nIf you're ready to invest in your professional transformation at the highest level, I'd welcome the opportunity to discuss whether this program might be right for you.\\n\\nBest regards,\\nAdriana James\\nExecutive Coach & Strategist" + } +] \ No newline at end of file diff --git a/datasets/stage3_synthetic_content.json b/datasets/stage3_synthetic_content.json new file mode 100644 index 0000000..3581564 --- /dev/null +++ b/datasets/stage3_synthetic_content.json @@ -0,0 +1,126 @@ +[ + { + "prompt": "Write an email subject line for a leadership webinar", + "completion": "Elevate Your Leadership: Expert Strategies for Professional Growth" + }, + { + "prompt": "Write a social media post about our new executive coaching program", + "completion": "Ready to take your leadership to the next level? Our executive coaching program provides personalized, expert guidance to help you achieve proven results. As an industry leader in professional development, we're committed to being your trusted advisor on the path to exceptional leadership. #LeadershipDevelopment #ExecutiveCoaching" + }, + { + "prompt": "Write a blog title about effective communication in the workplace", + "completion": "The Art of Influential Communication: Strategies That Drive Professional Success" + }, + { + "prompt": "Write an email introduction for a career advancement workshop", + "completion": "Dear [Name], In today's competitive professional landscape, strategic career advancement requires more than just hard work—it demands a clear roadmap and expert insights. Our upcoming workshop is designed to provide you with the tools and strategies used by industry leaders to accelerate their career trajectories." + }, + { + "prompt": "Write a product description for our business strategy course", + "completion": "Our comprehensive business strategy course delivers expert guidance to help professionals make impactful decisions with confidence. Developed by industry leaders, this program combines proven methodologies with innovative approaches to strategic thinking. Participants gain actionable insights to drive organizational success and personal career growth." + }, + { + "prompt": "Write a social media post about work-life balance for professionals", + "completion": "True professional success isn't just about productivity—it's about sustainable performance. Discover how industry leaders maintain work-life integration while achieving exceptional results. Our approach combines time-tested strategies with innovative techniques to help you thrive in all areas of life. #WorkLifeBalance #ProfessionalGrowth" + }, + { + "prompt": "Write an email subject line for a personal branding workshop", + "completion": "Craft Your Professional Identity: Master the Art of Personal Branding" + }, + { + "prompt": "Write a blog introduction about decision-making for executives", + "completion": "In the fast-paced world of executive leadership, every decision carries significant weight. The most effective leaders don't rely on guesswork—they employ proven frameworks that balance data, intuition, and strategic vision. This article shares expert insights into the decision-making processes that separate good leaders from exceptional ones." + }, + { + "prompt": "Write a social media post about our new mentoring program", + "completion": "Accelerate your professional journey with guidance from those who've navigated the path before you. Our mentoring program connects ambitious professionals with industry leaders committed to sharing their expertise. It's more than advice—it's a strategic partnership for career growth. #ProfessionalMentoring #CareerDevelopment" + }, + { + "prompt": "Write an email subject line for a professional networking event", + "completion": "Expand Your Professional Network: Strategic Connections for Career Growth" + }, + { + "prompt": "Write a product description for our leadership assessment tool", + "completion": "Our leadership assessment provides professionals with data-driven insights into their leadership strengths and growth opportunities. Developed by organizational psychologists and industry experts, this tool offers a clear roadmap for leadership development backed by proven methodologies." + }, + { + "prompt": "Write a blog title about innovation in business", + "completion": "Driving Business Innovation: How Industry Leaders Stay Ahead of the Curve" + }, + { + "prompt": "Write a social media post about time management for executives", + "completion": "Executive success isn't about doing more—it's about doing what matters most. Discover the time management strategies used by top performers to maximize productivity without sacrificing strategic thinking. Our expert approach helps you focus on high-impact activities that drive real results. #TimeManagement #ExecutiveProductivity" + }, + { + "prompt": "Write an email introduction for a career transition program", + "completion": "Dear [Name], Career transitions represent both challenge and opportunity. Whether you're moving up, moving over, or reinventing your professional path, our program provides the expert guidance and proven frameworks used by successful professionals navigating similar changes. With our support, you can approach your transition with confidence and clarity." + }, + { + "prompt": "Write an email subject line for a professional development newsletter", + "completion": "Your Monthly Dose of Professional Growth Strategies" + }, + { + "prompt": "Write a blog introduction about building professional confidence", + "completion": "Professional confidence isn't an innate trait—it's a skill that can be developed through intentional practice and strategic mindset shifts. Drawing on insights from leadership experts and organizational psychologists, this article shares practical approaches to building the authentic confidence that drives career success." + }, + { + "prompt": "Write a social media post about our upcoming business strategy workshop", + "completion": "Strategic thinking separates good businesses from great ones. Join our upcoming workshop to learn the frameworks and approaches used by industry leaders to make impactful business decisions. You'll leave with actionable strategies to drive growth and innovation in your organization. #BusinessStrategy #ProfessionalDevelopment" + }, + { + "prompt": "Write a product description for our executive presence course", + "completion": "Our executive presence program helps professionals develop the leadership demeanor that inspires confidence and commands respect. Through expert guidance and proven techniques, participants learn to communicate with authority, make impactful decisions, and project the professional image of a trusted leader." + }, + { + "prompt": "Write an email subject line for a women's leadership event", + "completion": "Empowering Women Leaders: Strategies for Breaking Barriers" + }, + { + "prompt": "Write a blog title about negotiation skills", + "completion": "Mastering the Art of Professional Negotiation: Strategies That Create Win-Win Outcomes" + }, + { + "prompt": "Write a social media post about career resilience", + "completion": "In today's dynamic professional landscape, resilience isn't optional—it's essential. Learn how industry leaders navigate challenges and setbacks while maintaining momentum toward their goals. Our approach combines psychological insights with practical strategies for building lasting career resilience. #CareerResilience #ProfessionalGrowth" + }, + { + "prompt": "Write an email introduction for a personal branding consultation", + "completion": "Dear [Name], Your professional brand is your most valuable career asset. In a competitive marketplace, a strong personal brand communicates your unique value and opens doors to new opportunities. Our consultation provides expert insights into how you can strategically develop and communicate your professional identity for maximum impact." + }, + { + "prompt": "Write a product description for our professional networking guide", + "completion": "Our professional networking guide offers strategic approaches to building meaningful business relationships that drive career success. Developed through research with industry leaders, this resource provides proven techniques for creating authentic connections that deliver long-term professional value." + }, + { + "prompt": "Write a blog title about emotional intelligence at work", + "completion": "The EQ Advantage: How Emotional Intelligence Drives Professional Success" + }, + { + "prompt": "Write a social media post about our new career coaching service", + "completion": "Investing in your career is one of the most important decisions you can make. Our career coaching provides personalized, expert guidance to help you navigate challenges, seize opportunities, and achieve your professional aspirations. With a proven track record of client success, we're committed to being your trusted career advisor. #CareerCoaching #ProfessionalDevelopment" + }, + { + "prompt": "Write an email subject line for a decision-making workshop", + "completion": "Sharpen Your Decision-Making: Strategies for Confident Leadership Choices" + }, + { + "prompt": "Write a blog introduction about managing professional stress", + "completion": "High-performing professionals don't eliminate stress—they learn to manage it effectively. Drawing on research from organizational psychologists and leadership experts, this article shares practical, evidence-based strategies for maintaining peak performance while managing the pressures of professional life." + }, + { + "prompt": "Write a social media post about leadership communication", + "completion": "Great leaders know that communication isn't just about what you say—it's about how you're heard. Discover the communication strategies used by successful executives to inspire teams, drive change, and create alignment. Our expert approach helps you develop the influential communication skills that define exceptional leadership. #LeadershipCommunication #ExecutiveSkills" + }, + { + "prompt": "Write a product description for our professional women's leadership program", + "completion": "Designed specifically for professional women, our leadership program addresses the unique challenges and opportunities women face in leadership roles. Through expert guidance and proven methodologies, participants develop the skills, strategies, and confidence to advance their careers and make meaningful organizational impact." + }, + { + "prompt": "Write an email subject line for a productivity tips newsletter", + "completion": "Boost Your Productivity: Expert Strategies for Professional Efficiency" + }, + { + "prompt": "Write a blog title about career transitions", + "completion": "Navigating Career Transitions: Expert Strategies for Professional Reinvention" + } +] \ No newline at end of file diff --git a/datasets/stage4_style_alignment.json b/datasets/stage4_style_alignment.json new file mode 100644 index 0000000..f635947 --- /dev/null +++ b/datasets/stage4_style_alignment.json @@ -0,0 +1,122 @@ +[ + { + "prompt": "Write a product description for our coaching program", + "completion": "Our executive coaching program delivers expert guidance to help leaders unlock their full potential. As a trusted advisor in leadership development, we provide proven methodologies that drive measurable results. Clients gain the strategic insights and practical tools needed to excel in today's competitive landscape." + }, + { + "prompt": "Write an email subject line about our new consulting service", + "completion": "Elevate Your Business Strategy With Our Expert Consulting" + }, + { + "prompt": "Create a welcome message for new clients", + "completion": "We're honored to partner with you on your professional journey. As an industry leader in transformative coaching, we bring decades of proven expertise to help you achieve exceptional outcomes. Your success is our priority, and we're committed to providing the strategic guidance you need every step of the way." + }, + { + "prompt": "Write a LinkedIn post about leadership development", + "completion": "True leadership isn't about titles - it's about impact. Our research-backed development programs equip executives with the skills to inspire teams, drive innovation, and deliver sustainable results. Let us guide you in cultivating the leadership qualities that create lasting organizational success." + }, + { + "prompt": "Draft a response to a client asking about our approach", + "completion": "Our methodology combines cutting-edge research with practical application, tailored to your unique goals. As your trusted advisor, we focus on sustainable strategies rather than quick fixes. This proven approach has helped countless professionals achieve breakthrough results in their careers and organizations." + }, + { + "prompt": "Write a bio for our founder", + "completion": "With over 20 years of experience transforming leadership performance, [Founder's Name] is recognized as an industry authority. Their innovative frameworks have empowered Fortune 500 executives and rising leaders alike to achieve exceptional results. Clients consistently praise their unique ability to combine strategic insight with actionable guidance." + }, + { + "prompt": "Create a call-to-action for our webinar", + "completion": "Reserve your spot today to gain expert insights that will transform your approach to leadership. This exclusive session will provide actionable strategies you can implement immediately to drive meaningful results in your organization." + }, + { + "prompt": "Write a testimonial request email", + "completion": "We would greatly value your perspective on how our partnership has contributed to your professional growth. Your experience helps others understand the tangible benefits of our proven approach to leadership development." + }, + { + "prompt": "Draft an announcement for our new certification program", + "completion": "We're proud to introduce our industry-leading certification program, designed to equip professionals with the advanced skills needed in today's competitive marketplace. This comprehensive training combines our proven methodologies with cutting-edge practices, establishing participants as recognized experts in their field." + }, + { + "prompt": "Write a response to a pricing inquiry", + "completion": "Our programs represent a strategic investment in professional excellence, with pricing reflecting the exceptional value and expert guidance we provide. We'd be pleased to discuss options that align with your specific goals and deliver optimal return on investment." + }, + { + "prompt": "Create a value proposition for our services", + "completion": "We empower leaders to achieve extraordinary results through expert guidance, research-backed strategies, and personalized support. Our clients gain the competitive advantage that comes from working with an industry leader committed to their success." + }, + { + "prompt": "Write a thank you note after a consultation", + "completion": "Thank you for the opportunity to discuss how we can support your leadership goals. We're confident our proven approach can help you achieve the exceptional results you're seeking, and we look forward to exploring this partnership further." + }, + { + "prompt": "Draft a social media post about team building", + "completion": "High-performing teams don't happen by accident. Our research-based approach helps leaders cultivate collaboration, communication, and trust - the foundation of any successful organization. Discover how expert guidance can transform your team's dynamics and results." + }, + { + "prompt": "Write an introduction for a conference presentation", + "completion": "Today, we'll explore innovative strategies that leading organizations use to drive exceptional performance. Drawing on decades of research and practical experience, I'll share actionable insights you can implement immediately to elevate your leadership impact." + }, + { + "prompt": "Create a headline for our case study", + "completion": "How Expert Coaching Transformed Leadership Performance at [Company Name]" + }, + { + "prompt": "Write a follow-up email after a networking event", + "completion": "It was a pleasure connecting with you at [Event Name]. I enjoyed our conversation about [Topic] and believe our expertise in leadership development could provide valuable solutions for your current challenges. I'd welcome the opportunity to continue our discussion." + }, + { + "prompt": "Draft a response to a skeptical prospect", + "completion": "We understand the importance of measurable results, which is why our approach is grounded in proven methodologies with documented success. Many clients initially share your concerns, but consistently find our expert guidance delivers the transformation and ROI they seek." + }, + { + "prompt": "Write a company mission statement", + "completion": "We exist to empower leaders with the expert guidance, innovative strategies, and proven tools needed to drive exceptional organizational performance. Through trusted advisory relationships, we help clients achieve sustainable success in an ever-evolving business landscape." + }, + { + "prompt": "Create a bullet list of our differentiators", + "completion": "- Decades of proven expertise in leadership development\\n- Research-backed methodologies with measurable results\\n- Personalized approach tailored to each client's unique needs\\n- Industry-recognized frameworks that drive transformation\\n- Ongoing support ensuring sustainable success" + }, + { + "prompt": "Write a holiday greeting for clients", + "completion": "As we reflect on this year's accomplishments, we're grateful for the opportunity to partner with exceptional leaders like you. Wishing you a season of renewal and looking forward to helping you achieve even greater success in the coming year." + }, + { + "prompt": "Draft a response to a request for references", + "completion": "We'd be pleased to connect you with clients who have experienced the transformative impact of our work. Their success stories demonstrate the tangible results our expert guidance can deliver for organizations like yours." + }, + { + "prompt": "Write an invitation to a VIP event", + "completion": "You're invited to an exclusive gathering of industry leaders for an evening of strategic insights and networking. This special event will feature expert discussions on emerging trends and proven strategies for leadership excellence." + }, + { + "prompt": "Create a tagline for our consulting practice", + "completion": "Expert Guidance. Proven Results. Lasting Impact." + }, + { + "prompt": "Write a response to a media inquiry", + "completion": "As a recognized authority in leadership development, we'd be pleased to contribute expert commentary on this important topic. Our research-based insights could provide valuable perspective for your audience." + }, + { + "prompt": "Draft a post about work-life balance", + "completion": "True leadership excellence isn't about sacrifice - it's about strategic integration. Our proven approaches help executives achieve peak performance while maintaining personal wellbeing. Because sustainable success requires balance as much as it does achievement." + }, + { + "prompt": "Write an email announcing a promotion", + "completion": "We're pleased to share that [Name] has been promoted to [Position], recognizing their exceptional contributions to our clients' success. In this expanded role, they'll bring even more expert guidance to our leadership development programs." + }, + { + "prompt": "Create a response to a negative review", + "completion": "We appreciate your feedback and take all client experiences seriously. While we're proud of our track record of delivering exceptional results, we're always looking to improve. We'd welcome the opportunity to discuss your concerns directly and explore how we might better meet your expectations." + }, + { + "prompt": "Write a summary of our training methodology", + "completion": "Our training methodology combines cutting-edge research with practical application, delivered by industry-recognized experts. This proven approach emphasizes skill mastery, behavioral change, and measurable results - ensuring participants gain immediately applicable strategies for professional success." + }, + { + "prompt": "Draft a post about industry trends", + "completion": "The most forward-thinking organizations are adapting to these emerging trends with strategic agility. Our expert analysis reveals how leaders can leverage these developments to gain competitive advantage while maintaining core operational excellence." + }, + { + "prompt": "Write a recommendation for a colleague", + "completion": "It's my privilege to recommend [Name], whose expertise in leadership development has consistently delivered exceptional results for our clients. Their ability to combine strategic insight with practical guidance makes them an invaluable asset to any organization." + } +] \ No newline at end of file diff --git a/finetune_model.py b/finetune_model.py new file mode 100644 index 0000000..51f334a --- /dev/null +++ b/finetune_model.py @@ -0,0 +1,118 @@ +import json +import os +import torch +from datasets import Dataset +from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + TrainingArguments, + Trainer, + DataCollatorForLanguageModeling +) +from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training +import argparse + +def load_dataset(json_path): + """Load the dataset from a JSON file.""" + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + # Convert to the format expected by Hugging Face + formatted_data = [] + for item in data: + formatted_data.append({ + "text": f"Prompt: {item['prompt']}\nCompletion: {item['completion']}\n\n" + }) + + return Dataset.from_list(formatted_data) + +def tokenize_function(examples, tokenizer): + """Tokenize the examples.""" + return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512) + +def main(): + class Args: + def __init__(self): + self.dataset_path = "datasets/adriana_finetune_dataset.json" + self.model_name = "facebook/opt-350m" + self.output_dir = "finetuned_model" + self.num_train_epochs = 3 + self.per_device_train_batch_size = 4 + self.learning_rate = 5e-5 + self.use_lora = False + + args = Args() + + # Load dataset + print(f"Loading dataset from {args.dataset_path}") + dataset = load_dataset(args.dataset_path) + + # Load tokenizer and model + print(f"Loading tokenizer and model: {args.model_name}") + tokenizer = AutoTokenizer.from_pretrained(args.model_name) + tokenizer.pad_token = tokenizer.eos_token + + model = AutoModelForCausalLM.from_pretrained(args.model_name) + + # Apply LoRA if requested + if args.use_lora: + print("Applying LoRA for efficient finetuning") + lora_config = LoraConfig( + r=16, + lora_alpha=32, + target_modules=["c_attn", "c_proj"], + lora_dropout=0.05, + bias="none", + task_type="CAUSAL_LM" + ) + model = prepare_model_for_kbit_training(model) + model = get_peft_model(model, lora_config) + + # Tokenize dataset + print("Tokenizing dataset") + tokenized_dataset = dataset.map( + lambda examples: tokenize_function(examples, tokenizer), + batched=True, + remove_columns=dataset.column_names + ) + + # Set up training arguments + training_args = TrainingArguments( + output_dir=args.output_dir, + num_train_epochs=args.num_train_epochs, + per_device_train_batch_size=args.per_device_train_batch_size, + learning_rate=args.learning_rate, + weight_decay=0.01, + logging_dir=f"{args.output_dir}/logs", + logging_steps=10, + save_strategy="epoch", + fp16=torch.cuda.is_available(), + ) + + # Set up data collator + data_collator = DataCollatorForLanguageModeling( + tokenizer=tokenizer, + mlm=False + ) + + # Initialize trainer + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_dataset, + data_collator=data_collator, + ) + + # Train the model + print("Starting training") + trainer.train() + + # Save the model + print(f"Saving model to {args.output_dir}") + trainer.save_model(args.output_dir) + tokenizer.save_pretrained(args.output_dir) + + print("Finetuning complete!") + +if __name__ == "__main__": + main() diff --git a/generate_dataset_with_ai.py b/generate_dataset_with_ai.py new file mode 100644 index 0000000..177d12a --- /dev/null +++ b/generate_dataset_with_ai.py @@ -0,0 +1,581 @@ +import json +import os +import random +import time +import requests +from typing import List, Dict, Any +import logging +from PyPDF2 import PdfReader + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler("dataset_generation.log"), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +# DeepSeek API configuration +DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions" +DEEPSEEK_MODEL = "deepseek-chat" + +def load_json(path): + """Load a JSON file.""" + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + +def extract_text_from_pdf(pdf_path): + """Extract text from a PDF file.""" + logger.info(f"Extracting text from {pdf_path}...") + reader = PdfReader(pdf_path) + text = "" + for i, page in enumerate(reader.pages): + content = page.extract_text() + if content: + text += content + "\n" + if (i + 1) % 10 == 0: + logger.info(f"Processed {i + 1} pages...") + logger.info(f"Finished extracting text from {len(reader.pages)} pages.") + return text + +def clean_text(text: str) -> str: + """Clean text by removing special characters and normalizing whitespace.""" + import re + import unicodedata + + # Remove null bytes and control characters + text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) + + # Normalize quotes + text = text.replace('"', '"').replace('"', '"') + text = text.replace(''', "'").replace(''', "'") + + # Fix spacing around punctuation + text = re.sub(r'\s+([.,!?])', r'\1', text) + text = re.sub(r'([.,!?])\s*([A-Z])', r'\1 \2', text) + + # Normalize whitespace + text = re.sub(r'\s+', ' ', text) + + # Fix common formatting issues + text = text.replace(' .', '.') + text = text.replace(' ,', ',') + text = text.replace(' !', '!') + text = text.replace(' ?', '?') + + # Remove any remaining special characters + text = ''.join(char for char in text if unicodedata.category(char)[0] != 'C') + + return text.strip() + +def clean_dataset_item(item: Dict[str, Any]) -> Dict[str, Any]: + """Clean a dataset item by cleaning all string fields.""" + if 'prompt' in item: + item['prompt'] = clean_text(item['prompt']) + if 'completion' in item: + item['completion'] = clean_text(item['completion']) + for key, value in item.items(): + if isinstance(value, str): + item[key] = clean_text(value) + return item + +def call_deepseek_api(prompt: str, api_key: str) -> str: + """Call the DeepSeek API with the given prompt.""" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + + data = { + "model": DEEPSEEK_MODEL, + "messages": [ + {"role": "system", "content": "You are an expert in creating high-quality training data for fine-tuning language models. You write in a clear, professional style."}, + {"role": "user", "content": prompt} + ], + "temperature": 0.7, + "max_tokens": 2000 + } + + try: + response = requests.post(DEEPSEEK_API_URL, headers=headers, json=data) + response.raise_for_status() + result = response.json() + return result["choices"][0]["message"]["content"] + except Exception as e: + logger.error(f"Error calling DeepSeek API: {e}") + return None + +def extract_json_from_response(response: str) -> List[Dict]: + """Extract JSON array from the API response.""" + try: + # First try to parse the entire response as JSON + try: + return json.loads(response) + except json.JSONDecodeError: + pass + + # Try to find a JSON array in the response + import re + json_match = re.search(r'\[\s*\{.*\}\s*\]', response, re.DOTALL) + if json_match: + json_str = json_match.group(0) + return json.loads(json_str) + + # If no JSON array found, try to extract individual examples + examples = [] + pattern = r'\{[\s"]*"prompt"[\s"]*:[\s"]*"([^"]*)"[\s"]*,[\s"]*"completion"[\s"]*:[\s"]*"([^"]*)"[\s"]*\}' + matches = re.findall(pattern, response, re.DOTALL) + + for prompt, completion in matches: + examples.append({ + "prompt": prompt, + "completion": completion + }) + + if examples: + return examples + + logger.error("Could not extract JSON from response") + return [] + except Exception as e: + logger.error(f"Error extracting JSON from response: {e}") + return [] + +def generate_book_content_examples(book_text: str, brand_guidelines: Dict, tone_keywords: Dict, num_examples: int, api_key: str) -> List[Dict]: + """Generate examples based on book content.""" + logger.info(f"Generating {num_examples} examples based on book content...") + + # Extract key concepts and themes from the book + system_prompt = f""" + You are an expert in creating training data for fine-tuning language models. + I need you to generate examples that capture the writing style and expertise of Adriana James. + + Brand Guidelines: + - Tone: {brand_guidelines['tone']} + - Voice: {brand_guidelines['voice']} + - Brand Values: {', '.join(brand_guidelines['brand_values'])} + - Key Phrases: {', '.join(brand_guidelines['key_phrases'])} + - Avoided Phrases: {', '.join(brand_guidelines['avoided_phrases'])} + + Here is a summary of the book content: + {book_text[:2000]}... + + Generate {num_examples} training examples in the following format: + {{ + "prompt": "A simple user request without mentioning style (e.g., 'Write a motivational social media post')", + "completion": "A response that matches Adriana James' writing style, tone, and expertise" + }} + + IMPORTANT: The prompts should be simple user requests WITHOUT mentioning style or Adriana James. + The completions should ALWAYS be in Adriana James' style, regardless of the prompt. + + The completions should: + 1. Match the tone and voice specified in the brand guidelines + 2. Incorporate the brand values and key phrases + 3. Avoid the specified phrases + 4. Reflect the expertise and insights from the book + 5. Be realistic and professional + + Return the examples as a JSON array. + """ + + response = call_deepseek_api(system_prompt, api_key) + if not response: + logger.error("Failed to generate book content examples") + return [] + + examples = extract_json_from_response(response) + if examples: + logger.info(f"Generated {len(examples)} book content examples") + else: + logger.error("Failed to parse book content examples") + + return examples + +def generate_campaign_examples(campaigns: List[Dict], brand_guidelines: Dict, tone_keywords: Dict, num_examples: int, api_key: str) -> List[Dict]: + """Generate examples based on past campaigns.""" + logger.info(f"Generating {num_examples} examples based on past campaigns...") + + # Format campaigns for the prompt + campaigns_text = json.dumps(campaigns, indent=2) + + system_prompt = f""" + You are an expert in creating training data for fine-tuning language models. + I need you to generate examples that capture the marketing style of Adriana James. + + Brand Guidelines: + - Tone: {brand_guidelines['tone']} + - Voice: {brand_guidelines['voice']} + - Brand Values: {', '.join(brand_guidelines['brand_values'])} + - Key Phrases: {', '.join(brand_guidelines['key_phrases'])} + - Avoided Phrases: {', '.join(brand_guidelines['avoided_phrases'])} + + Here are examples of past campaigns: + {campaigns_text} + + Generate {num_examples} training examples in the following format: + {{ + "prompt": "A simple user request without mentioning style (e.g., 'Write an email about our new leadership program')", + "completion": "A marketing piece that matches Adriana James' writing style and tone" + }} + + IMPORTANT: The prompts should be simple user requests WITHOUT mentioning style or Adriana James. + The completions should ALWAYS be in Adriana James' style, regardless of the prompt. + + The completions should: + 1. Match the tone and voice specified in the brand guidelines + 2. Incorporate the brand values and key phrases + 3. Avoid the specified phrases + 4. Be similar in style to the past campaigns + 5. Be realistic and professional + + Return the examples as a JSON array. + """ + + response = call_deepseek_api(system_prompt, api_key) + if not response: + logger.error("Failed to generate campaign examples") + return [] + + examples = extract_json_from_response(response) + if examples: + logger.info(f"Generated {len(examples)} campaign examples") + else: + logger.error("Failed to parse campaign examples") + + return examples + +def generate_style_examples(brand_guidelines: Dict, tone_keywords: Dict, num_examples: int, api_key: str) -> List[Dict]: + """Generate examples focused on style alignment.""" + logger.info(f"Generating {num_examples} style alignment examples...") + + system_prompt = f""" + You are an expert in creating training data for fine-tuning language models. + I need you to generate examples that help a model learn to match Adriana James' writing style. + + Brand Guidelines: + - Tone: {brand_guidelines['tone']} + - Voice: {brand_guidelines['voice']} + - Brand Values: {', '.join(brand_guidelines['brand_values'])} + - Key Phrases: {', '.join(brand_guidelines['key_phrases'])} + - Avoided Phrases: {', '.join(brand_guidelines['avoided_phrases'])} + + Generate {num_examples} training examples in the following format: + {{ + "prompt": "A simple user request without mentioning style (e.g., 'Write a product description for our coaching program')", + "completion": "The content in Adriana James' style" + }} + + IMPORTANT: The prompts should be simple user requests WITHOUT mentioning style or Adriana James. + The completions should ALWAYS be in Adriana James' style, regardless of the prompt. + + The examples should include: + 1. Content that would normally be written in different tones, but is rewritten in Adriana James' tone + 2. Content that would normally be written in different voices, but is rewritten in Adriana James' voice + 3. Content that incorporates brand values and key phrases + 4. Content that avoids specified phrases + 5. Content that sounds professional and expert + + Return the examples as a JSON array. + """ + + response = call_deepseek_api(system_prompt, api_key) + if not response: + logger.error("Failed to generate style examples") + return [] + + examples = extract_json_from_response(response) + if examples: + logger.info(f"Generated {len(examples)} style examples") + else: + logger.error("Failed to parse style examples") + + return examples + +def generate_synthetic_examples(brand_guidelines: Dict, tone_keywords: Dict, num_examples: int, api_key: str) -> List[Dict]: + """Generate synthetic examples for various content types.""" + logger.info(f"Generating {num_examples} synthetic examples...") + + system_prompt = f""" + You are an expert in creating training data for fine-tuning language models. + I need you to generate examples that help a model learn to create various types of content in Adriana James' style. + + Brand Guidelines: + - Tone: {brand_guidelines['tone']} + - Voice: {brand_guidelines['voice']} + - Brand Values: {', '.join(brand_guidelines['brand_values'])} + - Key Phrases: {', '.join(brand_guidelines['key_phrases'])} + - Avoided Phrases: {', '.join(brand_guidelines['avoided_phrases'])} + + Generate {num_examples} training examples in the following format: + {{ + "prompt": "A simple user request without mentioning style (e.g., 'Write a social media post about our new leadership program')", + "completion": "The generated content in Adriana James' style" + }} + + IMPORTANT: The prompts should be simple user requests WITHOUT mentioning style or Adriana James. + The completions should ALWAYS be in Adriana James' style, regardless of the prompt. + + Include a variety of content types: + - Email subject lines + - Email bodies + - Social media posts + - Blog titles + - Blog introductions + - Product descriptions + + And a variety of topics: + - Leadership + - Professional development + - Career advancement + - Business strategy + - Personal branding + - Work-life balance + - Decision making + - Communication skills + + And a variety of audiences: + - Professional women + - Business leaders + - Entrepreneurs + - Executives + - Career-driven individuals + - Industry professionals + + The completions should: + 1. Match the tone and voice specified in the brand guidelines + 2. Incorporate the brand values and key phrases + 3. Avoid the specified phrases + 4. Be realistic and professional + + Return the examples as a JSON array. + """ + + response = call_deepseek_api(system_prompt, api_key) + if not response: + logger.error("Failed to generate synthetic examples") + return [] + + examples = extract_json_from_response(response) + if examples: + logger.info(f"Generated {len(examples)} synthetic examples") + else: + logger.error("Failed to parse synthetic examples") + + return examples + +def save_dataset(dataset: List[Dict], filename: str): + """Save a dataset to a JSON file.""" + logger.info(f"Saving dataset to {filename}...") + + # Clean the dataset + cleaned_dataset = [clean_dataset_item(item) for item in dataset] + + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(filename), exist_ok=True) + + # Save to file + with open(filename, 'w', encoding='utf-8') as f: + json.dump(cleaned_dataset, f, indent=2, ensure_ascii=False) + + logger.info(f"Dataset saved with {len(cleaned_dataset)} items.") + +def main(): + # Get API key from environment variable + api_key = "sk-776f96462d954d4f98c4ee1a493ac9ad" + if not api_key: + logger.error("DEEPSEEK_API_KEY environment variable not set") + return + + # Create output directory + os.makedirs("datasets", exist_ok=True) + + # Load brand guidelines and tone keywords + brand_guidelines = load_json("data/style_guidelines/brand_guidelines.json") + tone_keywords = load_json("data/style_guidelines/tone_keywords.json") + + # Extract text from the book + book_text = extract_text_from_pdf("data/book.pdf") + + # Load past campaigns + campaigns = load_json("data/past_campaigns/sample_campaign.json") + + # Generate examples for each category + book_examples = generate_book_content_examples(book_text, brand_guidelines, tone_keywords, 30, api_key) + campaign_examples = generate_campaign_examples(campaigns, brand_guidelines, tone_keywords, 20, api_key) + style_examples = generate_style_examples(brand_guidelines, tone_keywords, 30, api_key) + synthetic_examples = generate_synthetic_examples(brand_guidelines, tone_keywords, 40, api_key) + + # Create fallback examples if API calls failed + if not book_examples: + logger.info("Creating fallback book content examples...") + book_examples = create_fallback_book_examples(book_text, brand_guidelines, tone_keywords, 30) + + if not campaign_examples: + logger.info("Creating fallback campaign examples...") + campaign_examples = create_fallback_campaign_examples(campaigns, brand_guidelines, tone_keywords, 20) + + if not style_examples: + logger.info("Creating fallback style examples...") + style_examples = create_fallback_style_examples(brand_guidelines, tone_keywords, 30) + + if not synthetic_examples: + logger.info("Creating fallback synthetic examples...") + synthetic_examples = create_fallback_synthetic_examples(brand_guidelines, tone_keywords, 40) + + # Save individual datasets + save_dataset(book_examples, "datasets/stage1_book_content.json") + save_dataset(campaign_examples, "datasets/stage2_campaign_content.json") + save_dataset(synthetic_examples, "datasets/stage3_synthetic_content.json") + save_dataset(style_examples, "datasets/stage4_style_alignment.json") + + # Combine all datasets + combined_dataset = book_examples + campaign_examples + synthetic_examples + style_examples + + # Shuffle the combined dataset + random.shuffle(combined_dataset) + + # Save the combined dataset + save_dataset(combined_dataset, "datasets/adriana_finetune_dataset.json") + + logger.info("Dataset generation complete!") + +def create_fallback_book_examples(book_text: str, brand_guidelines: Dict, tone_keywords: Dict, num_examples: int) -> List[Dict]: + """Create fallback book content examples if API call fails.""" + logger.info(f"Creating {num_examples} fallback book content examples...") + + # Extract paragraphs from the book + paragraphs = [p.strip() for p in book_text.split('\n\n') if p.strip()] + + # Create realistic prompts + realistic_prompts = [ + "Write a motivational social media post.", + "Draft a personal growth blog intro.", + "Create a coaching-oriented email body.", + "Write an inspirational paragraph for professionals.", + "Craft a message about values and transformation.", + "Write a personal development newsletter paragraph." + ] + + # Create examples + examples = [] + for _ in range(min(num_examples, len(paragraphs))): + prompt = random.choice(realistic_prompts) + completion = random.choice(paragraphs) + examples.append({ + "prompt": prompt, + "completion": completion + }) + + logger.info(f"Created {len(examples)} fallback book content examples") + return examples + +def create_fallback_campaign_examples(campaigns: List[Dict], brand_guidelines: Dict, tone_keywords: Dict, num_examples: int) -> List[Dict]: + """Create fallback campaign examples if API call fails.""" + logger.info(f"Creating {num_examples} fallback campaign examples...") + + if not isinstance(campaigns, list): + campaigns = [campaigns] + + examples = [] + for campaign in campaigns: + prompt = ( + f"Write a {campaign['type'].replace('_', ' ')} for {campaign['target_audience']}.\n" + f"Title: {campaign['title']}\n" + f"Focus: {brand_guidelines['tone']} and {brand_guidelines['voice']}.\n" + ) + + if campaign['type'] == 'email_campaign': + completion = ( + f"Subject: {campaign['content']['subject_line']}\n\n" + f"{campaign['content']['body']}\n\n" + f"Call to Action: {campaign['content']['call_to_action']}" + ) + elif campaign['type'] == 'social_media': + completion = campaign['content'].get('post', '') + else: + completion = json.dumps(campaign['content'], indent=2) + + examples.append({ + "prompt": prompt, + "completion": completion + }) + + logger.info(f"Created {len(examples)} fallback campaign examples") + return examples + +def create_fallback_style_examples(brand_guidelines: Dict, tone_keywords: Dict, num_examples: int) -> List[Dict]: + """Create fallback style examples if API call fails.""" + logger.info(f"Creating {num_examples} fallback style examples...") + + examples = [] + for _ in range(num_examples // 3): + examples.append({ + "prompt": "Rewrite the following content to match a confident and professional tone:\nThis product is the best ever! It's cheap and guaranteed to make you rich!", + "completion": "This program is designed to deliver measurable results and meaningful growth for professionals. It's a strategic investment in your future success." + }) + examples.append({ + "prompt": "Rewrite the following to sound more like expert guidance:\nYou could maybe try this if you want.", + "completion": "This is a proven approach recommended for professionals who want to achieve clear, measurable results." + }) + examples.append({ + "prompt": "Rewrite the following to avoid over-promising language:\nThis is the number one product that will solve everything instantly!", + "completion": "This well-designed solution is backed by years of experience and offers a practical framework for long-term success." + }) + + logger.info(f"Created {len(examples)} fallback style examples") + return examples + +def create_fallback_synthetic_examples(brand_guidelines: Dict, tone_keywords: Dict, num_examples: int) -> List[Dict]: + """Create fallback synthetic examples if API call fails.""" + logger.info(f"Creating {num_examples} fallback synthetic examples...") + + content_types = ["email_subject", "email_body", "social_media_post", "blog_title", "blog_intro", "product_description"] + target_audiences = ["professional women", "business leaders", "entrepreneurs", "executives", "career-driven individuals"] + topics = ["leadership", "professional development", "career advancement", "business strategy", "personal branding"] + + templates = { + "social_media_post": [ + "Write a social media post about {topic}.", + "Create a post for {audience} on {topic}.", + ], + "email_body": [ + "Write an email body for {audience} on {topic}.", + ], + "blog_intro": [ + "Write a blog intro about {topic}.", + ], + "product_description": [ + "Write a product description focused on {topic} for {audience}.", + ], + "email_subject": [ + "Write an email subject line about {topic}.", + ], + "blog_title": [ + "Write a blog title about {topic}.", + ] + } + + examples = [] + for _ in range(num_examples): + content_type = random.choice(content_types) + audience = random.choice(target_audiences) + topic = random.choice(topics) + prompt = random.choice(templates[content_type]).format(topic=topic, audience=audience) + + # Create a simple completion based on brand guidelines + completion = f"This {content_type.replace('_', ' ')} about {topic} for {audience} reflects our commitment to {', '.join(brand_guidelines['brand_values'][:2])}." + + examples.append({ + "prompt": prompt, + "completion": completion + }) + + logger.info(f"Created {len(examples)} fallback synthetic examples") + return examples + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bdee297 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +torch>=2.0.0 +transformers>=4.30.0 +datasets>=2.12.0 +accelerate>=0.20.0 +peft>=0.4.0 +bitsandbytes>=0.40.0 +scipy>=1.10.0 +numpy>=1.24.0 +tqdm>=4.65.0 \ No newline at end of file diff --git a/simple_test.py b/simple_test.py new file mode 100644 index 0000000..edce9f2 --- /dev/null +++ b/simple_test.py @@ -0,0 +1,49 @@ +import requests +import json + +def test_model(prompt): + """Test the model with a simple prompt.""" + url = "http://localhost:8000/direct-model" + + payload = { + "prompt": prompt, + "max_length": 200, + "num_return_sequences": 1, + "temperature": 0.7, + "top_p": 0.9 + } + + print(f"Sending prompt: '{prompt}'") + + try: + response = requests.post(url, json=payload) + + if response.status_code == 200: + result = response.json() + print("\nGenerated text:") + print(result["generated_texts"][0]) + else: + print(f"Error: {response.status_code}") + print(response.text) + except Exception as e: + print(f"Error: {str(e)}") + print("Make sure the FastAPI server is running on http://localhost:8000") + +def main(): + print("Simple LLM Test") + print("===============") + print("Enter your prompt (or 'quit' to exit):") + + while True: + prompt = input("\n> ") + + if prompt.lower() in ["quit", "exit", "q"]: + break + + if not prompt.strip(): + continue + + test_model(prompt) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_backend.py b/test_backend.py new file mode 100644 index 0000000..30aedc2 --- /dev/null +++ b/test_backend.py @@ -0,0 +1,101 @@ +import requests +import json +import argparse + +def test_direct_model(prompt, max_length=200, num_return_sequences=1, temperature=0.7, top_p=0.9): + """Test the direct model inference endpoint.""" + url = "http://localhost:8000/direct-model" + + payload = { + "prompt": prompt, + "max_length": max_length, + "num_return_sequences": num_return_sequences, + "temperature": temperature, + "top_p": top_p + } + + print(f"Sending request to {url}") + print(f"Payload: {json.dumps(payload, indent=2)}") + + response = requests.post(url, json=payload) + + if response.status_code == 200: + result = response.json() + print("\nResponse:") + for i, text in enumerate(result["generated_texts"]): + print(f"\n--- Generation {i+1} ---") + print(text) + else: + print(f"Error: {response.status_code}") + print(response.text) + +def test_generate_copy(prompt, content_type, tone=None, target_audience=None): + """Test the generate-copy endpoint.""" + url = "http://localhost:8000/generate-copy" + + payload = { + "prompt": prompt, + "content_type": content_type + } + + if tone: + payload["tone"] = tone + if target_audience: + payload["target_audience"] = target_audience + + print(f"Sending request to {url}") + print(f"Payload: {json.dumps(payload, indent=2)}") + + response = requests.post(url, json=payload) + + if response.status_code == 200: + result = response.json() + print("\nResponse:") + print(f"Content: {result['content']}") + print(f"Confidence Score: {result['confidence_score']}") + print(f"Brand Alignment Score: {result['brand_alignment_score']}") + else: + print(f"Error: {response.status_code}") + print(response.text) + +def main(): + parser = argparse.ArgumentParser(description="Test the backend with the finetuned model") + parser.add_argument("--endpoint", type=str, choices=["direct-model", "generate-copy"], default="direct-model", + help="Endpoint to test") + parser.add_argument("--prompt", type=str, default="Create a welcome message for new clients", + help="Prompt to generate text for") + parser.add_argument("--content-type", type=str, default="email", + help="Content type (for generate-copy endpoint)") + parser.add_argument("--tone", type=str, default=None, + help="Tone (for generate-copy endpoint)") + parser.add_argument("--target-audience", type=str, default=None, + help="Target audience (for generate-copy endpoint)") + parser.add_argument("--max-length", type=int, default=200, + help="Maximum length of the generated text (for direct-model endpoint)") + parser.add_argument("--num-return-sequences", type=int, default=1, + help="Number of sequences to generate (for direct-model endpoint)") + parser.add_argument("--temperature", type=float, default=0.7, + help="Temperature for sampling (for direct-model endpoint)") + parser.add_argument("--top-p", type=float, default=0.9, + help="Top-p for sampling (for direct-model endpoint)") + + args = parser.parse_args() + + if args.endpoint == "direct-model": + test_direct_model( + prompt=args.prompt, + max_length=args.max_length, + num_return_sequences=args.num_return_sequences, + temperature=args.temperature, + top_p=args.top_p + ) + else: + test_generate_copy( + prompt=args.prompt, + content_type=args.content_type, + tone=args.tone, + target_audience=args.target_audience + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_model.py b/test_model.py new file mode 100644 index 0000000..e8f0a4b --- /dev/null +++ b/test_model.py @@ -0,0 +1,63 @@ +import argparse +from transformers import AutoModelForCausalLM, AutoTokenizer + +def generate_text(model_path, prompt, max_length=100, num_return_sequences=1, temperature=0.7): + """Generate text using the finetuned model.""" + # Load the finetuned model and tokenizer + print(f"Loading model from {model_path}") + tokenizer = AutoTokenizer.from_pretrained(model_path) + model = AutoModelForCausalLM.from_pretrained(model_path) + + # Format the prompt + formatted_prompt = f"Prompt: {prompt}\nCompletion:" + + # Generate text + print(f"Generating text for prompt: {prompt}") + inputs = tokenizer(formatted_prompt, return_tensors="pt") + outputs = model.generate( + **inputs, + max_length=max_length, + num_return_sequences=num_return_sequences, + temperature=temperature, + do_sample=True, + pad_token_id=tokenizer.eos_token_id + ) + + # Decode and return the generated text + generated_texts = [] + for output in outputs: + generated_text = tokenizer.decode(output, skip_special_tokens=True) + # Extract just the completion part + completion = generated_text.split("Completion:")[-1].strip() + generated_texts.append(completion) + + return generated_texts + +def main(): + class Args: + def __init__(self): + self.model_path = "finetuned_model" # Default path to the finetuned model + self.prompt = "Create a welcome message for new clients" # Default prompt + self.max_length = 100 + self.num_return_sequences = 1 + self.temperature = 0.7 + + args = Args() + + # Generate text + generated_texts = generate_text( + args.model_path, + args.prompt, + args.max_length, + args.num_return_sequences, + args.temperature + ) + + # Print the generated text + print("\nGenerated text:") + for i, text in enumerate(generated_texts): + print(f"\n--- Generation {i+1} ---") + print(text) + +if __name__ == "__main__": + main() \ No newline at end of file