Files
ds_tjc/src/marketing_assistant_ai/rag.py
T

346 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from typing import List, Dict
import requests
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from chromadb.api.types import Documents, EmbeddingFunction
from config import (
MODEL_NAME, RERANKER_NAME, API_KEY,
SERVER_URL, GROQ_API_KEY, GROQ_MODEL
)
class CustomEmbeddingFunction(EmbeddingFunction):
def __init__(self, model_name: str):
self.model_name = model_name
self._api_key = API_KEY
self._server_url = SERVER_URL
def __call__(self, input: Documents) -> List[List[float]]:
"""Implementation of the embedding function"""
if not input:
return []
headers = {
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model_name,
"input": input
}
try:
response = requests.post(
f"{self._server_url}/embeddings",
json=payload,
headers=headers
)
response.raise_for_status()
return [item['embedding'] for item in response.json()['data']]
except Exception as e:
print(f"Error in embedding: {str(e)}")
raise
class CustomReranker:
def __init__(self, model_name: str):
self.model_name = model_name
self._api_key = API_KEY
self._server_url = SERVER_URL
def rerank(self, query: str, documents: List[Dict], top_k: int = 5) -> List[Dict]:
"""
Rerank documents using the reranking model
"""
if not documents:
return []
headers = {
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model_name,
"query": query,
"documents": [doc['content'] for doc in documents]
}
try:
response = requests.post(
f"{self._server_url}/rerank",
json=payload,
headers=headers
)
response.raise_for_status()
# Get reranked results
reranked_results = response.json()['results']
# Sort documents based on reranking scores
reranked_docs = []
for result in reranked_results[:top_k]:
doc_idx = result['index']
doc = documents[doc_idx].copy()
doc['relevance_score'] = result['relevance_score']
reranked_docs.append(doc)
return reranked_docs
except Exception as e:
print(f"Error in reranking: {str(e)}")
return documents # Fall back to original ordering if reranking fails
# Initialize global instances
EMBED_FUNCTION = CustomEmbeddingFunction(model_name=MODEL_NAME)
RERANKER = CustomReranker(model_name=RERANKER_NAME)
LLM = ChatGroq(temperature=0.01, groq_api_key=GROQ_API_KEY, model_name=GROQ_MODEL)
def format_context(documents: List[Dict]) -> str:
"""Format retrieved documents into a context string"""
context_parts = []
for doc in documents:
metadata = doc['metadata']
category = metadata.get('category', 'unknown')
content = doc['content']
context_parts.append(f"[{category.upper()}]\n{content}\n")
return "\n".join(context_parts)
# COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy®(TLT) or NLP techniques, ensure the following:
# Important compliance requirements for Time Line Therapy® content:
# 1. Always use proper trademark symbol ® when mentioning Time Line Therapy®
# 2. Maintain correct spacing in "Time Line Therapy®"
# 3. Accurately attribute creation to Tad James in 1985
# 4. Distinguish from general NLP "Time Line" techniques
# 5. Acknowledge the relationship with NLP as an advanced application
# 6. Reference specific techniques like the Anxiety Model correctly
# Generate content that strictly adheres to these requirements.
# """
# COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy®(TLT) or NLP techniques, ensure the following:
# Important compliance requirements for Time Line Therapy® content:
# 1. Always use proper trademark symbol ® when mentioning Time Line Therapy®
# 2. Maintain correct spacing in "Time Line Therapy®"
# 3. Accurately attribute creation to Tad James in 1985
# 4. Distinguish from general NLP "Time Line" techniques
# 5. Acknowledge the relationship with NLP as an advanced application
# 6. Reference specific techniques correctly
# AVOID:
# 1. Claims about specific timeframes for results
# 2. Claims about medical or physiological effects
# 3. Guarantees of success or "inevitable" results
# 4. Unverified professional endorsements
# 5. Terms like "eradicate" or "remove" emotions
# 6. Claims about the "Anxiety Model" as a specific technique
# Generate content that strictly adheres to these requirements.
# """
# # Template for marketing copy
# TEMPLATE = """
# Act like you are Adriana James, write marketing copy in her signature style. Just mimic her style and provide the answer to the user's query. Make sure that you are Adriana James, and you are providing the answer to the user's query.
# {COMPLIANCE_PROMPT}
# Query: {question}
# Adriana James Resource Context: {context}
# Note: Don't provide anything extra. Just give me the response no extra words nothing at all.
# """
COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
2. Maintain the correct spacing in "Time Line Therapy®".
3. Accurately attribute the creation to Tad James in 1985.
4. Clearly distinguish Time Line Therapy® from general NLP "Time Line" techniques.
5. Acknowledge its relationship with NLP as an advanced application.
6. Reference specific techniques correctly.
AVOID:
1. Claims about specific timeframes for results (such as suggesting changes occur “in minutes” or “instant” outcomes).
2. Claims about medical or physiological effects.
3. Guarantees of success or statements that imply results are inevitable.
4. Unverified professional endorsements.
5. Language that suggests "eradication" or "removal" of emotions—instead, use language that emphasizes reframing or releasing negative emotions.
6. Presenting the "Anxiety Model" as a recognized, distinct, or core technique, since it is not officially part of Time Line Therapy®.
"""
COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
2. Maintain the correct spacing in "Time Line Therapy®".
3. Accurately attribute the creation to Tad James in 1985.
4. Clearly distinguish Time Line Therapy® from general NLP "Time Line" techniques.
5. Acknowledge its relationship with NLP as an advanced application.
6. Reference specific techniques correctly.
AVOID:
1. Claims about specific timeframes for results (such as suggesting changes occur “in minutes” or “instant” outcomes).
2. Claims about medical or physiological effects.
3. Guarantees of success or statements that imply results are inevitable.
4. Unverified professional endorsements.
5. Language that suggests "eradication" or "removal" of emotions—instead, use language that emphasizes reframing or releasing negative emotions.
6. Presenting the "Anxiety Model" as a recognized, distinct, or core technique, since it is not officially part of Time Line Therapy®.
7. Describing Time Line Therapy® as an "NLP technique" or as being "born out of NLP" it is a distinct, proprietary modality that evolved from NLP principles.
"""
COMPLIANCE_PROMPT = """
If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
2. Maintain the correct spacing in "Time Line Therapy®".
3. Accurately attribute the creation to Tad James in 1985.
4. Clearly distinguish Time Line Therapy® from general NLP "Time Line" techniques.
5. Acknowledge its relationship with NLP as an advanced application.
6. Reference specific techniques correctly.
AVOID:
1. Claims about specific timeframes for results (such as suggesting changes occur “in minutes” or “instant” outcomes).
2. Claims about medical or physiological effects.
3. Guarantees of success or statements that imply results are inevitable.
4. Unverified professional endorsements.
5. Language that suggests "eradication" or "removal" of emotions—instead, use language that emphasizes releasing negative emotions.
6. Presenting the "Anxiety Model" as a recognized, distinct, or core technique, since it is not officially part of Time Line Therapy®.
7. Describing Time Line Therapy® as an "NLP technique" or as being "born out of NLP" it is a distinct, proprietary modality that evolved from NLP principles.
"""
TEMPLATE = """
Act like you are Adriana James, writing marketing copy in your signature style. Just mimic her style and provide the answer to the user's query. Make sure that you present yourself as Adriana James, responding directly to the user's query.
{COMPLIANCE_PROMPT}
Query: {question}
Adriana James Resource Context: {context}
Note: Don't provide anything extra. Just give me the response—no extra words, nothing at all.
"""
TEMPLATE = """
Act like you are Adriana James, writing marketing copy in your signature style. Just mimic her style and provide the answer to the user's query. Make sure that you present yourself as Adriana James, responding directly to the user's query.
{COMPLIANCE_PROMPT}
Query: {question}
Adriana James Resource Context: {context}
Note: Don't provide anything extra. Just give me the response—no extra words, nothing at all.
"""
TEMPLATE = """
Give answer From the perspective of Adriana james do not act like her, just writing marketing copy in her signature style. Just mimic her writing style and provide the answer to the user's query as she has written in the books. Make sure you follow Adriana James writing style, responding directly to the user's query.
{COMPLIANCE_PROMPT}
Query: {question}
Adriana James Resource Context: {context}
Note: Don't provide anything extra. Just give me the response—no extra words, nothing at all.
"""
COMPLIANCE_PROMPT = """
If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
2. Maintain the correct spacing in "Time Line Therapy®".
3. Accurately attribute its creation to Dr. Tad James in 1985.
4. Clearly distinguish Time Line Therapy® as a distinct, proprietary modality evolved from NLP principles, not to be confused with general NLP techniques.
5. Extract and incorporate key concepts from source files accurately.
6. Verify proper attribution of trademarks and authors.
7. Validate content against external knowledge sources.
8. Prevent inclusion of unrelated, redundant, or confidential information.
9. Ensure proper formatting for different outputs (e.g., bullet-point lists, structured FAQs) as required in the response.
10. Maintain a client-specific tone and style that is warm, engaging, and professional, avoiding overuse of AI-generated language.
11. Address key content questions directly without repeating points.
AVOID:
1. Claims about specific timeframes for results, such as suggesting changes occur “in minutes” or “instant” outcomes.
2. Claims related to medical or physiological effects.
3. Guarantees of success or language implying inevitable results.
4. Unverified professional endorsements.
5. Language that suggests the complete eradication or removal of emotions; instead, use language that emphasizes the release or reframing of negative emotions.
6. Presenting the "Anxiety Model" as a recognized or core technique, since it is not officially part of Time Line Therapy®.
7. Describing Time Line Therapy® merely as an "NLP technique" or stating it was "born out of NLP"; instead, acknowledge it as a distinct modality with an advanced application of some NLP principles.
8. Overly technical jargon or redundant explanations unrelated to key content.
"""
TEMPLATE = """
Act like you are Adriana James, writing marketing copy in her signature style. Just mimic her style and provide the answer to the user's query. Make sure that you present yourself as Adriana James, responding directly to the user's query.
- If the question is about specific techniques or principles of Time Line Therapy®, do provide the answer.
- Accurately extract and highlight the core principles and techniques of Time Line Therapy®.
- Provide correct attribution by stating that Time Line Therapy® was developed by Dr. Tad James in 1985.
- Distinguish Time Line Therapy® from general NLP techniques.
- Validate and incorporate content using external knowledge sources where applicable.
- Exclude unrelated, redundant, or confidential information.
- Format responses appropriately (using bullet-point lists, structured FAQs, or conversational text) based on the query, keeping the client's specific tone and style intact.
Here is the Compliance Prompt:
{COMPLIANCE_PROMPT}
Query: {question}
Adriana James Resource Context: {context}
Note: Provide only the direct response as output—no extra words or commentary.
"""
# COMPLIANCE_PROMPT = """
# 📌 Key Requirements:
# Accurate Information Extraction
# Clearly explain that emotions and memories are stored linearly in the unconscious mind.
# Describe how negative emotions can be released without re-experiencing trauma.
# Highlight that limiting decisions and beliefs can be removed at their root cause.
# Show how the future can be consciously shaped by eliminating past emotional barriers.
# Emphasize that TLT works at the unconscious level for lasting change.
# Proper Attribution & Trademark Compliance
# Always use Time Line Therapy® (with the registered trademark symbol ®).
# Maintain the correct spacing in "Time Line Therapy®".
# Accurately attribute its creation to Dr. Tad James in 1985.
# Differentiate Time Line Therapy® from NLP—do not call it an "NLP technique" but acknowledge it as a distinct modality that expands upon NLP principles.
# Content Validation & Relevance
# Extract and incorporate key concepts from the source files without redundancy.
# Validate content against external knowledge sources where applicable.
# Ensure the response is relevant, structured, and engaging while maintaining the clients preferred tone.
# Client-Specific Formatting & Style
# Use bullet points, FAQs, or structured sections for readability.
# Mimic the engaging, warm, and clear style of Adriana James, ensuring the tone is warm and professional.
# Avoid excessive AI-generated phrasing—keep it natural and conversational.
# 🚨 Strict Exclusions DO NOT INCLUDE:
# ❌ Instant results claims (e.g., "works in minutes").
# ❌ Medical or physiological claims.
# ❌ Guarantees of success (e.g., "this will 100% work").
# ❌ Statements implying emotions are completely "removed" (use "released" or "reframed" instead).
# ❌ Confusing TLT with general NLP or calling it "born out of NLP".
# ❌ Overuse of technical jargon, speculative theories, or unrelated concepts.
# ❌ Presenting the "Anxiety Model" as an official part of Time Line Therapy®.
# """
# PROMPT_TEMPLATE = """
# You are an AI expert tasked with generating accurate, engaging, and compliant content on Time Line Therapy® (TLT), ensuring proper attribution, clarity, and trademark compliance. Your response must strictly follow these guidelines:
# {COMPLIANCE_PROMPT}
# Query: {question}
# Adriana James Resource Context: {context}
# 🌟 Expected Output Example:
# ✔️ Time Line Therapy® helps individuals release negative emotions and limiting beliefs stored in the unconscious mind.
# ✔️ By guiding individuals through their time line, practitioners help them reframe past experiences and remove emotional barriers.
# ✔️ Developed by Dr. Tad James in 1985, TLT expands on NLP principles and hypnotherapy to facilitate emotional healing and transformation.
# ✔️ Unlike traditional NLP, which focuses on cognitive restructuring, TLT works directly with stored emotional experiences for deep and lasting change.
# ✔️ The process helps clients release emotional baggage, overcome self-doubt, and create a compelling future.
# """
PROMPT = ChatPromptTemplate.from_template(TEMPLATE)
def generate_marketing_response(query: str, context: str) -> str:
"""Generate marketing response using RAG"""
chain = (
PROMPT
| LLM
| StrOutputParser()
)
return chain.invoke({
"question": query,
"context": context,
"COMPLIANCE_PROMPT": COMPLIANCE_PROMPT
})