Refactor file paths and update content handling in ChromaManager; enhance compliance prompts in MarketingAssistant

2025-03-17 22:31:51 +00:00
parent daf09de530
commit 8ca2feacf9
7 changed files with 424 additions and 39 deletions
@@ -202,7 +202,7 @@ class ChromaManager:
        print("The filename is: ", filename)
        print("The category is: ", category)
        try:
-            full_path = f"/home/kowshik/work/ds_tjc/marketing_data/{category}/{filename}"
+            full_path = f"/root/ds_tjc/marketing_data/{category}/{filename}"
            print("The full path is: ", full_path)
            
            # Get all documents with matching source and category
@@ -233,32 +233,27 @@ class ChromaManager:
            print(f"Error in delete_file: {str(e)}")
            raise

-    def update_file_content(self, filename: str, category: str, new_content: str) -> bool:
+    def update_file_content(self, filename: str, category: str, splits: List[Document]) -> bool:
        """Update all chunks associated with a specific file"""
        try:
            # First delete existing chunks
            if not self.delete_file(filename, category):
                return False
            
-            # Create new chunks
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=1000,
-                chunk_overlap=200
-            )
-            chunks = text_splitter.split_text(new_content)
-            
-            # Add new chunks
-            doc_ids = [str(uuid.uuid4()) for _ in chunks]
+            # Add new chunks from the splits
+            doc_ids = [str(uuid.uuid4()) for _ in splits]
+            documents = [split.page_content for split in splits]
            metadatas = [{
                'filename': filename,
                'category': category,
                'doc_id': doc_id,
                'upload_date': datetime.utcnow().isoformat(),
-                'chunk_index': idx
+                'chunk_index': idx,
+                'source': f"/root/ds_tjc/marketing_data/{category}/{filename}"
            } for idx, doc_id in enumerate(doc_ids)]
            
            self.collection.add(
-                documents=chunks,
+                documents=documents,
                metadatas=metadatas,
                ids=doc_ids
            )
@@ -267,4 +262,4 @@ class ChromaManager:
            
        except Exception as e:
            print(f"Error updating file: {str(e)}")
-            return False
+            raise
@@ -3,7 +3,7 @@ from pathlib import Path

 # Base directory configuration
 #BASE_DIR = Path(__file__).resolve().parent
-BASE_DIR = Path("/home/kowshik/work/ds_tjc")
+BASE_DIR = Path("/root/ds_tjc")

 # Data directories
 UPLOAD_DIR = BASE_DIR / "marketing_data"
@@ -65,6 +65,8 @@ async def upload_document(
        
        # Add to vector store
        doc_ids = chroma_manager.add_documents(splits, category.value)
+
+        print("Document IDs: ", doc_ids)
        
        return DocumentResponse(
            document_id=doc_ids[0],
@@ -104,7 +106,7 @@ async def query_documents(request: QueryRequest):
        assistant = MarketingAssistant()
        content_type = assistant.classify_query(request.query)
        print(f"Query classified as: {content_type}")
-        category = content_type
+        category = "books"
        initial_results = chroma_manager.query_documents(
            query=request.query,
            category=category if category else None,
@@ -179,6 +181,7 @@ async def delete_file(
    filename: str
 ):
    """Delete a specific file and all its chunks"""
+    """filename example: /path/to/file.txt"""
    try:
        # # URL decode the filename
        # decoded_filename = unquote(filename)
@@ -214,15 +217,50 @@ async def delete_file(
 async def update_file(
    category: CategoryEnum,
    filename: str,
-    content: str
+    file: UploadFile = File(...)
 ):
    """Update content for a specific file"""
+    if not file.filename.lower().endswith(('.pdf', '.txt')):
+        raise HTTPException(400, "Only PDF and TXT files are supported")
    try:
-        success = chroma_manager.update_file_content(filename, category.value, content)
-        if not success:
-            raise HTTPException(404, f"File {filename} not found in category {category}")
-        return {"status": "success", "message": f"File {filename} updated successfully"}
+        # URL decode the filename if needed
+        decoded_filename = unquote(filename)
+        
+        # Ensure category directory exists
+        category_dir = UPLOAD_DIR / category.value
+        category_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Create temp file path
+        temp_path = category_dir / f"temp_{decoded_filename}"
+        
+        # Save uploaded file
+        content = await file.read()
+        with open(temp_path, "wb") as f:
+            f.write(content)
+
+        try:
+            # Process the file
+            splits = load_and_split_documents(temp_path, category_dir)
+            if not splits:
+                raise HTTPException(400, "No content could be extracted from the file")
+
+            # Update in Chroma
+            success = chroma_manager.update_file_content(decoded_filename, category.value, splits)
+            if not success:
+                raise HTTPException(404, f"File {decoded_filename} not found in category {category}")
+
+        finally:
+            # Clean up temp file
+            if temp_path.exists():
+                os.remove(temp_path)
+
+        return {
+            "status": "success", 
+            "message": f"File {decoded_filename} updated successfully"
+        }
+
    except Exception as e:
+        print(f"Error in update_file: {str(e)}")
        raise HTTPException(500, f"Error updating file: {str(e)}")

 # Modify the upload endpoint to include filename in metadata
@@ -232,33 +270,37 @@ async def upload_document(
    file: UploadFile = File(...)
 ):
    """Upload and process a document for a specific category"""
-    if not file.filename.lower().endswith(('.pdf', '.txt')):
+    if not file.filename.lower().endswith(('.pdf', '.txt', '.docx', '.pptx', '.png', '.jpg', '.jpeg')):
        raise HTTPException(400, "Only PDF and TXT files are supported")
    
    try:
+        # Generate a unique document ID
+        doc_id = str(uuid.uuid4())
+        
        # Save file temporarily
        file_path = save_upload_file(file, category)
        
        # Process and split document
-        splits = load_and_split_documents(file_path)
+        splits = load_and_split_documents(file_path, folder_path=UPLOAD_DIR / category.value)
        
        if not splits:
            raise HTTPException(400, "No content could be extracted from the file")
        
-        # Add metadata to splits
+        # Add metadata to each split
+        current_time = datetime.utcnow().isoformat()
        for split in splits:
            split.metadata.update({
                'filename': file.filename,
                'category': category.value,
-                'upload_date': datetime.utcnow().isoformat(),
-                'doc_id': str(uuid.uuid4())
+                'upload_date': current_time,
+                'doc_id': doc_id  # Use the same doc_id for all splits of the same document
            })
        
        # Add to vector store
        doc_ids = chroma_manager.add_documents(splits, category.value)
        
        return DocumentResponse(
-            document_id=doc_ids[0],
+            document_id=doc_id,
            category=category,
            filename=file.filename,
            status="success"
@@ -267,4 +309,4 @@ async def upload_document(
        raise HTTPException(500, f"Error processing document: {str(e)}")

 if __name__ == "__main__":
-    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
+    uvicorn.run("main:app", host="0.0.0.0", port=5302, reload=True)
@@ -1,4 +1,3 @@
-# models/marketing_assistant.py
 from langchain.chains.llm import LLMChain
 from langchain_core.prompts import PromptTemplate
 from langchain_community.chat_models.openai import ChatOpenAI
@@ -9,6 +8,7 @@ import os

 load_dotenv()

+
 class MarketingAssistant:
    def __init__(self):
        self.templates = self._load_templates()
@@ -17,25 +17,43 @@ class MarketingAssistant:
            groq_api_key=os.getenv("GROQ_API_KEY"),
            model_name=os.getenv("GROQ_MODEL_NAME")
        )
+
+    def _base_prompt(self):
+        COMPLIANCE_PROMPT = """
+        If the user asks about Time Line Therapy®(TLT) or NLP techniques, ensure the following:
+        Important compliance requirements for Time Line Therapy® content:
+
+        1. Always use proper trademark symbol ® when mentioning Time Line Therapy®
+        2. Maintain correct spacing in "Time Line Therapy®"
+        3. Accurately attribute creation to Tad James in 1985
+        4. Distinguish from general NLP "Time Line" techniques
+        5. Acknowledge the relationship with NLP as an advanced application
+        6. Reference specific techniques like the Anxiety Model correctly
+
+        Generate content that strictly adheres to these requirements.
+        """
+        return COMPLIANCE_PROMPT
+
    
    def _load_templates(self) -> Dict[str, PromptTemplate]:
        """Load prompt templates for different content types"""
+        compliance_prompt = self._base_prompt()
        return {
            'email': PromptTemplate(
                input_variables=["query", "topic", "style"],
-                template="Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Write an email newsletter about {topic} in {style} style. The query is: {query}"
+                template=f"Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Write an email newsletter about {{topic}} in {{style}} style. {compliance_prompt} The query is: {{query}}"
            ),
            'social': PromptTemplate(
                input_variables=["query","topic", "platform"],
-                template="Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Create a {platform} post about {topic}... The query is: {query}"
+                template=f"Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Create a {{platform}} post about {{topic}}... {compliance_prompt} The query is: {{query}}"
            ),
            'book': PromptTemplate(
                input_variables=["query","topic", "style"],
-                template="Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Write a book blurb about {topic} in {style} style... The query is: {query}"
+                template=f"Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Write a book blurb about {{topic}} in {{style}} style... {compliance_prompt} The query is: {{query}}"
            ),
            'article': PromptTemplate(
                input_variables=["query","topic", "style"],
-                template="Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Write an article about {topic} in {style} style... The query is: {query}"
+                template=f"Act like you are Adriana James (Adriana James is a woman of force and character, beauty and charm and an expert leader in the field of Neuro-Linguistic Programming (NLP), NLP Coaching and Time Line Therapy®.), write marketing copy in her signature style. Write an article about {{topic}} in {{style}} style... {compliance_prompt} The query is: {{query}}"
            ),
        }
    
@@ -106,15 +106,225 @@ def format_context(documents: List[Dict]) -> str:
    
    return "\n".join(context_parts)

-# Template for marketing copy
+
+# COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy®(TLT) or NLP techniques, ensure the following:
+#     Important compliance requirements for Time Line Therapy® content:
+#     1. Always use proper trademark symbol ® when mentioning Time Line Therapy®
+#     2. Maintain correct spacing in "Time Line Therapy®"
+#     3. Accurately attribute creation to Tad James in 1985
+#     4. Distinguish from general NLP "Time Line" techniques
+#     5. Acknowledge the relationship with NLP as an advanced application
+#     6. Reference specific techniques like the Anxiety Model correctly
+
+#     Generate content that strictly adheres to these requirements.
+#     """
+
+# COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy®(TLT) or NLP techniques, ensure the following:
+#     Important compliance requirements for Time Line Therapy® content:
+#     1. Always use proper trademark symbol ® when mentioning Time Line Therapy®
+#     2. Maintain correct spacing in "Time Line Therapy®"
+#     3. Accurately attribute creation to Tad James in 1985
+#     4. Distinguish from general NLP "Time Line" techniques
+#     5. Acknowledge the relationship with NLP as an advanced application
+#     6. Reference specific techniques correctly
+
+#     AVOID:
+#     1. Claims about specific timeframes for results
+#     2. Claims about medical or physiological effects
+#     3. Guarantees of success or "inevitable" results
+#     4. Unverified professional endorsements
+#     5. Terms like "eradicate" or "remove" emotions
+#     6. Claims about the "Anxiety Model" as a specific technique
+
+#     Generate content that strictly adheres to these requirements.
+#     """
+# # Template for marketing copy
+# TEMPLATE = """
+# Act like you are Adriana James, write marketing copy in her signature style. Just mimic her style and provide the answer to the user's query. Make sure that you are Adriana James, and you are providing the answer to the user's query.
+
+# {COMPLIANCE_PROMPT}
+
+# Query: {question}
+# Adriana James Resource Context: {context}
+
+# Note: Don't provide anything extra. Just give me the response no extra words nothing at all.
+# """
+
+COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
+1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
+2. Maintain the correct spacing in "Time Line Therapy®".
+3. Accurately attribute the creation to Tad James in 1985.
+4. Clearly distinguish Time Line Therapy® from general NLP "Time Line" techniques.
+5. Acknowledge its relationship with NLP as an advanced application.
+6. Reference specific techniques correctly.
+
+AVOID:
+1. Claims about specific timeframes for results (such as suggesting changes occur “in minutes” or “instant” outcomes).
+2. Claims about medical or physiological effects.
+3. Guarantees of success or statements that imply results are inevitable.
+4. Unverified professional endorsements.
+5. Language that suggests "eradication" or "removal" of emotions—instead, use language that emphasizes reframing or releasing negative emotions.
+6. Presenting the "Anxiety Model" as a recognized, distinct, or core technique, since it is not officially part of Time Line Therapy®.
+"""
+COMPLIANCE_PROMPT = """If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
+1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
+2. Maintain the correct spacing in "Time Line Therapy®".
+3. Accurately attribute the creation to Tad James in 1985.
+4. Clearly distinguish Time Line Therapy® from general NLP "Time Line" techniques.
+5. Acknowledge its relationship with NLP as an advanced application.
+6. Reference specific techniques correctly.
+
+AVOID:
+1. Claims about specific timeframes for results (such as suggesting changes occur “in minutes” or “instant” outcomes).
+2. Claims about medical or physiological effects.
+3. Guarantees of success or statements that imply results are inevitable.
+4. Unverified professional endorsements.
+5. Language that suggests "eradication" or "removal" of emotions—instead, use language that emphasizes reframing or releasing negative emotions.
+6. Presenting the "Anxiety Model" as a recognized, distinct, or core technique, since it is not officially part of Time Line Therapy®.
+7. Describing Time Line Therapy® as an "NLP technique" or as being "born out of NLP" – it is a distinct, proprietary modality that evolved from NLP principles.
+"""
+
+COMPLIANCE_PROMPT = """
+If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
+1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
+2. Maintain the correct spacing in "Time Line Therapy®".
+3. Accurately attribute the creation to Tad James in 1985.
+4. Clearly distinguish Time Line Therapy® from general NLP "Time Line" techniques.
+5. Acknowledge its relationship with NLP as an advanced application.
+6. Reference specific techniques correctly.
+
+AVOID:
+1. Claims about specific timeframes for results (such as suggesting changes occur “in minutes” or “instant” outcomes).
+2. Claims about medical or physiological effects.
+3. Guarantees of success or statements that imply results are inevitable.
+4. Unverified professional endorsements.
+5. Language that suggests "eradication" or "removal" of emotions—instead, use language that emphasizes releasing negative emotions.
+6. Presenting the "Anxiety Model" as a recognized, distinct, or core technique, since it is not officially part of Time Line Therapy®.
+7. Describing Time Line Therapy® as an "NLP technique" or as being "born out of NLP" – it is a distinct, proprietary modality that evolved from NLP principles.
+"""
+
 TEMPLATE = """
-Act like you are Adriana James, write marketing copy in her signature style. Just mimic her style and provide the answer to the user's query. Make sure that you are Adriana James, and you are providing the answer to the user's query.
+Act like you are Adriana James, writing marketing copy in your signature style. Just mimic her style and provide the answer to the user's query. Make sure that you present yourself as Adriana James, responding directly to the user's query.
+
+{COMPLIANCE_PROMPT}

 Query: {question}
 Adriana James Resource Context: {context}

-Note: Don't provide anything extra. Just give me the response no extra words nothing at all.
+Note: Don't provide anything extra. Just give me the response—no extra words, nothing at all.
 """
+TEMPLATE = """
+Act like you are Adriana James, writing marketing copy in your signature style. Just mimic her style and provide the answer to the user's query. Make sure that you present yourself as Adriana James, responding directly to the user's query.
+
+{COMPLIANCE_PROMPT}
+
+Query: {question}
+Adriana James Resource Context: {context}
+
+Note: Don't provide anything extra. Just give me the response—no extra words, nothing at all.
+"""
+
+TEMPLATE = """
+Give answer From the perspective of Adriana james do not act like her, just writing marketing copy in her signature style. Just mimic her writing style and provide the answer to the user's query as she has written in the books. Make sure you follow Adriana James writing style, responding directly to the user's query.
+{COMPLIANCE_PROMPT}
+
+Query: {question}
+Adriana James Resource Context: {context}
+
+Note: Don't provide anything extra. Just give me the response—no extra words, nothing at all.
+"""
+
+COMPLIANCE_PROMPT = """
+If the user asks about Time Line Therapy® (TLT) or NLP techniques, ensure the following:
+1. Always use the proper trademark symbol ® when mentioning Time Line Therapy®.
+2. Maintain the correct spacing in "Time Line Therapy®".
+3. Accurately attribute its creation to Dr. Tad James in 1985.
+4. Clearly distinguish Time Line Therapy® as a distinct, proprietary modality evolved from NLP principles, not to be confused with general NLP techniques.
+5. Extract and incorporate key concepts from source files accurately.
+6. Verify proper attribution of trademarks and authors.
+7. Validate content against external knowledge sources.
+8. Prevent inclusion of unrelated, redundant, or confidential information.
+9. Ensure proper formatting for different outputs (e.g., bullet-point lists, structured FAQs) as required in the response.
+10. Maintain a client-specific tone and style that is warm, engaging, and professional, avoiding overuse of AI-generated language.
+11. Address key content questions directly without repeating points.
+    
+AVOID:
+1. Claims about specific timeframes for results, such as suggesting changes occur “in minutes” or “instant” outcomes.
+2. Claims related to medical or physiological effects.
+3. Guarantees of success or language implying inevitable results.
+4. Unverified professional endorsements.
+5. Language that suggests the complete eradication or removal of emotions; instead, use language that emphasizes the release or reframing of negative emotions.
+6. Presenting the "Anxiety Model" as a recognized or core technique, since it is not officially part of Time Line Therapy®.
+7. Describing Time Line Therapy® merely as an "NLP technique" or stating it was "born out of NLP"; instead, acknowledge it as a distinct modality with an advanced application of some NLP principles.
+8. Overly technical jargon or redundant explanations unrelated to key content.
+"""
+
+TEMPLATE = """
+Act like you are Adriana James, writing marketing copy in your signature style. Just mimic her style and provide the answer to the user's query. Make sure that you present yourself as Adriana James, responding directly to the user's query.
+- Accurately extract and highlight the core principles and techniques of Time Line Therapy®.
+- Provide correct attribution by stating that Time Line Therapy® was developed by Dr. Tad James in 1985.
+- Distinguish Time Line Therapy® from general NLP techniques.
+- Validate and incorporate content using external knowledge sources where applicable.
+- Exclude unrelated, redundant, or confidential information.
+- Format responses appropriately (using bullet-point lists, structured FAQs, or conversational text) based on the query, keeping the client's specific tone and style intact.
+
+{COMPLIANCE_PROMPT}
+
+Query: {question}
+Adriana James Resource Context: {context}
+
+Note: Provide only the direct response as output—no extra words or commentary.
+"""
+
+# COMPLIANCE_PROMPT = """
+# 📌 Key Requirements:
+# Accurate Information Extraction
+
+# Clearly explain that emotions and memories are stored linearly in the unconscious mind.
+# Describe how negative emotions can be released without re-experiencing trauma.
+# Highlight that limiting decisions and beliefs can be removed at their root cause.
+# Show how the future can be consciously shaped by eliminating past emotional barriers.
+# Emphasize that TLT works at the unconscious level for lasting change.
+# Proper Attribution & Trademark Compliance
+
+# Always use Time Line Therapy® (with the registered trademark symbol ®).
+# Maintain the correct spacing in "Time Line Therapy®".
+# Accurately attribute its creation to Dr. Tad James in 1985.
+# Differentiate Time Line Therapy® from NLP—do not call it an "NLP technique" but acknowledge it as a distinct modality that expands upon NLP principles.
+# Content Validation & Relevance
+
+# Extract and incorporate key concepts from the source files without redundancy.
+# Validate content against external knowledge sources where applicable.
+# Ensure the response is relevant, structured, and engaging while maintaining the client’s preferred tone.
+# Client-Specific Formatting & Style
+
+# Use bullet points, FAQs, or structured sections for readability.
+# Mimic the engaging, warm, and clear style of Adriana James, ensuring the tone is warm and professional.
+# Avoid excessive AI-generated phrasing—keep it natural and conversational.
+# 🚨 Strict Exclusions – DO NOT INCLUDE:
+# ❌ Instant results claims (e.g., "works in minutes").
+# ❌ Medical or physiological claims.
+# ❌ Guarantees of success (e.g., "this will 100% work").
+# ❌ Statements implying emotions are completely "removed" (use "released" or "reframed" instead).
+# ❌ Confusing TLT with general NLP or calling it "born out of NLP".
+# ❌ Overuse of technical jargon, speculative theories, or unrelated concepts.
+# ❌ Presenting the "Anxiety Model" as an official part of Time Line Therapy®.
+# """
+
+# PROMPT_TEMPLATE = """
+# You are an AI expert tasked with generating accurate, engaging, and compliant content on Time Line Therapy® (TLT), ensuring proper attribution, clarity, and trademark compliance. Your response must strictly follow these guidelines:
+
+# {COMPLIANCE_PROMPT}
+# Query: {question}
+# Adriana James Resource Context: {context}
+# 🌟 Expected Output Example:
+# ✔️ Time Line Therapy® helps individuals release negative emotions and limiting beliefs stored in the unconscious mind.
+# ✔️ By guiding individuals through their time line, practitioners help them reframe past experiences and remove emotional barriers.
+# ✔️ Developed by Dr. Tad James in 1985, TLT expands on NLP principles and hypnotherapy to facilitate emotional healing and transformation.
+# ✔️ Unlike traditional NLP, which focuses on cognitive restructuring, TLT works directly with stored emotional experiences for deep and lasting change.
+# ✔️ The process helps clients release emotional baggage, overcome self-doubt, and create a compelling future.
+# """
+

 PROMPT = ChatPromptTemplate.from_template(TEMPLATE)

@@ -128,5 +338,6 @@ def generate_marketing_response(query: str, context: str) -> str:
    
    return chain.invoke({
        "question": query,
-        "context": context
-    })
+        "context": context,
+        "COMPLIANCE_PROMPT": COMPLIANCE_PROMPT
+    })
@@ -0,0 +1,120 @@
+from marketing_assistant import MarketingAssistant
+from chroma_manager import ChromaManager
+from rag import generate_marketing_response, format_context, RERANKER
+import time
+import os
+
+"""
+Time Claims Questions
+
+1. "How quickly can Time Line Therapy® help with emotional change?"
+2. "What's the typical timeframe for seeing results with Time Line Therapy®?"
+3. "Can Time Line Therapy® produce instant changes?"
+
+Technique-Related Questions
+
+1. "What are the core techniques of Time Line Therapy®?"
+2. "How does Time Line Therapy® work with emotions?"
+3. "Can you explain the relationship between Time Line Therapy® and NLP?"
+
+Results and Effects Questions
+
+1. "How does Time Line Therapy® help with emotional patterns?"
+2. "What can I expect from a Time Line Therapy® session?"
+3. "How does Time Line Therapy® approach emotional transformation?"
+
+Professional Application Questions
+
+1. "Who typically uses Time Line Therapy®?"
+2. "What training is required for Time Line Therapy®?"
+3. "How is Time Line Therapy® used in professional practice?"
+
+Historical Context Questions
+
+1. "When was Time Line Therapy® developed and by whom?"
+2. "What's the origin of Time Line Therapy®?"
+3. "How has Time Line Therapy® evolved since its creation?"
+
+Distinctive Features Questions
+
+1. "What makes Time Line Therapy® unique?"
+2. "How is Time Line Therapy® different from other approaches?"
+3. "What are the key principles of Time Line Therapy®?"
+"""
+
+# queries = [
+#     "How quickly can Time Line Therapy® help with emotional change?",
+#     "What's the typical timeframe for seeing results with Time Line Therapy®?",
+#     "Can Time Line Therapy® produce instant changes?",
+#     "What are the core techniques of Time Line Therapy®?",
+#     "How does Time Line Therapy® work with emotions?",
+#     "Can you explain the relationship between Time Line Therapy® and NLP?",
+#     "How does Time Line Therapy® help with emotional patterns?",
+#     "What can I expect from a Time Line Therapy® session?",
+#     "How does Time Line Therapy® approach emotional transformation?",
+#     "Who typically uses Time Line Therapy®?",
+#     "What training is required for Time Line Therapy®?",
+#     "How is Time Line Therapy® used in professional practice?",
+#     "When was Time Line Therapy® developed and by whom?",
+#     "What's the origin of Time Line Therapy®?",
+#     "How has Time Line Therapy® evolved since its creation?",
+#     "What makes Time Line Therapy® unique?",
+#     "How is Time Line Therapy® different from other approaches?",
+#     "What are the key principles of Time Line Therapy®?",
+# ]
+queries = ['List the key principles of Time Line Therapy®',
+    'Summarize the core techniques of Time Line Therapy®',
+    'Who developed Time Line Therapy®, and when was it created?',
+    'Provide a brief history of Time Line Therapy®, ensuring correct attribution of its creator and name.',
+    'Compare the main objectives of Time Line Therapy® with those of traditional NLP techniques.',
+    'Explain how Time Line Therapy® approaches emotional transformation and how it aligns with widely accepted psychological principles.',
+    'Describe the Time Line Therapy® Anxiety Model and its function.',
+    'List the key benefits of Time Line Therapy® without including general NLP or unrelated psychological concepts.',
+    'Explain how Time Line Therapy® helps individuals overcome limiting beliefs in a concise manner.',
+    'Provide a high-level overview of Time Line Therapy® without repeating any points within the response.',
+    'Format the benefits of Time Line Therapy® into a bullet-point list suitable for a website.',
+    'Convert the explanation of Time Line Therapy® into a structured FAQ format with clear headings.',
+    'Rewrite the description of Time Line Therapy® in a warm and engaging tone suitable for a life coaching audience.',
+    'Adapt the explanation of Time Line Therapy® into a professional yet approachable style for a corporate setting.',
+    'Emphasize the emotional transformation aspect of Time Line Therapy® in a way that resonates with personal development clients.',
+    'Adjust the explanation of Time Line Therapy® to focus on practical applications for business and leadership coaching.',
+    'Rewrite the explanation of Time Line Therapy® in a way that feels more conversational and natural.',
+    'Rephrase the key benefits of Time Line Therapy® to sound more human-like and engaging.',
+    'What makes Time Line Therapy® different from other NLP-based techniques?',
+    'How does Time Line Therapy® help individuals release negative emotions?'
+]
+
+chroma_manager = ChromaManager()
+assistant = MarketingAssistant()
+
+def test_query():
+    # Open the responses file in append mode to keep all responses
+    with open("/root/ds_tjc/tests/response_query_test.txt", "a") as f:
+        for query in queries:
+            content_type = assistant.classify_query(query)
+            print(f"Query classified as: {content_type}")
+            category = content_type
+            initial_results = chroma_manager.query_documents(
+                query=query,
+                category=category if category else None,
+                top_k=10  # Retrieve more documents initially for reranking
+            )
+            reranked_results = RERANKER.rerank(
+                query=query,
+                documents=initial_results,
+                top_k=5  # Keep top 5 most relevant documents after reranking
+            )
+            context = format_context(reranked_results)
+            response = generate_marketing_response(query, context)
+            print(f"Response: {response}")
+            print(f"Context: {context}")
+            # Write the response to the file with a separator
+            f.write("-----------------------------------------------------\n")
+            f.write(f"Query: {query}\n")
+            f.write(f"Answer: {response}\n")
+            # sleep for 2 seconds to avoid rate limiting
+            time.sleep(2)
+
+
+if __name__ == "__main__":
+    test_query()
@@ -10,7 +10,6 @@ from llama_index.core import SimpleDirectoryReader
 from PyPDF2 import PdfReader
 from langchain_core.documents import Document
 from PIL import Image
-import pytesseract
 import easyocr

 def save_upload_file(file, category) -> Path: