added bot prediction for assessments

2024-09-14 01:50:41 +00:00
parent 45bc62c745
commit cd8f499f97
14 changed files with 698 additions and 22 deletions
@@ -4,6 +4,7 @@ from werkzeug.utils import secure_filename
 from src.services.chatbot import Chatbot
 from src.utils.utils import delete_all_files_in_directory
 from src.utils.document_loader import load_document  
+from src.services.chatbot import Chatbot


 # Initialize the Blueprint
@@ -59,3 +60,64 @@ def validate_worker_document():

    except Exception as e:
        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+
+
+@bot.route('/predict_next_n_assessments', methods=['POST'])
+def predict_next_n_assessments():
+    try:
+        # Retrieve JSON data from the request
+        data = request.get_json()
+        company_info = data.get('company_info')
+        companyid = data.get('companyid')
+        N = data.get('N')
+
+        if not company_info or not companyid or N is None:
+            return jsonify({"error": "Missing data", "message": "Company info, company ID, or N value not provided."}), 400
+
+        # Instantiate the chatbot service
+        chatbot = Chatbot()
+
+        # Call the prediction method
+        response = chatbot.predict_next_n_assessment(
+            company_info=company_info,
+            companyid=companyid,
+            N=N
+        )
+        if not response:
+            return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
+        
+        return jsonify({"predictions": response}), 200
+
+    except Exception as e:
+        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+
+
+
+@bot.route('/use_bot_predict_assessments', methods=['POST'])
+def use_bot_predict_assessments():
+    try:
+        # Retrieve JSON data from the request
+        data = request.get_json()
+        company_info = data.get('company_info')
+        companyid = data.get('companyid')
+        query = data.get('query')
+
+        if not company_info or not companyid or query is None:
+            return jsonify({"error": "Missing data", "message": "Company info, company ID, or query value not provided."}), 400
+
+        # Instantiate the chatbot service
+        chatbot = Chatbot()
+
+        # Call the prediction method
+        response = chatbot.predict_based_on_past_assessment(
+            company_info=company_info,
+            companyid=companyid,
+            query=query
+        )
+        if not response:
+            return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
+        
+        return jsonify({"predictions": response}), 200
+
+    except Exception as e:
+        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@@ -4,4 +4,21 @@ from typing import List, Dict

 class ValidateWorker(BaseModel):
    result:str
-    
+    
+class Result(BaseModel):
+    response:str
+
+class Cases(BaseModel):
+    open_items: int
+    red_flags: int
+
+class AssessmentsFrequency(BaseModel):
+    weekly: Cases
+    biweekly: Cases
+    quarterly: Cases
+
+class AssessmentPrediction(BaseModel):
+    AssessmentN: AssessmentsFrequency
+
+class AssessmentPredictionsResponse(BaseModel):
+    predictions: List[AssessmentPrediction]
@@ -4,11 +4,6 @@ import logging
 from logging.handlers import RotatingFileHandler


-handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-logger.addHandler(handler)
-
 class DataPreprocessor:
    def __init__(self, input_path, company_id):
        self.input_path = input_path
@@ -33,7 +33,8 @@ class ModelTrainer:
        y = self.df[['open_items', 'red_flags']]  # Multi-target for open items and red flags

        # Split into training and test sets with 10% as test size
-        X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+        X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=False)
+

        # Train the model
        self.model.fit(X_train, y_train)
@@ -1,3 +1,5 @@
+
+
 def validate_worker_prompt() -> str:
   return """
    You are a worker in the company "Validate" where you are asked a specific yes or no question:
@@ -15,4 +17,133 @@ def validate_worker_prompt() -> str:
      result:"validated" 
    }
    """
-    
+def predict_based_past_assessment_prompt(query,company_info, summary_stats):
+    # Extract company information from the dictionary
+    company_name = company_info['company_name']
+    company_size = company_info['company_size']
+    departments = company_info['departments']
+    
+    # Create the prompt with the provided company info and summary statistics
+    prompt = f"""
+    **Prompt for the Chatbot:**
+
+    **Context:**
+    You are an AI assistant working for {company_name}, and your primary responsibility is to provide **insights**, **predictions**, and **recommendations** based on the company's past assessment data and organizational structure. You are not allowed to respond to any queries outside of this domain.
+
+    **General Company Information:**
+    - **Company Name**: {company_name}
+    - **Company Size**: {company_size} (e.g., Small, Medium, Large)
+    - **Departments**: 
+      {', '.join(departments)}
+
+    **Assessment Summary**:
+    The following is a detailed summary of past assessments at {company_name}. Use this information to provide predictions and recommendations based on trends and data points.
+    
+    - **Open Items and Red Flags**:
+      - Total Open Items: {summary_stats['Open Items and Red Flags']['Total Open Items']}
+      - Average Open Items per Assessment: {summary_stats['Open Items and Red Flags']['Average Open Items per Assessment']}
+      - Total Red Flags: {summary_stats['Open Items and Red Flags']['Total Red Flags']}
+      - Average Red Flags per Assessment: {summary_stats['Open Items and Red Flags']['Average Red Flags per Assessment']}
+      - Max Red Flags in a Single Assessment: {summary_stats['Open Items and Red Flags']['Max Red Flags in a Single Assessment']}
+      - Most Common Area with Red Flags: {summary_stats['Open Items and Red Flags']['Most Common Area with Red Flags']}
+    
+    - **Assessment Frequency**:
+      - Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Weekly', 0) * 100}%
+      - Bi-Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Bi-Weekly', 0) * 100}%
+      - Quarterly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Quarterly', 0) * 100}%
+      - Average Time Between Assessments: {summary_stats['Assessment Frequency']['Average Time Between Assessments']} days
+      - Average Assessment Duration: {summary_stats['Assessment Frequency']['Average Assessment Duration']} days
+
+    - **Assessment Start and End Dates**:
+      - Longest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Longest Assessment Duration (days)']} days
+      - Shortest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Shortest Assessment Duration (days)']} days
+
+    - **Assessment Areas**:
+      - Most Assessed Area: {summary_stats['Assessment Areas']['Most Assessed Area']}
+      - Most Open Items in Area: {summary_stats['Assessment Areas']['Most Open Items in Area']}
+      - Area with Most Red Flags: {summary_stats['Assessment Areas']['Area with Most Red Flags']}
+
+    - **Assessment Status**:
+      - Completed: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Completed', 0) * 100}%
+      - In Progress: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('In Progress', 0) * 100}%
+      - Incomplete: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Incomplete', 0) * 100}%
+
+    - **Assessment Admin**:
+      - Most Frequent Admin: {summary_stats['Assessment Admin']['Most Frequent Admin']}
+      - Admin with Fewest Red Flags: {summary_stats['Assessment Admin']['Admin with Fewest Red Flags']}
+      - Admin with Most Open Items: {summary_stats['Assessment Admin']['Admin with Most Open Items']}
+
+    **Instructions:**
+    Use the above information to answer user queries. You should:
+    - Analyze historical data to identify trends and problem areas.
+    - Predict potential outcomes for future assessments based on past performance (e.g., meeting deadlines, reducing red flags).
+    - Provide **actionable recommendations** that can help improve performance in future assessments.
+
+    **User Query**:
+    "{query}"
+
+    **Your Response**:
+    Predict and provide recommendations based on the company’s historical data, focusing on the areas most relevant to the query. Ensure the response is based on past trends and performance issues.
+
+    **Examples of Insightful Responses**:
+    - "To improve your performance in the next assessment, you should focus on reducing red flags in the Communication department, as it has had the most issues."
+    - "Based on the company's past performance, there is a 70% chance that you will meet the deadline for the next weekly assessment. To ensure success, focus on completing open items in the IT department."
+    - "The data indicates that quarterly assessments have the highest rate of incomplete tasks. I recommend prioritizing quarterly assessment tasks to avoid falling behind."
+    """
+    
+    return prompt
+
+
+
+def predict_next_n_assessments_prompt():
+    
+    # Create the prompt with provided company info, summary statistics, and number of assessments (n)
+    prompt = """
+    **Prompt for the Chatbot:**
+
+    **Context:**
+    You are an AI assistant responsible for analyzing the past assessment data of , and your primary responsibility is to provide **predictions** for the next {n} assessments. 
+    These assessments can occur on a **weekly**, **bi-weekly**, or **quarterly** basis. Use the company's past performance to predict the following for each of the next {n} assessments:
+    - **Number of Open Items**.
+    - **Number of Red Flags**.
+    - **Predictions for Weekly, Bi-Weekly, and Quarterly assessments**.
+    input : 
+    - company basic info
+    - past assessment statitics
+    - N - number of next assessments to be predicted 
+    **General Company Information:**
+    
+
+    **Assessment Summary (Past Data)**:
+    The Detailed information on past asssessment will be provided. Use this information to make predictions for the next {n} assessments.
+
+
+    **Instructions**:
+    - Predict the number of open items and red flags for the next n assessments if they are conducted on a weekly, bi-weekly, or quarterly basis.
+    - Use the historical summary statistics provided above to guide your predictions.
+    - Return the response in the following JSON format:
+
+    **Response Format**:
+
+    {
+      "assessment 1": [
+        {
+          "weekly": {"open_items": X, "red_flags": Y}},
+          "biweekly": {{"open_items": X, "red_flags": Y}},
+          "quarterly": {{"open_items": X, "red_flags": Y}}
+        }
+      ],
+      "assessment 2": [
+        {
+          "weekly": {"open_items": X, "red_flags": Y},
+          "biweekly": {"open_items": X, "red_flags": Y},
+          "quarterly": {"open_items": X, "red_flags": Y}
+        }
+      ]
+      // assuming N is 2
+    }
+    ```
+    Ensure each assessment is provided with three predictions: one for Weekly, one for Bi-Weekly, and one for Quarterly assessments.
+    """
+    
+    return prompt
@@ -7,6 +7,7 @@ from src.prompts.sops import *
 from src.prompts.chatbot import *
 from src.models.sop_response_schemas import *
 from src.models.bot_response_schema import *
+from scripts.assessment_data import generate_summary_stats_v2
 from dotenv import load_dotenv
 load_dotenv()

@@ -52,7 +53,7 @@ class Chatbot:
                    }
                ],
                response_format=ValidateWorker,
-                max_tokens=4096,
+                max_tokens=1024,
                temperature=0.1
            )

@@ -64,3 +65,121 @@ class Chatbot:
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
+        
+    
+
+
+    def predict_based_on_past_assessment(self, query, company_info, companyid) -> Result:
+        """
+        This method generates predictions based on past assessment data of a company. It queries the backend for the 
+        company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
+
+        :param query: The question or query asked by the user.
+        :param company_info: General information about the company (name, size, departments, etc.).
+        :param companyid: Unique identifier of the company to fetch its specific data.
+        :return: Result containing the prediction result or None if an error occurs.
+        """
+        try:
+            # Define the path to the company's assessment data (stored as a CSV)
+            data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
+            
+            # Generate summary statistics from the company's assessment data
+            summary_stats = generate_summary_stats_v2(file_path=data_path)
+           
+
+            # Generate the prompt using the company info and the summary statistics
+            prompt = predict_based_past_assessment_prompt(
+                query=query,
+                company_info=company_info,
+                summary_stats=summary_stats
+            )
+   
+            # Interact with GPT-4 model to get a response
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"{prompt}"
+                    },
+                    {
+                        "role": "user",
+                        "content": f"{query}",
+                    }
+                ],
+                response_format=Result,
+                max_tokens=1024,
+                temperature=0.1
+            )
+
+            # Extract and return the response from the GPT-4 model
+            extracted_text = json.loads(response.choices[0].message.content)
+
+            return extracted_text
+
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return None
+
+    
+    def predict_next_n_assessment(self, company_info, companyid, N) -> AssessmentPredictionsResponse:
+        """
+        This method generates predictions based on past assessment data of a company. It queries the backend for the 
+        company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
+
+        :param query: The question or query asked by the user.
+        :param company_info: General information about the company (name, size, departments, etc.).
+        :param companyid: Unique identifier of the company to fetch its specific data.
+        :param N: Number of assessments to predict.
+        :return: Result containing the prediction result or None if an error occurs.
+        """
+        try:
+            # Define the path to the company's assessment data (stored as a CSV)
+            data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
+            
+            # Generate summary statistics from the company's assessment data
+            summary_stats = generate_summary_stats_v2(file_path=data_path)
+
+            # Generate the prompt using the company info and the summary statistics
+            prompt = predict_next_n_assessments_prompt()
+          
+            
+            # Interact with GPT-4 model to get a response
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"{prompt}"
+                    },
+                    {
+                        "role": "user",
+                        "content": f"company info: {company_info}--> N-value is {N} ",
+                    },
+                    {
+                        "role": "user",
+                        "content": f"Summary stats: {summary_stats}",
+                    }
+                ],
+                response_format=AssessmentPredictionsResponse,
+                max_tokens=1024,
+                temperature=0.1
+            )
+
+            # Extract the response from the GPT-4 model
+            extracted_text = json.loads(response.choices[0].message.content)
+
+            # Initialize dictionary to store assessments with dynamic names
+            predictions = {}
+
+            # Loop through the predicted assessments and rename them dynamically
+            for i in range(N):
+                assessment_key = f"assessment_{i + 1}"
+                predictions[assessment_key] = extracted_text["predictions"][i]['AssessmentN']
+
+            # Return the dynamically named assessments
+            return predictions
+
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return None