added bot prediction for assessments

2024-09-14 01:50:41 +00:00
parent 45bc62c745
commit cd8f499f97
14 changed files with 698 additions and 22 deletions
@@ -0,0 +1,11 @@
 Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin
 1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A
 2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B
 3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A
 4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B
 5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A
 6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A
 7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B
 8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A
 9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B
 10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A
@@ -0,0 +1,11 @@
 Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin,Department
 1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A,IT
 2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B,HR
 3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A,Finance
 4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B,IT
 5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A,HR
 6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A,Finance
 7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B,IT
 8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A,HR
 9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B,Finance
 10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A,IT
@@ -0,0 +1,11 @@
 Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin
 1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A
 2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B
 3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A
 4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B
 5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A
 6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A
 7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B
 8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A
 9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B
 10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A
@@ -2,22 +2,204 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a dummy dataset with past 5 assessments\n",
    "import pandas as pd\n",
    "\n",
    "data_dummy = {\n",
-    "    'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n",
+    "    'start_date': pd.date_range(start='2023-01-01', periods=12, freq='7D'),\n",
-    "    'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n",
+    "    'end_date': pd.date_range(start='2023-01-02', periods=12, freq='7D'),\n",
-    "    'open_items': [10, 12, 11, 9, 13],\n",
+    "    'open_items': [10, 12, 11, 9, 13, 14, 15, 16, 12, 11, 10, 9],\n",
-    "    'red_flags': [2, 1, 3, 1, 4],\n",
+    "    'red_flags': [2, 1, 3, 1, 4, 2, 1, 3, 2, 1, 4, 3],\n",
-    "    'num_employees': [30, 25, 28, 30, 27],\n",
+    "    'num_employees': [30, 25, 28, 30, 27, 26, 31, 29, 25, 30, 27, 26],\n",
-    "    'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n",
+    "    'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly', \n",
    "                        'weekly', 'quarterly', 'biweekly', 'weekly', 'quarterly', 'weekly', 'biweekly']\n",
    "}\n",
    "\n",
-    "df_dummy = pd.DataFrame(data_dummy)"
+    "df_dummy = pd.DataFrame(data_dummy)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>start_date</th>\n",
       "      <th>end_date</th>\n",
       "      <th>open_items</th>\n",
       "      <th>red_flags</th>\n",
       "      <th>num_employees</th>\n",
       "      <th>assessment_type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>weekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-08</td>\n",
       "      <td>2023-01-09</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>25</td>\n",
       "      <td>biweekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-15</td>\n",
       "      <td>2023-01-16</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>28</td>\n",
       "      <td>quarterly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-22</td>\n",
       "      <td>2023-01-23</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>30</td>\n",
       "      <td>weekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-29</td>\n",
       "      <td>2023-01-30</td>\n",
       "      <td>13</td>\n",
       "      <td>4</td>\n",
       "      <td>27</td>\n",
       "      <td>biweekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2023-02-05</td>\n",
       "      <td>2023-02-06</td>\n",
       "      <td>14</td>\n",
       "      <td>2</td>\n",
       "      <td>26</td>\n",
       "      <td>weekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2023-02-12</td>\n",
       "      <td>2023-02-13</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>quarterly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2023-02-19</td>\n",
       "      <td>2023-02-20</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>biweekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2023-02-26</td>\n",
       "      <td>2023-02-27</td>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "      <td>25</td>\n",
       "      <td>weekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2023-03-05</td>\n",
       "      <td>2023-03-06</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>30</td>\n",
       "      <td>quarterly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2023-03-12</td>\n",
       "      <td>2023-03-13</td>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "      <td>27</td>\n",
       "      <td>weekly</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2023-03-19</td>\n",
       "      <td>2023-03-20</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>26</td>\n",
       "      <td>biweekly</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   start_date   end_date  open_items  red_flags  num_employees assessment_type\n",
       "0  2023-01-01 2023-01-02          10          2             30          weekly\n",
       "1  2023-01-08 2023-01-09          12          1             25        biweekly\n",
       "2  2023-01-15 2023-01-16          11          3             28       quarterly\n",
       "3  2023-01-22 2023-01-23           9          1             30          weekly\n",
       "4  2023-01-29 2023-01-30          13          4             27        biweekly\n",
       "5  2023-02-05 2023-02-06          14          2             26          weekly\n",
       "6  2023-02-12 2023-02-13          15          1             31       quarterly\n",
       "7  2023-02-19 2023-02-20          16          3             29        biweekly\n",
       "8  2023-02-26 2023-02-27          12          2             25          weekly\n",
       "9  2023-03-05 2023-03-06          11          1             30       quarterly\n",
       "10 2023-03-12 2023-03-13          10          4             27          weekly\n",
       "11 2023-03-19 2023-03-20           9          3             26        biweekly"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dummy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_dummy.to_csv(\"test_data.csv\",index=False)"
   ]
  },
  {
@@ -1399,6 +1581,45 @@
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dummy assessment data has been saved as dummy_company_asseement_data.csv.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Create dummy assessment data\n",
    "data = {\n",
    "    'Assessment_ID': range(1, 11),\n",
    "    'Open_Items': [3, 4, 2, 5, 1, 3, 2, 4, 5, 3],\n",
    "    'Red_Flags': [1, 2, 0, 1, 0, 3, 2, 1, 1, 2],\n",
    "    'Assessment_Frequency': ['Weekly', 'Bi-Weekly', 'Weekly', 'Quarterly', 'Bi-Weekly', 'Weekly', 'Quarterly', 'Bi-Weekly', 'Weekly', 'Quarterly'],\n",
    "    'Assessment_Start_Date': pd.date_range(start='2023-01-01', periods=10, freq='15D'),\n",
    "    'Assessment_End_Date': pd.date_range(start='2023-01-07', periods=10, freq='15D'),\n",
    "    'Assessment_Area': ['Deployment', 'Communication', 'Deployment', 'Communication', 'Deployment', 'Deployment', 'Communication', 'Deployment', 'Communication', 'Deployment'],\n",
    "    'Assessment_Status': ['Completed', 'Completed', 'Completed', 'In Progress', 'Completed', 'Completed', 'Incomplete', 'Completed', 'In Progress', 'Completed'],\n",
    "    'Assessment_Admin': ['Admin A', 'Admin B', 'Admin A', 'Admin B', 'Admin A', 'Admin A', 'Admin B', 'Admin A', 'Admin B', 'Admin A']\n",
    "}\n",
    "\n",
    "# Create DataFrame\n",
    "df = pd.DataFrame(data)\n",
    "\n",
    "# Save DataFrame to CSV\n",
    "csv_file_path = 'dummy_company_asseement_data.csv'\n",
    "df.to_csv(csv_file_path, index=False)\n",
    "\n",
    "print(f\"Dummy assessment data has been saved as {csv_file_path}.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -4,4 +4,4 @@ app = create_app()
 if __name__ == '__main__':
-    app.run(debug=True, port=5401)
+    app.run(debug=True, port=5402)
@@ -0,0 +1,70 @@
 import pandas as pd
 def generate_summary_stats_v2(file_path):
    # Load the DataFrame from the provided file path
    df = pd.read_csv(file_path)
    # Ensure date columns are correctly parsed
    df['Assessment_Start_Date'] = pd.to_datetime(df['Assessment_Start_Date'])
    df['Assessment_End_Date'] = pd.to_datetime(df['Assessment_End_Date'])
    # Add completion rate calculation
    completed_status = df['Assessment_Status'] == 'Completed'
    completion_rate_by_frequency = df[completed_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
    in_progress_status = df['Assessment_Status'] == 'In Progress'
    incomplete_status = df['Assessment_Status'] == 'Incomplete'
    # Calculate in-progress and incomplete rates by frequency
    in_progress_rate_by_frequency = df[in_progress_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
    incomplete_rate_by_frequency = df[incomplete_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
    # Fill NaN values (where no assessments are in-progress or incomplete for certain frequencies)
    completion_rate_by_frequency = completion_rate_by_frequency.fillna(0)
    in_progress_rate_by_frequency = in_progress_rate_by_frequency.fillna(0)
    incomplete_rate_by_frequency = incomplete_rate_by_frequency.fillna(0)
    # Round all numerical values to 2 decimal places
    completion_rate_by_frequency = completion_rate_by_frequency.round(2)
    in_progress_rate_by_frequency = in_progress_rate_by_frequency.round(2)
    incomplete_rate_by_frequency = incomplete_rate_by_frequency.round(2)
    summary_stats = {
        'Open Items and Red Flags': {
            'Total Open Items': round(df['Open_Items'].sum(), 2),
            'Average Open Items per Assessment': round(df['Open_Items'].mean(), 2),
            'Total Red Flags': round(df['Red_Flags'].sum(), 2),
            'Average Red Flags per Assessment': round(df['Red_Flags'].mean(), 2),
            'Max Red Flags in a Single Assessment': round(df['Red_Flags'].max(), 2),
            'Most Common Area with Red Flags': df[df['Red_Flags'] > 0]['Assessment_Area'].mode()[0]
        },
        'Assessment Frequency': {
            'Assessment Type Breakdown': df['Assessment_Frequency'].value_counts(normalize=True).round(2).to_dict(),
            'Average Time Between Assessments': round((df['Assessment_End_Date'] - df['Assessment_Start_Date']).dt.days.mean(), 2),
            'Average Assessment Duration': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.mean(), 2),
            'Completion Rate by Frequency': completion_rate_by_frequency.to_dict(),
            'In Progress Rate by Frequency': in_progress_rate_by_frequency.to_dict(),
            'Incomplete Rate by Frequency': incomplete_rate_by_frequency.to_dict()
        },
        'Assessment Start and End Dates': {
            'Longest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.max(), 2),
            'Shortest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.min(), 2),
        },
        'Assessment Areas': {
            'Most Assessed Area': df['Assessment_Area'].value_counts().idxmax(),
            'Most Open Items in Area': df.groupby('Assessment_Area')['Open_Items'].sum().idxmax(),
            'Area with Most Red Flags': df.groupby('Assessment_Area')['Red_Flags'].sum().idxmax()
        },
        'Assessment Status': {
            'Assessment Status Distribution': df['Assessment_Status'].value_counts(normalize=True).round(2).to_dict(),
            'Incomplete Assessments': round(df[df['Assessment_Status'] == 'Incomplete'].shape[0], 2),
            'In Progress Assessments': round(df[df['Assessment_Status'] == 'In Progress'].shape[0], 2)
        },
        'Assessment Admin': {
            'Most Frequent Admin': df['Assessment_Admin'].mode()[0],
            'Admin with Fewest Red Flags': df.groupby('Assessment_Admin')['Red_Flags'].sum().idxmin(),
            'Admin with Most Open Items': df.groupby('Assessment_Admin')['Open_Items'].mean().idxmax()
        }
    }
    return summary_stats
@@ -4,6 +4,7 @@ from werkzeug.utils import secure_filename
 from src.services.chatbot import Chatbot
 from src.utils.utils import delete_all_files_in_directory
 from src.utils.document_loader import load_document  
 from src.services.chatbot import Chatbot
 # Initialize the Blueprint
@@ -59,3 +60,64 @@ def validate_worker_document():
    except Exception as e:
        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/predict_next_n_assessments', methods=['POST'])
 def predict_next_n_assessments():
    try:
        # Retrieve JSON data from the request
        data = request.get_json()
        company_info = data.get('company_info')
        companyid = data.get('companyid')
        N = data.get('N')
        if not company_info or not companyid or N is None:
            return jsonify({"error": "Missing data", "message": "Company info, company ID, or N value not provided."}), 400
        # Instantiate the chatbot service
        chatbot = Chatbot()
        # Call the prediction method
        response = chatbot.predict_next_n_assessment(
            company_info=company_info,
            companyid=companyid,
            N=N
        )
        if not response:
            return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
        return jsonify({"predictions": response}), 200
    except Exception as e:
        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/use_bot_predict_assessments', methods=['POST'])
 def use_bot_predict_assessments():
    try:
        # Retrieve JSON data from the request
        data = request.get_json()
        company_info = data.get('company_info')
        companyid = data.get('companyid')
        query = data.get('query')
        if not company_info or not companyid or query is None:
            return jsonify({"error": "Missing data", "message": "Company info, company ID, or query value not provided."}), 400
        # Instantiate the chatbot service
        chatbot = Chatbot()
        # Call the prediction method
        response = chatbot.predict_based_on_past_assessment(
            company_info=company_info,
            companyid=companyid,
            query=query
        )
        if not response:
            return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
        return jsonify({"predictions": response}), 200
    except Exception as e:
        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@@ -5,3 +5,20 @@ from typing import List, Dict
 class ValidateWorker(BaseModel):
    result:str
 class Result(BaseModel):
    response:str
 class Cases(BaseModel):
    open_items: int
    red_flags: int
 class AssessmentsFrequency(BaseModel):
    weekly: Cases
    biweekly: Cases
    quarterly: Cases
 class AssessmentPrediction(BaseModel):
    AssessmentN: AssessmentsFrequency
 class AssessmentPredictionsResponse(BaseModel):
    predictions: List[AssessmentPrediction]
@@ -4,11 +4,6 @@ import logging
 from logging.handlers import RotatingFileHandler
 handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logger.addHandler(handler)
 class DataPreprocessor:
    def __init__(self, input_path, company_id):
        self.input_path = input_path
@@ -33,7 +33,8 @@ class ModelTrainer:
        y = self.df[['open_items', 'red_flags']]  # Multi-target for open items and red flags
        # Split into training and test sets with 10% as test size
-        X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+        X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=False)
        # Train the model
        self.model.fit(X_train, y_train)
@@ -1,3 +1,5 @@
 def validate_worker_prompt() -> str:
   return """
    You are a worker in the company "Validate" where you are asked a specific yes or no question:
@@ -15,4 +17,133 @@ def validate_worker_prompt() -> str:
      result:"validated" 
    }
    """
 def predict_based_past_assessment_prompt(query,company_info, summary_stats):
    # Extract company information from the dictionary
    company_name = company_info['company_name']
    company_size = company_info['company_size']
    departments = company_info['departments']
    # Create the prompt with the provided company info and summary statistics
    prompt = f"""
    **Prompt for the Chatbot:**
    **Context:**
    You are an AI assistant working for {company_name}, and your primary responsibility is to provide **insights**, **predictions**, and **recommendations** based on the company's past assessment data and organizational structure. You are not allowed to respond to any queries outside of this domain.
    **General Company Information:**
    - **Company Name**: {company_name}
    - **Company Size**: {company_size} (e.g., Small, Medium, Large)
    - **Departments**: 
      {', '.join(departments)}
    **Assessment Summary**:
    The following is a detailed summary of past assessments at {company_name}. Use this information to provide predictions and recommendations based on trends and data points.
    - **Open Items and Red Flags**:
      - Total Open Items: {summary_stats['Open Items and Red Flags']['Total Open Items']}
      - Average Open Items per Assessment: {summary_stats['Open Items and Red Flags']['Average Open Items per Assessment']}
      - Total Red Flags: {summary_stats['Open Items and Red Flags']['Total Red Flags']}
      - Average Red Flags per Assessment: {summary_stats['Open Items and Red Flags']['Average Red Flags per Assessment']}
      - Max Red Flags in a Single Assessment: {summary_stats['Open Items and Red Flags']['Max Red Flags in a Single Assessment']}
      - Most Common Area with Red Flags: {summary_stats['Open Items and Red Flags']['Most Common Area with Red Flags']}
    - **Assessment Frequency**:
      - Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Weekly', 0) * 100}%
      - Bi-Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Bi-Weekly', 0) * 100}%
      - Quarterly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Quarterly', 0) * 100}%
      - Average Time Between Assessments: {summary_stats['Assessment Frequency']['Average Time Between Assessments']} days
      - Average Assessment Duration: {summary_stats['Assessment Frequency']['Average Assessment Duration']} days
    - **Assessment Start and End Dates**:
      - Longest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Longest Assessment Duration (days)']} days
      - Shortest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Shortest Assessment Duration (days)']} days
    - **Assessment Areas**:
      - Most Assessed Area: {summary_stats['Assessment Areas']['Most Assessed Area']}
      - Most Open Items in Area: {summary_stats['Assessment Areas']['Most Open Items in Area']}
      - Area with Most Red Flags: {summary_stats['Assessment Areas']['Area with Most Red Flags']}
    - **Assessment Status**:
      - Completed: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Completed', 0) * 100}%
      - In Progress: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('In Progress', 0) * 100}%
      - Incomplete: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Incomplete', 0) * 100}%
    - **Assessment Admin**:
      - Most Frequent Admin: {summary_stats['Assessment Admin']['Most Frequent Admin']}
      - Admin with Fewest Red Flags: {summary_stats['Assessment Admin']['Admin with Fewest Red Flags']}
      - Admin with Most Open Items: {summary_stats['Assessment Admin']['Admin with Most Open Items']}
    **Instructions:**
    Use the above information to answer user queries. You should:
    - Analyze historical data to identify trends and problem areas.
    - Predict potential outcomes for future assessments based on past performance (e.g., meeting deadlines, reducing red flags).
    - Provide **actionable recommendations** that can help improve performance in future assessments.
    **User Query**:
    "{query}"
    **Your Response**:
    Predict and provide recommendations based on the company’s historical data, focusing on the areas most relevant to the query. Ensure the response is based on past trends and performance issues.
    **Examples of Insightful Responses**:
    - "To improve your performance in the next assessment, you should focus on reducing red flags in the Communication department, as it has had the most issues."
    - "Based on the company's past performance, there is a 70% chance that you will meet the deadline for the next weekly assessment. To ensure success, focus on completing open items in the IT department."
    - "The data indicates that quarterly assessments have the highest rate of incomplete tasks. I recommend prioritizing quarterly assessment tasks to avoid falling behind."
    """
    return prompt
 def predict_next_n_assessments_prompt():
    # Create the prompt with provided company info, summary statistics, and number of assessments (n)
    prompt = """
    **Prompt for the Chatbot:**
    **Context:**
    You are an AI assistant responsible for analyzing the past assessment data of , and your primary responsibility is to provide **predictions** for the next {n} assessments. 
    These assessments can occur on a **weekly**, **bi-weekly**, or **quarterly** basis. Use the company's past performance to predict the following for each of the next {n} assessments:
    - **Number of Open Items**.
    - **Number of Red Flags**.
    - **Predictions for Weekly, Bi-Weekly, and Quarterly assessments**.
    input : 
    - company basic info
    - past assessment statitics
    - N - number of next assessments to be predicted 
    **General Company Information:**
    **Assessment Summary (Past Data)**:
    The Detailed information on past asssessment will be provided. Use this information to make predictions for the next {n} assessments.
    **Instructions**:
    - Predict the number of open items and red flags for the next n assessments if they are conducted on a weekly, bi-weekly, or quarterly basis.
    - Use the historical summary statistics provided above to guide your predictions.
    - Return the response in the following JSON format:
    **Response Format**:
    {
      "assessment 1": [
        {
          "weekly": {"open_items": X, "red_flags": Y}},
          "biweekly": {{"open_items": X, "red_flags": Y}},
          "quarterly": {{"open_items": X, "red_flags": Y}}
        }
      ],
      "assessment 2": [
        {
          "weekly": {"open_items": X, "red_flags": Y},
          "biweekly": {"open_items": X, "red_flags": Y},
          "quarterly": {"open_items": X, "red_flags": Y}
        }
      ]
      // assuming N is 2
    }
    ```
    Ensure each assessment is provided with three predictions: one for Weekly, one for Bi-Weekly, and one for Quarterly assessments.
    """
    return prompt
@@ -7,6 +7,7 @@ from src.prompts.sops import *
 from src.prompts.chatbot import *
 from src.models.sop_response_schemas import *
 from src.models.bot_response_schema import *
 from scripts.assessment_data import generate_summary_stats_v2
 from dotenv import load_dotenv
 load_dotenv()
@@ -52,7 +53,7 @@ class Chatbot:
                    }
                ],
                response_format=ValidateWorker,
-                max_tokens=4096,
+                max_tokens=1024,
                temperature=0.1
            )
@@ -64,3 +65,121 @@ class Chatbot:
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
    def predict_based_on_past_assessment(self, query, company_info, companyid) -> Result:
        """
        This method generates predictions based on past assessment data of a company. It queries the backend for the 
        company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
        :param query: The question or query asked by the user.
        :param company_info: General information about the company (name, size, departments, etc.).
        :param companyid: Unique identifier of the company to fetch its specific data.
        :return: Result containing the prediction result or None if an error occurs.
        """
        try:
            # Define the path to the company's assessment data (stored as a CSV)
            data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
            # Generate summary statistics from the company's assessment data
            summary_stats = generate_summary_stats_v2(file_path=data_path)
            # Generate the prompt using the company info and the summary statistics
            prompt = predict_based_past_assessment_prompt(
                query=query,
                company_info=company_info,
                summary_stats=summary_stats
            )
            # Interact with GPT-4 model to get a response
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": f"{prompt}"
                    },
                    {
                        "role": "user",
                        "content": f"{query}",
                    }
                ],
                response_format=Result,
                max_tokens=1024,
                temperature=0.1
            )
            # Extract and return the response from the GPT-4 model
            extracted_text = json.loads(response.choices[0].message.content)
            return extracted_text
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
    def predict_next_n_assessment(self, company_info, companyid, N) -> AssessmentPredictionsResponse:
        """
        This method generates predictions based on past assessment data of a company. It queries the backend for the 
        company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
        :param query: The question or query asked by the user.
        :param company_info: General information about the company (name, size, departments, etc.).
        :param companyid: Unique identifier of the company to fetch its specific data.
        :param N: Number of assessments to predict.
        :return: Result containing the prediction result or None if an error occurs.
        """
        try:
            # Define the path to the company's assessment data (stored as a CSV)
            data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
            # Generate summary statistics from the company's assessment data
            summary_stats = generate_summary_stats_v2(file_path=data_path)
            # Generate the prompt using the company info and the summary statistics
            prompt = predict_next_n_assessments_prompt()
            # Interact with GPT-4 model to get a response
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": f"{prompt}"
                    },
                    {
                        "role": "user",
                        "content": f"company info: {company_info}--> N-value is {N} ",
                    },
                    {
                        "role": "user",
                        "content": f"Summary stats: {summary_stats}",
                    }
                ],
                response_format=AssessmentPredictionsResponse,
                max_tokens=1024,
                temperature=0.1
            )
            # Extract the response from the GPT-4 model
            extracted_text = json.loads(response.choices[0].message.content)
            # Initialize dictionary to store assessments with dynamic names
            predictions = {}
            # Loop through the predicted assessments and rename them dynamically
            for i in range(N):
                assessment_key = f"assessment_{i + 1}"
                predictions[assessment_key] = extracted_text["predictions"][i]['AssessmentN']
            # Return the dynamically named assessments
            return predictions
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
@@ -6,9 +6,7 @@ input_base_path = '/root/ds_erp_ai/data/raw/erp_assessment_prediction'  # The ba
 pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path)
 pipeline.run_pipeline()'''
-from src.pipeline.inference import AssessmentInference
+'''from src.pipeline.inference import AssessmentInference
 inference = AssessmentInference(
    company_id="testid",num_assessments=2
@@ -18,3 +16,32 @@ result = inference.run()
 print(result)
 '''
 '''
 response2 = bot.predict_next_n_assessment(
    company_info=company_info,
    companyid="testid",
    N=3
 )
 print(f"Predictions {response2}")
 '''
 from src.services.chatbot import Chatbot
 company_info = {
    'company_name': "ABC Corp",
    'company_size': "Medium",  # Can be "Small", "Medium", or "Large"
    'departments': ["Sales", "Marketing", "IT", "Finance", "HR", "Logistics"]
 }
 bot = Chatbot()
 response = bot.predict_based_on_past_assessment(
    query="Should i make my next assessment weekly or biweekly to meet up to deadline?",
    company_info=company_info,
    companyid="testid"
 )
 print(f"Result: {response}")
`@@ -4,4 +4,4 @@ app = create_app()`


	`if __name__ == '__main__':`	`if __name__ == '__main__':`
	`app.run(debug=True, port=5401)`	`app.run(debug=True, port=5402)`