diff --git a/data/raw/erp_company_assessment/testid_raw_data.2csv b/data/raw/erp_company_assessment/testid_raw_data.2csv
new file mode 100644
index 0000000..1cba78e
--- /dev/null
+++ b/data/raw/erp_company_assessment/testid_raw_data.2csv
@@ -0,0 +1,11 @@
+Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin
+1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A
+2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B
+3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A
+4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B
+5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A
+6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A
+7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B
+8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A
+9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B
+10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A
diff --git a/data/raw/erp_company_assessment/testid_raw_data.csv b/data/raw/erp_company_assessment/testid_raw_data.csv
new file mode 100644
index 0000000..77da1b3
--- /dev/null
+++ b/data/raw/erp_company_assessment/testid_raw_data.csv
@@ -0,0 +1,11 @@
+Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin,Department
+1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A,IT
+2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B,HR
+3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A,Finance
+4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B,IT
+5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A,HR
+6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A,Finance
+7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B,IT
+8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A,HR
+9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B,Finance
+10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A,IT
diff --git a/notebooks/dummy_assessment_data.csv b/notebooks/dummy_assessment_data.csv
new file mode 100644
index 0000000..1cba78e
--- /dev/null
+++ b/notebooks/dummy_assessment_data.csv
@@ -0,0 +1,11 @@
+Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin
+1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A
+2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B
+3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A
+4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B
+5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A
+6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A
+7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B
+8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A
+9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B
+10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A
diff --git a/notebooks/test_prediction_pipeline.ipynb b/notebooks/test_prediction_pipeline.ipynb
index 50a7fab..01c4c39 100644
--- a/notebooks/test_prediction_pipeline.ipynb
+++ b/notebooks/test_prediction_pipeline.ipynb
@@ -2,22 +2,204 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# Create a dummy dataset with past 5 assessments\n",
"import pandas as pd\n",
+ "\n",
"data_dummy = {\n",
- " 'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n",
- " 'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n",
- " 'open_items': [10, 12, 11, 9, 13],\n",
- " 'red_flags': [2, 1, 3, 1, 4],\n",
- " 'num_employees': [30, 25, 28, 30, 27],\n",
- " 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n",
+ " 'start_date': pd.date_range(start='2023-01-01', periods=12, freq='7D'),\n",
+ " 'end_date': pd.date_range(start='2023-01-02', periods=12, freq='7D'),\n",
+ " 'open_items': [10, 12, 11, 9, 13, 14, 15, 16, 12, 11, 10, 9],\n",
+ " 'red_flags': [2, 1, 3, 1, 4, 2, 1, 3, 2, 1, 4, 3],\n",
+ " 'num_employees': [30, 25, 28, 30, 27, 26, 31, 29, 25, 30, 27, 26],\n",
+ " 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly', \n",
+ " 'weekly', 'quarterly', 'biweekly', 'weekly', 'quarterly', 'weekly', 'biweekly']\n",
"}\n",
"\n",
- "df_dummy = pd.DataFrame(data_dummy)"
+ "df_dummy = pd.DataFrame(data_dummy)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " start_date | \n",
+ " end_date | \n",
+ " open_items | \n",
+ " red_flags | \n",
+ " num_employees | \n",
+ " assessment_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2023-01-01 | \n",
+ " 2023-01-02 | \n",
+ " 10 | \n",
+ " 2 | \n",
+ " 30 | \n",
+ " weekly | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2023-01-08 | \n",
+ " 2023-01-09 | \n",
+ " 12 | \n",
+ " 1 | \n",
+ " 25 | \n",
+ " biweekly | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2023-01-15 | \n",
+ " 2023-01-16 | \n",
+ " 11 | \n",
+ " 3 | \n",
+ " 28 | \n",
+ " quarterly | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2023-01-22 | \n",
+ " 2023-01-23 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 30 | \n",
+ " weekly | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2023-01-29 | \n",
+ " 2023-01-30 | \n",
+ " 13 | \n",
+ " 4 | \n",
+ " 27 | \n",
+ " biweekly | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 2023-02-05 | \n",
+ " 2023-02-06 | \n",
+ " 14 | \n",
+ " 2 | \n",
+ " 26 | \n",
+ " weekly | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 2023-02-12 | \n",
+ " 2023-02-13 | \n",
+ " 15 | \n",
+ " 1 | \n",
+ " 31 | \n",
+ " quarterly | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 2023-02-19 | \n",
+ " 2023-02-20 | \n",
+ " 16 | \n",
+ " 3 | \n",
+ " 29 | \n",
+ " biweekly | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 2023-02-26 | \n",
+ " 2023-02-27 | \n",
+ " 12 | \n",
+ " 2 | \n",
+ " 25 | \n",
+ " weekly | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 2023-03-05 | \n",
+ " 2023-03-06 | \n",
+ " 11 | \n",
+ " 1 | \n",
+ " 30 | \n",
+ " quarterly | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2023-03-12 | \n",
+ " 2023-03-13 | \n",
+ " 10 | \n",
+ " 4 | \n",
+ " 27 | \n",
+ " weekly | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2023-03-19 | \n",
+ " 2023-03-20 | \n",
+ " 9 | \n",
+ " 3 | \n",
+ " 26 | \n",
+ " biweekly | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " start_date end_date open_items red_flags num_employees assessment_type\n",
+ "0 2023-01-01 2023-01-02 10 2 30 weekly\n",
+ "1 2023-01-08 2023-01-09 12 1 25 biweekly\n",
+ "2 2023-01-15 2023-01-16 11 3 28 quarterly\n",
+ "3 2023-01-22 2023-01-23 9 1 30 weekly\n",
+ "4 2023-01-29 2023-01-30 13 4 27 biweekly\n",
+ "5 2023-02-05 2023-02-06 14 2 26 weekly\n",
+ "6 2023-02-12 2023-02-13 15 1 31 quarterly\n",
+ "7 2023-02-19 2023-02-20 16 3 29 biweekly\n",
+ "8 2023-02-26 2023-02-27 12 2 25 weekly\n",
+ "9 2023-03-05 2023-03-06 11 1 30 quarterly\n",
+ "10 2023-03-12 2023-03-13 10 4 27 weekly\n",
+ "11 2023-03-19 2023-03-20 9 3 26 biweekly"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_dummy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_dummy.to_csv(\"test_data.csv\",index=False)"
]
},
{
@@ -1399,6 +1581,45 @@
"metadata": {},
"source": []
},
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dummy assessment data has been saved as dummy_company_asseement_data.csv.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Create dummy assessment data\n",
+ "data = {\n",
+ " 'Assessment_ID': range(1, 11),\n",
+ " 'Open_Items': [3, 4, 2, 5, 1, 3, 2, 4, 5, 3],\n",
+ " 'Red_Flags': [1, 2, 0, 1, 0, 3, 2, 1, 1, 2],\n",
+ " 'Assessment_Frequency': ['Weekly', 'Bi-Weekly', 'Weekly', 'Quarterly', 'Bi-Weekly', 'Weekly', 'Quarterly', 'Bi-Weekly', 'Weekly', 'Quarterly'],\n",
+ " 'Assessment_Start_Date': pd.date_range(start='2023-01-01', periods=10, freq='15D'),\n",
+ " 'Assessment_End_Date': pd.date_range(start='2023-01-07', periods=10, freq='15D'),\n",
+ " 'Assessment_Area': ['Deployment', 'Communication', 'Deployment', 'Communication', 'Deployment', 'Deployment', 'Communication', 'Deployment', 'Communication', 'Deployment'],\n",
+ " 'Assessment_Status': ['Completed', 'Completed', 'Completed', 'In Progress', 'Completed', 'Completed', 'Incomplete', 'Completed', 'In Progress', 'Completed'],\n",
+ " 'Assessment_Admin': ['Admin A', 'Admin B', 'Admin A', 'Admin B', 'Admin A', 'Admin A', 'Admin B', 'Admin A', 'Admin B', 'Admin A']\n",
+ "}\n",
+ "\n",
+ "# Create DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Save DataFrame to CSV\n",
+ "csv_file_path = 'dummy_company_asseement_data.csv'\n",
+ "df.to_csv(csv_file_path, index=False)\n",
+ "\n",
+ "print(f\"Dummy assessment data has been saved as {csv_file_path}.\")\n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/run.py b/run.py
index 7d7cd4d..9ca3ee2 100644
--- a/run.py
+++ b/run.py
@@ -4,4 +4,4 @@ app = create_app()
if __name__ == '__main__':
- app.run(debug=True, port=5401)
+ app.run(debug=True, port=5402)
diff --git a/scripts/assessment_data.py b/scripts/assessment_data.py
new file mode 100644
index 0000000..104bc77
--- /dev/null
+++ b/scripts/assessment_data.py
@@ -0,0 +1,70 @@
+import pandas as pd
+
+def generate_summary_stats_v2(file_path):
+ # Load the DataFrame from the provided file path
+ df = pd.read_csv(file_path)
+
+ # Ensure date columns are correctly parsed
+ df['Assessment_Start_Date'] = pd.to_datetime(df['Assessment_Start_Date'])
+ df['Assessment_End_Date'] = pd.to_datetime(df['Assessment_End_Date'])
+
+ # Add completion rate calculation
+ completed_status = df['Assessment_Status'] == 'Completed'
+ completion_rate_by_frequency = df[completed_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
+
+ in_progress_status = df['Assessment_Status'] == 'In Progress'
+ incomplete_status = df['Assessment_Status'] == 'Incomplete'
+
+ # Calculate in-progress and incomplete rates by frequency
+ in_progress_rate_by_frequency = df[in_progress_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
+ incomplete_rate_by_frequency = df[incomplete_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
+
+ # Fill NaN values (where no assessments are in-progress or incomplete for certain frequencies)
+ completion_rate_by_frequency = completion_rate_by_frequency.fillna(0)
+ in_progress_rate_by_frequency = in_progress_rate_by_frequency.fillna(0)
+ incomplete_rate_by_frequency = incomplete_rate_by_frequency.fillna(0)
+
+ # Round all numerical values to 2 decimal places
+ completion_rate_by_frequency = completion_rate_by_frequency.round(2)
+ in_progress_rate_by_frequency = in_progress_rate_by_frequency.round(2)
+ incomplete_rate_by_frequency = incomplete_rate_by_frequency.round(2)
+
+ summary_stats = {
+ 'Open Items and Red Flags': {
+ 'Total Open Items': round(df['Open_Items'].sum(), 2),
+ 'Average Open Items per Assessment': round(df['Open_Items'].mean(), 2),
+ 'Total Red Flags': round(df['Red_Flags'].sum(), 2),
+ 'Average Red Flags per Assessment': round(df['Red_Flags'].mean(), 2),
+ 'Max Red Flags in a Single Assessment': round(df['Red_Flags'].max(), 2),
+ 'Most Common Area with Red Flags': df[df['Red_Flags'] > 0]['Assessment_Area'].mode()[0]
+ },
+ 'Assessment Frequency': {
+ 'Assessment Type Breakdown': df['Assessment_Frequency'].value_counts(normalize=True).round(2).to_dict(),
+ 'Average Time Between Assessments': round((df['Assessment_End_Date'] - df['Assessment_Start_Date']).dt.days.mean(), 2),
+ 'Average Assessment Duration': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.mean(), 2),
+ 'Completion Rate by Frequency': completion_rate_by_frequency.to_dict(),
+ 'In Progress Rate by Frequency': in_progress_rate_by_frequency.to_dict(),
+ 'Incomplete Rate by Frequency': incomplete_rate_by_frequency.to_dict()
+ },
+ 'Assessment Start and End Dates': {
+ 'Longest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.max(), 2),
+ 'Shortest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.min(), 2),
+ },
+ 'Assessment Areas': {
+ 'Most Assessed Area': df['Assessment_Area'].value_counts().idxmax(),
+ 'Most Open Items in Area': df.groupby('Assessment_Area')['Open_Items'].sum().idxmax(),
+ 'Area with Most Red Flags': df.groupby('Assessment_Area')['Red_Flags'].sum().idxmax()
+ },
+ 'Assessment Status': {
+ 'Assessment Status Distribution': df['Assessment_Status'].value_counts(normalize=True).round(2).to_dict(),
+ 'Incomplete Assessments': round(df[df['Assessment_Status'] == 'Incomplete'].shape[0], 2),
+ 'In Progress Assessments': round(df[df['Assessment_Status'] == 'In Progress'].shape[0], 2)
+ },
+ 'Assessment Admin': {
+ 'Most Frequent Admin': df['Assessment_Admin'].mode()[0],
+ 'Admin with Fewest Red Flags': df.groupby('Assessment_Admin')['Red_Flags'].sum().idxmin(),
+ 'Admin with Most Open Items': df.groupby('Assessment_Admin')['Open_Items'].mean().idxmax()
+ }
+ }
+
+ return summary_stats
diff --git a/setup.py b/setup.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/api/routes/chatbot.py b/src/api/routes/chatbot.py
index 56b35d1..ab4c7d8 100644
--- a/src/api/routes/chatbot.py
+++ b/src/api/routes/chatbot.py
@@ -4,6 +4,7 @@ from werkzeug.utils import secure_filename
from src.services.chatbot import Chatbot
from src.utils.utils import delete_all_files_in_directory
from src.utils.document_loader import load_document
+from src.services.chatbot import Chatbot
# Initialize the Blueprint
@@ -59,3 +60,64 @@ def validate_worker_document():
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+
+
+@bot.route('/predict_next_n_assessments', methods=['POST'])
+def predict_next_n_assessments():
+ try:
+ # Retrieve JSON data from the request
+ data = request.get_json()
+ company_info = data.get('company_info')
+ companyid = data.get('companyid')
+ N = data.get('N')
+
+ if not company_info or not companyid or N is None:
+ return jsonify({"error": "Missing data", "message": "Company info, company ID, or N value not provided."}), 400
+
+ # Instantiate the chatbot service
+ chatbot = Chatbot()
+
+ # Call the prediction method
+ response = chatbot.predict_next_n_assessment(
+ company_info=company_info,
+ companyid=companyid,
+ N=N
+ )
+ if not response:
+ return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
+
+ return jsonify({"predictions": response}), 200
+
+ except Exception as e:
+ return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+
+
+
+@bot.route('/use_bot_predict_assessments', methods=['POST'])
+def use_bot_predict_assessments():
+ try:
+ # Retrieve JSON data from the request
+ data = request.get_json()
+ company_info = data.get('company_info')
+ companyid = data.get('companyid')
+ query = data.get('query')
+
+ if not company_info or not companyid or query is None:
+ return jsonify({"error": "Missing data", "message": "Company info, company ID, or query value not provided."}), 400
+
+ # Instantiate the chatbot service
+ chatbot = Chatbot()
+
+ # Call the prediction method
+ response = chatbot.predict_based_on_past_assessment(
+ company_info=company_info,
+ companyid=companyid,
+ query=query
+ )
+ if not response:
+ return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
+
+ return jsonify({"predictions": response}), 200
+
+ except Exception as e:
+ return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
\ No newline at end of file
diff --git a/src/models/bot_response_schema.py b/src/models/bot_response_schema.py
index 22b0560..44feea6 100644
--- a/src/models/bot_response_schema.py
+++ b/src/models/bot_response_schema.py
@@ -4,4 +4,21 @@ from typing import List, Dict
class ValidateWorker(BaseModel):
result:str
-
\ No newline at end of file
+
+class Result(BaseModel):
+ response:str
+
+class Cases(BaseModel):
+ open_items: int
+ red_flags: int
+
+class AssessmentsFrequency(BaseModel):
+ weekly: Cases
+ biweekly: Cases
+ quarterly: Cases
+
+class AssessmentPrediction(BaseModel):
+ AssessmentN: AssessmentsFrequency
+
+class AssessmentPredictionsResponse(BaseModel):
+ predictions: List[AssessmentPrediction]
diff --git a/src/pipeline/data_preprocessor.py b/src/pipeline/data_preprocessor.py
index adccf94..c895ba5 100644
--- a/src/pipeline/data_preprocessor.py
+++ b/src/pipeline/data_preprocessor.py
@@ -4,11 +4,6 @@ import logging
from logging.handlers import RotatingFileHandler
-handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-logger.addHandler(handler)
-
class DataPreprocessor:
def __init__(self, input_path, company_id):
self.input_path = input_path
diff --git a/src/pipeline/model_trainer.py b/src/pipeline/model_trainer.py
index dd7b769..c7f70cc 100644
--- a/src/pipeline/model_trainer.py
+++ b/src/pipeline/model_trainer.py
@@ -33,7 +33,8 @@ class ModelTrainer:
y = self.df[['open_items', 'red_flags']] # Multi-target for open items and red flags
# Split into training and test sets with 10% as test size
- X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+ X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=False)
+
# Train the model
self.model.fit(X_train, y_train)
diff --git a/src/prompts/chatbot.py b/src/prompts/chatbot.py
index 847f3ee..b0fed5e 100644
--- a/src/prompts/chatbot.py
+++ b/src/prompts/chatbot.py
@@ -1,3 +1,5 @@
+
+
def validate_worker_prompt() -> str:
return """
You are a worker in the company "Validate" where you are asked a specific yes or no question:
@@ -15,4 +17,133 @@ def validate_worker_prompt() -> str:
result:"validated"
}
"""
-
\ No newline at end of file
+def predict_based_past_assessment_prompt(query,company_info, summary_stats):
+ # Extract company information from the dictionary
+ company_name = company_info['company_name']
+ company_size = company_info['company_size']
+ departments = company_info['departments']
+
+ # Create the prompt with the provided company info and summary statistics
+ prompt = f"""
+ **Prompt for the Chatbot:**
+
+ **Context:**
+ You are an AI assistant working for {company_name}, and your primary responsibility is to provide **insights**, **predictions**, and **recommendations** based on the company's past assessment data and organizational structure. You are not allowed to respond to any queries outside of this domain.
+
+ **General Company Information:**
+ - **Company Name**: {company_name}
+ - **Company Size**: {company_size} (e.g., Small, Medium, Large)
+ - **Departments**:
+ {', '.join(departments)}
+
+ **Assessment Summary**:
+ The following is a detailed summary of past assessments at {company_name}. Use this information to provide predictions and recommendations based on trends and data points.
+
+ - **Open Items and Red Flags**:
+ - Total Open Items: {summary_stats['Open Items and Red Flags']['Total Open Items']}
+ - Average Open Items per Assessment: {summary_stats['Open Items and Red Flags']['Average Open Items per Assessment']}
+ - Total Red Flags: {summary_stats['Open Items and Red Flags']['Total Red Flags']}
+ - Average Red Flags per Assessment: {summary_stats['Open Items and Red Flags']['Average Red Flags per Assessment']}
+ - Max Red Flags in a Single Assessment: {summary_stats['Open Items and Red Flags']['Max Red Flags in a Single Assessment']}
+ - Most Common Area with Red Flags: {summary_stats['Open Items and Red Flags']['Most Common Area with Red Flags']}
+
+ - **Assessment Frequency**:
+ - Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Weekly', 0) * 100}%
+ - Bi-Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Bi-Weekly', 0) * 100}%
+ - Quarterly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Quarterly', 0) * 100}%
+ - Average Time Between Assessments: {summary_stats['Assessment Frequency']['Average Time Between Assessments']} days
+ - Average Assessment Duration: {summary_stats['Assessment Frequency']['Average Assessment Duration']} days
+
+ - **Assessment Start and End Dates**:
+ - Longest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Longest Assessment Duration (days)']} days
+ - Shortest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Shortest Assessment Duration (days)']} days
+
+ - **Assessment Areas**:
+ - Most Assessed Area: {summary_stats['Assessment Areas']['Most Assessed Area']}
+ - Most Open Items in Area: {summary_stats['Assessment Areas']['Most Open Items in Area']}
+ - Area with Most Red Flags: {summary_stats['Assessment Areas']['Area with Most Red Flags']}
+
+ - **Assessment Status**:
+ - Completed: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Completed', 0) * 100}%
+ - In Progress: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('In Progress', 0) * 100}%
+ - Incomplete: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Incomplete', 0) * 100}%
+
+ - **Assessment Admin**:
+ - Most Frequent Admin: {summary_stats['Assessment Admin']['Most Frequent Admin']}
+ - Admin with Fewest Red Flags: {summary_stats['Assessment Admin']['Admin with Fewest Red Flags']}
+ - Admin with Most Open Items: {summary_stats['Assessment Admin']['Admin with Most Open Items']}
+
+ **Instructions:**
+ Use the above information to answer user queries. You should:
+ - Analyze historical data to identify trends and problem areas.
+ - Predict potential outcomes for future assessments based on past performance (e.g., meeting deadlines, reducing red flags).
+ - Provide **actionable recommendations** that can help improve performance in future assessments.
+
+ **User Query**:
+ "{query}"
+
+ **Your Response**:
+ Predict and provide recommendations based on the company’s historical data, focusing on the areas most relevant to the query. Ensure the response is based on past trends and performance issues.
+
+ **Examples of Insightful Responses**:
+ - "To improve your performance in the next assessment, you should focus on reducing red flags in the Communication department, as it has had the most issues."
+ - "Based on the company's past performance, there is a 70% chance that you will meet the deadline for the next weekly assessment. To ensure success, focus on completing open items in the IT department."
+ - "The data indicates that quarterly assessments have the highest rate of incomplete tasks. I recommend prioritizing quarterly assessment tasks to avoid falling behind."
+ """
+
+ return prompt
+
+
+
+def predict_next_n_assessments_prompt():
+
+ # Create the prompt with provided company info, summary statistics, and number of assessments (n)
+ prompt = """
+ **Prompt for the Chatbot:**
+
+ **Context:**
+ You are an AI assistant responsible for analyzing the past assessment data of , and your primary responsibility is to provide **predictions** for the next {n} assessments.
+ These assessments can occur on a **weekly**, **bi-weekly**, or **quarterly** basis. Use the company's past performance to predict the following for each of the next {n} assessments:
+ - **Number of Open Items**.
+ - **Number of Red Flags**.
+ - **Predictions for Weekly, Bi-Weekly, and Quarterly assessments**.
+ input :
+ - company basic info
+ - past assessment statitics
+ - N - number of next assessments to be predicted
+ **General Company Information:**
+
+
+ **Assessment Summary (Past Data)**:
+ The Detailed information on past asssessment will be provided. Use this information to make predictions for the next {n} assessments.
+
+
+ **Instructions**:
+ - Predict the number of open items and red flags for the next n assessments if they are conducted on a weekly, bi-weekly, or quarterly basis.
+ - Use the historical summary statistics provided above to guide your predictions.
+ - Return the response in the following JSON format:
+
+ **Response Format**:
+
+ {
+ "assessment 1": [
+ {
+ "weekly": {"open_items": X, "red_flags": Y}},
+ "biweekly": {{"open_items": X, "red_flags": Y}},
+ "quarterly": {{"open_items": X, "red_flags": Y}}
+ }
+ ],
+ "assessment 2": [
+ {
+ "weekly": {"open_items": X, "red_flags": Y},
+ "biweekly": {"open_items": X, "red_flags": Y},
+ "quarterly": {"open_items": X, "red_flags": Y}
+ }
+ ]
+ // assuming N is 2
+ }
+ ```
+ Ensure each assessment is provided with three predictions: one for Weekly, one for Bi-Weekly, and one for Quarterly assessments.
+ """
+
+ return prompt
diff --git a/src/services/chatbot.py b/src/services/chatbot.py
index d9cffbd..493dba8 100644
--- a/src/services/chatbot.py
+++ b/src/services/chatbot.py
@@ -7,6 +7,7 @@ from src.prompts.sops import *
from src.prompts.chatbot import *
from src.models.sop_response_schemas import *
from src.models.bot_response_schema import *
+from scripts.assessment_data import generate_summary_stats_v2
from dotenv import load_dotenv
load_dotenv()
@@ -52,7 +53,7 @@ class Chatbot:
}
],
response_format=ValidateWorker,
- max_tokens=4096,
+ max_tokens=1024,
temperature=0.1
)
@@ -64,3 +65,121 @@ class Chatbot:
except Exception as e:
print(f"An error occurred: {e}")
return None
+
+
+
+
+ def predict_based_on_past_assessment(self, query, company_info, companyid) -> Result:
+ """
+ This method generates predictions based on past assessment data of a company. It queries the backend for the
+ company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
+
+ :param query: The question or query asked by the user.
+ :param company_info: General information about the company (name, size, departments, etc.).
+ :param companyid: Unique identifier of the company to fetch its specific data.
+ :return: Result containing the prediction result or None if an error occurs.
+ """
+ try:
+ # Define the path to the company's assessment data (stored as a CSV)
+ data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
+
+ # Generate summary statistics from the company's assessment data
+ summary_stats = generate_summary_stats_v2(file_path=data_path)
+
+
+ # Generate the prompt using the company info and the summary statistics
+ prompt = predict_based_past_assessment_prompt(
+ query=query,
+ company_info=company_info,
+ summary_stats=summary_stats
+ )
+
+ # Interact with GPT-4 model to get a response
+ response = self.client.beta.chat.completions.parse(
+ model=self.model,
+ messages=[
+ {
+ "role": "system",
+ "content": f"{prompt}"
+ },
+ {
+ "role": "user",
+ "content": f"{query}",
+ }
+ ],
+ response_format=Result,
+ max_tokens=1024,
+ temperature=0.1
+ )
+
+ # Extract and return the response from the GPT-4 model
+ extracted_text = json.loads(response.choices[0].message.content)
+
+ return extracted_text
+
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ return None
+
+
+ def predict_next_n_assessment(self, company_info, companyid, N) -> AssessmentPredictionsResponse:
+ """
+ This method generates predictions based on past assessment data of a company. It queries the backend for the
+ company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
+
+ :param query: The question or query asked by the user.
+ :param company_info: General information about the company (name, size, departments, etc.).
+ :param companyid: Unique identifier of the company to fetch its specific data.
+ :param N: Number of assessments to predict.
+ :return: Result containing the prediction result or None if an error occurs.
+ """
+ try:
+ # Define the path to the company's assessment data (stored as a CSV)
+ data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
+
+ # Generate summary statistics from the company's assessment data
+ summary_stats = generate_summary_stats_v2(file_path=data_path)
+
+ # Generate the prompt using the company info and the summary statistics
+ prompt = predict_next_n_assessments_prompt()
+
+
+ # Interact with GPT-4 model to get a response
+ response = self.client.beta.chat.completions.parse(
+ model=self.model,
+ messages=[
+ {
+ "role": "system",
+ "content": f"{prompt}"
+ },
+ {
+ "role": "user",
+ "content": f"company info: {company_info}--> N-value is {N} ",
+ },
+ {
+ "role": "user",
+ "content": f"Summary stats: {summary_stats}",
+ }
+ ],
+ response_format=AssessmentPredictionsResponse,
+ max_tokens=1024,
+ temperature=0.1
+ )
+
+ # Extract the response from the GPT-4 model
+ extracted_text = json.loads(response.choices[0].message.content)
+
+ # Initialize dictionary to store assessments with dynamic names
+ predictions = {}
+
+ # Loop through the predicted assessments and rename them dynamically
+ for i in range(N):
+ assessment_key = f"assessment_{i + 1}"
+ predictions[assessment_key] = extracted_text["predictions"][i]['AssessmentN']
+
+ # Return the dynamically named assessments
+ return predictions
+
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ return None
diff --git a/test.py b/test.py
index 11fbc78..09ca595 100644
--- a/test.py
+++ b/test.py
@@ -6,9 +6,7 @@ input_base_path = '/root/ds_erp_ai/data/raw/erp_assessment_prediction' # The ba
pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path)
pipeline.run_pipeline()'''
-from src.pipeline.inference import AssessmentInference
-
-
+'''from src.pipeline.inference import AssessmentInference
inference = AssessmentInference(
company_id="testid",num_assessments=2
@@ -18,3 +16,32 @@ result = inference.run()
print(result)
+'''
+'''
+response2 = bot.predict_next_n_assessment(
+ company_info=company_info,
+ companyid="testid",
+ N=3
+)
+
+print(f"Predictions {response2}")
+'''
+
+from src.services.chatbot import Chatbot
+company_info = {
+ 'company_name': "ABC Corp",
+ 'company_size': "Medium", # Can be "Small", "Medium", or "Large"
+ 'departments': ["Sales", "Marketing", "IT", "Finance", "HR", "Logistics"]
+}
+bot = Chatbot()
+response = bot.predict_based_on_past_assessment(
+ query="Should i make my next assessment weekly or biweekly to meet up to deadline?",
+ company_info=company_info,
+ companyid="testid"
+)
+
+print(f"Result: {response}")
+
+
+
+