added bot prediction for assessments
This commit is contained in:
@@ -0,0 +1,11 @@
|
|||||||
|
Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin
|
||||||
|
1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A
|
||||||
|
2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B
|
||||||
|
3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A
|
||||||
|
4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B
|
||||||
|
5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A
|
||||||
|
6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A
|
||||||
|
7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B
|
||||||
|
8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A
|
||||||
|
9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B
|
||||||
|
10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin,Department
|
||||||
|
1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A,IT
|
||||||
|
2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B,HR
|
||||||
|
3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A,Finance
|
||||||
|
4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B,IT
|
||||||
|
5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A,HR
|
||||||
|
6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A,Finance
|
||||||
|
7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B,IT
|
||||||
|
8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A,HR
|
||||||
|
9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B,Finance
|
||||||
|
10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A,IT
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
Assessment_ID,Open_Items,Red_Flags,Assessment_Frequency,Assessment_Start_Date,Assessment_End_Date,Assessment_Area,Assessment_Status,Assessment_Admin
|
||||||
|
1,3,1,Weekly,2023-01-01,2023-01-07,Deployment,Completed,Admin A
|
||||||
|
2,4,2,Bi-Weekly,2023-01-16,2023-01-22,Communication,Completed,Admin B
|
||||||
|
3,2,0,Weekly,2023-01-31,2023-02-06,Deployment,Completed,Admin A
|
||||||
|
4,5,1,Quarterly,2023-02-15,2023-02-21,Communication,In Progress,Admin B
|
||||||
|
5,1,0,Bi-Weekly,2023-03-02,2023-03-08,Deployment,Completed,Admin A
|
||||||
|
6,3,3,Weekly,2023-03-17,2023-03-23,Deployment,Completed,Admin A
|
||||||
|
7,2,2,Quarterly,2023-04-01,2023-04-07,Communication,Incomplete,Admin B
|
||||||
|
8,4,1,Bi-Weekly,2023-04-16,2023-04-22,Deployment,Completed,Admin A
|
||||||
|
9,5,1,Weekly,2023-05-01,2023-05-07,Communication,In Progress,Admin B
|
||||||
|
10,3,2,Quarterly,2023-05-16,2023-05-22,Deployment,Completed,Admin A
|
||||||
|
@@ -2,22 +2,204 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Create a dummy dataset with past 5 assessments\n",
|
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
"data_dummy = {\n",
|
"data_dummy = {\n",
|
||||||
" 'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n",
|
" 'start_date': pd.date_range(start='2023-01-01', periods=12, freq='7D'),\n",
|
||||||
" 'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n",
|
" 'end_date': pd.date_range(start='2023-01-02', periods=12, freq='7D'),\n",
|
||||||
" 'open_items': [10, 12, 11, 9, 13],\n",
|
" 'open_items': [10, 12, 11, 9, 13, 14, 15, 16, 12, 11, 10, 9],\n",
|
||||||
" 'red_flags': [2, 1, 3, 1, 4],\n",
|
" 'red_flags': [2, 1, 3, 1, 4, 2, 1, 3, 2, 1, 4, 3],\n",
|
||||||
" 'num_employees': [30, 25, 28, 30, 27],\n",
|
" 'num_employees': [30, 25, 28, 30, 27, 26, 31, 29, 25, 30, 27, 26],\n",
|
||||||
" 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n",
|
" 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly', \n",
|
||||||
|
" 'weekly', 'quarterly', 'biweekly', 'weekly', 'quarterly', 'weekly', 'biweekly']\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df_dummy = pd.DataFrame(data_dummy)"
|
"df_dummy = pd.DataFrame(data_dummy)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>start_date</th>\n",
|
||||||
|
" <th>end_date</th>\n",
|
||||||
|
" <th>open_items</th>\n",
|
||||||
|
" <th>red_flags</th>\n",
|
||||||
|
" <th>num_employees</th>\n",
|
||||||
|
" <th>assessment_type</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>2023-01-01</td>\n",
|
||||||
|
" <td>2023-01-02</td>\n",
|
||||||
|
" <td>10</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>30</td>\n",
|
||||||
|
" <td>weekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2023-01-08</td>\n",
|
||||||
|
" <td>2023-01-09</td>\n",
|
||||||
|
" <td>12</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>25</td>\n",
|
||||||
|
" <td>biweekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2023-01-15</td>\n",
|
||||||
|
" <td>2023-01-16</td>\n",
|
||||||
|
" <td>11</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>28</td>\n",
|
||||||
|
" <td>quarterly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>2023-01-22</td>\n",
|
||||||
|
" <td>2023-01-23</td>\n",
|
||||||
|
" <td>9</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>30</td>\n",
|
||||||
|
" <td>weekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>2023-01-29</td>\n",
|
||||||
|
" <td>2023-01-30</td>\n",
|
||||||
|
" <td>13</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>27</td>\n",
|
||||||
|
" <td>biweekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>2023-02-05</td>\n",
|
||||||
|
" <td>2023-02-06</td>\n",
|
||||||
|
" <td>14</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>26</td>\n",
|
||||||
|
" <td>weekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>2023-02-12</td>\n",
|
||||||
|
" <td>2023-02-13</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>31</td>\n",
|
||||||
|
" <td>quarterly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>2023-02-19</td>\n",
|
||||||
|
" <td>2023-02-20</td>\n",
|
||||||
|
" <td>16</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>29</td>\n",
|
||||||
|
" <td>biweekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>2023-02-26</td>\n",
|
||||||
|
" <td>2023-02-27</td>\n",
|
||||||
|
" <td>12</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>25</td>\n",
|
||||||
|
" <td>weekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>2023-03-05</td>\n",
|
||||||
|
" <td>2023-03-06</td>\n",
|
||||||
|
" <td>11</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>30</td>\n",
|
||||||
|
" <td>quarterly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>10</th>\n",
|
||||||
|
" <td>2023-03-12</td>\n",
|
||||||
|
" <td>2023-03-13</td>\n",
|
||||||
|
" <td>10</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>27</td>\n",
|
||||||
|
" <td>weekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>11</th>\n",
|
||||||
|
" <td>2023-03-19</td>\n",
|
||||||
|
" <td>2023-03-20</td>\n",
|
||||||
|
" <td>9</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>26</td>\n",
|
||||||
|
" <td>biweekly</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" start_date end_date open_items red_flags num_employees assessment_type\n",
|
||||||
|
"0 2023-01-01 2023-01-02 10 2 30 weekly\n",
|
||||||
|
"1 2023-01-08 2023-01-09 12 1 25 biweekly\n",
|
||||||
|
"2 2023-01-15 2023-01-16 11 3 28 quarterly\n",
|
||||||
|
"3 2023-01-22 2023-01-23 9 1 30 weekly\n",
|
||||||
|
"4 2023-01-29 2023-01-30 13 4 27 biweekly\n",
|
||||||
|
"5 2023-02-05 2023-02-06 14 2 26 weekly\n",
|
||||||
|
"6 2023-02-12 2023-02-13 15 1 31 quarterly\n",
|
||||||
|
"7 2023-02-19 2023-02-20 16 3 29 biweekly\n",
|
||||||
|
"8 2023-02-26 2023-02-27 12 2 25 weekly\n",
|
||||||
|
"9 2023-03-05 2023-03-06 11 1 30 quarterly\n",
|
||||||
|
"10 2023-03-12 2023-03-13 10 4 27 weekly\n",
|
||||||
|
"11 2023-03-19 2023-03-20 9 3 26 biweekly"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_dummy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_dummy.to_csv(\"test_data.csv\",index=False)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1399,6 +1581,45 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": []
|
"source": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Dummy assessment data has been saved as dummy_company_asseement_data.csv.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# Create dummy assessment data\n",
|
||||||
|
"data = {\n",
|
||||||
|
" 'Assessment_ID': range(1, 11),\n",
|
||||||
|
" 'Open_Items': [3, 4, 2, 5, 1, 3, 2, 4, 5, 3],\n",
|
||||||
|
" 'Red_Flags': [1, 2, 0, 1, 0, 3, 2, 1, 1, 2],\n",
|
||||||
|
" 'Assessment_Frequency': ['Weekly', 'Bi-Weekly', 'Weekly', 'Quarterly', 'Bi-Weekly', 'Weekly', 'Quarterly', 'Bi-Weekly', 'Weekly', 'Quarterly'],\n",
|
||||||
|
" 'Assessment_Start_Date': pd.date_range(start='2023-01-01', periods=10, freq='15D'),\n",
|
||||||
|
" 'Assessment_End_Date': pd.date_range(start='2023-01-07', periods=10, freq='15D'),\n",
|
||||||
|
" 'Assessment_Area': ['Deployment', 'Communication', 'Deployment', 'Communication', 'Deployment', 'Deployment', 'Communication', 'Deployment', 'Communication', 'Deployment'],\n",
|
||||||
|
" 'Assessment_Status': ['Completed', 'Completed', 'Completed', 'In Progress', 'Completed', 'Completed', 'Incomplete', 'Completed', 'In Progress', 'Completed'],\n",
|
||||||
|
" 'Assessment_Admin': ['Admin A', 'Admin B', 'Admin A', 'Admin B', 'Admin A', 'Admin A', 'Admin B', 'Admin A', 'Admin B', 'Admin A']\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"# Create DataFrame\n",
|
||||||
|
"df = pd.DataFrame(data)\n",
|
||||||
|
"\n",
|
||||||
|
"# Save DataFrame to CSV\n",
|
||||||
|
"csv_file_path = 'dummy_company_asseement_data.csv'\n",
|
||||||
|
"df.to_csv(csv_file_path, index=False)\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Dummy assessment data has been saved as {csv_file_path}.\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
@@ -4,4 +4,4 @@ app = create_app()
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(debug=True, port=5401)
|
app.run(debug=True, port=5402)
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def generate_summary_stats_v2(file_path):
|
||||||
|
# Load the DataFrame from the provided file path
|
||||||
|
df = pd.read_csv(file_path)
|
||||||
|
|
||||||
|
# Ensure date columns are correctly parsed
|
||||||
|
df['Assessment_Start_Date'] = pd.to_datetime(df['Assessment_Start_Date'])
|
||||||
|
df['Assessment_End_Date'] = pd.to_datetime(df['Assessment_End_Date'])
|
||||||
|
|
||||||
|
# Add completion rate calculation
|
||||||
|
completed_status = df['Assessment_Status'] == 'Completed'
|
||||||
|
completion_rate_by_frequency = df[completed_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
|
||||||
|
|
||||||
|
in_progress_status = df['Assessment_Status'] == 'In Progress'
|
||||||
|
incomplete_status = df['Assessment_Status'] == 'Incomplete'
|
||||||
|
|
||||||
|
# Calculate in-progress and incomplete rates by frequency
|
||||||
|
in_progress_rate_by_frequency = df[in_progress_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
|
||||||
|
incomplete_rate_by_frequency = df[incomplete_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
|
||||||
|
|
||||||
|
# Fill NaN values (where no assessments are in-progress or incomplete for certain frequencies)
|
||||||
|
completion_rate_by_frequency = completion_rate_by_frequency.fillna(0)
|
||||||
|
in_progress_rate_by_frequency = in_progress_rate_by_frequency.fillna(0)
|
||||||
|
incomplete_rate_by_frequency = incomplete_rate_by_frequency.fillna(0)
|
||||||
|
|
||||||
|
# Round all numerical values to 2 decimal places
|
||||||
|
completion_rate_by_frequency = completion_rate_by_frequency.round(2)
|
||||||
|
in_progress_rate_by_frequency = in_progress_rate_by_frequency.round(2)
|
||||||
|
incomplete_rate_by_frequency = incomplete_rate_by_frequency.round(2)
|
||||||
|
|
||||||
|
summary_stats = {
|
||||||
|
'Open Items and Red Flags': {
|
||||||
|
'Total Open Items': round(df['Open_Items'].sum(), 2),
|
||||||
|
'Average Open Items per Assessment': round(df['Open_Items'].mean(), 2),
|
||||||
|
'Total Red Flags': round(df['Red_Flags'].sum(), 2),
|
||||||
|
'Average Red Flags per Assessment': round(df['Red_Flags'].mean(), 2),
|
||||||
|
'Max Red Flags in a Single Assessment': round(df['Red_Flags'].max(), 2),
|
||||||
|
'Most Common Area with Red Flags': df[df['Red_Flags'] > 0]['Assessment_Area'].mode()[0]
|
||||||
|
},
|
||||||
|
'Assessment Frequency': {
|
||||||
|
'Assessment Type Breakdown': df['Assessment_Frequency'].value_counts(normalize=True).round(2).to_dict(),
|
||||||
|
'Average Time Between Assessments': round((df['Assessment_End_Date'] - df['Assessment_Start_Date']).dt.days.mean(), 2),
|
||||||
|
'Average Assessment Duration': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.mean(), 2),
|
||||||
|
'Completion Rate by Frequency': completion_rate_by_frequency.to_dict(),
|
||||||
|
'In Progress Rate by Frequency': in_progress_rate_by_frequency.to_dict(),
|
||||||
|
'Incomplete Rate by Frequency': incomplete_rate_by_frequency.to_dict()
|
||||||
|
},
|
||||||
|
'Assessment Start and End Dates': {
|
||||||
|
'Longest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.max(), 2),
|
||||||
|
'Shortest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.min(), 2),
|
||||||
|
},
|
||||||
|
'Assessment Areas': {
|
||||||
|
'Most Assessed Area': df['Assessment_Area'].value_counts().idxmax(),
|
||||||
|
'Most Open Items in Area': df.groupby('Assessment_Area')['Open_Items'].sum().idxmax(),
|
||||||
|
'Area with Most Red Flags': df.groupby('Assessment_Area')['Red_Flags'].sum().idxmax()
|
||||||
|
},
|
||||||
|
'Assessment Status': {
|
||||||
|
'Assessment Status Distribution': df['Assessment_Status'].value_counts(normalize=True).round(2).to_dict(),
|
||||||
|
'Incomplete Assessments': round(df[df['Assessment_Status'] == 'Incomplete'].shape[0], 2),
|
||||||
|
'In Progress Assessments': round(df[df['Assessment_Status'] == 'In Progress'].shape[0], 2)
|
||||||
|
},
|
||||||
|
'Assessment Admin': {
|
||||||
|
'Most Frequent Admin': df['Assessment_Admin'].mode()[0],
|
||||||
|
'Admin with Fewest Red Flags': df.groupby('Assessment_Admin')['Red_Flags'].sum().idxmin(),
|
||||||
|
'Admin with Most Open Items': df.groupby('Assessment_Admin')['Open_Items'].mean().idxmax()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary_stats
|
||||||
@@ -4,6 +4,7 @@ from werkzeug.utils import secure_filename
|
|||||||
from src.services.chatbot import Chatbot
|
from src.services.chatbot import Chatbot
|
||||||
from src.utils.utils import delete_all_files_in_directory
|
from src.utils.utils import delete_all_files_in_directory
|
||||||
from src.utils.document_loader import load_document
|
from src.utils.document_loader import load_document
|
||||||
|
from src.services.chatbot import Chatbot
|
||||||
|
|
||||||
|
|
||||||
# Initialize the Blueprint
|
# Initialize the Blueprint
|
||||||
@@ -59,3 +60,64 @@ def validate_worker_document():
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
|
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@bot.route('/predict_next_n_assessments', methods=['POST'])
|
||||||
|
def predict_next_n_assessments():
|
||||||
|
try:
|
||||||
|
# Retrieve JSON data from the request
|
||||||
|
data = request.get_json()
|
||||||
|
company_info = data.get('company_info')
|
||||||
|
companyid = data.get('companyid')
|
||||||
|
N = data.get('N')
|
||||||
|
|
||||||
|
if not company_info or not companyid or N is None:
|
||||||
|
return jsonify({"error": "Missing data", "message": "Company info, company ID, or N value not provided."}), 400
|
||||||
|
|
||||||
|
# Instantiate the chatbot service
|
||||||
|
chatbot = Chatbot()
|
||||||
|
|
||||||
|
# Call the prediction method
|
||||||
|
response = chatbot.predict_next_n_assessment(
|
||||||
|
company_info=company_info,
|
||||||
|
companyid=companyid,
|
||||||
|
N=N
|
||||||
|
)
|
||||||
|
if not response:
|
||||||
|
return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
|
||||||
|
|
||||||
|
return jsonify({"predictions": response}), 200
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@bot.route('/use_bot_predict_assessments', methods=['POST'])
|
||||||
|
def use_bot_predict_assessments():
|
||||||
|
try:
|
||||||
|
# Retrieve JSON data from the request
|
||||||
|
data = request.get_json()
|
||||||
|
company_info = data.get('company_info')
|
||||||
|
companyid = data.get('companyid')
|
||||||
|
query = data.get('query')
|
||||||
|
|
||||||
|
if not company_info or not companyid or query is None:
|
||||||
|
return jsonify({"error": "Missing data", "message": "Company info, company ID, or query value not provided."}), 400
|
||||||
|
|
||||||
|
# Instantiate the chatbot service
|
||||||
|
chatbot = Chatbot()
|
||||||
|
|
||||||
|
# Call the prediction method
|
||||||
|
response = chatbot.predict_based_on_past_assessment(
|
||||||
|
company_info=company_info,
|
||||||
|
companyid=companyid,
|
||||||
|
query=query
|
||||||
|
)
|
||||||
|
if not response:
|
||||||
|
return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
|
||||||
|
|
||||||
|
return jsonify({"predictions": response}), 200
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
|
||||||
@@ -5,3 +5,20 @@ from typing import List, Dict
|
|||||||
class ValidateWorker(BaseModel):
|
class ValidateWorker(BaseModel):
|
||||||
result:str
|
result:str
|
||||||
|
|
||||||
|
class Result(BaseModel):
|
||||||
|
response:str
|
||||||
|
|
||||||
|
class Cases(BaseModel):
|
||||||
|
open_items: int
|
||||||
|
red_flags: int
|
||||||
|
|
||||||
|
class AssessmentsFrequency(BaseModel):
|
||||||
|
weekly: Cases
|
||||||
|
biweekly: Cases
|
||||||
|
quarterly: Cases
|
||||||
|
|
||||||
|
class AssessmentPrediction(BaseModel):
|
||||||
|
AssessmentN: AssessmentsFrequency
|
||||||
|
|
||||||
|
class AssessmentPredictionsResponse(BaseModel):
|
||||||
|
predictions: List[AssessmentPrediction]
|
||||||
|
|||||||
@@ -4,11 +4,6 @@ import logging
|
|||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
|
||||||
handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
|
|
||||||
class DataPreprocessor:
|
class DataPreprocessor:
|
||||||
def __init__(self, input_path, company_id):
|
def __init__(self, input_path, company_id):
|
||||||
self.input_path = input_path
|
self.input_path = input_path
|
||||||
|
|||||||
@@ -33,7 +33,8 @@ class ModelTrainer:
|
|||||||
y = self.df[['open_items', 'red_flags']] # Multi-target for open items and red flags
|
y = self.df[['open_items', 'red_flags']] # Multi-target for open items and red flags
|
||||||
|
|
||||||
# Split into training and test sets with 10% as test size
|
# Split into training and test sets with 10% as test size
|
||||||
X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=False)
|
||||||
|
|
||||||
|
|
||||||
# Train the model
|
# Train the model
|
||||||
self.model.fit(X_train, y_train)
|
self.model.fit(X_train, y_train)
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
|
||||||
|
|
||||||
def validate_worker_prompt() -> str:
|
def validate_worker_prompt() -> str:
|
||||||
return """
|
return """
|
||||||
You are a worker in the company "Validate" where you are asked a specific yes or no question:
|
You are a worker in the company "Validate" where you are asked a specific yes or no question:
|
||||||
@@ -15,4 +17,133 @@ def validate_worker_prompt() -> str:
|
|||||||
result:"validated"
|
result:"validated"
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
def predict_based_past_assessment_prompt(query,company_info, summary_stats):
|
||||||
|
# Extract company information from the dictionary
|
||||||
|
company_name = company_info['company_name']
|
||||||
|
company_size = company_info['company_size']
|
||||||
|
departments = company_info['departments']
|
||||||
|
|
||||||
|
# Create the prompt with the provided company info and summary statistics
|
||||||
|
prompt = f"""
|
||||||
|
**Prompt for the Chatbot:**
|
||||||
|
|
||||||
|
**Context:**
|
||||||
|
You are an AI assistant working for {company_name}, and your primary responsibility is to provide **insights**, **predictions**, and **recommendations** based on the company's past assessment data and organizational structure. You are not allowed to respond to any queries outside of this domain.
|
||||||
|
|
||||||
|
**General Company Information:**
|
||||||
|
- **Company Name**: {company_name}
|
||||||
|
- **Company Size**: {company_size} (e.g., Small, Medium, Large)
|
||||||
|
- **Departments**:
|
||||||
|
{', '.join(departments)}
|
||||||
|
|
||||||
|
**Assessment Summary**:
|
||||||
|
The following is a detailed summary of past assessments at {company_name}. Use this information to provide predictions and recommendations based on trends and data points.
|
||||||
|
|
||||||
|
- **Open Items and Red Flags**:
|
||||||
|
- Total Open Items: {summary_stats['Open Items and Red Flags']['Total Open Items']}
|
||||||
|
- Average Open Items per Assessment: {summary_stats['Open Items and Red Flags']['Average Open Items per Assessment']}
|
||||||
|
- Total Red Flags: {summary_stats['Open Items and Red Flags']['Total Red Flags']}
|
||||||
|
- Average Red Flags per Assessment: {summary_stats['Open Items and Red Flags']['Average Red Flags per Assessment']}
|
||||||
|
- Max Red Flags in a Single Assessment: {summary_stats['Open Items and Red Flags']['Max Red Flags in a Single Assessment']}
|
||||||
|
- Most Common Area with Red Flags: {summary_stats['Open Items and Red Flags']['Most Common Area with Red Flags']}
|
||||||
|
|
||||||
|
- **Assessment Frequency**:
|
||||||
|
- Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Weekly', 0) * 100}%
|
||||||
|
- Bi-Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Bi-Weekly', 0) * 100}%
|
||||||
|
- Quarterly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Quarterly', 0) * 100}%
|
||||||
|
- Average Time Between Assessments: {summary_stats['Assessment Frequency']['Average Time Between Assessments']} days
|
||||||
|
- Average Assessment Duration: {summary_stats['Assessment Frequency']['Average Assessment Duration']} days
|
||||||
|
|
||||||
|
- **Assessment Start and End Dates**:
|
||||||
|
- Longest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Longest Assessment Duration (days)']} days
|
||||||
|
- Shortest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Shortest Assessment Duration (days)']} days
|
||||||
|
|
||||||
|
- **Assessment Areas**:
|
||||||
|
- Most Assessed Area: {summary_stats['Assessment Areas']['Most Assessed Area']}
|
||||||
|
- Most Open Items in Area: {summary_stats['Assessment Areas']['Most Open Items in Area']}
|
||||||
|
- Area with Most Red Flags: {summary_stats['Assessment Areas']['Area with Most Red Flags']}
|
||||||
|
|
||||||
|
- **Assessment Status**:
|
||||||
|
- Completed: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Completed', 0) * 100}%
|
||||||
|
- In Progress: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('In Progress', 0) * 100}%
|
||||||
|
- Incomplete: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Incomplete', 0) * 100}%
|
||||||
|
|
||||||
|
- **Assessment Admin**:
|
||||||
|
- Most Frequent Admin: {summary_stats['Assessment Admin']['Most Frequent Admin']}
|
||||||
|
- Admin with Fewest Red Flags: {summary_stats['Assessment Admin']['Admin with Fewest Red Flags']}
|
||||||
|
- Admin with Most Open Items: {summary_stats['Assessment Admin']['Admin with Most Open Items']}
|
||||||
|
|
||||||
|
**Instructions:**
|
||||||
|
Use the above information to answer user queries. You should:
|
||||||
|
- Analyze historical data to identify trends and problem areas.
|
||||||
|
- Predict potential outcomes for future assessments based on past performance (e.g., meeting deadlines, reducing red flags).
|
||||||
|
- Provide **actionable recommendations** that can help improve performance in future assessments.
|
||||||
|
|
||||||
|
**User Query**:
|
||||||
|
"{query}"
|
||||||
|
|
||||||
|
**Your Response**:
|
||||||
|
Predict and provide recommendations based on the company’s historical data, focusing on the areas most relevant to the query. Ensure the response is based on past trends and performance issues.
|
||||||
|
|
||||||
|
**Examples of Insightful Responses**:
|
||||||
|
- "To improve your performance in the next assessment, you should focus on reducing red flags in the Communication department, as it has had the most issues."
|
||||||
|
- "Based on the company's past performance, there is a 70% chance that you will meet the deadline for the next weekly assessment. To ensure success, focus on completing open items in the IT department."
|
||||||
|
- "The data indicates that quarterly assessments have the highest rate of incomplete tasks. I recommend prioritizing quarterly assessment tasks to avoid falling behind."
|
||||||
|
"""
|
||||||
|
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def predict_next_n_assessments_prompt():
|
||||||
|
|
||||||
|
# Create the prompt with provided company info, summary statistics, and number of assessments (n)
|
||||||
|
prompt = """
|
||||||
|
**Prompt for the Chatbot:**
|
||||||
|
|
||||||
|
**Context:**
|
||||||
|
You are an AI assistant responsible for analyzing the past assessment data of , and your primary responsibility is to provide **predictions** for the next {n} assessments.
|
||||||
|
These assessments can occur on a **weekly**, **bi-weekly**, or **quarterly** basis. Use the company's past performance to predict the following for each of the next {n} assessments:
|
||||||
|
- **Number of Open Items**.
|
||||||
|
- **Number of Red Flags**.
|
||||||
|
- **Predictions for Weekly, Bi-Weekly, and Quarterly assessments**.
|
||||||
|
input :
|
||||||
|
- company basic info
|
||||||
|
- past assessment statitics
|
||||||
|
- N - number of next assessments to be predicted
|
||||||
|
**General Company Information:**
|
||||||
|
|
||||||
|
|
||||||
|
**Assessment Summary (Past Data)**:
|
||||||
|
The Detailed information on past asssessment will be provided. Use this information to make predictions for the next {n} assessments.
|
||||||
|
|
||||||
|
|
||||||
|
**Instructions**:
|
||||||
|
- Predict the number of open items and red flags for the next n assessments if they are conducted on a weekly, bi-weekly, or quarterly basis.
|
||||||
|
- Use the historical summary statistics provided above to guide your predictions.
|
||||||
|
- Return the response in the following JSON format:
|
||||||
|
|
||||||
|
**Response Format**:
|
||||||
|
|
||||||
|
{
|
||||||
|
"assessment 1": [
|
||||||
|
{
|
||||||
|
"weekly": {"open_items": X, "red_flags": Y}},
|
||||||
|
"biweekly": {{"open_items": X, "red_flags": Y}},
|
||||||
|
"quarterly": {{"open_items": X, "red_flags": Y}}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"assessment 2": [
|
||||||
|
{
|
||||||
|
"weekly": {"open_items": X, "red_flags": Y},
|
||||||
|
"biweekly": {"open_items": X, "red_flags": Y},
|
||||||
|
"quarterly": {"open_items": X, "red_flags": Y}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
// assuming N is 2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Ensure each assessment is provided with three predictions: one for Weekly, one for Bi-Weekly, and one for Quarterly assessments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return prompt
|
||||||
|
|||||||
+120
-1
@@ -7,6 +7,7 @@ from src.prompts.sops import *
|
|||||||
from src.prompts.chatbot import *
|
from src.prompts.chatbot import *
|
||||||
from src.models.sop_response_schemas import *
|
from src.models.sop_response_schemas import *
|
||||||
from src.models.bot_response_schema import *
|
from src.models.bot_response_schema import *
|
||||||
|
from scripts.assessment_data import generate_summary_stats_v2
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -52,7 +53,7 @@ class Chatbot:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
response_format=ValidateWorker,
|
response_format=ValidateWorker,
|
||||||
max_tokens=4096,
|
max_tokens=1024,
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -64,3 +65,121 @@ class Chatbot:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An error occurred: {e}")
|
print(f"An error occurred: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def predict_based_on_past_assessment(self, query, company_info, companyid) -> Result:
|
||||||
|
"""
|
||||||
|
This method generates predictions based on past assessment data of a company. It queries the backend for the
|
||||||
|
company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
|
||||||
|
|
||||||
|
:param query: The question or query asked by the user.
|
||||||
|
:param company_info: General information about the company (name, size, departments, etc.).
|
||||||
|
:param companyid: Unique identifier of the company to fetch its specific data.
|
||||||
|
:return: Result containing the prediction result or None if an error occurs.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Define the path to the company's assessment data (stored as a CSV)
|
||||||
|
data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
|
||||||
|
|
||||||
|
# Generate summary statistics from the company's assessment data
|
||||||
|
summary_stats = generate_summary_stats_v2(file_path=data_path)
|
||||||
|
|
||||||
|
|
||||||
|
# Generate the prompt using the company info and the summary statistics
|
||||||
|
prompt = predict_based_past_assessment_prompt(
|
||||||
|
query=query,
|
||||||
|
company_info=company_info,
|
||||||
|
summary_stats=summary_stats
|
||||||
|
)
|
||||||
|
|
||||||
|
# Interact with GPT-4 model to get a response
|
||||||
|
response = self.client.beta.chat.completions.parse(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": f"{prompt}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"{query}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
response_format=Result,
|
||||||
|
max_tokens=1024,
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract and return the response from the GPT-4 model
|
||||||
|
extracted_text = json.loads(response.choices[0].message.content)
|
||||||
|
|
||||||
|
return extracted_text
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def predict_next_n_assessment(self, company_info, companyid, N) -> AssessmentPredictionsResponse:
|
||||||
|
"""
|
||||||
|
This method generates predictions based on past assessment data of a company. It queries the backend for the
|
||||||
|
company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
|
||||||
|
|
||||||
|
:param query: The question or query asked by the user.
|
||||||
|
:param company_info: General information about the company (name, size, departments, etc.).
|
||||||
|
:param companyid: Unique identifier of the company to fetch its specific data.
|
||||||
|
:param N: Number of assessments to predict.
|
||||||
|
:return: Result containing the prediction result or None if an error occurs.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Define the path to the company's assessment data (stored as a CSV)
|
||||||
|
data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
|
||||||
|
|
||||||
|
# Generate summary statistics from the company's assessment data
|
||||||
|
summary_stats = generate_summary_stats_v2(file_path=data_path)
|
||||||
|
|
||||||
|
# Generate the prompt using the company info and the summary statistics
|
||||||
|
prompt = predict_next_n_assessments_prompt()
|
||||||
|
|
||||||
|
|
||||||
|
# Interact with GPT-4 model to get a response
|
||||||
|
response = self.client.beta.chat.completions.parse(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": f"{prompt}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"company info: {company_info}--> N-value is {N} ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Summary stats: {summary_stats}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
response_format=AssessmentPredictionsResponse,
|
||||||
|
max_tokens=1024,
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract the response from the GPT-4 model
|
||||||
|
extracted_text = json.loads(response.choices[0].message.content)
|
||||||
|
|
||||||
|
# Initialize dictionary to store assessments with dynamic names
|
||||||
|
predictions = {}
|
||||||
|
|
||||||
|
# Loop through the predicted assessments and rename them dynamically
|
||||||
|
for i in range(N):
|
||||||
|
assessment_key = f"assessment_{i + 1}"
|
||||||
|
predictions[assessment_key] = extracted_text["predictions"][i]['AssessmentN']
|
||||||
|
|
||||||
|
# Return the dynamically named assessments
|
||||||
|
return predictions
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
return None
|
||||||
|
|||||||
@@ -6,9 +6,7 @@ input_base_path = '/root/ds_erp_ai/data/raw/erp_assessment_prediction' # The ba
|
|||||||
pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path)
|
pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path)
|
||||||
pipeline.run_pipeline()'''
|
pipeline.run_pipeline()'''
|
||||||
|
|
||||||
from src.pipeline.inference import AssessmentInference
|
'''from src.pipeline.inference import AssessmentInference
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inference = AssessmentInference(
|
inference = AssessmentInference(
|
||||||
company_id="testid",num_assessments=2
|
company_id="testid",num_assessments=2
|
||||||
@@ -18,3 +16,32 @@ result = inference.run()
|
|||||||
|
|
||||||
|
|
||||||
print(result)
|
print(result)
|
||||||
|
'''
|
||||||
|
'''
|
||||||
|
response2 = bot.predict_next_n_assessment(
|
||||||
|
company_info=company_info,
|
||||||
|
companyid="testid",
|
||||||
|
N=3
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Predictions {response2}")
|
||||||
|
'''
|
||||||
|
|
||||||
|
from src.services.chatbot import Chatbot
|
||||||
|
company_info = {
|
||||||
|
'company_name': "ABC Corp",
|
||||||
|
'company_size': "Medium", # Can be "Small", "Medium", or "Large"
|
||||||
|
'departments': ["Sales", "Marketing", "IT", "Finance", "HR", "Logistics"]
|
||||||
|
}
|
||||||
|
bot = Chatbot()
|
||||||
|
response = bot.predict_based_on_past_assessment(
|
||||||
|
query="Should i make my next assessment weekly or biweekly to meet up to deadline?",
|
||||||
|
company_info=company_info,
|
||||||
|
companyid="testid"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Result: {response}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user