assessment predictions pipelines added

This commit is contained in:
2024-09-12 21:36:02 +00:00
parent 24823432b3
commit 45bc62c745
10 changed files with 1583 additions and 11 deletions
@@ -1,6 +1,13 @@
start_date,end_date,open_items,red_flags,num_employees,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,time_since_last_event,percentage_change_open_items
2023-01-01,2023-01-02,10,2,30,0,0,1,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0
2023-01-08,2023-01-09,12,1,25,1,0,0,10.0,0.0,0.0,10.0,12.0,0.0,7.0,19.999999999999996
2023-01-15,2023-01-16,11,3,28,0,1,0,0.0,12.0,0.0,10.0,12.0,11.0,7.0,-8.333333333333337
2023-01-22,2023-01-23,9,1,30,0,0,1,0.0,0.0,11.0,9.0,12.0,11.0,7.0,-18.181818181818176
2023-01-29,2023-01-30,13,4,27,1,0,0,9.0,0.0,0.0,9.0,13.0,11.0,7.0,44.44444444444444
open_items,red_flags,num_employees,duration,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,percentage_change_open_items
10,2,30,1,0,0,1,0.0,0.0,0.0,10.0,0.0,0.0,0.0
12,1,25,1,1,0,0,10.0,0.0,0.0,10.0,12.0,0.0,19.999999999999996
11,3,28,1,0,1,0,0.0,12.0,0.0,10.0,12.0,11.0,-8.333333333333337
9,1,30,1,0,0,1,0.0,0.0,11.0,9.0,12.0,11.0,-18.181818181818176
13,4,27,1,1,0,0,9.0,0.0,0.0,9.0,13.0,11.0,44.44444444444444
14,2,26,1,0,0,1,0.0,13.0,0.0,11.5,13.0,0.0,7.692307692307687
15,1,31,1,0,1,0,14.0,0.0,0.0,14.0,13.0,15.0,7.14285714285714
16,3,29,1,1,0,0,0.0,0.0,15.0,14.0,16.0,15.0,6.666666666666665
12,2,25,1,0,0,1,0.0,16.0,0.0,12.0,16.0,15.0,-25.0
11,1,30,1,0,1,0,12.0,0.0,0.0,12.0,16.0,11.0,-8.333333333333337
10,4,27,1,0,0,1,0.0,0.0,11.0,11.0,0.0,11.0,-9.090909090909093
9,3,26,1,1,0,0,10.0,0.0,0.0,10.0,9.0,11.0,-9.999999999999998
1 start_date open_items end_date red_flags num_employees duration assessment_type_biweekly assessment_type_quarterly assessment_type_weekly open_items_assessment_type_weekly_lag_1 open_items_assessment_type_biweekly_lag_1 open_items_assessment_type_quarterly_lag_1 open_items_weekly_ma_3 open_items_biweekly_ma_3 open_items_quarterly_ma_3 percentage_change_open_items time_since_last_event
2 2023-01-01 10 2023-01-02 2 30 1 0 0 1 0.0 0.0 0.0 10.0 0.0 0.0 0.0 0.0
3 2023-01-08 12 2023-01-09 1 25 1 1 0 0 10.0 0.0 0.0 10.0 12.0 0.0 19.999999999999996 7.0
4 2023-01-15 11 2023-01-16 3 28 1 0 1 0 0.0 12.0 0.0 10.0 12.0 11.0 -8.333333333333337 7.0
5 2023-01-22 9 2023-01-23 1 30 1 0 0 1 0.0 0.0 11.0 9.0 12.0 11.0 -18.181818181818176 7.0
6 2023-01-29 13 2023-01-30 4 27 1 1 0 0 9.0 0.0 0.0 9.0 13.0 11.0 44.44444444444444 7.0
7 14 2 26 1 0 0 1 0.0 13.0 0.0 11.5 13.0 0.0 7.692307692307687
8 15 1 31 1 0 1 0 14.0 0.0 0.0 14.0 13.0 15.0 7.14285714285714
9 16 3 29 1 1 0 0 0.0 0.0 15.0 14.0 16.0 15.0 6.666666666666665
10 12 2 25 1 0 0 1 0.0 16.0 0.0 12.0 16.0 15.0 -25.0
11 11 1 30 1 0 1 0 12.0 0.0 0.0 12.0 16.0 11.0 -8.333333333333337
12 10 4 27 1 0 0 1 0.0 0.0 11.0 11.0 0.0 11.0 -9.090909090909093
13 9 3 26 1 1 0 0 10.0 0.0 0.0 10.0 9.0 11.0 -9.999999999999998
@@ -0,0 +1,13 @@
start_date,end_date,open_items,red_flags,num_employees,assessment_type
2023-01-01,2023-01-02,10,2,30,weekly
2023-01-08,2023-01-09,12,1,25,biweekly
2023-01-15,2023-01-16,11,3,28,quarterly
2023-01-22,2023-01-23,9,1,30,weekly
2023-01-29,2023-01-30,13,4,27,biweekly
2023-02-05,2023-02-06,14,2,26,weekly
2023-02-12,2023-02-13,15,1,31,quarterly
2023-02-19,2023-02-20,16,3,29,biweekly
2023-02-26,2023-02-27,12,2,25,weekly
2023-03-05,2023-03-06,11,1,30,quarterly
2023-03-12,2023-03-13,10,4,27,weekly
2023-03-19,2023-03-20,9,3,26,biweekly
1 start_date end_date open_items red_flags num_employees assessment_type
2 2023-01-01 2023-01-02 10 2 30 weekly
3 2023-01-08 2023-01-09 12 1 25 biweekly
4 2023-01-15 2023-01-16 11 3 28 quarterly
5 2023-01-22 2023-01-23 9 1 30 weekly
6 2023-01-29 2023-01-30 13 4 27 biweekly
7 2023-02-05 2023-02-06 14 2 26 weekly
8 2023-02-12 2023-02-13 15 1 31 quarterly
9 2023-02-19 2023-02-20 16 3 29 biweekly
10 2023-02-26 2023-02-27 12 2 25 weekly
11 2023-03-05 2023-03-06 11 1 30 quarterly
12 2023-03-12 2023-03-13 10 4 27 weekly
13 2023-03-19 2023-03-20 9 3 26 biweekly
@@ -0,0 +1,2 @@
open_items,red_flags,num_employees,duration,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,percentage_change_open_items
9,3,26,1,1,0,0,10.0,0.0,0.0,10.0,9.0,11.0,-9.999999999999998
1 open_items red_flags num_employees duration assessment_type_biweekly assessment_type_quarterly assessment_type_weekly open_items_assessment_type_weekly_lag_1 open_items_assessment_type_biweekly_lag_1 open_items_assessment_type_quarterly_lag_1 open_items_weekly_ma_3 open_items_biweekly_ma_3 open_items_quarterly_ma_3 percentage_change_open_items
2 9 3 26 1 1 0 0 10.0 0.0 0.0 10.0 9.0 11.0 -9.999999999999998
Binary file not shown.
+13
View File
@@ -0,0 +1,13 @@
,start_date,end_date,open_items,red_flags,num_employees,assessment_type
0,2023-01-01,2023-01-02,10,2,30,weekly
1,2023-01-08,2023-01-09,12,1,25,biweekly
2,2023-01-15,2023-01-16,11,3,28,quarterly
3,2023-01-22,2023-01-23,9,1,30,weekly
4,2023-01-29,2023-01-30,13,4,27,biweekly
5,2023-02-05,2023-02-06,14,2,26,weekly
6,2023-02-12,2023-02-13,15,1,31,quarterly
7,2023-02-19,2023-02-20,16,3,29,biweekly
8,2023-02-26,2023-02-27,12,2,25,weekly
9,2023-03-05,2023-03-06,11,1,30,quarterly
10,2023-03-12,2023-03-13,10,4,27,weekly
11,2023-03-19,2023-03-20,9,3,26,biweekly
File diff suppressed because it is too large Load Diff
+8
View File
@@ -1,5 +1,13 @@
import pandas as pd
import os
import logging
from logging.handlers import RotatingFileHandler
handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(handler)
class DataPreprocessor:
def __init__(self, input_path, company_id):
+85
View File
@@ -0,0 +1,85 @@
import pandas as pd
import joblib
import os
class AssessmentInference:
def __init__(self, company_id, num_assessments, model_dir='models'):
self.company_id = company_id
self.num_assessments = num_assessments
self.model_dir = model_dir
self.model = None
self.latest_data = None
def load_model(self):
# Load the trained model
model_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_model.pkl')
self.model = joblib.load(model_path)
print(f"Model loaded from {model_path}")
def load_latest_data(self):
# Load the latest assessment data
latest_data_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_latest_data.csv')
self.latest_data = pd.read_csv(latest_data_path)
print(f"Latest data loaded from {latest_data_path}")
def predict_next_assessment(self, current_data, assessment_type):
# Update assessment type (weekly, biweekly, quarterly) in the data for prediction
current_data['assessment_type_weekly'] = 1 if assessment_type == 'weekly' else 0
current_data['assessment_type_biweekly'] = 1 if assessment_type == 'biweekly' else 0
current_data['assessment_type_quarterly'] = 1 if assessment_type == 'quarterly' else 0
# Exclude target variables (open_items, red_flags) from the feature set
features = current_data.drop(columns=['open_items', 'red_flags'])
# Predict the next open items and red flags
prediction = self.model.predict(features)
open_items_pred, red_flags_pred = prediction[0]
# Ensure the predictions are integers by rounding
open_items_pred = int(round(open_items_pred))
red_flags_pred = int(round(red_flags_pred))
return {
'assessment_type': assessment_type,
'open_items': open_items_pred,
'red_flags': red_flags_pred
}
def predict_next_assessments(self):
predictions = []
current_data = self.latest_data.copy()
# Iteratively forecast the next assessments
for i in range(self.num_assessments):
print(f"\nForecasting assessment {i + 1}/{self.num_assessments}")
# Predict for weekly, biweekly, and quarterly for the same forecast step
weekly_prediction = self.predict_next_assessment(current_data, 'weekly')
biweekly_prediction = self.predict_next_assessment(current_data, 'biweekly')
quarterly_prediction = self.predict_next_assessment(current_data, 'quarterly')
# Append predictions for all types in one forecast step
predictions.append({
'forecast_step': i + 1,
'weekly': weekly_prediction,
'biweekly': biweekly_prediction,
'quarterly': quarterly_prediction
})
# Update the current data with the weekly prediction (or any of the predictions) for the next step
current_data['open_items'] = weekly_prediction['open_items']
current_data['red_flags'] = weekly_prediction['red_flags']
return predictions
def run(self):
self.load_model()
self.load_latest_data()
predictions = self.predict_next_assessments()
return predictions
# Example usage
#inference = AssessmentInference(company_id='testid', num_assessments=5)
#predictions = inference.run()
#print(predictions)
+17 -4
View File
@@ -1,7 +1,20 @@
# Example usage
from scripts.run_assessment_prediction_trainer import CompanyModelPipeline
company_ids = ['company_123', 'company_456', 'company_789']
input_base_path = '/root/ds_erp_ai/data/raw/dummy_assessment_data.csv' # The base path where the raw data for each company is stored
'''from scripts.run_assessment_prediction_trainer import CompanyModelPipeline
company_ids = ['testid']
input_base_path = '/root/ds_erp_ai/data/raw/erp_assessment_prediction' # The base path where the raw data for each company is stored
pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path)
pipeline.run_pipeline()
pipeline.run_pipeline()'''
from src.pipeline.inference import AssessmentInference
inference = AssessmentInference(
company_id="testid",num_assessments=2
)
result = inference.run()
print(result)