assessment predictions pipelines added

2024-09-12 21:36:02 +00:00
parent 24823432b3
commit 45bc62c745
10 changed files with 1583 additions and 11 deletions
@@ -1,6 +1,13 @@
-start_date,end_date,open_items,red_flags,num_employees,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,time_since_last_event,percentage_change_open_items
-2023-01-01,2023-01-02,10,2,30,0,0,1,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0
-2023-01-08,2023-01-09,12,1,25,1,0,0,10.0,0.0,0.0,10.0,12.0,0.0,7.0,19.999999999999996
-2023-01-15,2023-01-16,11,3,28,0,1,0,0.0,12.0,0.0,10.0,12.0,11.0,7.0,-8.333333333333337
-2023-01-22,2023-01-23,9,1,30,0,0,1,0.0,0.0,11.0,9.0,12.0,11.0,7.0,-18.181818181818176
-2023-01-29,2023-01-30,13,4,27,1,0,0,9.0,0.0,0.0,9.0,13.0,11.0,7.0,44.44444444444444
+open_items,red_flags,num_employees,duration,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,percentage_change_open_items
+10,2,30,1,0,0,1,0.0,0.0,0.0,10.0,0.0,0.0,0.0
+12,1,25,1,1,0,0,10.0,0.0,0.0,10.0,12.0,0.0,19.999999999999996
+11,3,28,1,0,1,0,0.0,12.0,0.0,10.0,12.0,11.0,-8.333333333333337
+9,1,30,1,0,0,1,0.0,0.0,11.0,9.0,12.0,11.0,-18.181818181818176
+13,4,27,1,1,0,0,9.0,0.0,0.0,9.0,13.0,11.0,44.44444444444444
+14,2,26,1,0,0,1,0.0,13.0,0.0,11.5,13.0,0.0,7.692307692307687
+15,1,31,1,0,1,0,14.0,0.0,0.0,14.0,13.0,15.0,7.14285714285714
+16,3,29,1,1,0,0,0.0,0.0,15.0,14.0,16.0,15.0,6.666666666666665
+12,2,25,1,0,0,1,0.0,16.0,0.0,12.0,16.0,15.0,-25.0
+11,1,30,1,0,1,0,12.0,0.0,0.0,12.0,16.0,11.0,-8.333333333333337
+10,4,27,1,0,0,1,0.0,0.0,11.0,11.0,0.0,11.0,-9.090909090909093
+9,3,26,1,1,0,0,10.0,0.0,0.0,10.0,9.0,11.0,-9.999999999999998
@@ -0,0 +1,13 @@
+start_date,end_date,open_items,red_flags,num_employees,assessment_type
+2023-01-01,2023-01-02,10,2,30,weekly
+2023-01-08,2023-01-09,12,1,25,biweekly
+2023-01-15,2023-01-16,11,3,28,quarterly
+2023-01-22,2023-01-23,9,1,30,weekly
+2023-01-29,2023-01-30,13,4,27,biweekly
+2023-02-05,2023-02-06,14,2,26,weekly
+2023-02-12,2023-02-13,15,1,31,quarterly
+2023-02-19,2023-02-20,16,3,29,biweekly
+2023-02-26,2023-02-27,12,2,25,weekly
+2023-03-05,2023-03-06,11,1,30,quarterly
+2023-03-12,2023-03-13,10,4,27,weekly
+2023-03-19,2023-03-20,9,3,26,biweekly
@@ -0,0 +1,2 @@
+open_items,red_flags,num_employees,duration,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,percentage_change_open_items
+9,3,26,1,1,0,0,10.0,0.0,0.0,10.0,9.0,11.0,-9.999999999999998
@@ -0,0 +1,13 @@
+,start_date,end_date,open_items,red_flags,num_employees,assessment_type
+0,2023-01-01,2023-01-02,10,2,30,weekly
+1,2023-01-08,2023-01-09,12,1,25,biweekly
+2,2023-01-15,2023-01-16,11,3,28,quarterly
+3,2023-01-22,2023-01-23,9,1,30,weekly
+4,2023-01-29,2023-01-30,13,4,27,biweekly
+5,2023-02-05,2023-02-06,14,2,26,weekly
+6,2023-02-12,2023-02-13,15,1,31,quarterly
+7,2023-02-19,2023-02-20,16,3,29,biweekly
+8,2023-02-26,2023-02-27,12,2,25,weekly
+9,2023-03-05,2023-03-06,11,1,30,quarterly
+10,2023-03-12,2023-03-13,10,4,27,weekly
+11,2023-03-19,2023-03-20,9,3,26,biweekly
@@ -26,7 +26,7 @@ class CompanyModelPipeline:

                logger.info(f"Starting preprocessing for company {company_id}.")
                
-                # Step 1: Preprocess the data
+                # Step 1 : Preprocess the data
                preprocessor = DataPreprocessor(input_path=input_path, company_id=company_id)
                processed_data_path = preprocessor.run()
                logger.info(f"Data preprocessing completed for company {company_id}. Processed data saved to {processed_data_path}.")
@@ -1,5 +1,13 @@
 import pandas as pd
 import os
+import logging
+from logging.handlers import RotatingFileHandler
+
+
+handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+logger.addHandler(handler)

 class DataPreprocessor:
    def __init__(self, input_path, company_id):
@@ -0,0 +1,85 @@
+import pandas as pd
+import joblib
+import os
+
+class AssessmentInference:
+    def __init__(self, company_id, num_assessments, model_dir='models'):
+        self.company_id = company_id
+        self.num_assessments = num_assessments
+        self.model_dir = model_dir
+        self.model = None
+        self.latest_data = None
+
+    def load_model(self):
+        # Load the trained model
+        model_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_model.pkl')
+        self.model = joblib.load(model_path)
+        print(f"Model loaded from {model_path}")
+
+    def load_latest_data(self):
+        # Load the latest assessment data
+        latest_data_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_latest_data.csv')
+        self.latest_data = pd.read_csv(latest_data_path)
+        print(f"Latest data loaded from {latest_data_path}")
+
+    def predict_next_assessment(self, current_data, assessment_type):
+        # Update assessment type (weekly, biweekly, quarterly) in the data for prediction
+        current_data['assessment_type_weekly'] = 1 if assessment_type == 'weekly' else 0
+        current_data['assessment_type_biweekly'] = 1 if assessment_type == 'biweekly' else 0
+        current_data['assessment_type_quarterly'] = 1 if assessment_type == 'quarterly' else 0
+
+        # Exclude target variables (open_items, red_flags) from the feature set
+        features = current_data.drop(columns=['open_items', 'red_flags'])
+
+        # Predict the next open items and red flags
+        prediction = self.model.predict(features)
+        open_items_pred, red_flags_pred = prediction[0]
+
+        # Ensure the predictions are integers by rounding
+        open_items_pred = int(round(open_items_pred))
+        red_flags_pred = int(round(red_flags_pred))
+
+        return {
+            'assessment_type': assessment_type,
+            'open_items': open_items_pred,
+            'red_flags': red_flags_pred
+        }
+
+    def predict_next_assessments(self):
+        predictions = []
+        current_data = self.latest_data.copy()
+
+        # Iteratively forecast the next assessments
+        for i in range(self.num_assessments):
+            print(f"\nForecasting assessment {i + 1}/{self.num_assessments}")
+
+            # Predict for weekly, biweekly, and quarterly for the same forecast step
+            weekly_prediction = self.predict_next_assessment(current_data, 'weekly')
+            biweekly_prediction = self.predict_next_assessment(current_data, 'biweekly')
+            quarterly_prediction = self.predict_next_assessment(current_data, 'quarterly')
+
+            # Append predictions for all types in one forecast step
+            predictions.append({
+                'forecast_step': i + 1,
+                'weekly': weekly_prediction,
+                'biweekly': biweekly_prediction,
+                'quarterly': quarterly_prediction
+            })
+
+            # Update the current data with the weekly prediction (or any of the predictions) for the next step
+            current_data['open_items'] = weekly_prediction['open_items']
+            current_data['red_flags'] = weekly_prediction['red_flags']
+
+        return predictions
+
+    def run(self):
+        self.load_model()
+        self.load_latest_data()
+        predictions = self.predict_next_assessments()
+        return predictions
+
+
+# Example usage
+#inference = AssessmentInference(company_id='testid', num_assessments=5)
+#predictions = inference.run()
+#print(predictions)
@@ -1,7 +1,20 @@
 # Example usage
-from scripts.run_assessment_prediction_trainer import CompanyModelPipeline
-company_ids = ['company_123', 'company_456', 'company_789']
-input_base_path = '/root/ds_erp_ai/data/raw/dummy_assessment_data.csv'  # The base path where the raw data for each company is stored
+'''from scripts.run_assessment_prediction_trainer import CompanyModelPipeline
+company_ids = ['testid']
+input_base_path = '/root/ds_erp_ai/data/raw/erp_assessment_prediction'  # The base path where the raw data for each company is stored

 pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path)
-pipeline.run_pipeline()
+pipeline.run_pipeline()'''
+
+from src.pipeline.inference import AssessmentInference
+
+
+
+inference = AssessmentInference(
+    company_id="testid",num_assessments=2
+)
+
+result = inference.run()
+
+
+print(result)