assessment predictions pipelines added

This commit is contained in:
2024-09-12 21:36:02 +00:00
parent 24823432b3
commit 45bc62c745
10 changed files with 1583 additions and 11 deletions
+8
View File
@@ -1,5 +1,13 @@
import pandas as pd
import os
import logging
from logging.handlers import RotatingFileHandler
handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(handler)
class DataPreprocessor:
def __init__(self, input_path, company_id):
+85
View File
@@ -0,0 +1,85 @@
import pandas as pd
import joblib
import os
class AssessmentInference:
def __init__(self, company_id, num_assessments, model_dir='models'):
self.company_id = company_id
self.num_assessments = num_assessments
self.model_dir = model_dir
self.model = None
self.latest_data = None
def load_model(self):
# Load the trained model
model_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_model.pkl')
self.model = joblib.load(model_path)
print(f"Model loaded from {model_path}")
def load_latest_data(self):
# Load the latest assessment data
latest_data_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_latest_data.csv')
self.latest_data = pd.read_csv(latest_data_path)
print(f"Latest data loaded from {latest_data_path}")
def predict_next_assessment(self, current_data, assessment_type):
# Update assessment type (weekly, biweekly, quarterly) in the data for prediction
current_data['assessment_type_weekly'] = 1 if assessment_type == 'weekly' else 0
current_data['assessment_type_biweekly'] = 1 if assessment_type == 'biweekly' else 0
current_data['assessment_type_quarterly'] = 1 if assessment_type == 'quarterly' else 0
# Exclude target variables (open_items, red_flags) from the feature set
features = current_data.drop(columns=['open_items', 'red_flags'])
# Predict the next open items and red flags
prediction = self.model.predict(features)
open_items_pred, red_flags_pred = prediction[0]
# Ensure the predictions are integers by rounding
open_items_pred = int(round(open_items_pred))
red_flags_pred = int(round(red_flags_pred))
return {
'assessment_type': assessment_type,
'open_items': open_items_pred,
'red_flags': red_flags_pred
}
def predict_next_assessments(self):
predictions = []
current_data = self.latest_data.copy()
# Iteratively forecast the next assessments
for i in range(self.num_assessments):
print(f"\nForecasting assessment {i + 1}/{self.num_assessments}")
# Predict for weekly, biweekly, and quarterly for the same forecast step
weekly_prediction = self.predict_next_assessment(current_data, 'weekly')
biweekly_prediction = self.predict_next_assessment(current_data, 'biweekly')
quarterly_prediction = self.predict_next_assessment(current_data, 'quarterly')
# Append predictions for all types in one forecast step
predictions.append({
'forecast_step': i + 1,
'weekly': weekly_prediction,
'biweekly': biweekly_prediction,
'quarterly': quarterly_prediction
})
# Update the current data with the weekly prediction (or any of the predictions) for the next step
current_data['open_items'] = weekly_prediction['open_items']
current_data['red_flags'] = weekly_prediction['red_flags']
return predictions
def run(self):
self.load_model()
self.load_latest_data()
predictions = self.predict_next_assessments()
return predictions
# Example usage
#inference = AssessmentInference(company_id='testid', num_assessments=5)
#predictions = inference.run()
#print(predictions)