added bot prediction for assessments

This commit is contained in:
2024-09-14 01:50:41 +00:00
parent 45bc62c745
commit cd8f499f97
14 changed files with 698 additions and 22 deletions
+62
View File
@@ -4,6 +4,7 @@ from werkzeug.utils import secure_filename
from src.services.chatbot import Chatbot
from src.utils.utils import delete_all_files_in_directory
from src.utils.document_loader import load_document
from src.services.chatbot import Chatbot
# Initialize the Blueprint
@@ -59,3 +60,64 @@ def validate_worker_document():
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/predict_next_n_assessments', methods=['POST'])
def predict_next_n_assessments():
try:
# Retrieve JSON data from the request
data = request.get_json()
company_info = data.get('company_info')
companyid = data.get('companyid')
N = data.get('N')
if not company_info or not companyid or N is None:
return jsonify({"error": "Missing data", "message": "Company info, company ID, or N value not provided."}), 400
# Instantiate the chatbot service
chatbot = Chatbot()
# Call the prediction method
response = chatbot.predict_next_n_assessment(
company_info=company_info,
companyid=companyid,
N=N
)
if not response:
return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
return jsonify({"predictions": response}), 200
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/use_bot_predict_assessments', methods=['POST'])
def use_bot_predict_assessments():
try:
# Retrieve JSON data from the request
data = request.get_json()
company_info = data.get('company_info')
companyid = data.get('companyid')
query = data.get('query')
if not company_info or not companyid or query is None:
return jsonify({"error": "Missing data", "message": "Company info, company ID, or query value not provided."}), 400
# Instantiate the chatbot service
chatbot = Chatbot()
# Call the prediction method
response = chatbot.predict_based_on_past_assessment(
company_info=company_info,
companyid=companyid,
query=query
)
if not response:
return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
return jsonify({"predictions": response}), 200
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+18 -1
View File
@@ -4,4 +4,21 @@ from typing import List, Dict
class ValidateWorker(BaseModel):
result:str
class Result(BaseModel):
response:str
class Cases(BaseModel):
open_items: int
red_flags: int
class AssessmentsFrequency(BaseModel):
weekly: Cases
biweekly: Cases
quarterly: Cases
class AssessmentPrediction(BaseModel):
AssessmentN: AssessmentsFrequency
class AssessmentPredictionsResponse(BaseModel):
predictions: List[AssessmentPrediction]
-5
View File
@@ -4,11 +4,6 @@ import logging
from logging.handlers import RotatingFileHandler
handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(handler)
class DataPreprocessor:
def __init__(self, input_path, company_id):
self.input_path = input_path
+2 -1
View File
@@ -33,7 +33,8 @@ class ModelTrainer:
y = self.df[['open_items', 'red_flags']] # Multi-target for open items and red flags
# Split into training and test sets with 10% as test size
X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=False)
# Train the model
self.model.fit(X_train, y_train)
+132 -1
View File
@@ -1,3 +1,5 @@
def validate_worker_prompt() -> str:
return """
You are a worker in the company "Validate" where you are asked a specific yes or no question:
@@ -15,4 +17,133 @@ def validate_worker_prompt() -> str:
result:"validated"
}
"""
def predict_based_past_assessment_prompt(query,company_info, summary_stats):
# Extract company information from the dictionary
company_name = company_info['company_name']
company_size = company_info['company_size']
departments = company_info['departments']
# Create the prompt with the provided company info and summary statistics
prompt = f"""
**Prompt for the Chatbot:**
**Context:**
You are an AI assistant working for {company_name}, and your primary responsibility is to provide **insights**, **predictions**, and **recommendations** based on the company's past assessment data and organizational structure. You are not allowed to respond to any queries outside of this domain.
**General Company Information:**
- **Company Name**: {company_name}
- **Company Size**: {company_size} (e.g., Small, Medium, Large)
- **Departments**:
{', '.join(departments)}
**Assessment Summary**:
The following is a detailed summary of past assessments at {company_name}. Use this information to provide predictions and recommendations based on trends and data points.
- **Open Items and Red Flags**:
- Total Open Items: {summary_stats['Open Items and Red Flags']['Total Open Items']}
- Average Open Items per Assessment: {summary_stats['Open Items and Red Flags']['Average Open Items per Assessment']}
- Total Red Flags: {summary_stats['Open Items and Red Flags']['Total Red Flags']}
- Average Red Flags per Assessment: {summary_stats['Open Items and Red Flags']['Average Red Flags per Assessment']}
- Max Red Flags in a Single Assessment: {summary_stats['Open Items and Red Flags']['Max Red Flags in a Single Assessment']}
- Most Common Area with Red Flags: {summary_stats['Open Items and Red Flags']['Most Common Area with Red Flags']}
- **Assessment Frequency**:
- Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Weekly', 0) * 100}%
- Bi-Weekly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Bi-Weekly', 0) * 100}%
- Quarterly: {summary_stats['Assessment Frequency']['Assessment Type Breakdown'].get('Quarterly', 0) * 100}%
- Average Time Between Assessments: {summary_stats['Assessment Frequency']['Average Time Between Assessments']} days
- Average Assessment Duration: {summary_stats['Assessment Frequency']['Average Assessment Duration']} days
- **Assessment Start and End Dates**:
- Longest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Longest Assessment Duration (days)']} days
- Shortest Assessment Duration: {summary_stats['Assessment Start and End Dates']['Shortest Assessment Duration (days)']} days
- **Assessment Areas**:
- Most Assessed Area: {summary_stats['Assessment Areas']['Most Assessed Area']}
- Most Open Items in Area: {summary_stats['Assessment Areas']['Most Open Items in Area']}
- Area with Most Red Flags: {summary_stats['Assessment Areas']['Area with Most Red Flags']}
- **Assessment Status**:
- Completed: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Completed', 0) * 100}%
- In Progress: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('In Progress', 0) * 100}%
- Incomplete: {summary_stats['Assessment Status']['Assessment Status Distribution'].get('Incomplete', 0) * 100}%
- **Assessment Admin**:
- Most Frequent Admin: {summary_stats['Assessment Admin']['Most Frequent Admin']}
- Admin with Fewest Red Flags: {summary_stats['Assessment Admin']['Admin with Fewest Red Flags']}
- Admin with Most Open Items: {summary_stats['Assessment Admin']['Admin with Most Open Items']}
**Instructions:**
Use the above information to answer user queries. You should:
- Analyze historical data to identify trends and problem areas.
- Predict potential outcomes for future assessments based on past performance (e.g., meeting deadlines, reducing red flags).
- Provide **actionable recommendations** that can help improve performance in future assessments.
**User Query**:
"{query}"
**Your Response**:
Predict and provide recommendations based on the companys historical data, focusing on the areas most relevant to the query. Ensure the response is based on past trends and performance issues.
**Examples of Insightful Responses**:
- "To improve your performance in the next assessment, you should focus on reducing red flags in the Communication department, as it has had the most issues."
- "Based on the company's past performance, there is a 70% chance that you will meet the deadline for the next weekly assessment. To ensure success, focus on completing open items in the IT department."
- "The data indicates that quarterly assessments have the highest rate of incomplete tasks. I recommend prioritizing quarterly assessment tasks to avoid falling behind."
"""
return prompt
def predict_next_n_assessments_prompt():
# Create the prompt with provided company info, summary statistics, and number of assessments (n)
prompt = """
**Prompt for the Chatbot:**
**Context:**
You are an AI assistant responsible for analyzing the past assessment data of , and your primary responsibility is to provide **predictions** for the next {n} assessments.
These assessments can occur on a **weekly**, **bi-weekly**, or **quarterly** basis. Use the company's past performance to predict the following for each of the next {n} assessments:
- **Number of Open Items**.
- **Number of Red Flags**.
- **Predictions for Weekly, Bi-Weekly, and Quarterly assessments**.
input :
- company basic info
- past assessment statitics
- N - number of next assessments to be predicted
**General Company Information:**
**Assessment Summary (Past Data)**:
The Detailed information on past asssessment will be provided. Use this information to make predictions for the next {n} assessments.
**Instructions**:
- Predict the number of open items and red flags for the next n assessments if they are conducted on a weekly, bi-weekly, or quarterly basis.
- Use the historical summary statistics provided above to guide your predictions.
- Return the response in the following JSON format:
**Response Format**:
{
"assessment 1": [
{
"weekly": {"open_items": X, "red_flags": Y}},
"biweekly": {{"open_items": X, "red_flags": Y}},
"quarterly": {{"open_items": X, "red_flags": Y}}
}
],
"assessment 2": [
{
"weekly": {"open_items": X, "red_flags": Y},
"biweekly": {"open_items": X, "red_flags": Y},
"quarterly": {"open_items": X, "red_flags": Y}
}
]
// assuming N is 2
}
```
Ensure each assessment is provided with three predictions: one for Weekly, one for Bi-Weekly, and one for Quarterly assessments.
"""
return prompt
+120 -1
View File
@@ -7,6 +7,7 @@ from src.prompts.sops import *
from src.prompts.chatbot import *
from src.models.sop_response_schemas import *
from src.models.bot_response_schema import *
from scripts.assessment_data import generate_summary_stats_v2
from dotenv import load_dotenv
load_dotenv()
@@ -52,7 +53,7 @@ class Chatbot:
}
],
response_format=ValidateWorker,
max_tokens=4096,
max_tokens=1024,
temperature=0.1
)
@@ -64,3 +65,121 @@ class Chatbot:
except Exception as e:
print(f"An error occurred: {e}")
return None
def predict_based_on_past_assessment(self, query, company_info, companyid) -> Result:
"""
This method generates predictions based on past assessment data of a company. It queries the backend for the
company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
:param query: The question or query asked by the user.
:param company_info: General information about the company (name, size, departments, etc.).
:param companyid: Unique identifier of the company to fetch its specific data.
:return: Result containing the prediction result or None if an error occurs.
"""
try:
# Define the path to the company's assessment data (stored as a CSV)
data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
# Generate summary statistics from the company's assessment data
summary_stats = generate_summary_stats_v2(file_path=data_path)
# Generate the prompt using the company info and the summary statistics
prompt = predict_based_past_assessment_prompt(
query=query,
company_info=company_info,
summary_stats=summary_stats
)
# Interact with GPT-4 model to get a response
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f"{prompt}"
},
{
"role": "user",
"content": f"{query}",
}
],
response_format=Result,
max_tokens=1024,
temperature=0.1
)
# Extract and return the response from the GPT-4 model
extracted_text = json.loads(response.choices[0].message.content)
return extracted_text
except Exception as e:
print(f"An error occurred: {e}")
return None
def predict_next_n_assessment(self, company_info, companyid, N) -> AssessmentPredictionsResponse:
"""
This method generates predictions based on past assessment data of a company. It queries the backend for the
company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
:param query: The question or query asked by the user.
:param company_info: General information about the company (name, size, departments, etc.).
:param companyid: Unique identifier of the company to fetch its specific data.
:param N: Number of assessments to predict.
:return: Result containing the prediction result or None if an error occurs.
"""
try:
# Define the path to the company's assessment data (stored as a CSV)
data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
# Generate summary statistics from the company's assessment data
summary_stats = generate_summary_stats_v2(file_path=data_path)
# Generate the prompt using the company info and the summary statistics
prompt = predict_next_n_assessments_prompt()
# Interact with GPT-4 model to get a response
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f"{prompt}"
},
{
"role": "user",
"content": f"company info: {company_info}--> N-value is {N} ",
},
{
"role": "user",
"content": f"Summary stats: {summary_stats}",
}
],
response_format=AssessmentPredictionsResponse,
max_tokens=1024,
temperature=0.1
)
# Extract the response from the GPT-4 model
extracted_text = json.loads(response.choices[0].message.content)
# Initialize dictionary to store assessments with dynamic names
predictions = {}
# Loop through the predicted assessments and rename them dynamically
for i in range(N):
assessment_key = f"assessment_{i + 1}"
predictions[assessment_key] = extracted_text["predictions"][i]['AssessmentN']
# Return the dynamically named assessments
return predictions
except Exception as e:
print(f"An error occurred: {e}")
return None