updated questions generator

This commit is contained in:
2024-10-15 01:56:05 +00:00
parent cd10cdaf7d
commit 8ce331b023
10 changed files with 427 additions and 105 deletions
+33 -4
View File
@@ -136,14 +136,15 @@ def use_bot_suggest_frequencies():
# Check if 'sops' is present in the request data
sops = data.get('sops')
if not sops:
return jsonify({"error": "Invalid data", "message": "The 'sops' field is missing or not formatted correctly."}), 400
options = data.get('options')
if not sops and not options :
return jsonify({"error": "Invalid data", "message": "The 'sops' or 'options' field is missing or not formatted correctly."}), 400
# Instantiate the chatbot service
chatbot = Chatbot()
# Call the prediction method
response = chatbot.recommend_assessment_frequencies(sops)
response = chatbot.recommend_assessment_frequencies(sops,options)
if not response:
return jsonify({"error": "Prediction Failed", "message": "The prediction process returned no results."}), 400
@@ -186,4 +187,32 @@ def predict_goal_achievement():
return jsonify({"predictions": response}), 200
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/suggest_more_areas', methods=['POST'])
@auth_check
def suggest_more_areas():
try:
# Retrieve JSON data from the request
data = request.get_json()
position = data.get('position')
existing_areas = data.get('existing_areas')
if not position or not existing_areas:
return jsonify({"error": "Missing data", "message": "Position or existing areas not provided."}), 400
# Instantiate the chatbot service
chatbot = Chatbot()
# Call the prediction method
response = chatbot.suggest_more_areas(
position=position,
existing_areas=existing_areas
)
if not response:
return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
return jsonify({"areas": response}), 200
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+16 -26
View File
@@ -4,7 +4,7 @@ from flask import Blueprint, request, jsonify, current_app
from werkzeug.utils import secure_filename
from src.services.sop_generator import (SopPersonalAssessment,SopGeneratorExecutive)
from src.services.sop_document_parser import DocumentParser
from src.services.questions_generator import QuestionsGenerator
from src.services.questions_generator import QuestionsGenerator,QuestionsGeneratorV2
from src.utils.utils import delete_all_files_in_directory
from src.utils.document_loader import load_document
@@ -24,51 +24,41 @@ def allowed_file(filename):
@qs_b.route('/generate_questions_from_sop', methods=['POST'])
@auth_check
def generate_questions_from_sop():
# Check if the request contains data
def generate_questions_from_sop_v2():
if not request.is_json:
return jsonify({"error": "Invalid input", "message": "Input data must be in JSON format."}), 400
# Parse the incoming JSON data
input_data = request.get_json()
# Validate required fields
required_fields = ['sops', 'assessment_type', 'frequency_type', 'duration',"roles_data"]
required_fields = ['sops', 'assessment_type', 'duration']
for field in required_fields:
if field not in input_data:
return jsonify({"error": "Missing data", "message": f"'{field}' is required."}), 400
try:
# Extract fields from input_data
sops = input_data['sops']
assessment_type = input_data['assessment_type']
frequency_type = input_data['frequency_type']
duration = input_data['duration']
roles_data = input_data["roles_data"]
frequency_number=input_data["frequency_number"]
# Prepare the data for the generator
generator_input = {
"sops": json.dumps(sops), # Convert SOPs to JSON string
"assessment_type": assessment_type,
"frequency_type": frequency_type,
"duration": duration,
"roles_data":roles_data,
"frequency_number":frequency_number
"sops": input_data['sops'],
"assessment_type": input_data['assessment_type'],
"duration": input_data['duration']
}
# Call the generator to create questions
generator = QuestionsGenerator()
questions_response = generator.generate_questions_v2(generator_input)
# Generate questions using the QuestionGenerator
generator = QuestionsGeneratorV2()
questions_response = generator.generate_questions(generator_input)
# Check if the response is valid
if not questions_response:
return jsonify({"error": "Question generation failed", "message": "Could not generate questions from the provided data."}), 500
# Return the generated questions
return jsonify({"questions": questions_response}), 200
# Convert the Pydantic model to a dictionary and return as JSON
return jsonify({"questions": questions_response.dict()}), 200
except ValidationError as ve:
return jsonify({"error": "Validation Error", "message": str(ve)}), 400
except Exception as e:
return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
+3 -3
View File
@@ -71,12 +71,12 @@ def get_roles_questionnaire():
questionnaire_data = request.json
# Validate the required fields in the questionnaire data
if not questionnaire_data.get('vision') or not questionnaire_data.get('roles') or not questionnaire_data.get('responsibilities'):
if not questionnaire_data.get('questionnaire_response'):
return jsonify({
"error": "Missing required fields",
"message": "Please provide 'vision', 'roles', and 'responsibilities' in the request body."
"message": "Please provide questionnaire_data in the request body"
}), 400
generator = SopPersonalAssessment()
roles = generator.generate_roles_from_questionnaire(questionnaire_data)
+4 -1
View File
@@ -26,7 +26,7 @@ class AssessmentPredictionsResponse(BaseModel):
class AssessmentSuggestion(BaseModel):
assessment_type:str
frequency:str
duration:int
Justification:str
@@ -41,3 +41,6 @@ class AchievementPrediction(BaseModel):
goal_achievement:GoalAchivementPrediction
goal_achievement_new_assessment:str
class Areas(BaseModel):
areas:List[str]
+22 -2
View File
@@ -1,4 +1,4 @@
from pydantic import BaseModel
'''from pydantic import BaseModel
from typing import List, Dict
class Question(BaseModel):
@@ -9,5 +9,25 @@ class Question(BaseModel):
class AssementQuestion(BaseModel):
number: int
questions: List[Question]
questions: List[Question]'''
from pydantic import BaseModel
from typing import List, Dict
class Question(BaseModel):
area_tag: int
assigned_to: int
questions: str
role: int
class FrequencyQuestions(BaseModel):
frequency_number: int
items: List[Question]
class Questions(BaseModel):
questions: List[FrequencyQuestions]
class AssessmentQuestions(BaseModel):
questions: Questions
+44 -12
View File
@@ -148,27 +148,30 @@ def predict_next_n_assessments_prompt():
return prompt
def recommend_assessment_frequency_prompt():
return '''
You are provided with the Standard Operating Procedures (SOPs) for various roles within a company. Your task is to collectively recommend the best assessment type and frequency (either weekly, biweekly, or quarterly) for all employees based on the overall nature of the SOPs.
You are provided with the Standard Operating Procedures (SOPs) for various roles within a company, along with options (e.g., ['weekly', 'monthly', 'biweekly']) for how frequently assessments should be performed. Your task is to recommend the best assessment type and frequency (weekly, biweekly, or quarterly) for all employees, based on the overall nature of the SOPs.
Consider the following when making your recommendation:
- The overall complexity and criticality of the tasks outlined in the SOPs.
- The frequency of updates or changes needed to the roles or procedures.
When making your recommendation, consider the following:
- The complexity and criticality of the tasks outlined in the SOPs.
- The frequency of updates or changes to the roles or procedures.
- The urgency of the responsibilities and their potential impact on the company's performance.
- The balance between the need for regular evaluations and avoiding employee overload.
- The need for continuous monitoring for key goals such as compliance, safety, or mission-critical objectives.
Provide a concise and collective assessment recommendation:
- Assessment Type: [Weekly/Biweekly/Quarterly]
- Frequency: [X weeks/biweeks/quarters]
- Justification: Explain the rationale for choosing the recommended assessment type and frequency based on the overall SOP content.
- The need for continuous monitoring of key goals, such as compliance, safety, or mission-critical objectives.
Provide a concise and collective assessment recommendation including:
- Assessment Type: Select from the provided options (e.g., if the options are ['weekly', 'monthly'], choose the most appropriate based on your analysis of the SOP).
- Duration: An integer indicating how long the assessment cycle should last (e.g., if you recommend weekly assessments for two weeks, duration = 2).
- Justification: Explain the rationale for choosing the recommended assessment type and frequency, based on the overall content of the SOPs.
Note: Predict the duration properly based the assessment type pick and the sop analysis
Example Response:
"Based on the complexity and critical nature of the tasks described in the SOPs, it is recommended that assessments be conducted biweekly for 6 biweeks. This frequency will allow for regular progress reviews without overwhelming employees, while ensuring that critical objectives such as compliance and security are met."
"Based on the complexity and critical nature of the tasks described in the SOPs, it is recommended that assessments be conducted biweekly for 6 weeks. This frequency will allow for regular progress reviews without overwhelming employees, while ensuring that critical objectives such as compliance and security are met."
'''
def predict_goal_achievement_probability_prompt():
return '''
@@ -191,3 +194,32 @@ def predict_goal_achievement_probability_prompt():
Be dynamic in your response and you do not need to write as exactly as it is the example response , just keep the format but your response after the analysis should be dynamic and professional
'''
def suggest_more_areas_prompt():
return '''
You are provided a company position and a set of areas where the employee will be performing their tasks.
Your role is to analyze the position and suggest additional areas in the range 5 - 10 that the employee should be working on to improve their performance.
Example:
Position: Data Scientist
Existing areas: Development, Testing, Communication
Suggested areas: [
"Data Exploration",
"Machine Learning",
"Data Visualization",
"Reporting"
]
Your task is to fill in the suggested areas list with relevant areas based on the position and existing areas.
Please provide the suggested areas in the following format:
Suggested_areas: [
"Area 1",
"Area 2",
...,
"Area N"
]
'''
+73 -26
View File
@@ -65,39 +65,86 @@ def get_questions_prompt_v2():
prompt = """
You are tasked with generating assessment questions for workers based on their SOPs. These questions should vary by assessment type (daily, weekly, biweekly), frequency number (e.g., day 2, week 3), and total duration (e.g., 6 weeks).
You are generating a variable number of assessment questions for different assessment types, which can be daily, weekly, biweekly, etc. The assessment frequency and the specific frequency number should be taken into account when generating the questions.
For a particular assessment, based on the type (e.g., daily, weekly, biweekly) and total duration (e.g., if it is weekly and the total duration is 6 weeks, generate at least 20 questions weekly for up to six weeks).
Each question will be based on the SOPs of specific workers in different departments, and the questions should vary depending on the assessment frequency type and frequency number.
The goal of the assessment is to focus on the progress of the tasks outlined in each worker's SOPs.
Make sure each question is relevant to the worker's SOP, and attach an area tag ID, aissgned_to ID, role ID, beacuse each role sop will the provided with area tags and member for that role, so questions should be generated based on that area
The questions should become more detailed or challenging as the assessment progresses over time and the question is
Guidelines:
1. Generate yes/no questions relevant to each worker's SOP based on their role.
2. Use frequency numbers as 1, 2, 3, etc., regardless of the type.
3. Tag each question with its topic area (e.g., communication, timeline).
3. Tag each question with the provided topic area ID (e.g., 1, 2, 3, 4, 5).
4. Tag each question with the provided role ID (e.g., 1, 2, 3, 4, 5).
5. Tag each question with the provided assigned_to ID (the ID of the person in charge) (e.g., 1, 2, 3, 4, 5).
4. The questions should evolve in detail as assessments progress over time.
6. For example if ans assement_type is weekly and the duration is 4, this means we generate questions for 4 weeks , frequency_number 1 to top 4 using the sop and all data
7 For each frequency_number, generate at least 15 - 20 questions.
Example response:
{
{
"questions": {
"questions": [
{
"frequency_number": "2",
"questions": [
{
"assigned_to": "John",
"role": "Data Analyst",
"question": "Is the dataset being cleaned according to the SOP?",
"area_tag": "data quality"
}
]
},
{
"frequency_number": "3",
"questions": [
{
"assigned_to": "Jane",
"role": "Data Scientist",
"question": "Are predictive models tested for accuracy before deployment?",
"area_tag": "model validation"
}
]
}
]
}
{
frequency_number: 1,
items: [
{
"area_tag": 5,
"assigned_to": 8,
"questions": "Has the content calendar been developed and shared with the team?",
"role": 4
},
{
}
]
},
{
     }
        ]
"""
return prompt
def get_questions_prompt_v3():
prompt = """
You are tasked with generating assessment questions for workers based on their SOPs. These questions should vary by assessment type (daily, weekly, biweekly), frequency number (e.g., day 2, week 3), and total duration (e.g., 6 weeks).
Guidelines:
1. Generate yes/no questions relevant to each worker's SOP based on their role.
2. Use frequency numbers as 1, 2, 3, etc., regardless of the type.
3. Tag each question with the provided topic area ID (e.g., 1, 2, 3, 4, 5).
4. Tag each question with the provided role ID (e.g., 1, 2, 3, 4, 5).
5. Tag each question with the provided assigned_to ID (the ID of the person in charge) (e.g., 1, 2, 3, 4, 5).
6. The questions should evolve in detail as assessments progress over time.
7. For each frequency_number, generate at least 15 - 20 questions.
Provide the response in the following JSON format:
{
"questions": {
"questions": [
{
"frequency_number": 1,
"items": [
{
"area_tag": 5,
"assigned_to": 8,
"questions": "Has the content calendar been developed and shared with the team?",
"role": 4
},
...
]
},
...
]
}
}
"""
return prompt
+49 -3
View File
@@ -66,7 +66,49 @@ class Chatbot:
print(f"An error occurred: {e}")
return None
def suggest_more_areas(self, position, existing_areas) -> VisionMissionResponse:
"""
This method is responsible for suggesting more areas based on the worker's position and existing areas.
The system generates a prompt, and then uses the GPT-4 model to return more areas based on the query.
:param position: The worker's position.
:param existing areas: The existing areas.
:return: VisionMissionResponse containing the suggested areas or None if an error occurs.
"""
try:
prompt = suggest_more_areas_prompt()
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f'''{prompt} '''
},
{
"role": "user",
"content": f"position :{position}",
},
{
"role": "user",
"content": f"existing areas :{existing_areas}",
}
],
response_format=Areas,
max_tokens=1024,
temperature=0.1
)
# Parse the response from the LLM
extracted_text = json.loads(response.choices[0].message.content)
return extracted_text
except Exception as e:
print(f"An error occurred: {e}")
return None
def predict_based_on_past_assessment(self, query, company_info, companyid) -> Result:
@@ -185,7 +227,7 @@ class Chatbot:
return None
def recommend_assessment_frequencies(self,sops) -> AssessmentSuggestion:
def recommend_assessment_frequencies(self,sops,options) -> AssessmentSuggestion:
"""
Process the SOPs data and return assessment frequencies.
@@ -211,7 +253,8 @@ class Chatbot:
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": docs}
{"role": "user", "content": f'provided sops {docs}'},
{"role": "user", "content": f'options: {options}'}
],
response_format=AssessmentSuggestion, # Use the updated response schema
max_tokens=4096,
@@ -274,3 +317,6 @@ class Chatbot:
except Exception as e:
print(f"An error occurred: {e}")
return None
+106 -26
View File
@@ -6,67 +6,63 @@ from typing import List, Dict, Optional
from src.prompts.sops import *
from src.models.questions_response import *
from src.services.sop_document_parser import DocumentParser
from src.prompts.questions import get_questions_prompt
from src.prompts.questions import *
from dotenv import load_dotenv
load_dotenv()
class QuestionsGenerator:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini"
def generate_questions(self, input_data):
def generate_questions(self, input_data: Dict) -> AssessmentQuestions:
try:
sops = input_data['sops']
assessment_type = input_data['assessment_type']
frequency_type = input_data['frequency_type']
total_duration = input_data['duration']
roles_data = input_data["roles_data"]
# Chunk the SOPs into smaller pieces
chunk_size = 1000 # Define your chunk size
chunk_size = 1000
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
# Create a list of documents
docs = [{"type": "text", "text": text} for text in docs_text]
prompt = get_questions_prompt() # Get the questions prompt for the SOP
prompt = get_questions_prompt_v3()
all_questions = []
# Iterate through each frequency number (e.g., week 1, week 2, etc.)
for frequency_number in range(1, total_duration + 1):
frequency_label = f"{frequency_type} {frequency_number}" # e.g., week 1, daily 3
frequency_label = f"{assessment_type} number : {frequency_number}"
# Generate questions for the current frequency number
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": f"The SOPs are provided below."},
{"role": "user", "content": docs}, # Use the chunked documents
{"role": "user", "content": json.dumps(docs)},
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
{"role": "user", "content": f"Frequency Type: {frequency_type}"},
{"role": "user", "content": f"Current Frequency Number to generate : {frequency_label}"},
{"role": "user", "content": f"Duration: {total_duration}"},
{"role": "user", "content": f"roles data : {roles_data}"}
{"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
{"role": "user", "content": f"Duration: {total_duration}"}
],
response_format=AssementQuestion, # Use the updated response schema
max_tokens=4096,
temperature=0.1
temperature=0.1,
response_format=FrequencyQuestions, # Ensure you specify the correct format
max_tokens=10000
)
questions = json.loads(response.choices[0].message.content)
questions_json = json.loads(response.choices[0].message.content)
all_questions.append({
"frequency_number": frequency_label,
"questions": questions
})
all_questions.append(questions_json)
return all_questions
return AssessmentQuestions(questions=Questions(questions=all_questions))
except json.JSONDecodeError:
return False
except Exception as e:
print(f"An error occurred: {e}")
return None
def generate_questions_v2(self, input_data):
@@ -116,4 +112,88 @@ class QuestionsGenerator:
}
except json.JSONDecodeError:
return False
import os
import json
import threading
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
from src.prompts.sops import *
from src.models.questions_response import *
from src.services.sop_document_parser import DocumentParser
from src.prompts.questions import *
from dotenv import load_dotenv
load_dotenv()
class QuestionsGeneratorV2:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini"
def _generate_questions_for_frequency(self, frequency_number: int, assessment_type: str, docs: List[Dict], total_duration: int, all_questions: List[Dict]):
try:
frequency_label = f"{assessment_type} number : {frequency_number}"
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": get_questions_prompt_v3()},
{"role": "user", "content": f"The SOPs are provided below."},
{"role": "user", "content": json.dumps(docs)},
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
{"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
{"role": "user", "content": f"Duration: {total_duration}"}
],
temperature=0.1,
response_format=FrequencyQuestions, # Ensure you specify the correct format
max_tokens=10000
)
questions_json = json.loads(response.choices[0].message.content)
# Add the questions to the shared list (using lock to avoid race conditions)
all_questions.append(questions_json)
except Exception as e:
print(f"An error occurred while generating questions for frequency {frequency_number}: {e}")
def generate_questions(self, input_data: Dict) -> AssessmentQuestions:
try:
sops = input_data['sops']
assessment_type = input_data['assessment_type']
total_duration = input_data['duration']
# Chunk the SOPs into smaller pieces
chunk_size = 1000
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
docs = [{"type": "text", "text": text} for text in docs_text]
all_questions = []
# Create a list to store threads
threads = []
# Create a lock to ensure thread-safe modifications of all_questions
lock = threading.Lock()
# Launch threads for each frequency
for frequency_number in range(1, total_duration + 1):
thread = threading.Thread(target=self._generate_questions_for_frequency, args=(frequency_number, assessment_type, docs, total_duration, all_questions))
threads.append(thread)
thread.start()
# Wait for all threads to complete
for thread in threads:
thread.join()
return AssessmentQuestions(questions=Questions(questions=all_questions))
except Exception as e:
print(f"An error occurred: {e}")
return None
+77 -2
View File
@@ -1,3 +1,78 @@
import os
API_KEY = "erp_" + os.urandom(16).hex()
print(API_KEY)
import json
import asyncio
from openai import AsyncOpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
from src.prompts.sops import *
from src.models.questions_response import *
from src.services.sop_document_parser import DocumentParser
from src.prompts.questions import *
from dotenv import load_dotenv
load_dotenv()
class QuestionsGenerator:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.client = AsyncOpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini"
async def generate_single_frequency_questions(self, docs, assessment_type, frequency_number, total_duration):
prompt = get_questions_prompt_v3()
frequency_label = f"{assessment_type} number : {frequency_number}"
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": f"The SOPs are provided below."},
{"role": "user", "content": json.dumps(docs)},
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
{"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
{"role": "user", "content": f"Duration: {total_duration}"}
],
temperature=0.1,
response_format={ "type": "json_object" },
max_tokens=10000
)
questions_json = json.loads(response.choices[0].message.content)
return questions_json
async def generate_questions(self, input_data: Dict) -> AssessmentQuestions:
try:
sops = input_data['sops']
assessment_type = input_data['assessment_type']
total_duration = input_data['duration']
chunk_size = 1000
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
docs = [{"type": "text", "text": text} for text in docs_text]
tasks = []
for frequency_number in range(1, total_duration + 1):
task = self.generate_single_frequency_questions(docs, assessment_type, frequency_number, total_duration)
tasks.append(task)
all_questions = await asyncio.gather(*tasks)
return AssessmentQuestions(questions=Questions(questions=all_questions))
except Exception as e:
print(f"An error occurred: {e}")
return None
# Usage
async def main():
generator = QuestionsGenerator()
input_data = {
"sops": "Your SOP text here",
"assessment_type": "weekly",
"duration": 4
}
result = await generator.generate_questions(input_data)
print(result)
if __name__ == "__main__":
asyncio.run(main())