added goal achievment preditions

2024-09-17 22:39:07 +00:00
parent 47a274741f
commit 1bfc773782
10 changed files with 325 additions and 20 deletions
@@ -121,3 +121,63 @@ def use_bot_predict_assessments():
    except Exception as e:
        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/suggest_assessment_frequencies', methods=['POST'])
 def use_bot_suggest_frequencies():
    try:
        # Retrieve JSON data from the request
        data = request.get_json()
        # Check if 'sops' is present in the request data
        sops = data.get('sops')
        if not sops:
            return jsonify({"error": "Invalid data", "message": "The 'sops' field is missing or not formatted correctly."}), 400
        # Instantiate the chatbot service
        chatbot = Chatbot()
        # Call the prediction method
        response = chatbot.recommend_assessment_frequencies(sops)
        if not response:
            return jsonify({"error": "Prediction Failed", "message": "The prediction process returned no results."}), 400
        # Return the successful response with predictions
        return jsonify({"response": response}), 200
    except Exception as e:
        # Log the error (optional, but helpful for debugging)
       print(f"Error in /suggest_assessment_frequencies: {e}")
       return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@bot.route('/predict_goal_achievment_proba', methods=['POST'])
 def predict_goal_achievement():
    try:
        # Retrieve JSON data from the request
        data = request.get_json()
        company_info = data.get('company_info')
        companyid = data.get('companyid')
        if not company_info or not companyid:
            return jsonify({"error": "Missing data", "message": "Company info, company ID, or N value not provided."}), 400
        # Instantiate the chatbot service
        chatbot = Chatbot()
        # Call the prediction method
        response = chatbot.predict_goal_achievement_probability(
            company_info=company_info,
            companyid=companyid
        )
        if not response:
            return jsonify({"error": "No predictions available", "message": "Prediction process failed."}), 400
        return jsonify({"predictions": response}), 200
    except Exception as e:
        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@@ -44,6 +44,7 @@ def generate_questions_from_sop():
        frequency_type = input_data['frequency_type']
        duration = input_data['duration']
        roles_data = input_data["roles_data"]
        frequency_number=input_data["frequency_number"]
        # Prepare the data for the generator
        generator_input = {
@@ -51,12 +52,13 @@ def generate_questions_from_sop():
            "assessment_type": assessment_type,
            "frequency_type": frequency_type,
            "duration": duration,
-            "roles_data":roles_data
+            "roles_data":roles_data,
            "frequency_number":frequency_number
        }
        # Call the generator to create questions
        generator = QuestionsGenerator()
-        questions_response = generator.generate_questions(generator_input)
+        questions_response = generator.generate_questions_v2(generator_input)
        # Check if the response is valid
        if not questions_response:
@@ -100,6 +100,16 @@ def generate_sops():
    if file.filename == '':
        return jsonify({"error": "No selected file", "message": "A file was not selected for upload. Please select a valid file."}), 400
    if 'workers_info' not in request.form:
        return jsonify({"error": "No roles provided", "message": "Please provide roles as a JSON array."}), 400
    try:
        roles_data = request.form.get('workers_info')
        # Manually load roles from the string to JSON
        roles_data = json.loads(roles_data)
    except json.JSONDecodeError:
        return jsonify({"error": "Invalid JSON", "message": "The roles must be a valid JSON array."}), 400
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        upload_folder = current_app.config['UPLOAD_FOLDER']
@@ -114,7 +124,7 @@ def generate_sops():
            # Generate SOPs based on the roles provided
            sop_generator = DocumentParser()
-            sops = sop_generator.extract_sops_from_doc(docs)
+            sops = sop_generator.extract_sops_from_doc(docs,roles_data)
            # Cleanup: Delete all files in the upload directory after processing
            delete_all_files_in_directory(upload_folder)
            if not sops:
@@ -301,8 +311,12 @@ def generate_sop_managers_doc():
        try:
            docs = load_document(file_path)
-            sop_generator = SopGeneratorExecutive()
+            # Load department managers from form data as a JSON string
-            result = sop_generator.generate_sops_for_department_managers(docs)
+            department_managers_json = request.form.get('department_managers', '[]')
            department_managers = json.loads(department_managers_json)
            sop_generator = DocumentParser()
            result = sop_generator.extract_sops_for_managers_by_department(docs, department_managers)
            delete_all_files_in_directory(upload_folder)
@@ -319,8 +333,6 @@ def generate_sop_managers_doc():
@sops_bp.route('/executive/generate_sops_from_questionnaire', methods=['POST'])
 def generate_sops_from_questionnaire():
    try:
@@ -488,3 +500,4 @@ def generate_sop_workers_doc():
    except Exception as e:
        delete_all_files_in_directory(upload_folder)
        return jsonify({"error": "Processing error", "message": f"An error occurred while processing the document: {str(e)}"}), 500
@@ -22,3 +22,22 @@ class AssessmentPrediction(BaseModel):
 class AssessmentPredictionsResponse(BaseModel):
    predictions: List[AssessmentPrediction]
 class AssessmentSuggestion(BaseModel):
    assessment_type:str
    frequency:str
    Justification:str
 class GoalAchivementPrediction(BaseModel):
    prediction:str
    reasoning:str
 class AchievementPrediction(BaseModel):
    goal_achievement:GoalAchivementPrediction
    goal_achievement_new_assessment:str
@@ -147,3 +147,47 @@ def predict_next_n_assessments_prompt():
    """
    return prompt
 def recommend_assessment_frequency_prompt():
    return '''
    You are provided with the Standard Operating Procedures (SOPs) for various roles within a company. Your task is to collectively recommend the best assessment type and frequency (either weekly, biweekly, or quarterly) for all employees based on the overall nature of the SOPs.
    Consider the following when making your recommendation:
    - The overall complexity and criticality of the tasks outlined in the SOPs.
    - The frequency of updates or changes needed to the roles or procedures.
    - The urgency of the responsibilities and their potential impact on the company's performance.
    - The balance between the need for regular evaluations and avoiding employee overload.
    - The need for continuous monitoring for key goals such as compliance, safety, or mission-critical objectives.
    Provide a concise and collective assessment recommendation:
    - Assessment Type: [Weekly/Biweekly/Quarterly]
    - Frequency: [X weeks/biweeks/quarters]
    - Justification: Explain the rationale for choosing the recommended assessment type and frequency based on the overall SOP content.
    Example Response:
    "Based on the complexity and critical nature of the tasks described in the SOPs, it is recommended that assessments be conducted biweekly for 6 biweeks. This frequency will allow for regular progress reviews without overwhelming employees, while ensuring that critical objectives such as compliance and security are met."
    '''
 def predict_goal_achievement_probability_prompt():
    return  '''
    You are provided with summary stats of past assessments and general basic information about the company.
    Your goal is to predict two things after analyzing the summary statistics and the company info:
    1. Goal achievement probability: Predict if the company can meet its goals and vision within the original deadline and the reasoning behind it.
    2. Goal achievement probability for new assessment: Predict whether a worker will meet the deadline, and if not, specify the extension needed in weeks or days to complete the assessments.
    Example response:  --> 
    {
    "goal_achievement": {
        "predictions": "Based on the analysis of previous assessments and current data, it is anticipated that the company may not meet its goals by the original deadline.",
        "reasoning": "The data indicates that workers are currently falling behind on completing their assessments. With the current rate of progress, it is expected that an extension of approximately 2 weeks beyond the original due date may be required to complete all assessments."
    },
    "goal_achievement_new_assessment": "Based on previous assessments, it is anticipated that the workers will not be able to complete the assessments on time. They may require an extension of approximately 2 weeks beyond the due date to complete the assessment."
    }
    Be dynamic in your response and you do not need to write as exactly as it is the example response , just keep the format but your response after the analysis should be dynamic and professional
    '''
@@ -1,16 +1,17 @@
 def get_sop_extraction_from_doc():
-    return '''Your task is to extract the "Vision", "Mission", and role-specific Standard Operating Procedures (SOPs) from the provided document.
+    return '''Your task is to generate Standard Operating Procedures (SOPs) for the verified workers based on the provided document and their positions.
    You must extract and categorize the SOPs into three categories: "must", "shall", and "will."
    Instructions:
    1. **Vision**: Extract the vision of the company or organization.
    2. **Mission**: Extract the mission of the company or organization. If not explicitly mentioned, consider the mission as the company's goals.
-    3. **Role-specific SOPs**: 
+    3. **Position-specific SOPs**: 
-        - Categorize the questions under three categories: "must," "shall," and "will."
+        - Categorize the SOPs under three categories: "must," "shall," and "will."
-        - The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.
+        - The SOPs should be directly relevant to the verified workers' positions and responsibilities, ensuring that the position names remain exactly the same as provided.
-        - If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the document. Do not generate or assume SOPs that are not directly supported by the document.
+        - If multiple workers share the same position, generate a single set of SOPs for that position., Do not repeat it for the same position
-        - If no SOPs are found for the role, return an empty list for each category.
+        - If SOPs for the position are not explicitly stated, infer them from the context, but only if there is clear evidence within the document. Do not generate or assume SOPs that are not directly supported by the document.
-    Provide the extracted sections exactly as they appear in the document.'''
+        - If no SOPs are found for the position, return an empty list for each category.
    Use the provided workers' information to ensure the SOPs are tailored to the verified workers.'''
 def get_roles_extraction_from_questionnaire():
@@ -345,3 +346,5 @@ def get_sop_for_department_workers():
        ]
    }
    '''
@@ -183,3 +183,94 @@ class Chatbot:
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
    def recommend_assessment_frequencies(self,sops) -> AssessmentSuggestion:
        """
        Process the SOPs data and return assessment frequencies.
        """
        try:
            chunk_size = 1000  # Define your chunk size
        # Convert the 'sops' dictionary values into a single text string for chunking
            sops_text = '\n'.join([str(item) for sublist in sops.values() for item in sublist])
            # Break the text into chunks of the defined size
            docs_text = [sops_text[i:i + chunk_size] for i in range(0, len(sops_text), chunk_size)]
            # Create a list of documents
            docs = [{"type": "text", "text": text} for text in docs_text]
            # Generate the prompt using the company info and the summary statistics
            prompt = recommend_assessment_frequency_prompt()  # Update your prompt to handle managers and workers
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {"role": "system", "content": prompt},
                    {"role": "user", "content": docs}
                ],
                response_format=AssessmentSuggestion,  # Use the updated response schema
                max_tokens=4096,
                temperature=0.1
            )
            return json.loads(response.choices[0].message.content)
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
    def predict_goal_achievement_probability(self, company_info, companyid) -> AchievementPrediction:
        """
        This method generates predictions based on past assessment data of a company. It queries the backend for the 
        company's assessment data, generates a prompt, and then uses the GPT-4 model to return predictions based on the query.
        :param company_info: General information about the company (name, size, departments, etc.).
        :param companyid: Unique identifier of the company to fetch its specific data.
        :return: Result containing the prediction result or None if an error occurs.
        """
        try:
            # Define the path to the company's assessment data (stored as a CSV)
            data_path = os.path.join('data', 'raw', 'erp_company_assessment', f'{companyid}_raw_data.csv')
            # Generate summary statistics from the company's assessment data
            summary_stats = generate_summary_stats_v2(file_path=data_path)
            # Generate the prompt using the company info and the summary statistics
            prompt = predict_goal_achievement_probability_prompt()
            # Interact with GPT-4 model to get a response
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": prompt
                    },
                    {
                        "role": "user",
                        "content": f"company info: {company_info}",
                    },
                    {
                        "role": "user",
                        "content": f"Summary stats: {summary_stats}",
                    }
                ],
                response_format=AchievementPrediction,
                max_tokens=1024,
                temperature=0.1
            )
            # Extract the response from the GPT-4 model
            predictions = json.loads(response.choices[0].message.content)
            # Return the dynamically named assessments
            return predictions
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
@@ -57,7 +57,7 @@ class QuestionsGenerator:
                )
                questions = json.loads(response.choices[0].message.content)
-                print(f"Generated questions: {questions}")
+                
                all_questions.append({
                    "frequency_number": frequency_label,
                    "questions": questions
@@ -67,3 +67,53 @@ class QuestionsGenerator:
        except json.JSONDecodeError:
            return False
    def generate_questions_v2(self, input_data):
        try:
            sops = input_data['sops']
            assessment_type = input_data['assessment_type']
            frequency_type = input_data['frequency_type']
            frequency_number = input_data['frequency_number']  # Specific week for which to generate questions
            total_duration = input_data['duration']
            roles_data = input_data["roles_data"]
            # Chunk the SOPs into smaller pieces
            chunk_size = 1000  # Define your chunk size
            docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
            # Create a list of documents
            docs = [{"type": "text", "text": text} for text in docs_text]
            prompt = get_questions_prompt()  # Get the questions prompt for the SOP
            # Prepare the frequency label
            frequency_label = f"{frequency_type} {frequency_number}"  # e.g., week 2
            # Generate questions for the current frequency number only
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {"role": "system", "content": prompt},
                    {"role": "user", "content": f"The SOPs are provided below."},
                    {"role": "user", "content": json.dumps(docs)},  # Use the chunked documents
                    {"role": "user", "content": f"Assessment Type: {assessment_type}"},
                    {"role": "user", "content": f"Frequency Type: {frequency_type}"},
                    {"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
                    {"role": "user", "content": f"Duration: {total_duration}"},
                    {"role": "user", "content": f"Roles Data: {roles_data}"}
                ],
                response_format=AssementQuestion,  # Ensure you specify the correct format
                max_tokens=4096,
                temperature=0.1
            )
            # Parse and format the response
            questions = json.loads(response.choices[0].message.content)
            return {
                "frequency_number": frequency_label,
                "questions": questions
            }
        except json.JSONDecodeError:
            return False
@@ -21,7 +21,7 @@ class DocumentParser:
        return [doc.page_content for doc in docs]
    # Existing methods...
-    def extract_sops_from_doc(self, docs) -> VisionMissionResponse:
+    def extract_sops_from_doc(self, docs,roles_data) -> SOPsResponse:
        """
        Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document.
@@ -39,6 +39,10 @@ class DocumentParser:
                            "role": "system",
                            "content": f'''{prompt}'''
                        },
                        {
                             "role":"user",
                             "content":f"Verfied workers data:{roles_data}"
                        },
                        {
                            "role": "user",
                            "content": [{"type": "text", "text": text} for text in docs_text],
@@ -322,7 +326,7 @@ class DocumentParser:
                return False
-    def extract_sops_for_managers_by_department(self, docs,depts_wmanagers):
+    def extract_sops_for_managers_by_department(self, docs,depts_managers):
            """
            Extract sops for managers from the document.
@@ -337,7 +341,7 @@ class DocumentParser:
                    model=self.model,
                    messages=[
                        {"role": "system", "content": prompt},
-                        {"role": "user", "content": f"Mangers information: {depts_wmanagers}"},
+                        {"role": "user", "content": f"Mangers information: {depts_managers}"},
                        {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
                    ],
                    response_format=ManagerWithSOPs,  # Use the updated response schema
@@ -40,7 +40,7 @@ response = bot.predict_based_on_past_assessment(
    companyid="testid"
 )
-print(f"Result: {response}")'''
+print(f"Result: {response}")
 from src.services.sop_document_parser import DocumentParser
@@ -59,4 +59,23 @@ res = parser.extract_sops_for_workers_by_department(
    depts_workers=workers_department
 )
-print(res)
+print(res)'''
 from src.services.chatbot import Chatbot
 bot = Chatbot(
 )
 company_info =  {
        "company_name": "Example Co",
        "company_size": "Medium",
        "departments": ["HR", "Finance", "IT"]
    }
 response = bot.predict_goal_achievement_probability(
    company_info=company_info,
    companyid="testid"
 )
 print(response)