added questions generator

2024-09-10 02:51:31 +01:00
parent 286ff0e61e
commit 348c871abc
9 changed files with 271 additions and 39 deletions
@@ -1,12 +1,14 @@
 import os
 from flask import Flask
 from src.api.routes.sops import sops_bp
+from src.api.routes.questions import qs_b

 def create_app():
    app = Flask(__name__)

    # Register the blueprint with the desired prefix
    app.register_blueprint(sops_bp, url_prefix='/api/v1/sop')
+    app.register_blueprint(qs_b,url_prefix='/api/v1/qs')

    # Set up the upload folder configuration inside the src directory
    UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../uploads')
@@ -0,0 +1,67 @@
+
+import os
+from flask import Blueprint, request, jsonify, current_app
+from werkzeug.utils import secure_filename
+from src.services.sop_generator import (SopPersonalAssessment,SopGeneratorExecutive)
+from src.services.sop_document_parser import DocumentParser
+from src.services.questions_generator import QuestionsGenerator
+
+from src.utils.utils import delete_all_files_in_directory
+from src.utils.document_loader import load_document  
+import json
+# Initialize the Blueprint
+qs_b = Blueprint('questions', __name__)
+
+# Initialize SopGenerator
+
+
+ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}
+
+def allowed_file(filename):
+    """Check if the file has an allowed extension."""
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+
+@qs_b.route('/generate_questions_from_sop', methods=['POST'])
+def generate_questions_from_sop():
+    # Check if the request contains data
+    if not request.is_json:
+        return jsonify({"error": "Invalid input", "message": "Input data must be in JSON format."}), 400
+
+    # Parse the incoming JSON data
+    input_data = request.get_json()
+
+    # Validate required fields
+    required_fields = ['sops', 'assessment_type', 'frequency_type', 'duration']
+    for field in required_fields:
+        if field not in input_data:
+            return jsonify({"error": "Missing data", "message": f"'{field}' is required."}), 400
+
+    try:
+        # Extract fields from input_data
+        sops = input_data['sops']
+        assessment_type = input_data['assessment_type']
+        frequency_type = input_data['frequency_type']
+        duration = input_data['duration']
+
+        # Prepare the data for the generator
+        generator_input = {
+            "sops": json.dumps(sops),  # Convert SOPs to JSON string
+            "assessment_type": assessment_type,
+            "frequency_type": frequency_type,
+            "duration": duration
+        }
+
+        # Call the generator to create questions
+        generator = QuestionsGenerator()
+        questions_response = generator.generate_questions(generator_input)
+
+        # Check if the response is valid
+        if not questions_response:
+            return jsonify({"error": "Question generation failed", "message": "Could not generate questions from the provided data."}), 500
+
+        # Return the generated questions
+        return jsonify({"questions": questions_response}), 200
+
+    except Exception as e:
+        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
@@ -0,0 +1,12 @@
+from pydantic import BaseModel
+from typing import List, Dict
+
+class Question(BaseModel):
+    assigned_to: str
+    role: str
+    questions: str
+
+class AssementQuestion(BaseModel):
+    number: int
+    questions: List[Question]
+
@@ -0,0 +1,53 @@
+def get_questions_prompt():
+    # Header part of the prompt, explaining the context
+    prompt = """
+    You are generating a variable number of assessment questions for different assessment types, which can be daily, weekly, biweekly, etc. The assessment frequency and the specific frequency number should be taken into account when generating the questions.
+    For a particular assessment, based on the type (e.g., daily, weekly, biweekly) and total duration (e.g., if it is weekly and the total duration is 6 weeks, generate at least 20 questions weekly for up to six weeks).
+    Each question will be based on the SOPs of specific workers in different departments, and the questions should vary depending on the assessment frequency type and frequency number.
+    The goal of the assessment is to focus on the progress of the tasks outlined in each worker's SOPs.
+    Make sure each question is relevant to the worker's SOP, and attach a tag to each question indicating the topic area (e.g., communication, timeline, development).
+    The questions should become more detailed or challenging as the assessment progresses over time.
+    If either the name or role of the assigned person is available in the SOP, use it to formulate the questions.
+
+    Input:
+    assessment type: (e.g., daily, weekly, biweekly)
+    frequency type: (e.g., daily, weekly, biweekly)
+    frequency number: (e.g., day 3, week 2, biweekly 1)
+    total duration: (e.g., 6 weeks, 12 days)
+    SOPs of the assessment:
+
+    Instructions:
+
+    1. Review the SOPs of the assessment and generate questions for the workers based on the frequency type, frequency number, and topic areas.
+    2. Regardless of the assement type, always use 1,2,3 for the frequency numbering, nothing else
+    Example response:
+
+    questions 
+    {
+        "questions": [
+            {
+                
+                "frequency_number": "2",
+                "questions": [
+                    {
+                        "assigned_to": "person name or role",
+                        "role": "person role or name",
+                        "question": "e.g., Is the internal project team being followed according to the SOP?"
+                    }
+                ]
+            },
+            {
+                
+                "frequency_number": "3",
+                "questions": [
+                    {
+                        "assigned_to": "person name or role",
+                        "role": "person role or name",
+                        "question": "e.g., Have communication protocols been followed for the task at hand?"
+                    }
+                ]
+            }
+        ]
+    }
+    """
+    return prompt
@@ -0,0 +1,66 @@
+import os
+import json
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional
+from src.prompts.sops import *
+from src.models.questions_response import *
+from src.services.sop_document_parser import DocumentParser
+from src.prompts.questions import get_questions_prompt
+from dotenv import load_dotenv
+load_dotenv()
+
+class QuestionsGenerator:
+    def __init__(self):
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = "gpt-4o-mini"
+    
+    def generate_questions(self, input_data):
+        try:
+            sops = input_data['sops']
+            assessment_type = input_data['assessment_type']
+            frequency_type = input_data['frequency_type']
+            total_duration = input_data['duration']
+            
+            # Chunk the SOPs into smaller pieces
+            chunk_size = 1000  # Define your chunk size
+            docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
+            # Create a list of documents
+            docs = [{"type": "text", "text": text} for text in docs_text]
+
+            prompt = get_questions_prompt()  # Get the questions prompt for the SOP
+
+            all_questions = []
+
+            # Iterate through each frequency number (e.g., week 1, week 2, etc.)
+            for frequency_number in range(1, total_duration + 1):
+                frequency_label = f"{frequency_type} {frequency_number}"  # e.g., week 1, daily 3
+
+                # Generate questions for the current frequency number
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": f"The SOPs are provided below."},
+                        {"role": "user", "content": docs},  # Use the chunked documents
+                        {"role": "user", "content": f"Assessment Type: {assessment_type}"},
+                        {"role": "user", "content": f"Frequency Type: {frequency_type}"},
+                        {"role": "user", "content": f"Current Frequency Number to generate : {frequency_label}"},
+                        {"role": "user", "content": f"Duration: {total_duration}"}
+                    ],
+                    response_format=AssementQuestion,  # Use the updated response schema
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+                
+                questions = json.loads(response.choices[0].message.content)
+                all_questions.append({
+                    "frequency_number": frequency_label,
+                    "questions": questions
+                })
+            
+            return all_questions
+        
+        except json.JSONDecodeError:
+            return False
@@ -4,7 +4,7 @@ from openai import OpenAI
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
 from src.prompts.sops import *
-from src.models.response_schemas import *
+from src.models.sop_response_schemas import *
 from dotenv import load_dotenv
 load_dotenv()

@@ -4,7 +4,7 @@ from openai import OpenAI
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
 from src.prompts.sops import *
-from src.models.response_schemas import *
+from src.models.sop_response_schemas import *
 from src.services.sop_document_parser import DocumentParser
 from dotenv import load_dotenv
 load_dotenv()
@@ -266,3 +266,5 @@ class SopGeneratorManager:



+
+         
@@ -1,42 +1,72 @@
-from src.services.sop_generator import  SopGeneratorExecutive
-from src.utils.document_loader import load_document
-from src.services.document_parser import DocumentParser
-from src.services.sop_generator import SopGeneratorExecutive
-file_path = r"C:\Users\User\Desktop\Blessing_AI\MKD\test_erp_ai\erp_ai\test\erp_ai\data\raw\document.doc"
-docs = load_document(file_path)
+import json
+from src.services.questions_generator import QuestionsGenerator

-
-if __name__ == "__main__":
-    SOP = DocumentParser()
-    so = SopGeneratorExecutive()
-    referencs_roles = ["AR Director "]
-    workers_list = [
+# Mocking the SOPs input as described in your example
+mock_sops_input = {
+    "sops": {
+        "departments": [
            {
-      
-        "position": "AR dIRECTOR ",
-        "role": "Developer",
-        "department": "IT"
+                "name": "Accounts Receivable",
+                "workers": [
+                    {
+                        "name": "Angela Lewis",
+                        "position": "AR Director",
+                        "sops": {
+                            "must": [],
+                            "shall": [],
+                            "will": []
+                        }
                    },
                    {
-        "name": "Jane Smith",
-        "position": "Project Manager",
-        "role": "Manager",
-        "department": "IT"
+                        "name": "Jamie Vega",
+                        "position": "AR Supervisor",
+                        "sops": {
+                            "must": [
+                                "Provide escalation information to the AR Director if additional escalations are needed."
+                            ],
+                            "shall": [],
+                            "will": []
+                        }
+                    },
+                    {
+                        "name": "Jessica Merzougui",
+                        "position": "AR Supervisor",
+                        "sops": {
+                            "must": [
+                                "Provide escalation information to the AR Director if additional escalations are needed."
+                            ],
+                            "shall": [],
+                            "will": []
+                        }
+                    },
+                    {
+                        "name": "Deniece Santos",
+                        "position": "AR Analyst",
+                        "sops": {
+                            "must": [],
+                            "shall": [],
+                            "will": []
+                        }
                    }
                ]
-
-    departments_and_roles = SOP.extract_sops_for_workers_by_department(docs,workers_list)
-    # Prepare extracted roles (only managers)
-    '''extracted_managers = []
-    for department in departments_and_roles['departments']:
-        extracted_managers.extend([
-            {
-                'name': manager['name'],
-                'position': manager.get('position', 'Unknown Position'),
-                'role': manager.get('role', 'Unknown Role')  # PRP or SRP classification
            }
-            for manager in department['managers']
-        ])'''
+        ]
+    }
+}

-    print(departments_and_roles)
+# Defining the full input data including additional parameters
+input_data = {
+    "sops": json.dumps(mock_sops_input),  # Convert SOPs to JSON string format
+    "assessment_type": "progress",        # Set the assessment type
+    "frequency_type": "weekly",           # Set the frequency type (daily, weekly, etc.)
+    "duration": 4                         # Set the duration (number of frequency cycles, e.g., 4 weeks)
+}

+# Initialize the QuestionsGenerator
+generator = QuestionsGenerator()
+
+# Invoke the generate_questions method with the mock input
+questions_response = generator.generate_questions(input_data)
+
+# Print the generated questions response
+print(json.dumps(questions_response, indent=4))