From 67d30fbc6af781ba41ab656352ae6700a1db8bc0 Mon Sep 17 00:00:00 2001
From: kowshik <kowshik@mkd.com>
Date: Thu, 5 Sep 2024 02:59:01 +0000
Subject: [PATCH] added sops generation for personal assessment

---
 data/raw/document.pdf          | Bin 70797 -> 70797 bytes
 src/api/routes/sops.py         | 143 +++++++++++++++++++------
 src/models/response_schemas.py |  28 +++++
 src/prompts/sops.py            |  46 ++++++++
 src/services/sop_generator.py  | 186 ++++++++++++++++++++++++++++-----
 test.py                        |  36 +++++--
 6 files changed, 368 insertions(+), 71 deletions(-)
 create mode 100644 src/models/response_schemas.py
 create mode 100644 src/prompts/sops.py

diff --git a/data/raw/document.pdf b/data/raw/document.pdf
index 7527313126d76dbe615e9a3f5814830893686e59..3b0233957549ee42dac1573c95476bb6f2ece22f 100644
GIT binary patch
delta 99
zcmeBO$<n)$Wkawqr=@|Zfsv7=iBWTm@bnmAMkOa>XA2isb8|CiBSUjH11ED!a}yU=
ZQxgM23u8wMX9Ht98$v3k9}s3d3jpe77{mYo

delta 99
zcmeBO$<n)$Wkawqr-iYhfq|iciA8ga@bnmAMkObph?ALxtEH)llZ&O3xs$7tqmhM)
Yp|h!(qot{tv74O@Ar;dP2s54q0PqMI3;+NC

diff --git a/src/api/routes/sops.py b/src/api/routes/sops.py
index fb96ba1..e9939bb 100644
--- a/src/api/routes/sops.py
+++ b/src/api/routes/sops.py
@@ -2,7 +2,9 @@ import os
 from flask import Blueprint, request, jsonify, current_app
 from werkzeug.utils import secure_filename
 
-from src.services.sop_generator import SopGenerator  
+from src.services.sop_generator import (SopGenerator,SopGeneratorDocument,
+                                        SopPersonalAssessment)
+
 from src.utils.utils import delete_all_files_in_directory
 from src.utils.document_loader import load_document  
 import json
@@ -59,8 +61,8 @@ def get_roles():
 
 
 
-@sops_bp.route('/generate_sops_from_doc', methods=['POST'])
-def generate_sops():
+@sops_bp.route('/generate_questions_from_doc', methods=['POST'])
+def generate_questions_from_sop():
     # Check if the POST request has the file part
     if 'document' not in request.files:
         return jsonify({"error": "No file part", "message": "Please upload a file with the key 'document'."}), 400
@@ -101,7 +103,8 @@ def generate_sops():
                 return jsonify({"error": "Document cannot extract SOPs", "message": status_check["message"]}), 400
             
             # Generate SOPs based on the roles provided
-            sops = sop_generator.generate_sops_from_doc(roles, docs)
+            sop_generator = SopGeneratorDocument()
+            sops = sop_generator.generate_sops_from_doc(docs)
             
             # Cleanup: Delete all files in the upload directory after processing
             delete_all_files_in_directory(upload_folder)
@@ -117,44 +120,120 @@ def generate_sops():
 
 
 
-@sops_bp.route('/generate_sops_from_info', methods=['POST'])
-def generate_sops_from_info():
+@sops_bp.route('/generate_sops_from_doc', methods=['POST'])
+def generate_sops():
+    # Check if the POST request has the file part
+    if 'document' not in request.files:
+        return jsonify({"error": "No file part", "message": "Please upload a file with the key 'document'."}), 400
+
+    print("Running................")
+
+    file = request.files['document']
+   
+    # If the user does not select a file, the browser may also submit an empty part without a filename
+    if file.filename == '':
+        return jsonify({"error": "No selected file", "message": "A file was not selected for upload. Please select a valid file."}), 400
+
+    if file and allowed_file(file.filename):
+        filename = secure_filename(file.filename)
+        upload_folder = current_app.config['UPLOAD_FOLDER']
+        file_path = os.path.join(upload_folder, filename)
+        
+        # Save the file to the upload folder
+        file.save(file_path)
+
+        try:
+            # Use the utility function to generate docs from the file
+            docs = load_document(file_path)
+            
+            # Generate SOPs based on the roles provided
+            sop_generator = SopGeneratorDocument()
+            sops = sop_generator.extract_sops_from_doc(docs)
+            # Cleanup: Delete all files in the upload directory after processing
+            delete_all_files_in_directory(upload_folder)
+            if not sops:
+                return jsonify({"error":"Error in generating sops"})
+            
+            return jsonify({"sops": sops, "message": "SOPs successfully generated for the roles from the document."}), 200
+
+        except Exception as e:
+            # Cleanup: Delete all files in the upload directory if an error occurs
+            delete_all_files_in_directory(upload_folder)
+            return jsonify({"error": "Processing error", "message": f"An error occurred while processing the document: {str(e)}"}), 500
+
+    return jsonify({"error": "File type not allowed", "message": "The uploaded file type is not allowed. Please upload a PDF, DOC, or DOCX file."}), 400
+
+
+
+
+@sops_bp.route('/personal_assessment/generate_sops_from_questionnaire', methods=['POST'])
+def generate_sops_from_questionnaire():
     """
-    Generate SOPs based on role information provided in the request body.
+    Generate SOPs based on the questionnaire data provided in the request body.
+    The request body is expected to contain plain-text information for vision, roles, responsibilities, and project details.
     """
     try:
-        # Get role information from the request body
-        roles_info = request.json.get('roles_info')
-        if not roles_info:
-            return jsonify({"error": "No role information provided", "message": "Please provide role information in the 'roles_info' field."}), 400
-        
-        # Generate SOPs based on the provided role information
-        sops_response = sop_generator.generate_sops_from_info(roles=roles_info)
-        
-        return jsonify({"sops": sops_response, "message": "SOPs successfully generated based on the provided role information."}), 200
+        # Get the questionnaire data from the request body
+        questionnaire_data = request.json
+
+        # Validate the required fields in the questionnaire data
+        if not questionnaire_data.get('vision') or not questionnaire_data.get('roles') or not questionnaire_data.get('responsibilities'):
+            return jsonify({
+                "error": "Missing required fields",
+                "message": "Please provide 'vision', 'roles', and 'responsibilities' in the request body."
+            }), 400
+
+        # Step 1: Call the function from the sop_generator
+        sop_generator = SopPersonalAssessment()
+        sops_response = sop_generator.extract_sops_from_questionnaire(questionnaire_data)
+
+        # Step 2: Return the SOPs if the extraction is successful
+        if not sops_response:
+            return jsonify({
+                "error": "SOP generation failed",
+                "message": "Failed to generate SOPs based on the provided questionnaire data."
+            }), 500
+
+        return jsonify({
+            "sops": sops_response,
+            "message": "SOPs successfully generated based on the provided questionnaire data."
+        }), 200
 
     except Exception as e:
-        return jsonify({"error": "Processing error", "message": f"An error occurred while generating SOPs: {str(e)}"}), 500
+        return jsonify({
+            "error": "Processing error",
+            "message": f"An error occurred while generating SOPs: {str(e)}"
+        }), 500
 
 
-@sops_bp.route('/generate_sops_by_role_and_area', methods=['POST'])
-def generate_sops_by_role_and_area():
+
+@sops_bp.route('/personal_assessment/generate_sops_by_roles_and_areas', methods=['POST'])
+def generate_sops_by_roles_and_areas():
+    """
+    Generate SOPs based on the roles, SOP types (will, shall, must), and areas provided in the request body.
+    """
     try:
-        # Get role and area from the request body
-        role = request.json.get('role')
-        area = request.json.get('area')
-        
-        if not role or not area:
-            return jsonify({"error": "Missing parameters", "message": "Both 'role' and 'area' fields are required."}), 400
-        
-        # Generate SOPs based on the provided role and area
-        sops_response = sop_generator.generate_sops_by_role_and_area(role=role, area=area)
-        
-        return jsonify({"sops": sops_response, "message": f"SOPs successfully generated for role '{role}' in area '{area}'."}), 200
+        # Get the roles data from the request body
+        roles = request.json.get('roles', None)
+        sop_generator = SopPersonalAssessment()
+
+        # Validate the presence of roles data
+        if not roles or not isinstance(roles, list):
+            return jsonify({"error": "Invalid input", "message": "The 'roles' field should be a non-empty list."}), 400
+
+        # Generate SOPs for all roles at once
+        sops_response = sop_generator.generate_sops_by_role_and_area(roles=roles)
+
+        return jsonify({
+            "sops": sops_response,
+            "message": "SOPs successfully generated for all provided roles."
+        }), 200
 
     except Exception as e:
-        return jsonify({"error": "Processing error", "message": f"An error occurred while generating SOPs: {str(e)}"}), 500
-
+        return jsonify({
+            "error": "Processing error",
+            "message": f"An error occurred while generating SOPs: {str(e)}"
+        }), 500
 
 
 @sops_bp.route('/executive/generate_sops_from_questionnaire', methods=['POST'])
diff --git a/src/models/response_schemas.py b/src/models/response_schemas.py
new file mode 100644
index 0000000..92c6a1e
--- /dev/null
+++ b/src/models/response_schemas.py
@@ -0,0 +1,28 @@
+from pydantic import BaseModel, Field
+from typing import List, Optional
+
+class RoleSops(BaseModel):
+    role:str
+    must: Optional[List[str]] = Field(default_factory=list)
+    shall: Optional[List[str]] = Field(default_factory=list)
+    will: Optional[List[str]] = Field(default_factory=list)
+
+#class RoleSOPs(BaseModel):
+#    sops: SOPs
+
+class SOPsFound(BaseModel):
+    message: str
+    status: bool
+
+class RolesResponse(BaseModel):
+    roles: List[str]
+
+class SOPsResponse(BaseModel):
+    roles_sops: List[RoleSops]
+    mission: List[str]
+    vission:List[str]
+
+class VisionMissionResponse(BaseModel):
+    vision: Optional[str]
+    mission: Optional[str]
+    message: str
\ No newline at end of file
diff --git a/src/prompts/sops.py b/src/prompts/sops.py
new file mode 100644
index 0000000..ceb3bd7
--- /dev/null
+++ b/src/prompts/sops.py
@@ -0,0 +1,46 @@
+def get_sop_extraction_from_doc():
+    return '''Your task is to extract the "Vision", "Mission", and role-specific Standard Operating Procedures (SOPs) from the provided document.
+    You must extract and categorize the SOPs into three categories: "must", "shall", and "will."
+    
+    Instructions:
+    1. **Vision**: Extract the vision of the company or organization.
+    2. **Mission**: Extract the mission of the company or organization. If not explicitly mentioned, consider the mission as the company's goals.
+    3. **Role-specific SOPs**: 
+        - Categorize the questions under three categories: "must," "shall," and "will."
+        - The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.
+        - If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the document. Do not generate or assume SOPs that are not directly supported by the document.
+        - If no SOPs are found for the role, return an empty list for each category.
+    Provide the extracted sections exactly as they appear in the document.'''
+
+
+
+def get_sop_personalassessment_from_questionnaire():
+    return '''Your task is to generate Standard Operating Procedures (SOPs) based on the responses to the questionnaire provided.
+
+    You must extract and categorize the SOPs into three categories: "must," "shall," and "will."
+    
+    Instructions:
+    1. **Vision or Strategic Direction**: Use the provided project vision or strategic direction to align SOPs with the overall goals of the project.
+    2. **Roles**: Generate SOPs for the specific roles required for the project.
+    3. **Responsibilities**: For each role, use the outlined key responsibilities to generate SOPs.
+    4. **Project Details**: Incorporate any additional project information to ensure the SOPs reflect the full scope of the project.
+    
+    Role-specific SOPs:
+    - Categorize the SOPs under three categories: "must," "shall," and "will."
+    - The SOPs should be directly aligned with the role and responsibilities provided in the questionnaire.
+    - If SOPs for the role are not explicitly mentioned, infer them from the context of the responsibilities, vision, and project details, but only if there is clear evidence. Do not generate or assume SOPs that are not directly supported by the information provided.
+    - If no SOPs are found for the role, return an empty list for each category.
+
+    Provide the generated SOPs based on the questionnaire responses.'''
+
+def get_sop_personalassessment_from_area_role(role,areas,sop_types):
+    return f"""Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role}" with a focus on the areas "{areas}" based on the following instructions:
+
+            Instructions:
+            Categorization: Organize the SOPs under the selected categories: a checkboxex of the three categories "must" , "shall" and "will"
+            So use the selected sop types categories: {sop_types}
+            Direct Instructions: The SOPs should directly address responsibilities, objectives, and challenges related to the area of "{areas}" for the role of "{role}".
+            Contextual Inference: If SOPs for the area are not explicitly stated, infer them from the role and area context provided.
+            Empty Lists: If no SOPs are generated, return an empty list for each category.
+            Format: The SOPs should be direct and concise.
+                                   """
\ No newline at end of file
diff --git a/src/services/sop_generator.py b/src/services/sop_generator.py
index 0c7abb4..ba34235 100644
--- a/src/services/sop_generator.py
+++ b/src/services/sop_generator.py
@@ -3,29 +3,10 @@ import json
 from openai import OpenAI
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
+from src.prompts.sops import *
+from src.models.response_schemas import *
 
-class SOPs(BaseModel):
-    must: Optional[List[str]] = Field(default_factory=list)
-    shall: Optional[List[str]] = Field(default_factory=list)
-    will: Optional[List[str]] = Field(default_factory=list)
 
-class RoleSOPs(BaseModel):
-    sops: SOPs
-
-class SOPsFound(BaseModel):
-    message: str
-    status: bool
-
-class RolesResponse(BaseModel):
-    roles: List[str]
-
-class SOPsResponse(BaseModel):
-    roles_sops: Dict[str, SOPs] = Field(default_factory=dict)
-
-class VisionMissionResponse(BaseModel):
-    vision: Optional[str]
-    mission: Optional[str]
-    message: str
 
 
 class SopGenerator:
@@ -171,19 +152,19 @@ class SopGenerator:
 
 
 
-    def generate_sops_by_role_and_area(self, role: str, area: str) -> RoleSOPs:
+    def generate_sops_by_role_and_area(self, role: str, areas: str) -> RoleSops:
        
         response = self.client.beta.chat.completions.parse(
             model=self.model,
             messages=[
                 {
                     "role": "system",
-                    "content": f'''Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role}" with a focus on the area "{area}" based on the following instructions:
+                    "content": f'''Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role}" with a focus on the areas "{areas}" based on the following instructions:
 
                                    Instructions:
 
-                                   Categorization: Organize the SOPs under three categories: "must," "shall," and "will."
-                                   Direct Instructions: The SOPs should directly address responsibilities, objectives, and challenges related to the area of "{area}" for the role of "{role}".
+                                   Categorization: Organize the SOPs under the selected categories: a checkboxex of the three categories "must" , "shall" and "will"
+                                   Direct Instructions: The SOPs should directly address responsibilities, objectives, and challenges related to the area of "{areas}" for the role of "{role}".
                                    Contextual Inference: If SOPs for the area are not explicitly stated, infer them from the role and area context provided.
                                    Empty Lists: If no SOPs are generated, return an empty list for each category.
                                    Format: The SOPs should be direct and concise.
@@ -197,7 +178,7 @@ class SopGenerator:
         return json.loads(response.choices[0].message.content)
 
 
-    def generate_executive_sops_from_questionnaire(self, data: dict) -> RoleSOPs:
+    def generate_executive_sops_from_questionnaire(self, data: dict) -> RoleSops:
         """
         Generate SOPs based on the answers from an executive questionnaire.
         
@@ -242,7 +223,7 @@ class SopGenerator:
                                    ''',
                 }
             ],
-            response_format=RoleSOPs,
+            response_format=RoleSops,
             max_tokens=1024,
             temperature=0.1
         )
@@ -325,8 +306,6 @@ class SopGenerator:
         # Parse the response from the LLM
         extracted_text = json.loads(response.choices[0].message.content)
 
-
-        print(F"extracted text:{extracted_text}")
         # Assuming the response contains fields for 'vision' and 'mission' (or 'goals')
         vision_section = extracted_text["vision"]
         mission_section = extracted_text["mission"]
@@ -334,3 +313,152 @@ class SopGenerator:
         return vision_section, mission_section
 
 
+
+
+class SopGeneratorDocument:
+    def __init__(self):
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = "gpt-4o-2024-08-06"
+
+    def _extract_text_from_docs(self, docs):
+        """Extract text content from document objects."""
+        return [doc.page_content for doc in docs]
+    # Existing methods...
+
+    def extract_sops_from_doc(self, docs) -> VisionMissionResponse:
+        """
+        Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document.
+        
+        :param docs: The document(s) from which to extract information.
+        :return: VisionMissionResponse containing the vision, mission, and role-specific SOPs.
+        """
+
+        try:
+                docs_text = self._extract_text_from_docs(docs)
+                prompt = get_sop_extraction_from_doc()
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": f'''{prompt}'''
+                        },
+                        {
+                            "role": "user",
+                            "content": [{"type": "text", "text": text} for text in docs_text],
+                        }
+                    ],
+                    response_format=SOPsResponse,
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+
+                # Parse the response from the LLM
+                extracted_text = json.loads(response.choices[0].message.content)
+
+                return extracted_text
+
+        except:
+            return False
+
+
+
+class SopPersonalAssessment:
+    def __init__(self):
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = "gpt-4o-mini"
+
+    # Existing methods...
+
+    def extract_sops_from_questionnaire(self, questionnaire_data: dict) -> VisionMissionResponse:
+        """
+        Extracts SOPs categorized into 'must,' 'shall,' and 'will' based on free-text questionnaire information.
+
+        :param questionnaire_data: A dictionary containing plain text responses to the questionnaire.
+        :return: SOPsResponse containing the vision, mission, and role-specific SOPs
+        """
+
+        try:
+            # Extract the plain text answers from the questionnaire
+            vision = questionnaire_data.get("vision", "No vision provided")
+            roles = questionnaire_data.get("roles", "No roles provided")  # This will be a plain text string
+            responsibilities = questionnaire_data.get("responsibilities", "No responsibilities provided")  # Plain text string
+            project_details = questionnaire_data.get("project_details", "No additional project details provided")
+
+            # Get the appropriate prompt for the questionnaire scenario
+            prompt = get_sop_personalassessment_from_questionnaire()
+
+            # Combine the plain text data into a string to pass to the LLM
+            user_content = f'''
+            Vision: {vision}
+            Roles: {roles}
+            Responsibilities: {responsibilities}
+            Project Details: {project_details}
+            '''
+
+            # Send the combined plain text content to the LLM
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f'''{prompt}'''
+                    },
+                    {
+                        "role": "user",
+                        "content": user_content,
+                    }
+                ],
+                response_format=SOPsResponse,
+                max_tokens=2048,
+                temperature=0.1
+            )
+
+            # Parse the response from the LLM
+            extracted_text = json.loads(response.choices[0].message.content)
+
+            return extracted_text
+
+        except Exception as e:
+            print(f"Error occurred: {str(e)}")
+            return False
+
+
+    def generate_sops_by_role_and_area(self, roles: List[dict]) -> RoleSops:
+
+        try:
+            sops_by_role = []
+            for role_info in roles:
+                role = role_info['role']
+                sop_types = role_info['sop_types']  # List of SOP types: ["will", "shall", "must"]
+                areas = role_info['areas']  # List of areas: ["communication", "development", etc.]
+
+
+                prompt = get_sop_personalassessment_from_area_role(role,areas,sop_types)
+                response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f'''{prompt}
+                                    ''',
+                    }
+                ],
+                response_format=RoleSops,
+                max_tokens=1024,
+                temperature=0.1
+            )   
+                extracted_text = json.loads(response.choices[0].message.content)
+                # You can customize this to generate SOPs based on the role, SOP types, and areas
+                sops_by_role.append(extracted_text)
+
+            
+
+            return sops_by_role
+        
+        except:
+            return False
+        
+
diff --git a/test.py b/test.py
index 62ab249..8301929 100644
--- a/test.py
+++ b/test.py
@@ -1,21 +1,37 @@
 from src.services.sop_generator import SopGenerator
 from src.utils.document_loader import load_document
+from src.services.sop_generator import SopPersonalAssessment
 
 
-file_path = "/root/ds_erp_ai/data/raw/test_sop.pdf"
+file_path = "/root/ds_erp_ai/data/raw/document.doc"
 docs = load_document(file_path)
-sop = SopGenerator()
+from src.services.sop_generator import SopGeneratorDocument
+sop = SopPersonalAssessment()
 
 
 if __name__ == "__main__":
     # Assuming 'sop' is an instance of SopGenerator and 'docs' is the loaded document content.
-    
     # Step 1: Get the roles from the document
-    roles = sop.get_roles(docs)["roles"]
-    print(f"Roles {roles}")
- 
-    sop_status = sop.check_role_sop(roles=roles, docs=docs)
-    print(sop_status)
-    roles = ["cloud engineer"]
-    sops = sop.generate_sops(roles=roles,docs=docs)
+    
+    roles =  [
+        {
+            "role": "Content Marketing Specialist",
+            "sop_types": ["will", "shall"],
+            "areas": ["communication", "development"]
+        },
+        {
+            "role": "Digital Marketing Specialist",
+            "sop_types": ["must"],
+            "areas": ["finance", "project management"]
+        },
+        {
+            "role": "Information Technology Officer",
+            "sop_types": ["shall", "must"],
+            "areas": ["development", "communication", "operations"]
+        }
+    ]
+
+
+
+    sops = sop.generate_sops_by_role_and_area(roles)
     print(f"sops:{sops}")