From 594f0eadb3336891159754c7a47574d6fcb2c442 Mon Sep 17 00:00:00 2001
From: OwusuBlessing <owususammy509@gmail.com>
Date: Tue, 10 Sep 2024 21:22:52 +0100
Subject: [PATCH] added adjustmensts to roles and questions

---
 src/api/routes/sops.py              |  62 +-----
 src/models/sop_response_schemas.py  |  18 +-
 src/prompts/sops.py                 |   5 +-
 src/services/document_parser.py     | 294 ++++++++++++++++++++++++++++
 src/services/sop_document_parser.py |  31 ++-
 5 files changed, 339 insertions(+), 71 deletions(-)
 create mode 100644 src/services/document_parser.py

diff --git a/src/api/routes/sops.py b/src/api/routes/sops.py
index e98fad6..19d6201 100644
--- a/src/api/routes/sops.py
+++ b/src/api/routes/sops.py
@@ -19,7 +19,7 @@ def allowed_file(filename):
     """Check if the file has an allowed extension."""
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 
-@sops_bp.route('/get_roles', methods=['POST'])
+@sops_bp.route('/personal_assessment/get_roles', methods=['POST'])
 def get_roles():
     # Check if the post request has the file part
     if 'document' not in request.files:
@@ -44,7 +44,8 @@ def get_roles():
             docs = load_document(file_path)
             
             # Generate roles from the docs
-            roles = sop_generator.get_roles(docs)["roles"]
+            parser = DocumentParser()
+            roles = parser.get_roles(docs)["roles"]
             
             # Cleanup: Delete all files in the upload directory after processing
             delete_all_files_in_directory(upload_folder)
@@ -60,63 +61,6 @@ def get_roles():
 
 
 
-@sops_bp.route('/generate_questions_from_doc', methods=['POST'])
-def generate_questions_from_sop():
-    # Check if the POST request has the file part
-    if 'document' not in request.files:
-        return jsonify({"error": "No file part", "message": "Please upload a file with the key 'document'."}), 400
-
-    print("Running................")
-
-    file = request.files['document']
-    roles_json = request.form.get('roles')  # Get the roles as a JSON string
-    if not roles_json:
-        return jsonify({"error": "No roles provided", "message": "Please provide a list of roles in the 'roles' field."}), 400
-
-    try:
-        roles = json.loads(roles_json)  # Parse the roles from JSON string to a list
-        print(f"Roles are:{roles}")
-    except json.JSONDecodeError:
-        return jsonify({"error": "Invalid JSON", "message": "The 'roles' field contains invalid JSON."}), 400
-
-    # If the user does not select a file, the browser may also submit an empty part without a filename
-    if file.filename == '':
-        return jsonify({"error": "No selected file", "message": "A file was not selected for upload. Please select a valid file."}), 400
-
-    if file and allowed_file(file.filename):
-        filename = secure_filename(file.filename)
-        upload_folder = current_app.config['UPLOAD_FOLDER']
-        file_path = os.path.join(upload_folder, filename)
-        
-        # Save the file to the upload folder
-        file.save(file_path)
-
-        try:
-            # Use the utility function to generate docs from the file
-            docs = load_document(file_path)
-            
-            # Check if the document can generate SOPs for the roles
-            status_check = sop_generator.check_role_sop(roles=roles, docs=docs)
-
-            if not status_check["status"]:
-                return jsonify({"error": "Document cannot extract SOPs", "message": status_check["message"]}), 400
-            
-            # Generate SOPs based on the roles provided
-            sop_generator = DocumentParser()
-            sops = sop_generator.generate_sops_from_doc(docs)
-            
-            # Cleanup: Delete all files in the upload directory after processing
-            delete_all_files_in_directory(upload_folder)
-            
-            return jsonify({"sops": sops, "message": "SOPs successfully generated for the roles from the document."}), 200
-
-        except Exception as e:
-            # Cleanup: Delete all files in the upload directory if an error occurs
-            delete_all_files_in_directory(upload_folder)
-            return jsonify({"error": "Processing error", "message": f"An error occurred while processing the document: {str(e)}"}), 500
-
-    return jsonify({"error": "File type not allowed", "message": "The uploaded file type is not allowed. Please upload a PDF, DOC, or DOCX file."}), 400
-
 
 
 @sops_bp.route('/personal_assessment/generate_sops_from_doc', methods=['POST'])
diff --git a/src/models/sop_response_schemas.py b/src/models/sop_response_schemas.py
index decc9da..213abfb 100644
--- a/src/models/sop_response_schemas.py
+++ b/src/models/sop_response_schemas.py
@@ -1,21 +1,26 @@
 from pydantic import BaseModel, Field
 from typing import List, Optional
 
-class RoleSops(BaseModel):
-    role:str
+
+class Categories(BaseModel):
     must: Optional[List[str]] = Field(default_factory=list)
     shall: Optional[List[str]] = Field(default_factory=list)
     will: Optional[List[str]] = Field(default_factory=list)
 
+class RoleSops(BaseModel):
+    role:str
+    sops:Categories
+
 #class RoleSOPs(BaseModel):
 #    sops: SOPs
+class Roles_response(BaseModel):
+    roles: list[str]
+
 
 class SOPsFound(BaseModel):
     message: str
     status: bool
 
-class RolesResponse(BaseModel):
-    roles: List[str]
 
 class SOPsResponse(BaseModel):
     roles_sops: List[RoleSops]
@@ -26,11 +31,6 @@ class VisionMissionResponse(BaseModel):
     vision: List[str]
     mission: List[str]
    
-
-class Categories(BaseModel):
-    must: Optional[List[str]] = Field(default_factory=list)
-    shall: Optional[List[str]] = Field(default_factory=list)
-    will: Optional[List[str]] = Field(default_factory=list)
     
 class ExecutivesSops(BaseModel):
     executive_sops: List[RoleSops]
diff --git a/src/prompts/sops.py b/src/prompts/sops.py
index f1a797e..dd62892 100644
--- a/src/prompts/sops.py
+++ b/src/prompts/sops.py
@@ -275,7 +275,7 @@ def get_sop_for_department_workers():
     5. Use the provided document and the workers and department information to generate the SOP.
     6. If the provided document cannot provide SOPs for a specific worker stated, then return an empty list for the SOP for that worker.
     
-    Example format:
+    Example forma
     {
         "departments": [
             {
@@ -287,8 +287,9 @@ def get_sop_for_department_workers():
                         "shall": ["Submit monthly reports"],
                         "will": ["Improve efficiency"]
                     }
+
                 ]
             }
         ]
-    }s
+    }
     '''
diff --git a/src/services/document_parser.py b/src/services/document_parser.py
new file mode 100644
index 0000000..f3c22b8
--- /dev/null
+++ b/src/services/document_parser.py
@@ -0,0 +1,294 @@
+import os
+import json
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional
+from src.prompts.sops import *
+from src.models.sop_response_schemas import *
+from dotenv import load_dotenv
+load_dotenv()
+
+
+#SopGeneratorDocument
+class DocumentParser:
+    def __init__(self):
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = "gpt-4o-2024-08-06"
+
+    def _extract_text_from_docs(self, docs):
+        """Extract text content from document objects."""
+        return [doc.page_content for doc in docs]
+    # Existing methods...
+
+    def extract_sops_from_doc(self, docs) -> VisionMissionResponse:
+        """
+        Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document.
+        
+        :param docs: The document(s) from which to extract information.
+        :return: VisionMissionResponse containing the vision, mission, and role-specific SOPs.
+        """
+
+        try:
+                docs_text = self._extract_text_from_docs(docs)
+                prompt = get_sop_extraction_from_doc()
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": f'''{prompt}'''
+                        },
+                        {
+                            "role": "user",
+                            "content": [{"type": "text", "text": text} for text in docs_text],
+                        }
+                    ],
+                    response_format=SOPsResponse,
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+
+                # Parse the response from the LLM
+                extracted_text = json.loads(response.choices[0].message.content)
+
+                return extracted_text
+
+        except:
+            return False
+
+    def extract_vision_mission(self, docs) -> VisionMissionResponse:
+        """
+        Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document.
+        
+        :param docs: The document(s) from which to extract information.
+        :return: VisionMissionResponse containing the vision, mission, and role-specific SOPs.
+        """
+
+        try:
+                docs_text = self._extract_text_from_docs(docs)
+                prompt = get_vision_mission_extraction_from_doc()
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": f'''{prompt}'''
+                        },
+                        {
+                            "role": "user",
+                            "content": [{"type": "text", "text": text} for text in docs_text],
+                        }
+                    ],
+                    response_format=VisionMissionResponse,
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+
+                # Parse the response from the LLM
+                extracted_text = json.loads(response.choices[0].message.content)
+
+                return extracted_text
+
+        except:
+            return False
+    
+    '''def extract_departments_and_managers(self, docs):
+        """
+        Extract departments and managerial roles from the document.
+        
+        :param docs: List of document chunks
+        :return: Dictionary containing departments and their managerial roles
+        """
+        try:
+            docs_text = self._extract_text_from_docs(docs)
+            prompt = get_departments_and_roles_extraction_prompt()
+                
+            response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
+                    ],
+                    response_format=DepartmentsAndRolesResponse,
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+                
+                
+            return json.loads(response.choices[0].message.content)
+        except json.JSONDecodeError:
+            return False'''
+    
+
+    def extract_departments_and_managers_workers(self, docs):
+            """
+            Extract departments, managers, and workers from the document.
+            
+            :param docs: List of document chunks
+            :return: Dictionary containing departments, their managers, and workers.
+            """
+            try:
+                docs_text = self._extract_text_from_docs(docs)
+                prompt = get_departments_managers_workers_extraction_prompt()  # Update your prompt to handle managers and workers
+                        
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
+                    ],
+                    response_format=DepartmentsAndWorkersResponse,  # Use the updated response schema
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+                
+                return json.loads(response.choices[0].message.content)
+                
+            except json.JSONDecodeError:
+                return False
+
+    
+    
+    def extract_roles_with_reference_managers(self, docs, reference_roles):
+        try:
+            # Extract departments and managers from the document
+            sop_doc = DocumentParser()
+            departments_and_roles = sop_doc.extract_departments_and_managers_workers(docs)
+
+            # Prepare extracted roles (only managers)
+            extracted_managers = []
+            for department in departments_and_roles['departments']:
+                extracted_managers.extend([
+                    {
+                        'name': manager['name'],
+                        'position': manager.get('position', 'Unknown Position'),
+                        'role': manager.get('role', 'Unknown Role')  # PRP or SRP classification
+                    }
+                    for manager in department['managers']
+                ])
+
+            # Generate prompt for the LLM to compare reference roles with extracted roles
+            prompt = get_roles_reference_comparison()
+
+
+            # Send prompt to the LLM for comparison
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": f"The reference roles are{reference_roles} while the extracted roles are {extracted_managers}"},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=1024,
+                temperature=0.1,
+                response_format=RolesComparisonResponse
+            )
+
+           
+
+            comparison_result = json.loads(response.choices[0].message.content)
+
+            # Return the result as a JSON response
+            return comparison_result
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            return False
+
+
+    def extract_roles_with_reference_workers(self, docs, reference_roles):
+        try:
+            # Extract departments and managers from the document
+            sop_doc = DocumentParser()
+            departments_and_roles = sop_doc.extract_departments_and_managers_workers(docs)
+
+            # Prepare extracted roles (only managers)
+            extracted_workers = []
+            for department in departments_and_roles['departments']:
+                extracted_workers.extend([
+                    {
+                        'name': worker['name'],
+                        'position': worker.get('position', 'Unknown Position'),
+                        'role': "worker"  # PRP or SRP classification
+                    }
+                    for worker in department['workers']
+                ])
+
+            # Generate prompt for the LLM to compare reference roles with extracted roles
+            prompt = get_roles_reference_comparison()
+
+
+            # Send prompt to the LLM for comparison
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": f"The reference roles are{reference_roles} while the extracted roles are {extracted_workers}"},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=1024,
+                temperature=0.1,
+                response_format=RolesComparisonResponse
+            )
+            comparison_result = json.loads(response.choices[0].message.content)
+
+            # Return the result as a JSON response
+            return comparison_result
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            return False
+        
+
+    def extract_managers_workers(self, docs):
+            """
+            Extract departments, managers, and workers from the document.
+            
+            :param docs: List of document chunks
+            :return: Dictionary containing departments, their managers, and workers.
+            """
+            try:
+                docs_text = self._extract_text_from_docs(docs)
+                prompt = get_managers_workers_extraction_prompt()  # Update your prompt to handle managers and workers
+                        
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
+                    ],
+                    response_format=DepartmentMembers,  # Use the updated response schema
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+                
+                return json.loads(response.choices[0].message.content)
+                
+            except json.JSONDecodeError:
+                return False
+    
+    def extract_sops_for_workers_by_department(self, docs,depts_workers):
+            """
+            Extract departments, managers, and workers from the document.
+            
+            :param docs: List of document chunks
+            :return: Dictionary containing departments, their managers, and workers.
+            """
+            try:
+                docs_text = self._extract_text_from_docs(docs)
+                prompt = get_sop_for_department_workers()  # Update your prompt to handle managers and workers
+                        
+                response = self.client.beta.chat.completions.parse(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": f"Workers information: {depts_workers}"},
+                        {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
+                    ],
+                    response_format=WorkerSOPsResponse,  # Use the updated response schema
+                    max_tokens=4096,
+                    temperature=0.1
+                )
+                
+                return json.loads(response.choices[0].message.content)
+                
+            except json.JSONDecodeError:
+                return False
+          
\ No newline at end of file
diff --git a/src/services/sop_document_parser.py b/src/services/sop_document_parser.py
index f3c22b8..22c237b 100644
--- a/src/services/sop_document_parser.py
+++ b/src/services/sop_document_parser.py
@@ -14,7 +14,7 @@ class DocumentParser:
     def __init__(self):
         self.api_key = os.getenv("OPENAI_API_KEY")
         self.client = OpenAI(api_key=self.api_key)
-        self.model = "gpt-4o-2024-08-06"
+        self.model = "gpt-4o-mini"
 
     def _extract_text_from_docs(self, docs):
         """Extract text content from document objects."""
@@ -93,6 +93,35 @@ class DocumentParser:
         except:
             return False
     
+    def get_roles(self, docs):
+        # Extract the text content from the Document objects
+        docs_text = [doc.page_content for doc in docs] 
+        response = self.client.beta.chat.completions.parse(
+        model=self.model,
+        messages=[
+            {
+                "role": "system",
+                "content": '''Suppose you are a role/postion extractor from a company document ,
+                 you extract the roles as a list e.g["finacial analyist,"data scientist]... etc
+                 if no roles are found return and empty list''',
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text", # Changed from "document chunk" to "text"
+                        "text": text
+                    } for text in docs_text
+                ]
+            }
+        ],
+        response_format=Roles_response,
+        max_tokens=1024,
+        temperature=0.1
+        )
+    
+        return json.loads(response.choices[0].message.content)
+    
     '''def extract_departments_and_managers(self, docs):
         """
         Extract departments and managerial roles from the document.