From 594f0eadb3336891159754c7a47574d6fcb2c442 Mon Sep 17 00:00:00 2001 From: OwusuBlessing Date: Tue, 10 Sep 2024 21:22:52 +0100 Subject: [PATCH] added adjustmensts to roles and questions --- src/api/routes/sops.py | 62 +----- src/models/sop_response_schemas.py | 18 +- src/prompts/sops.py | 5 +- src/services/document_parser.py | 294 ++++++++++++++++++++++++++++ src/services/sop_document_parser.py | 31 ++- 5 files changed, 339 insertions(+), 71 deletions(-) create mode 100644 src/services/document_parser.py diff --git a/src/api/routes/sops.py b/src/api/routes/sops.py index e98fad6..19d6201 100644 --- a/src/api/routes/sops.py +++ b/src/api/routes/sops.py @@ -19,7 +19,7 @@ def allowed_file(filename): """Check if the file has an allowed extension.""" return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS -@sops_bp.route('/get_roles', methods=['POST']) +@sops_bp.route('/personal_assessment/get_roles', methods=['POST']) def get_roles(): # Check if the post request has the file part if 'document' not in request.files: @@ -44,7 +44,8 @@ def get_roles(): docs = load_document(file_path) # Generate roles from the docs - roles = sop_generator.get_roles(docs)["roles"] + parser = DocumentParser() + roles = parser.get_roles(docs)["roles"] # Cleanup: Delete all files in the upload directory after processing delete_all_files_in_directory(upload_folder) @@ -60,63 +61,6 @@ def get_roles(): -@sops_bp.route('/generate_questions_from_doc', methods=['POST']) -def generate_questions_from_sop(): - # Check if the POST request has the file part - if 'document' not in request.files: - return jsonify({"error": "No file part", "message": "Please upload a file with the key 'document'."}), 400 - - print("Running................") - - file = request.files['document'] - roles_json = request.form.get('roles') # Get the roles as a JSON string - if not roles_json: - return jsonify({"error": "No roles provided", "message": "Please provide a list of roles in the 'roles' field."}), 400 - - try: - roles = json.loads(roles_json) # Parse the roles from JSON string to a list - print(f"Roles are:{roles}") - except json.JSONDecodeError: - return jsonify({"error": "Invalid JSON", "message": "The 'roles' field contains invalid JSON."}), 400 - - # If the user does not select a file, the browser may also submit an empty part without a filename - if file.filename == '': - return jsonify({"error": "No selected file", "message": "A file was not selected for upload. Please select a valid file."}), 400 - - if file and allowed_file(file.filename): - filename = secure_filename(file.filename) - upload_folder = current_app.config['UPLOAD_FOLDER'] - file_path = os.path.join(upload_folder, filename) - - # Save the file to the upload folder - file.save(file_path) - - try: - # Use the utility function to generate docs from the file - docs = load_document(file_path) - - # Check if the document can generate SOPs for the roles - status_check = sop_generator.check_role_sop(roles=roles, docs=docs) - - if not status_check["status"]: - return jsonify({"error": "Document cannot extract SOPs", "message": status_check["message"]}), 400 - - # Generate SOPs based on the roles provided - sop_generator = DocumentParser() - sops = sop_generator.generate_sops_from_doc(docs) - - # Cleanup: Delete all files in the upload directory after processing - delete_all_files_in_directory(upload_folder) - - return jsonify({"sops": sops, "message": "SOPs successfully generated for the roles from the document."}), 200 - - except Exception as e: - # Cleanup: Delete all files in the upload directory if an error occurs - delete_all_files_in_directory(upload_folder) - return jsonify({"error": "Processing error", "message": f"An error occurred while processing the document: {str(e)}"}), 500 - - return jsonify({"error": "File type not allowed", "message": "The uploaded file type is not allowed. Please upload a PDF, DOC, or DOCX file."}), 400 - @sops_bp.route('/personal_assessment/generate_sops_from_doc', methods=['POST']) diff --git a/src/models/sop_response_schemas.py b/src/models/sop_response_schemas.py index decc9da..213abfb 100644 --- a/src/models/sop_response_schemas.py +++ b/src/models/sop_response_schemas.py @@ -1,21 +1,26 @@ from pydantic import BaseModel, Field from typing import List, Optional -class RoleSops(BaseModel): - role:str + +class Categories(BaseModel): must: Optional[List[str]] = Field(default_factory=list) shall: Optional[List[str]] = Field(default_factory=list) will: Optional[List[str]] = Field(default_factory=list) +class RoleSops(BaseModel): + role:str + sops:Categories + #class RoleSOPs(BaseModel): # sops: SOPs +class Roles_response(BaseModel): + roles: list[str] + class SOPsFound(BaseModel): message: str status: bool -class RolesResponse(BaseModel): - roles: List[str] class SOPsResponse(BaseModel): roles_sops: List[RoleSops] @@ -26,11 +31,6 @@ class VisionMissionResponse(BaseModel): vision: List[str] mission: List[str] - -class Categories(BaseModel): - must: Optional[List[str]] = Field(default_factory=list) - shall: Optional[List[str]] = Field(default_factory=list) - will: Optional[List[str]] = Field(default_factory=list) class ExecutivesSops(BaseModel): executive_sops: List[RoleSops] diff --git a/src/prompts/sops.py b/src/prompts/sops.py index f1a797e..dd62892 100644 --- a/src/prompts/sops.py +++ b/src/prompts/sops.py @@ -275,7 +275,7 @@ def get_sop_for_department_workers(): 5. Use the provided document and the workers and department information to generate the SOP. 6. If the provided document cannot provide SOPs for a specific worker stated, then return an empty list for the SOP for that worker. - Example format: + Example forma { "departments": [ { @@ -287,8 +287,9 @@ def get_sop_for_department_workers(): "shall": ["Submit monthly reports"], "will": ["Improve efficiency"] } + ] } ] - }s + } ''' diff --git a/src/services/document_parser.py b/src/services/document_parser.py new file mode 100644 index 0000000..f3c22b8 --- /dev/null +++ b/src/services/document_parser.py @@ -0,0 +1,294 @@ +import os +import json +from openai import OpenAI +from pydantic import BaseModel, Field +from typing import List, Dict, Optional +from src.prompts.sops import * +from src.models.sop_response_schemas import * +from dotenv import load_dotenv +load_dotenv() + + +#SopGeneratorDocument +class DocumentParser: + def __init__(self): + self.api_key = os.getenv("OPENAI_API_KEY") + self.client = OpenAI(api_key=self.api_key) + self.model = "gpt-4o-2024-08-06" + + def _extract_text_from_docs(self, docs): + """Extract text content from document objects.""" + return [doc.page_content for doc in docs] + # Existing methods... + + def extract_sops_from_doc(self, docs) -> VisionMissionResponse: + """ + Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document. + + :param docs: The document(s) from which to extract information. + :return: VisionMissionResponse containing the vision, mission, and role-specific SOPs. + """ + + try: + docs_text = self._extract_text_from_docs(docs) + prompt = get_sop_extraction_from_doc() + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + { + "role": "system", + "content": f'''{prompt}''' + }, + { + "role": "user", + "content": [{"type": "text", "text": text} for text in docs_text], + } + ], + response_format=SOPsResponse, + max_tokens=4096, + temperature=0.1 + ) + + # Parse the response from the LLM + extracted_text = json.loads(response.choices[0].message.content) + + return extracted_text + + except: + return False + + def extract_vision_mission(self, docs) -> VisionMissionResponse: + """ + Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document. + + :param docs: The document(s) from which to extract information. + :return: VisionMissionResponse containing the vision, mission, and role-specific SOPs. + """ + + try: + docs_text = self._extract_text_from_docs(docs) + prompt = get_vision_mission_extraction_from_doc() + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + { + "role": "system", + "content": f'''{prompt}''' + }, + { + "role": "user", + "content": [{"type": "text", "text": text} for text in docs_text], + } + ], + response_format=VisionMissionResponse, + max_tokens=4096, + temperature=0.1 + ) + + # Parse the response from the LLM + extracted_text = json.loads(response.choices[0].message.content) + + return extracted_text + + except: + return False + + '''def extract_departments_and_managers(self, docs): + """ + Extract departments and managerial roles from the document. + + :param docs: List of document chunks + :return: Dictionary containing departments and their managerial roles + """ + try: + docs_text = self._extract_text_from_docs(docs) + prompt = get_departments_and_roles_extraction_prompt() + + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]} + ], + response_format=DepartmentsAndRolesResponse, + max_tokens=4096, + temperature=0.1 + ) + + + return json.loads(response.choices[0].message.content) + except json.JSONDecodeError: + return False''' + + + def extract_departments_and_managers_workers(self, docs): + """ + Extract departments, managers, and workers from the document. + + :param docs: List of document chunks + :return: Dictionary containing departments, their managers, and workers. + """ + try: + docs_text = self._extract_text_from_docs(docs) + prompt = get_departments_managers_workers_extraction_prompt() # Update your prompt to handle managers and workers + + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]} + ], + response_format=DepartmentsAndWorkersResponse, # Use the updated response schema + max_tokens=4096, + temperature=0.1 + ) + + return json.loads(response.choices[0].message.content) + + except json.JSONDecodeError: + return False + + + + def extract_roles_with_reference_managers(self, docs, reference_roles): + try: + # Extract departments and managers from the document + sop_doc = DocumentParser() + departments_and_roles = sop_doc.extract_departments_and_managers_workers(docs) + + # Prepare extracted roles (only managers) + extracted_managers = [] + for department in departments_and_roles['departments']: + extracted_managers.extend([ + { + 'name': manager['name'], + 'position': manager.get('position', 'Unknown Position'), + 'role': manager.get('role', 'Unknown Role') # PRP or SRP classification + } + for manager in department['managers'] + ]) + + # Generate prompt for the LLM to compare reference roles with extracted roles + prompt = get_roles_reference_comparison() + + + # Send prompt to the LLM for comparison + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + {"role": "system", "content": f"The reference roles are{reference_roles} while the extracted roles are {extracted_managers}"}, + {"role": "user", "content": prompt} + ], + max_tokens=1024, + temperature=0.1, + response_format=RolesComparisonResponse + ) + + + + comparison_result = json.loads(response.choices[0].message.content) + + # Return the result as a JSON response + return comparison_result + except Exception as e: + print(f"Error occurred: {e}") + return False + + + def extract_roles_with_reference_workers(self, docs, reference_roles): + try: + # Extract departments and managers from the document + sop_doc = DocumentParser() + departments_and_roles = sop_doc.extract_departments_and_managers_workers(docs) + + # Prepare extracted roles (only managers) + extracted_workers = [] + for department in departments_and_roles['departments']: + extracted_workers.extend([ + { + 'name': worker['name'], + 'position': worker.get('position', 'Unknown Position'), + 'role': "worker" # PRP or SRP classification + } + for worker in department['workers'] + ]) + + # Generate prompt for the LLM to compare reference roles with extracted roles + prompt = get_roles_reference_comparison() + + + # Send prompt to the LLM for comparison + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + {"role": "system", "content": f"The reference roles are{reference_roles} while the extracted roles are {extracted_workers}"}, + {"role": "user", "content": prompt} + ], + max_tokens=1024, + temperature=0.1, + response_format=RolesComparisonResponse + ) + comparison_result = json.loads(response.choices[0].message.content) + + # Return the result as a JSON response + return comparison_result + except Exception as e: + print(f"Error occurred: {e}") + return False + + + def extract_managers_workers(self, docs): + """ + Extract departments, managers, and workers from the document. + + :param docs: List of document chunks + :return: Dictionary containing departments, their managers, and workers. + """ + try: + docs_text = self._extract_text_from_docs(docs) + prompt = get_managers_workers_extraction_prompt() # Update your prompt to handle managers and workers + + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]} + ], + response_format=DepartmentMembers, # Use the updated response schema + max_tokens=4096, + temperature=0.1 + ) + + return json.loads(response.choices[0].message.content) + + except json.JSONDecodeError: + return False + + def extract_sops_for_workers_by_department(self, docs,depts_workers): + """ + Extract departments, managers, and workers from the document. + + :param docs: List of document chunks + :return: Dictionary containing departments, their managers, and workers. + """ + try: + docs_text = self._extract_text_from_docs(docs) + prompt = get_sop_for_department_workers() # Update your prompt to handle managers and workers + + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": f"Workers information: {depts_workers}"}, + {"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]} + ], + response_format=WorkerSOPsResponse, # Use the updated response schema + max_tokens=4096, + temperature=0.1 + ) + + return json.loads(response.choices[0].message.content) + + except json.JSONDecodeError: + return False + \ No newline at end of file diff --git a/src/services/sop_document_parser.py b/src/services/sop_document_parser.py index f3c22b8..22c237b 100644 --- a/src/services/sop_document_parser.py +++ b/src/services/sop_document_parser.py @@ -14,7 +14,7 @@ class DocumentParser: def __init__(self): self.api_key = os.getenv("OPENAI_API_KEY") self.client = OpenAI(api_key=self.api_key) - self.model = "gpt-4o-2024-08-06" + self.model = "gpt-4o-mini" def _extract_text_from_docs(self, docs): """Extract text content from document objects.""" @@ -93,6 +93,35 @@ class DocumentParser: except: return False + def get_roles(self, docs): + # Extract the text content from the Document objects + docs_text = [doc.page_content for doc in docs] + response = self.client.beta.chat.completions.parse( + model=self.model, + messages=[ + { + "role": "system", + "content": '''Suppose you are a role/postion extractor from a company document , + you extract the roles as a list e.g["finacial analyist,"data scientist]... etc + if no roles are found return and empty list''', + }, + { + "role": "user", + "content": [ + { + "type": "text", # Changed from "document chunk" to "text" + "text": text + } for text in docs_text + ] + } + ], + response_format=Roles_response, + max_tokens=1024, + temperature=0.1 + ) + + return json.loads(response.choices[0].message.content) + '''def extract_departments_and_managers(self, docs): """ Extract departments and managerial roles from the document.