2024-08-31 01:29:39 +00:00
import os
import json
from openai import OpenAI
from pydantic import BaseModel , Field
from typing import List , Dict , Optional
class SOPs ( BaseModel ) :
must : Optional [ List [ str ] ] = Field ( default_factory = list )
shall : Optional [ List [ str ] ] = Field ( default_factory = list )
will : Optional [ List [ str ] ] = Field ( default_factory = list )
class RoleSOPs ( BaseModel ) :
sops : SOPs
class SOPsFound ( BaseModel ) :
message : str
status : bool
class RolesResponse ( BaseModel ) :
roles : List [ str ]
class SOPsResponse ( BaseModel ) :
roles_sops : Dict [ str , SOPs ] = Field ( default_factory = dict )
2024-09-04 01:22:36 +00:00
class VisionMissionResponse ( BaseModel ) :
vision : Optional [ str ]
mission : Optional [ str ]
message : str
2024-08-31 01:29:39 +00:00
class SopGenerator :
def __init__ ( self ) :
self . api_key = os . getenv ( " OPENAI_API_KEY " )
self . client = OpenAI ( api_key = self . api_key )
self . model = " gpt-4o-mini "
def _extract_text_from_docs ( self , docs ) :
""" Extract text content from document objects. """
return [ doc . page_content for doc in docs ]
def get_roles ( self , docs ) - > RolesResponse :
docs_text = self . _extract_text_from_docs ( docs )
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : ''' Suppose you are a role/position extractor from a company document.
You extract the roles as a list, e.g., [ " financial analyst " , " data scientist " , etc.].
If no roles are found, return an empty list. ''' ,
} ,
{
" role " : " user " ,
" content " : [ { " type " : " text " , " text " : text } for text in docs_text ] ,
}
] ,
response_format = RolesResponse ,
max_tokens = 1024 ,
temperature = 0.1
)
return json . loads ( response . choices [ 0 ] . message . content )
def check_role_sop ( self , roles : str , docs ) - > SOPsFound :
docs_text = self . _extract_text_from_docs ( docs )
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : f ''' Your role is to check if the SOPs for the provided roles " { roles } " are found in the document.
You are validating if the document can provide the SOPs.
Return status=True with a proper message if found, and status=False with a proper message if not.
Keep the message short, e.g., " SOPs found for the role: { roles } " or " SOPs not found for the role: { roles } " . '''
} ,
{
" role " : " user " ,
" content " : [ { " type " : " text " , " text " : text } for text in docs_text ] ,
}
] ,
response_format = SOPsFound ,
max_tokens = 1024 ,
temperature = 0.1
)
return json . loads ( response . choices [ 0 ] . message . content )
2024-09-04 01:22:36 +00:00
def generate_sops_from_doc ( self , roles : List [ str ] , docs ) - > SOPsResponse :
2024-08-31 01:29:39 +00:00
roles_sops_all = { }
docs_text = self . _extract_text_from_docs ( docs )
for role in roles :
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
2024-09-04 01:22:36 +00:00
" content " : f ''' Your job is to extract Standard Operating Procedure (SOP) questions specifically for the role of " { role } " from the provided text.
Instructions:
Categorization: Organize the questions under three categories: " must, " " shall, " and " will. "
Direct Questions: The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.
Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the text. Do not generate or assume SOPs that are not directly supported by the document.
Empty Lists: If no SOPs are found for the role, return an empty list for each category.
Format: The questions should be direct and concise, e.g., " Have you completed all the required reports? "
Example:
Category: Must
Have you completed all the required reports?
Category: Shall
Are you ensuring that all team members follow the safety protocols?
Category: Will
Are you planning to review the budget next week? ''' ,
2024-08-31 01:29:39 +00:00
} ,
{
" role " : " user " ,
" content " : [ { " type " : " text " , " text " : text } for text in docs_text ] ,
}
] ,
response_format = RoleSOPs ,
max_tokens = 1024 ,
temperature = 0.1
)
role_sop = json . loads ( response . choices [ 0 ] . message . content )
roles_sops_all [ role ] = role_sop
2024-09-04 01:22:36 +00:00
return roles_sops_all
def generate_sops_from_info ( self , roles : List [ Dict [ str , str ] ] ) :
roles_sops_all = { }
for role_info in roles :
role_title = role_info . get ( " title " , " Unknown Role " )
print ( f " Role title : { role_title } " )
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : f ''' Your job is to generate Standard Operating Procedures (SOPs) for the role of " { role_title } " based on the following information provided:
Responsibilities: { role_info . get ( " responsibilities " , " Not provided " ) }
Objectives: { role_info . get ( " objectives " , " Not provided " ) }
Tools: { role_info . get ( " tools " , " Not provided " ) }
Challenges: { role_info . get ( " challenges " , " Not provided " ) }
Instructions:
Categorization: Organize the SOPs under three categories: " must, " " shall, " and " will. "
Direct Instructions: The SOPs should directly address the responsibilities, objectives, and challenges.
Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context provided.
Empty Lists: If no SOPs are generated, return an empty list for each category.
Format: The SOPs should be direct and concise.
''' ,
}
] ,
response_format = RoleSOPs ,
max_tokens = 1024 ,
temperature = 0.1
)
role_sop = json . loads ( response . choices [ 0 ] . message . content )
roles_sops_all [ role_title ] = role_sop
2024-08-31 01:29:39 +00:00
return roles_sops_all
2024-09-04 01:22:36 +00:00
def generate_sops_by_role_and_area ( self , role : str , area : str ) - > RoleSOPs :
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : f ''' Your job is to generate Standard Operating Procedures (SOPs) for the role of " { role } " with a focus on the area " { area } " based on the following instructions:
Instructions:
Categorization: Organize the SOPs under three categories: " must, " " shall, " and " will. "
Direct Instructions: The SOPs should directly address responsibilities, objectives, and challenges related to the area of " { area } " for the role of " { role } " .
Contextual Inference: If SOPs for the area are not explicitly stated, infer them from the role and area context provided.
Empty Lists: If no SOPs are generated, return an empty list for each category.
Format: The SOPs should be direct and concise.
''' ,
}
] ,
response_format = RoleSOPs ,
max_tokens = 1024 ,
temperature = 0.1
)
return json . loads ( response . choices [ 0 ] . message . content )
def generate_executive_sops_from_questionnaire ( self , data : dict ) - > RoleSOPs :
"""
Generate SOPs based on the answers from an executive questionnaire.
:param data: A dictionary containing the vision, strategic goals, and department goals.
:return: SOPs categorized by " must " , " shall " , and " will " .
"""
vision_list = data . get ( " organization vision " , [ ] )
strategic_goals = data . get ( " organization strategic goals " , [ ] )
department_goals = data . get ( " department goals " , [ ] )
# Format vision and goals as text
formatted_vision = " \n " . join ( [ f " - { vision } " for vision in vision_list ] )
formatted_goals = " \n " . join ( [ f " - { goal } " for goal in strategic_goals ] )
formatted_department_goals = " \n " . join ( [
f " { dept } : " + " , " . join ( [ f " { goal } " for goal in goals ] )
for dept_dict in department_goals
for dept , goals in dept_dict . items ( )
] )
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : f ''' Generate Standard Operating Procedures (SOPs) for an executive role based on the following information:
Organizational Vision:
{ formatted_vision }
Organizational Strategic Goals:
{ formatted_goals }
Departmental Strategic Goals:
{ formatted_department_goals }
Instructions:
Categorization: Organize the SOPs under three categories: " must, " " shall, " and " will. "
Direct Instructions: The SOPs should address leadership responsibilities for achieving the vision, strategic contribution, and goals outlined.
Empty Lists: If no SOPs are generated, return an empty list for each category.
Format: SOPs should be direct and concise.
''' ,
}
] ,
response_format = RoleSOPs ,
max_tokens = 1024 ,
temperature = 0.1
)
return json . loads ( response . choices [ 0 ] . message . content )
def generate_executive_sops_from_doc ( self , docs ) - > SOPsResponse :
docs_text = self . _extract_text_from_docs ( docs )
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : f ''' Your job is to extract Standard Operating Procedure (SOP) questions specifically for the role of " { role } " from the provided text.
Instructions:
Categorization: Organize the questions under three categories: " must, " " shall, " and " will. "
Direct Questions: The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.
Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the text. Do not generate or assume SOPs that are not directly supported by the document.
Empty Lists: If no SOPs are found for the role, return an empty list for each category.
Format: The questions should be direct and concise, e.g., " Have you completed all the required reports? "
Example:
Category: Must
Have you completed all the required reports?
Category: Shall
Are you ensuring that all team members follow the safety protocols?
Category: Will
Are you planning to review the budget next week? ''' ,
} ,
{
" role " : " user " ,
" content " : [ { " type " : " text " , " text " : text } for text in docs_text ] ,
}
] ,
response_format = VisionMissionResponse ,
max_tokens = 1024 ,
temperature = 0.1
)
return response
def extract_vision_and_mission ( self , docs : str ) :
"""
Use LLM to extract Vision and Mission from the document text.
:param document_text: The text content of the document.
:return: (vision_section, mission_section)
"""
docs_text = self . _extract_text_from_docs ( docs )
response = self . client . beta . chat . completions . parse (
model = self . model ,
messages = [
{
" role " : " system " ,
" content " : ''' You are a helpful assistant that extracts specific sections from business documents.
Your task is to extract the " Vision " and " Mission " sections (or " Goals " if " Mission " is not found).
mission is basically same as goals just mission as goals if not found
Provide the sections exactly as they appear in the document. '''
} ,
{
" role " : " user " ,
" content " : [ { " type " : " text " , " text " : text } for text in docs_text ] ,
}
] ,
max_tokens = 1024 ,
temperature = 0.1 ,
response_format = VisionMissionResponse ,
)
# Parse the response from the LLM
extracted_text = json . loads ( response . choices [ 0 ] . message . content )
print ( F " extracted text: { extracted_text } " )
# Assuming the response contains fields for 'vision' and 'mission' (or 'goals')
vision_section = extracted_text [ " vision " ]
mission_section = extracted_text [ " mission " ]
return vision_section , mission_section