src/services/sop_generator.py

import os
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional

class SOPs(BaseModel):
    must: Optional[List[str]] = Field(default_factory=list)
    shall: Optional[List[str]] = Field(default_factory=list)
    will: Optional[List[str]] = Field(default_factory=list)

class RoleSOPs(BaseModel):
    sops: SOPs

class SOPsFound(BaseModel):
    message: str
    status: bool

class RolesResponse(BaseModel):
    roles: List[str]

class SOPsResponse(BaseModel):
    roles_sops: Dict[str, SOPs] = Field(default_factory=dict)

class VisionMissionResponse(BaseModel):
    vision: Optional[str]
    mission: Optional[str]
    message: str


class SopGenerator:
    def __init__(self):
        self.api_key = os.getenv("OPENAI_API_KEY")
        self.client = OpenAI(api_key=self.api_key)
        self.model = "gpt-4o-mini"

    def _extract_text_from_docs(self, docs):
        """Extract text content from document objects."""
        return [doc.page_content for doc in docs]

    def get_roles(self, docs) -> RolesResponse:
        docs_text = self._extract_text_from_docs(docs)
        response = self.client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {
                    "role": "system",
                    "content": '''Suppose you are a role/position extractor from a company document. 
                                  You extract the roles as a list, e.g., ["financial analyst", "data scientist", etc.].
                                  If no roles are found, return an empty list.''',
                },
                {
                    "role": "user",
                    "content": [{"type": "text", "text": text} for text in docs_text],
                }
            ],
            response_format=RolesResponse,
            max_tokens=1024,
            temperature=0.1
        )
        return json.loads(response.choices[0].message.content)

    def check_role_sop(self, roles: str, docs) -> SOPsFound:
        docs_text = self._extract_text_from_docs(docs)
        response = self.client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {
                    "role": "system",
                    "content": f'''Your role is to check if the SOPs for the provided roles "{roles}" are found in the document.
                                   You are validating if the document can provide the SOPs.
                                   Return status=True with a proper message if found, and status=False with a proper message if not.
                                   Keep the message short, e.g., "SOPs found for the role: {roles}" or "SOPs not found for the role: {roles}".'''
                },
                {
                    "role": "user",
                    "content": [{"type": "text", "text": text} for text in docs_text],
                }
            ],
            response_format=SOPsFound,
            max_tokens=1024,
            temperature=0.1
        )
        return json.loads(response.choices[0].message.content)

    def generate_sops_from_doc(self, roles: List[str], docs) -> SOPsResponse:
        roles_sops_all = {}

        docs_text = self._extract_text_from_docs(docs)

        for role in roles:
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": f'''Your job is to extract Standard Operating Procedure (SOP) questions specifically for the role of "{role}" from the provided text.

                                            Instructions:

                                            Categorization: Organize the questions under three categories: "must," "shall," and "will."
                                            Direct Questions: The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.
                                            Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the text. Do not generate or assume SOPs that are not directly supported by the document.
                                            Empty Lists: If no SOPs are found for the role, return an empty list for each category.
                                            Format: The questions should be direct and concise, e.g., "Have you completed all the required reports?"
                                            Example:

                                            Category: Must

                                            Have you completed all the required reports?
                                            Category: Shall

                                            Are you ensuring that all team members follow the safety protocols?
                                            Category: Will

                                            Are you planning to review the budget next week?''',
                    },
                    {
                        "role": "user",
                        "content": [{"type": "text", "text": text} for text in docs_text],
                    }
                ],
                response_format=RoleSOPs,
                max_tokens=1024,
                temperature=0.1
            )
            role_sop = json.loads(response.choices[0].message.content)
            roles_sops_all[role] = role_sop
        

        return roles_sops_all
    
    def generate_sops_from_info(self, roles: List[Dict[str, str]]):
       
        roles_sops_all = {}

        for role_info in roles:
            role_title = role_info.get("title", "Unknown Role")
            print(f"Role title : {role_title}")
            response = self.client.beta.chat.completions.parse(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": f'''Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role_title}" based on the following information provided:
                                       
                                       Responsibilities: {role_info.get("responsibilities", "Not provided")}
                                       Objectives: {role_info.get("objectives", "Not provided")}
                                       Tools: {role_info.get("tools", "Not provided")}
                                       Challenges: {role_info.get("challenges", "Not provided")}

                                       Instructions:

                                       Categorization: Organize the SOPs under three categories: "must," "shall," and "will."
                                       Direct Instructions: The SOPs should directly address the responsibilities, objectives, and challenges.
                                       Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context provided.
                                       Empty Lists: If no SOPs are generated, return an empty list for each category.
                                       Format: The SOPs should be direct and concise.
                                       ''',
                    }
                ],
                response_format=RoleSOPs,
                max_tokens=1024,
                temperature=0.1
            )
            role_sop = json.loads(response.choices[0].message.content)
            roles_sops_all[role_title] = role_sop

        return roles_sops_all


    def generate_sops_by_role_and_area(self, role: str, area: str) -> RoleSOPs:
       
        response = self.client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {
                    "role": "system",
                    "content": f'''Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role}" with a focus on the area "{area}" based on the following instructions:

                                   Instructions:

                                   Categorization: Organize the SOPs under three categories: "must," "shall," and "will."
                                   Direct Instructions: The SOPs should directly address responsibilities, objectives, and challenges related to the area of "{area}" for the role of "{role}".
                                   Contextual Inference: If SOPs for the area are not explicitly stated, infer them from the role and area context provided.
                                   Empty Lists: If no SOPs are generated, return an empty list for each category.
                                   Format: The SOPs should be direct and concise.
                                   ''',
                }
            ],
            response_format=RoleSOPs,
            max_tokens=1024,
            temperature=0.1
        )
        return json.loads(response.choices[0].message.content)


    def generate_executive_sops_from_questionnaire(self, data: dict) -> RoleSOPs:
        """
        Generate SOPs based on the answers from an executive questionnaire.
        
        :param data: A dictionary containing the vision, strategic goals, and department goals.
        :return: SOPs categorized by "must", "shall", and "will".
        """
        vision_list = data.get("organization vision", [])
        strategic_goals = data.get("organization strategic goals", [])
        department_goals = data.get("department goals", [])

        # Format vision and goals as text
        formatted_vision = "\n".join([f"- {vision}" for vision in vision_list])
        formatted_goals = "\n".join([f"- {goal}" for goal in strategic_goals])
        formatted_department_goals = "\n".join([
            f"{dept}: " + ", ".join([f"{goal}" for goal in goals])
            for dept_dict in department_goals
            for dept, goals in dept_dict.items()
        ])

        response = self.client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {
                    "role": "system",
                    "content": f'''Generate Standard Operating Procedures (SOPs) for an executive role based on the following information:

                                   Organizational Vision:
                                   {formatted_vision}

                                   Organizational Strategic Goals:
                                   {formatted_goals}

                                   Departmental Strategic Goals:
                                   {formatted_department_goals}

                                   Instructions:

                                   Categorization: Organize the SOPs under three categories: "must," "shall," and "will."
                                   Direct Instructions: The SOPs should address leadership responsibilities for achieving the vision, strategic contribution, and goals outlined.
                                   Empty Lists: If no SOPs are generated, return an empty list for each category.
                                   Format: SOPs should be direct and concise.
                                   ''',
                }
            ],
            response_format=RoleSOPs,
            max_tokens=1024,
            temperature=0.1
        )
        return json.loads(response.choices[0].message.content)


    def generate_executive_sops_from_doc(self,docs) -> SOPsResponse:
       

        docs_text = self._extract_text_from_docs(docs)

        response = self.client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {
                    "role": "system",
                    "content": f'''Your job is to extract Standard Operating Procedure (SOP) questions specifically for the role of "{role}" from the provided text.

                                        Instructions:

                                        Categorization: Organize the questions under three categories: "must," "shall," and "will."
                                        Direct Questions: The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.
                                        Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the text. Do not generate or assume SOPs that are not directly supported by the document.
                                        Empty Lists: If no SOPs are found for the role, return an empty list for each category.
                                        Format: The questions should be direct and concise, e.g., "Have you completed all the required reports?"
                                        Example:

                                        Category: Must

                                        Have you completed all the required reports?
                                        Category: Shall

                                        Are you ensuring that all team members follow the safety protocols?
                                        Category: Will

                                        Are you planning to review the budget next week?''',
                },
                {
                    "role": "user",
                    "content": [{"type": "text", "text": text} for text in docs_text],
                }
            ],
            response_format=VisionMissionResponse,
            max_tokens=1024,
            temperature=0.1
            )
        return response
        
    def extract_vision_and_mission(self, docs: str):
        """
        Use LLM to extract Vision and Mission from the document text.

        :param document_text: The text content of the document.
        :return: (vision_section, mission_section)
        """
        docs_text = self._extract_text_from_docs(docs)
        response = self.client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {
                    "role": "system",
                    "content": '''You are a helpful assistant that extracts specific sections from business documents.
                                  Your task is to extract the "Vision" and "Mission" sections (or "Goals" if "Mission" is not found).
                                  mission is basically same as goals just mission as goals if not found
                                  Provide the sections exactly as they appear in the document.'''
                },
                
                 {
                    "role": "user",
                    "content": [{"type": "text", "text": text} for text in docs_text],
                }
            ],
            max_tokens=1024,
            temperature=0.1,
            response_format=VisionMissionResponse,
        )

        # Parse the response from the LLM
        extracted_text = json.loads(response.choices[0].message.content)


        print(F"extracted text:{extracted_text}")
        # Assuming the response contains fields for 'vision' and 'mission' (or 'goals')
        vision_section = extracted_text["vision"]
        mission_section = extracted_text["mission"]

        return vision_section, mission_section
role extracion and sop generation added 2024-08-31 01:29:39 +00:00			`import os`
			`import json`
			`from openai import OpenAI`
			`from pydantic import BaseModel, Field`
			`from typing import List, Dict, Optional`

			`class SOPs(BaseModel):`
			`must: Optional[List[str]] = Field(default_factory=list)`
			`shall: Optional[List[str]] = Field(default_factory=list)`
			`will: Optional[List[str]] = Field(default_factory=list)`

			`class RoleSOPs(BaseModel):`
			`sops: SOPs`

			`class SOPsFound(BaseModel):`
			`message: str`
			`status: bool`

			`class RolesResponse(BaseModel):`
			`roles: List[str]`

			`class SOPsResponse(BaseModel):`
			`roles_sops: Dict[str, SOPs] = Field(default_factory=dict)`

sop for execuitve generator added 2024-09-04 01:22:36 +00:00			`class VisionMissionResponse(BaseModel):`
			`vision: Optional[str]`
			`mission: Optional[str]`
			`message: str`


role extracion and sop generation added 2024-08-31 01:29:39 +00:00			`class SopGenerator:`
			`def __init__(self):`
			`self.api_key = os.getenv("OPENAI_API_KEY")`
			`self.client = OpenAI(api_key=self.api_key)`
			`self.model = "gpt-4o-mini"`

			`def _extract_text_from_docs(self, docs):`
			`"""Extract text content from document objects."""`
			`return [doc.page_content for doc in docs]`

			`def get_roles(self, docs) -> RolesResponse:`
			`docs_text = self._extract_text_from_docs(docs)`
			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": '''Suppose you are a role/position extractor from a company document.`
			`You extract the roles as a list, e.g., ["financial analyst", "data scientist", etc.].`
			`If no roles are found, return an empty list.''',`
			`},`
			`{`
			`"role": "user",`
			`"content": [{"type": "text", "text": text} for text in docs_text],`
			`}`
			`],`
			`response_format=RolesResponse,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`return json.loads(response.choices[0].message.content)`

			`def check_role_sop(self, roles: str, docs) -> SOPsFound:`
			`docs_text = self._extract_text_from_docs(docs)`
			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": f'''Your role is to check if the SOPs for the provided roles "{roles}" are found in the document.`
			`You are validating if the document can provide the SOPs.`
			`Return status=True with a proper message if found, and status=False with a proper message if not.`
			`Keep the message short, e.g., "SOPs found for the role: {roles}" or "SOPs not found for the role: {roles}".'''`
			`},`
			`{`
			`"role": "user",`
			`"content": [{"type": "text", "text": text} for text in docs_text],`
			`}`
			`],`
			`response_format=SOPsFound,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`return json.loads(response.choices[0].message.content)`

sop for execuitve generator added 2024-09-04 01:22:36 +00:00			`def generate_sops_from_doc(self, roles: List[str], docs) -> SOPsResponse:`
role extracion and sop generation added 2024-08-31 01:29:39 +00:00			`roles_sops_all = {}`

			`docs_text = self._extract_text_from_docs(docs)`

			`for role in roles:`
			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
sop for execuitve generator added 2024-09-04 01:22:36 +00:00			`"content": f'''Your job is to extract Standard Operating Procedure (SOP) questions specifically for the role of "{role}" from the provided text.`

			`Instructions:`

			`Categorization: Organize the questions under three categories: "must," "shall," and "will."`
			`Direct Questions: The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.`
			`Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the text. Do not generate or assume SOPs that are not directly supported by the document.`
			`Empty Lists: If no SOPs are found for the role, return an empty list for each category.`
			`Format: The questions should be direct and concise, e.g., "Have you completed all the required reports?"`
			`Example:`

			`Category: Must`

			`Have you completed all the required reports?`
			`Category: Shall`

			`Are you ensuring that all team members follow the safety protocols?`
			`Category: Will`

			`Are you planning to review the budget next week?''',`
role extracion and sop generation added 2024-08-31 01:29:39 +00:00			`},`
			`{`
			`"role": "user",`
			`"content": [{"type": "text", "text": text} for text in docs_text],`
			`}`
			`],`
			`response_format=RoleSOPs,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`role_sop = json.loads(response.choices[0].message.content)`
			`roles_sops_all[role] = role_sop`
sop for execuitve generator added 2024-09-04 01:22:36 +00:00

			`return roles_sops_all`

			`def generate_sops_from_info(self, roles: List[Dict[str, str]]):`

			`roles_sops_all = {}`

			`for role_info in roles:`
			`role_title = role_info.get("title", "Unknown Role")`
			`print(f"Role title : {role_title}")`
			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": f'''Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role_title}" based on the following information provided:`

			`Responsibilities: {role_info.get("responsibilities", "Not provided")}`
			`Objectives: {role_info.get("objectives", "Not provided")}`
			`Tools: {role_info.get("tools", "Not provided")}`
			`Challenges: {role_info.get("challenges", "Not provided")}`

			`Instructions:`

			`Categorization: Organize the SOPs under three categories: "must," "shall," and "will."`
			`Direct Instructions: The SOPs should directly address the responsibilities, objectives, and challenges.`
			`Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context provided.`
			`Empty Lists: If no SOPs are generated, return an empty list for each category.`
			`Format: The SOPs should be direct and concise.`
			`''',`
			`}`
			`],`
			`response_format=RoleSOPs,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`role_sop = json.loads(response.choices[0].message.content)`
			`roles_sops_all[role_title] = role_sop`
role extracion and sop generation added 2024-08-31 01:29:39 +00:00
			`return roles_sops_all`

sop for execuitve generator added 2024-09-04 01:22:36 +00:00


			`def generate_sops_by_role_and_area(self, role: str, area: str) -> RoleSOPs:`

			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": f'''Your job is to generate Standard Operating Procedures (SOPs) for the role of "{role}" with a focus on the area "{area}" based on the following instructions:`

			`Instructions:`

			`Categorization: Organize the SOPs under three categories: "must," "shall," and "will."`
			`Direct Instructions: The SOPs should directly address responsibilities, objectives, and challenges related to the area of "{area}" for the role of "{role}".`
			`Contextual Inference: If SOPs for the area are not explicitly stated, infer them from the role and area context provided.`
			`Empty Lists: If no SOPs are generated, return an empty list for each category.`
			`Format: The SOPs should be direct and concise.`
			`''',`
			`}`
			`],`
			`response_format=RoleSOPs,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`return json.loads(response.choices[0].message.content)`


			`def generate_executive_sops_from_questionnaire(self, data: dict) -> RoleSOPs:`
			`"""`
			`Generate SOPs based on the answers from an executive questionnaire.`

			`:param data: A dictionary containing the vision, strategic goals, and department goals.`
			`:return: SOPs categorized by "must", "shall", and "will".`
			`"""`
			`vision_list = data.get("organization vision", [])`
			`strategic_goals = data.get("organization strategic goals", [])`
			`department_goals = data.get("department goals", [])`

			`# Format vision and goals as text`
			`formatted_vision = "\n".join([f"- {vision}" for vision in vision_list])`
			`formatted_goals = "\n".join([f"- {goal}" for goal in strategic_goals])`
			`formatted_department_goals = "\n".join([`
			`f"{dept}: " + ", ".join([f"{goal}" for goal in goals])`
			`for dept_dict in department_goals`
			`for dept, goals in dept_dict.items()`
			`])`

			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": f'''Generate Standard Operating Procedures (SOPs) for an executive role based on the following information:`

			`Organizational Vision:`
			`{formatted_vision}`

			`Organizational Strategic Goals:`
			`{formatted_goals}`

			`Departmental Strategic Goals:`
			`{formatted_department_goals}`

			`Instructions:`

			`Categorization: Organize the SOPs under three categories: "must," "shall," and "will."`
			`Direct Instructions: The SOPs should address leadership responsibilities for achieving the vision, strategic contribution, and goals outlined.`
			`Empty Lists: If no SOPs are generated, return an empty list for each category.`
			`Format: SOPs should be direct and concise.`
			`''',`
			`}`
			`],`
			`response_format=RoleSOPs,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`return json.loads(response.choices[0].message.content)`




			`def generate_executive_sops_from_doc(self,docs) -> SOPsResponse:`


			`docs_text = self._extract_text_from_docs(docs)`

			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": f'''Your job is to extract Standard Operating Procedure (SOP) questions specifically for the role of "{role}" from the provided text.`

			`Instructions:`

			`Categorization: Organize the questions under three categories: "must," "shall," and "will."`
			`Direct Questions: The questions should be directly addressed to the person in the role. Do not reference the role itself in the question.`
			`Contextual Inference: If SOPs for the role are not explicitly stated, infer them from the context, but only if there is clear evidence within the text. Do not generate or assume SOPs that are not directly supported by the document.`
			`Empty Lists: If no SOPs are found for the role, return an empty list for each category.`
			`Format: The questions should be direct and concise, e.g., "Have you completed all the required reports?"`
			`Example:`

			`Category: Must`

			`Have you completed all the required reports?`
			`Category: Shall`

			`Are you ensuring that all team members follow the safety protocols?`
			`Category: Will`

			`Are you planning to review the budget next week?''',`
			`},`
			`{`
			`"role": "user",`
			`"content": [{"type": "text", "text": text} for text in docs_text],`
			`}`
			`],`
			`response_format=VisionMissionResponse,`
			`max_tokens=1024,`
			`temperature=0.1`
			`)`
			`return response`

			`def extract_vision_and_mission(self, docs: str):`
			`"""`
			`Use LLM to extract Vision and Mission from the document text.`

			`:param document_text: The text content of the document.`
			`:return: (vision_section, mission_section)`
			`"""`
			`docs_text = self._extract_text_from_docs(docs)`
			`response = self.client.beta.chat.completions.parse(`
			`model=self.model,`
			`messages=[`
			`{`
			`"role": "system",`
			`"content": '''You are a helpful assistant that extracts specific sections from business documents.`
			`Your task is to extract the "Vision" and "Mission" sections (or "Goals" if "Mission" is not found).`
			`mission is basically same as goals just mission as goals if not found`
			`Provide the sections exactly as they appear in the document.'''`
			`},`

			`{`
			`"role": "user",`
			`"content": [{"type": "text", "text": text} for text in docs_text],`
			`}`
			`],`
			`max_tokens=1024,`
			`temperature=0.1,`
			`response_format=VisionMissionResponse,`
			`)`

			`# Parse the response from the LLM`
			`extracted_text = json.loads(response.choices[0].message.content)`


			`print(F"extracted text:{extracted_text}")`
			`# Assuming the response contains fields for 'vision' and 'mission' (or 'goals')`
			`vision_section = extracted_text["vision"]`
			`mission_section = extracted_text["mission"]`

			`return vision_section, mission_section`