Files
erp_ai/src/services/sop_generator.py
T

114 lines
4.7 KiB
Python
Raw Normal View History

2024-08-31 01:29:39 +00:00
import os
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
class SOPs(BaseModel):
must: Optional[List[str]] = Field(default_factory=list)
shall: Optional[List[str]] = Field(default_factory=list)
will: Optional[List[str]] = Field(default_factory=list)
class RoleSOPs(BaseModel):
sops: SOPs
class SOPsFound(BaseModel):
message: str
status: bool
class RolesResponse(BaseModel):
roles: List[str]
class SOPsResponse(BaseModel):
roles_sops: Dict[str, SOPs] = Field(default_factory=dict)
class SopGenerator:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini"
def _extract_text_from_docs(self, docs):
"""Extract text content from document objects."""
return [doc.page_content for doc in docs]
def get_roles(self, docs) -> RolesResponse:
docs_text = self._extract_text_from_docs(docs)
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": '''Suppose you are a role/position extractor from a company document.
You extract the roles as a list, e.g., ["financial analyst", "data scientist", etc.].
If no roles are found, return an empty list.''',
},
{
"role": "user",
"content": [{"type": "text", "text": text} for text in docs_text],
}
],
response_format=RolesResponse,
max_tokens=1024,
temperature=0.1
)
return json.loads(response.choices[0].message.content)
def check_role_sop(self, roles: str, docs) -> SOPsFound:
docs_text = self._extract_text_from_docs(docs)
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f'''Your role is to check if the SOPs for the provided roles "{roles}" are found in the document.
You are validating if the document can provide the SOPs.
Return status=True with a proper message if found, and status=False with a proper message if not.
Keep the message short, e.g., "SOPs found for the role: {roles}" or "SOPs not found for the role: {roles}".'''
},
{
"role": "user",
"content": [{"type": "text", "text": text} for text in docs_text],
}
],
response_format=SOPsFound,
max_tokens=1024,
temperature=0.1
)
return json.loads(response.choices[0].message.content)
def generate_sops(self, roles: List[str], docs) -> SOPsResponse:
roles_sops_all = {}
docs_text = self._extract_text_from_docs(docs)
for role in roles:
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f'''You are a Standard Operating Procedure (SOP) extractor.
Your task is to find SOPs for the role "{role}" in the provided text.
SOPs should be categorized under "must", "shall", and "will".
If the SOPs for the role are not explicitly stated, you are required to infer them from the context provided in the document,
but only if there is clear evidence within the text.
Do not generate or assume SOPs that are not directly supported by the document.
Your extraction should strictly adhere to the content of the document, ensuring that no information is fabricated or inferred beyond what is present.
If no SOPs are found for the role, return an empty list for each category.''',
},
{
"role": "user",
"content": [{"type": "text", "text": text} for text in docs_text],
}
],
response_format=RoleSOPs,
max_tokens=1024,
temperature=0.1
)
role_sop = json.loads(response.choices[0].message.content)
roles_sops_all[role] = role_sop
return roles_sops_all