role extracion and sop generation added
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
import os
|
||||
import json
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
class SOPs(BaseModel):
|
||||
must: Optional[List[str]] = Field(default_factory=list)
|
||||
shall: Optional[List[str]] = Field(default_factory=list)
|
||||
will: Optional[List[str]] = Field(default_factory=list)
|
||||
|
||||
class RoleSOPs(BaseModel):
|
||||
sops: SOPs
|
||||
|
||||
class SOPsFound(BaseModel):
|
||||
message: str
|
||||
status: bool
|
||||
|
||||
class RolesResponse(BaseModel):
|
||||
roles: List[str]
|
||||
|
||||
class SOPsResponse(BaseModel):
|
||||
roles_sops: Dict[str, SOPs] = Field(default_factory=dict)
|
||||
|
||||
class SopGenerator:
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.client = OpenAI(api_key=self.api_key)
|
||||
self.model = "gpt-4o-mini"
|
||||
|
||||
def _extract_text_from_docs(self, docs):
|
||||
"""Extract text content from document objects."""
|
||||
return [doc.page_content for doc in docs]
|
||||
|
||||
def get_roles(self, docs) -> RolesResponse:
|
||||
docs_text = self._extract_text_from_docs(docs)
|
||||
response = self.client.beta.chat.completions.parse(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": '''Suppose you are a role/position extractor from a company document.
|
||||
You extract the roles as a list, e.g., ["financial analyst", "data scientist", etc.].
|
||||
If no roles are found, return an empty list.''',
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": text} for text in docs_text],
|
||||
}
|
||||
],
|
||||
response_format=RolesResponse,
|
||||
max_tokens=1024,
|
||||
temperature=0.1
|
||||
)
|
||||
return json.loads(response.choices[0].message.content)
|
||||
|
||||
def check_role_sop(self, roles: str, docs) -> SOPsFound:
|
||||
docs_text = self._extract_text_from_docs(docs)
|
||||
response = self.client.beta.chat.completions.parse(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'''Your role is to check if the SOPs for the provided roles "{roles}" are found in the document.
|
||||
You are validating if the document can provide the SOPs.
|
||||
Return status=True with a proper message if found, and status=False with a proper message if not.
|
||||
Keep the message short, e.g., "SOPs found for the role: {roles}" or "SOPs not found for the role: {roles}".'''
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": text} for text in docs_text],
|
||||
}
|
||||
],
|
||||
response_format=SOPsFound,
|
||||
max_tokens=1024,
|
||||
temperature=0.1
|
||||
)
|
||||
return json.loads(response.choices[0].message.content)
|
||||
|
||||
def generate_sops(self, roles: List[str], docs) -> SOPsResponse:
|
||||
roles_sops_all = {}
|
||||
|
||||
docs_text = self._extract_text_from_docs(docs)
|
||||
|
||||
for role in roles:
|
||||
response = self.client.beta.chat.completions.parse(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'''You are a Standard Operating Procedure (SOP) extractor.
|
||||
Your task is to find SOPs for the role "{role}" in the provided text.
|
||||
SOPs should be categorized under "must", "shall", and "will".
|
||||
If the SOPs for the role are not explicitly stated, you are required to infer them from the context provided in the document,
|
||||
but only if there is clear evidence within the text.
|
||||
Do not generate or assume SOPs that are not directly supported by the document.
|
||||
Your extraction should strictly adhere to the content of the document, ensuring that no information is fabricated or inferred beyond what is present.
|
||||
If no SOPs are found for the role, return an empty list for each category.''',
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": text} for text in docs_text],
|
||||
}
|
||||
],
|
||||
response_format=RoleSOPs,
|
||||
max_tokens=1024,
|
||||
temperature=0.1
|
||||
)
|
||||
role_sop = json.loads(response.choices[0].message.content)
|
||||
roles_sops_all[role] = role_sop
|
||||
|
||||
return roles_sops_all
|
||||
|
||||
Reference in New Issue
Block a user