{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain_community.document_loaders import PyPDFLoader\n", "loader = PyPDFLoader(\"/content/Example SOP (1) (1).pdf\")\n", "docs = loader.load()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'SOPsResponse' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mRoles_response\u001b[39;00m(BaseModel):\n\u001b[1;32m 20\u001b[0m roles: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m]\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSopGenerator\u001b[39;00m:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "Cell \u001b[0;32mIn[1], line 89\u001b[0m, in \u001b[0;36mSopGenerator\u001b[0;34m()\u001b[0m\n\u001b[1;32m 60\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mparse(\n\u001b[1;32m 61\u001b[0m model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel,\n\u001b[1;32m 62\u001b[0m messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 83\u001b[0m temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m\n\u001b[1;32m 84\u001b[0m )\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mchoices[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mmessage\u001b[38;5;241m.\u001b[39mcontent)\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_sops\u001b[39m(\u001b[38;5;28mself\u001b[39m, roles, docs_text) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[43mSOPsResponse\u001b[49m:\n\u001b[1;32m 90\u001b[0m roles_sops_all \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m role \u001b[38;5;129;01min\u001b[39;00m roles:\n", "\u001b[0;31mNameError\u001b[0m: name 'SOPsResponse' is not defined" ] } ], "source": [ "import os\n", "import json\n", "from openai import OpenAI\n", "from pydantic import BaseModel, Field\n", "from typing import List, Dict, Optional\n", "\n", "class SOPs(BaseModel):\n", " must: Optional[List[str]] = Field(default_factory=list)\n", " shall: Optional[List[str]] = Field(default_factory=list)\n", " will: Optional[List[str]] = Field(default_factory=list)\n", "\n", "class RoleSOPs(BaseModel):\n", " sops: SOPs\n", "\n", "class SOPsFound(BaseModel):\n", " message:str\n", " status:bool\n", "\n", "class Roles_response(BaseModel):\n", " roles: list[str]\n", " \n", "class SOPsResponse(BaseModel):\n", " roles_sops: Dict[str, SOPs] = Field(default_factory=dict)\n", "class SopGenerator:\n", " def __init__(self):\n", " self.api_key = os.getenv(\"OPENAI_API_KEY\")\n", " self.client = OpenAI(api_key=self.api_key)\n", " self.model = \"gpt-4o-mini\"\n", "\n", " def get_roles(self, docs):\n", " # Extract the text content from the Document objects\n", " docs_text = [doc.page_content for doc in docs] \n", " response = self.client.beta.chat.completions.parse(\n", " model=self.model,\n", " messages=[\n", " {\n", " \"role\": \"system\",\n", " \"content\": '''Suppose you are a role/postion extractor from a company document ,\n", " you extract the roles as a list e.g[\"finacial analyist,\"data scientist]... etc\n", " if no roles are found return and empty list''',\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\n", " \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n", " \"text\": text\n", " } for text in docs_text\n", " ]\n", " }\n", " ],\n", " response_format=Roles_response,\n", " max_tokens=1024,\n", " temperature=0.1\n", " )\n", " \n", " return json.loads(response.choices[0].message.content)\n", " \n", " def check_role_sop(self, role, docs):\n", "\n", " docs_text = [doc.page_content for doc in docs] \n", " response = self.client.beta.chat.completions.parse(\n", " model=self.model,\n", " messages=[\n", " {\n", " \"role\": \"system\",\n", " \"content\": f'''Tour roles is to check if the sops for the provided roles: {roles} are found in the document\n", " i.e You are validating the document can provide the sops \n", " you retur status = True if yes and a proper message and status=False and a proper message if no\n", " keep the message short e.g \"sops found for the roles: roles..\n", " '''\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\n", " \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n", " \"text\": text\n", " } for text in docs_text\n", " ]\n", " }\n", " ],\n", " response_format=SOPsFound,\n", " max_tokens=1024,\n", " temperature=0.1\n", " )\n", " \n", " return json.loads(response.choices[0].message.content)\n", "\n", "\n", " def generate_sops(self, roles, docs_text) -> SOPsResponse:\n", " roles_sops_all = {}\n", "\n", " for role in roles:\n", " docs_text = [doc.page_content for doc in docs] \n", " response = self.client.beta.chat.completions.parse(\n", " model=self.model,\n", " messages=[\n", " {\n", " \"role\": \"system\",\n", " \"content\": f'''You are a Standard Operating Procedure (SOP) extractor. Your task is to find SOPs for the role \"{role}\" in the provided text. '\n", " 'SOPs should be categorized under \"must\", \"shall\", and \"will\". '\n", " 'If no SOPs are found for the role, return an empty list for each category'''\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\n", " \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n", " \"text\": text\n", " } for text in docs_text\n", " ]\n", " }\n", " ],\n", " response_format=RoleSOPs,\n", " max_tokens=1024,\n", " temperature=0.1\n", " )\n", " role_sop = json.loads(response.choices[0].message.content)\n", " roles_sops_all[role] = role_sop\n", " \n", " return roles_sops_all\n", " \n", "# Example usage:\n", "service = SopGenerator()\n", "roles = [\"Devops engineers\"]\n", "sops_response = service.check_role_sop(roles,docs)" ] } ], "metadata": { "kernelspec": { "display_name": "erp", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }