From ccb0db21d646ad8048987e20d4111f4a5b7e42c2 Mon Sep 17 00:00:00 2001 From: kowshik Date: Fri, 30 Aug 2024 02:08:08 +0000 Subject: [PATCH] test --- .gitignore | 127 +++++++++++++++ README.md | 0 environment.yml | 0 notebooks/test_sop_generator.ipynb | 184 ++++++++++++++++++++++ requirements.txt | 6 + setup.py | 0 src/__init__.py | 0 src/api/__init__.py | 0 src/data/__init__.py | 0 src/features/__init__.py | 0 src/models/__init__.py | 0 src/services/__init__.py | 0 src/services/background_tasks/__init__.py | 0 src/utils/__init__.py | 0 template.py | 58 +++++++ tests/__init__.py | 0 tests/test_services/__init__.py | 0 17 files changed, 375 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 environment.yml create mode 100644 notebooks/test_sop_generator.ipynb create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 src/__init__.py create mode 100644 src/api/__init__.py create mode 100644 src/data/__init__.py create mode 100644 src/features/__init__.py create mode 100644 src/models/__init__.py create mode 100644 src/services/__init__.py create mode 100644 src/services/background_tasks/__init__.py create mode 100644 src/utils/__init__.py create mode 100644 template.py create mode 100644 tests/__init__.py create mode 100644 tests/test_services/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7da9d0e --- /dev/null +++ b/.gitignore @@ -0,0 +1,127 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# poetry +poetry.lock +.poetry/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +erp/ +env.bak/ +venv.bak/ +.spyproject +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pyright type checker +.pyright/ + +# VS Code +.vscode/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/test_sop_generator.ipynb b/notebooks/test_sop_generator.ipynb new file mode 100644 index 0000000..23cd4fe --- /dev/null +++ b/notebooks/test_sop_generator.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import PyPDFLoader\n", + "loader = PyPDFLoader(\"/content/Example SOP (1) (1).pdf\")\n", + "docs = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'SOPsResponse' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mRoles_response\u001b[39;00m(BaseModel):\n\u001b[1;32m 20\u001b[0m roles: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m]\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSopGenerator\u001b[39;00m:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "Cell \u001b[0;32mIn[1], line 89\u001b[0m, in \u001b[0;36mSopGenerator\u001b[0;34m()\u001b[0m\n\u001b[1;32m 60\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mparse(\n\u001b[1;32m 61\u001b[0m model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel,\n\u001b[1;32m 62\u001b[0m messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 83\u001b[0m temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m\n\u001b[1;32m 84\u001b[0m )\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mchoices[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mmessage\u001b[38;5;241m.\u001b[39mcontent)\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_sops\u001b[39m(\u001b[38;5;28mself\u001b[39m, roles, docs_text) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[43mSOPsResponse\u001b[49m:\n\u001b[1;32m 90\u001b[0m roles_sops_all \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m role \u001b[38;5;129;01min\u001b[39;00m roles:\n", + "\u001b[0;31mNameError\u001b[0m: name 'SOPsResponse' is not defined" + ] + } + ], + "source": [ + "import os\n", + "import json\n", + "from openai import OpenAI\n", + "from pydantic import BaseModel, Field\n", + "from typing import List, Dict, Optional\n", + "\n", + "class SOPs(BaseModel):\n", + " must: Optional[List[str]] = Field(default_factory=list)\n", + " shall: Optional[List[str]] = Field(default_factory=list)\n", + " will: Optional[List[str]] = Field(default_factory=list)\n", + "\n", + "class RoleSOPs(BaseModel):\n", + " sops: SOPs\n", + "\n", + "class SOPsFound(BaseModel):\n", + " message:str\n", + " status:bool\n", + "\n", + "class Roles_response(BaseModel):\n", + " roles: list[str]\n", + " \n", + "class SOPsResponse(BaseModel):\n", + " roles_sops: Dict[str, SOPs] = Field(default_factory=dict)\n", + "class SopGenerator:\n", + " def __init__(self):\n", + " self.api_key = os.getenv(\"OPENAI_API_KEY\")\n", + " self.client = OpenAI(api_key=self.api_key)\n", + " self.model = \"gpt-4o-mini\"\n", + "\n", + " def get_roles(self, docs):\n", + " # Extract the text content from the Document objects\n", + " docs_text = [doc.page_content for doc in docs] \n", + " response = self.client.beta.chat.completions.parse(\n", + " model=self.model,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": '''Suppose you are a role/postion extractor from a company document ,\n", + " you extract the roles as a list e.g[\"finacial analyist,\"data scientist]... etc\n", + " if no roles are found return and empty list''',\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n", + " \"text\": text\n", + " } for text in docs_text\n", + " ]\n", + " }\n", + " ],\n", + " response_format=Roles_response,\n", + " max_tokens=1024,\n", + " temperature=0.1\n", + " )\n", + " \n", + " return json.loads(response.choices[0].message.content)\n", + " \n", + " def check_role_sop(self, role, docs):\n", + "\n", + " docs_text = [doc.page_content for doc in docs] \n", + " response = self.client.beta.chat.completions.parse(\n", + " model=self.model,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": f'''Tour roles is to check if the sops for the provided roles: {roles} are found in the document\n", + " i.e You are validating the document can provide the sops \n", + " you retur status = True if yes and a proper message and status=False and a proper message if no\n", + " keep the message short e.g \"sops found for the roles: roles..\n", + " '''\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n", + " \"text\": text\n", + " } for text in docs_text\n", + " ]\n", + " }\n", + " ],\n", + " response_format=SOPsFound,\n", + " max_tokens=1024,\n", + " temperature=0.1\n", + " )\n", + " \n", + " return json.loads(response.choices[0].message.content)\n", + "\n", + "\n", + " def generate_sops(self, roles, docs_text) -> SOPsResponse:\n", + " roles_sops_all = {}\n", + "\n", + " for role in roles:\n", + " docs_text = [doc.page_content for doc in docs] \n", + " response = self.client.beta.chat.completions.parse(\n", + " model=self.model,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": f'''You are a Standard Operating Procedure (SOP) extractor. Your task is to find SOPs for the role \"{role}\" in the provided text. '\n", + " 'SOPs should be categorized under \"must\", \"shall\", and \"will\". '\n", + " 'If no SOPs are found for the role, return an empty list for each category'''\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n", + " \"text\": text\n", + " } for text in docs_text\n", + " ]\n", + " }\n", + " ],\n", + " response_format=RoleSOPs,\n", + " max_tokens=1024,\n", + " temperature=0.1\n", + " )\n", + " role_sop = json.loads(response.choices[0].message.content)\n", + " roles_sops_all[role] = role_sop\n", + " \n", + " return roles_sops_all\n", + " \n", + "# Example usage:\n", + "service = SopGenerator()\n", + "roles = [\"Devops engineers\"]\n", + "sops_response = service.check_role_sop(roles,docs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "erp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..488c514 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +openai +langchain-community +langchain-openai +pydantic +flask +python-dotenv \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/api/__init__.py b/src/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/features/__init__.py b/src/features/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/services/__init__.py b/src/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/services/background_tasks/__init__.py b/src/services/background_tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/template.py b/template.py new file mode 100644 index 0000000..c4f7585 --- /dev/null +++ b/template.py @@ -0,0 +1,58 @@ +import os + +def create_folders(base_dir): + folders = [ + "data/raw", + "data/processed", + "data/external", + "data/interim", + "notebooks", + "src/data", + "src/features", + "src/models", + "src/api", + "src/services", + "src/services/background_tasks", + "src/utils", + "tests/test_services", + "scripts", + "models", + "docs", + "config" + ] + + # Create all the folders + for folder in folders: + folder_path = os.path.join(base_dir, folder) + os.makedirs(folder_path, exist_ok=True) + print(f"Created folder: {folder_path}") + + # Create some essential files + essential_files = [ + "requirements.txt", + "environment.yml", + ".gitignore", + "README.md", + "setup.py", + "src/__init__.py", + "src/data/__init__.py", + "src/features/__init__.py", + "src/models/__init__.py", + "src/api/__init__.py", + "src/services/__init__.py", + "src/services/background_tasks/__init__.py", + "src/utils/__init__.py", + "tests/__init__.py", + "tests/test_services/__init__.py" + ] + + for file in essential_files: + file_path = os.path.join(base_dir, file) + if not os.path.exists(file_path): + with open(file_path, 'w') as f: + f.write("") # Create an empty file + print(f"Created file: {file_path}") + +if __name__ == "__main__": + project_root = "." # Use the current directory as the project root + create_folders(project_root) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_services/__init__.py b/tests/test_services/__init__.py new file mode 100644 index 0000000..e69de29