From ccb0db21d646ad8048987e20d4111f4a5b7e42c2 Mon Sep 17 00:00:00 2001
From: kowshik <kowshik@mkd.com>
Date: Fri, 30 Aug 2024 02:08:08 +0000
Subject: [PATCH] test

---
 .gitignore                                | 127 +++++++++++++++
 README.md                                 |   0
 environment.yml                           |   0
 notebooks/test_sop_generator.ipynb        | 184 ++++++++++++++++++++++
 requirements.txt                          |   6 +
 setup.py                                  |   0
 src/__init__.py                           |   0
 src/api/__init__.py                       |   0
 src/data/__init__.py                      |   0
 src/features/__init__.py                  |   0
 src/models/__init__.py                    |   0
 src/services/__init__.py                  |   0
 src/services/background_tasks/__init__.py |   0
 src/utils/__init__.py                     |   0
 template.py                               |  58 +++++++
 tests/__init__.py                         |   0
 tests/test_services/__init__.py           |   0
 17 files changed, 375 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 environment.yml
 create mode 100644 notebooks/test_sop_generator.ipynb
 create mode 100644 requirements.txt
 create mode 100644 setup.py
 create mode 100644 src/__init__.py
 create mode 100644 src/api/__init__.py
 create mode 100644 src/data/__init__.py
 create mode 100644 src/features/__init__.py
 create mode 100644 src/models/__init__.py
 create mode 100644 src/services/__init__.py
 create mode 100644 src/services/background_tasks/__init__.py
 create mode 100644 src/utils/__init__.py
 create mode 100644 template.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_services/__init__.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7da9d0e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,127 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+Pipfile.lock
+
+# poetry
+poetry.lock
+.poetry/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+erp/
+env.bak/
+venv.bak/
+.spyproject
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pyright type checker
+.pyright/
+
+# VS Code
+.vscode/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..e69de29
diff --git a/notebooks/test_sop_generator.ipynb b/notebooks/test_sop_generator.ipynb
new file mode 100644
index 0000000..23cd4fe
--- /dev/null
+++ b/notebooks/test_sop_generator.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders import PyPDFLoader\n",
+    "loader = PyPDFLoader(\"/content/Example SOP (1) (1).pdf\")\n",
+    "docs = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'SOPsResponse' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m     19\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mRoles_response\u001b[39;00m(BaseModel):\n\u001b[1;32m     20\u001b[0m     roles: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m]\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSopGenerator\u001b[39;00m:\n\u001b[1;32m     23\u001b[0m     \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m     24\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "Cell \u001b[0;32mIn[1], line 89\u001b[0m, in \u001b[0;36mSopGenerator\u001b[0;34m()\u001b[0m\n\u001b[1;32m     60\u001b[0m   response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mparse(\n\u001b[1;32m     61\u001b[0m             model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel,\n\u001b[1;32m     62\u001b[0m             messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     83\u001b[0m     temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m\n\u001b[1;32m     84\u001b[0m     )\n\u001b[1;32m     86\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mchoices[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mmessage\u001b[38;5;241m.\u001b[39mcontent)\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_sops\u001b[39m(\u001b[38;5;28mself\u001b[39m, roles, docs_text) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[43mSOPsResponse\u001b[49m:\n\u001b[1;32m     90\u001b[0m     roles_sops_all \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m     92\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m role \u001b[38;5;129;01min\u001b[39;00m roles:\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'SOPsResponse' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from openai import OpenAI\n",
+    "from pydantic import BaseModel, Field\n",
+    "from typing import List, Dict, Optional\n",
+    "\n",
+    "class SOPs(BaseModel):\n",
+    "    must: Optional[List[str]] = Field(default_factory=list)\n",
+    "    shall: Optional[List[str]] = Field(default_factory=list)\n",
+    "    will: Optional[List[str]] = Field(default_factory=list)\n",
+    "\n",
+    "class RoleSOPs(BaseModel):\n",
+    "    sops: SOPs\n",
+    "\n",
+    "class SOPsFound(BaseModel):\n",
+    "    message:str\n",
+    "    status:bool\n",
+    "\n",
+    "class Roles_response(BaseModel):\n",
+    "    roles: list[str]\n",
+    "    \n",
+    "class SOPsResponse(BaseModel):\n",
+    "    roles_sops: Dict[str, SOPs] = Field(default_factory=dict)\n",
+    "class SopGenerator:\n",
+    "    def __init__(self):\n",
+    "        self.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "        self.client = OpenAI(api_key=self.api_key)\n",
+    "        self.model = \"gpt-4o-mini\"\n",
+    "\n",
+    "    def get_roles(self, docs):\n",
+    "        # Extract the text content from the Document objects\n",
+    "        docs_text = [doc.page_content for doc in docs] \n",
+    "        response = self.client.beta.chat.completions.parse(\n",
+    "        model=self.model,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": '''Suppose you are a role/postion extractor from a company document ,\n",
+    "                 you extract the roles as a list e.g[\"finacial analyist,\"data scientist]... etc\n",
+    "                 if no roles are found return and empty list''',\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
+    "                        \"text\": text\n",
+    "                    } for text in docs_text\n",
+    "                ]\n",
+    "            }\n",
+    "        ],\n",
+    "        response_format=Roles_response,\n",
+    "        max_tokens=1024,\n",
+    "        temperature=0.1\n",
+    "        )\n",
+    "    \n",
+    "        return json.loads(response.choices[0].message.content)\n",
+    "    \n",
+    "    def check_role_sop(self, role, docs):\n",
+    "\n",
+    "      docs_text = [doc.page_content for doc in docs] \n",
+    "      response = self.client.beta.chat.completions.parse(\n",
+    "                model=self.model,\n",
+    "                messages=[\n",
+    "                    {\n",
+    "                        \"role\": \"system\",\n",
+    "                        \"content\": f'''Tour roles is to check if the sops for the provided roles: {roles} are found in the document\n",
+    "                        i.e You are validating the document can provide the sops \n",
+    "                        you retur status = True if yes and a proper message and status=False and a proper message if no\n",
+    "                        keep the message short e.g \"sops found for the roles: roles..\n",
+    "                        '''\n",
+    "                    },\n",
+    "                    {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
+    "                        \"text\": text\n",
+    "                    } for text in docs_text\n",
+    "                ]\n",
+    "            }\n",
+    "        ],\n",
+    "        response_format=SOPsFound,\n",
+    "        max_tokens=1024,\n",
+    "        temperature=0.1\n",
+    "        )\n",
+    "      \n",
+    "      return json.loads(response.choices[0].message.content)\n",
+    "\n",
+    "\n",
+    "    def generate_sops(self, roles, docs_text) -> SOPsResponse:\n",
+    "        roles_sops_all = {}\n",
+    "\n",
+    "        for role in roles:\n",
+    "            docs_text = [doc.page_content for doc in docs] \n",
+    "            response = self.client.beta.chat.completions.parse(\n",
+    "                model=self.model,\n",
+    "                messages=[\n",
+    "                    {\n",
+    "                        \"role\": \"system\",\n",
+    "                        \"content\": f'''You are a Standard Operating Procedure (SOP) extractor. Your task is to find SOPs for the role \"{role}\" in the provided text. '\n",
+    "                            'SOPs should be categorized under \"must\", \"shall\", and \"will\". '\n",
+    "                            'If no SOPs are found for the role, return an empty list for each category'''\n",
+    "                    },\n",
+    "                    {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
+    "                        \"text\": text\n",
+    "                    } for text in docs_text\n",
+    "                ]\n",
+    "            }\n",
+    "        ],\n",
+    "        response_format=RoleSOPs,\n",
+    "        max_tokens=1024,\n",
+    "        temperature=0.1\n",
+    "        )\n",
+    "            role_sop = json.loads(response.choices[0].message.content)\n",
+    "            roles_sops_all[role] = role_sop\n",
+    "            \n",
+    "        return roles_sops_all\n",
+    "           \n",
+    "# Example usage:\n",
+    "service = SopGenerator()\n",
+    "roles = [\"Devops engineers\"]\n",
+    "sops_response = service.check_role_sop(roles,docs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "erp",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..488c514
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+openai
+langchain-community
+langchain-openai
+pydantic
+flask
+python-dotenv
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/api/__init__.py b/src/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/data/__init__.py b/src/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/features/__init__.py b/src/features/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/services/__init__.py b/src/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/services/background_tasks/__init__.py b/src/services/background_tasks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/template.py b/template.py
new file mode 100644
index 0000000..c4f7585
--- /dev/null
+++ b/template.py
@@ -0,0 +1,58 @@
+import os
+
+def create_folders(base_dir):
+    folders = [
+        "data/raw",
+        "data/processed",
+        "data/external",
+        "data/interim",
+        "notebooks",
+        "src/data",
+        "src/features",
+        "src/models",
+        "src/api",
+        "src/services",
+        "src/services/background_tasks",
+        "src/utils",
+        "tests/test_services",
+        "scripts",
+        "models",
+        "docs",
+        "config"
+    ]
+    
+    # Create all the folders
+    for folder in folders:
+        folder_path = os.path.join(base_dir, folder)
+        os.makedirs(folder_path, exist_ok=True)
+        print(f"Created folder: {folder_path}")
+
+    # Create some essential files
+    essential_files = [
+        "requirements.txt",
+        "environment.yml",
+        ".gitignore",
+        "README.md",
+        "setup.py",
+        "src/__init__.py",
+        "src/data/__init__.py",
+        "src/features/__init__.py",
+        "src/models/__init__.py",
+        "src/api/__init__.py",
+        "src/services/__init__.py",
+        "src/services/background_tasks/__init__.py",
+        "src/utils/__init__.py",
+        "tests/__init__.py",
+        "tests/test_services/__init__.py"
+    ]
+    
+    for file in essential_files:
+        file_path = os.path.join(base_dir, file)
+        if not os.path.exists(file_path):
+            with open(file_path, 'w') as f:
+                f.write("")  # Create an empty file
+            print(f"Created file: {file_path}")
+
+if __name__ == "__main__":
+    project_root = "."  # Use the current directory as the project root
+    create_folders(project_root)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_services/__init__.py b/tests/test_services/__init__.py
new file mode 100644
index 0000000..e69de29