test
This commit is contained in:
+127
@@ -0,0 +1,127 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
poetry.lock
|
||||||
|
.poetry/
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
erp/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
.spyproject
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pyright type checker
|
||||||
|
.pyright/
|
||||||
|
|
||||||
|
# VS Code
|
||||||
|
.vscode/
|
||||||
@@ -0,0 +1,184 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||||
|
"loader = PyPDFLoader(\"/content/Example SOP (1) (1).pdf\")\n",
|
||||||
|
"docs = loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "NameError",
|
||||||
|
"evalue": "name 'SOPsResponse' is not defined",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mRoles_response\u001b[39;00m(BaseModel):\n\u001b[1;32m 20\u001b[0m roles: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m]\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSopGenerator\u001b[39;00m:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||||
|
"Cell \u001b[0;32mIn[1], line 89\u001b[0m, in \u001b[0;36mSopGenerator\u001b[0;34m()\u001b[0m\n\u001b[1;32m 60\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mparse(\n\u001b[1;32m 61\u001b[0m model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel,\n\u001b[1;32m 62\u001b[0m messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 83\u001b[0m temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m\n\u001b[1;32m 84\u001b[0m )\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mchoices[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mmessage\u001b[38;5;241m.\u001b[39mcontent)\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_sops\u001b[39m(\u001b[38;5;28mself\u001b[39m, roles, docs_text) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[43mSOPsResponse\u001b[49m:\n\u001b[1;32m 90\u001b[0m roles_sops_all \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m role \u001b[38;5;129;01min\u001b[39;00m roles:\n",
|
||||||
|
"\u001b[0;31mNameError\u001b[0m: name 'SOPsResponse' is not defined"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import json\n",
|
||||||
|
"from openai import OpenAI\n",
|
||||||
|
"from pydantic import BaseModel, Field\n",
|
||||||
|
"from typing import List, Dict, Optional\n",
|
||||||
|
"\n",
|
||||||
|
"class SOPs(BaseModel):\n",
|
||||||
|
" must: Optional[List[str]] = Field(default_factory=list)\n",
|
||||||
|
" shall: Optional[List[str]] = Field(default_factory=list)\n",
|
||||||
|
" will: Optional[List[str]] = Field(default_factory=list)\n",
|
||||||
|
"\n",
|
||||||
|
"class RoleSOPs(BaseModel):\n",
|
||||||
|
" sops: SOPs\n",
|
||||||
|
"\n",
|
||||||
|
"class SOPsFound(BaseModel):\n",
|
||||||
|
" message:str\n",
|
||||||
|
" status:bool\n",
|
||||||
|
"\n",
|
||||||
|
"class Roles_response(BaseModel):\n",
|
||||||
|
" roles: list[str]\n",
|
||||||
|
" \n",
|
||||||
|
"class SOPsResponse(BaseModel):\n",
|
||||||
|
" roles_sops: Dict[str, SOPs] = Field(default_factory=dict)\n",
|
||||||
|
"class SopGenerator:\n",
|
||||||
|
" def __init__(self):\n",
|
||||||
|
" self.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
||||||
|
" self.client = OpenAI(api_key=self.api_key)\n",
|
||||||
|
" self.model = \"gpt-4o-mini\"\n",
|
||||||
|
"\n",
|
||||||
|
" def get_roles(self, docs):\n",
|
||||||
|
" # Extract the text content from the Document objects\n",
|
||||||
|
" docs_text = [doc.page_content for doc in docs] \n",
|
||||||
|
" response = self.client.beta.chat.completions.parse(\n",
|
||||||
|
" model=self.model,\n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"system\",\n",
|
||||||
|
" \"content\": '''Suppose you are a role/postion extractor from a company document ,\n",
|
||||||
|
" you extract the roles as a list e.g[\"finacial analyist,\"data scientist]... etc\n",
|
||||||
|
" if no roles are found return and empty list''',\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
|
||||||
|
" \"text\": text\n",
|
||||||
|
" } for text in docs_text\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" response_format=Roles_response,\n",
|
||||||
|
" max_tokens=1024,\n",
|
||||||
|
" temperature=0.1\n",
|
||||||
|
" )\n",
|
||||||
|
" \n",
|
||||||
|
" return json.loads(response.choices[0].message.content)\n",
|
||||||
|
" \n",
|
||||||
|
" def check_role_sop(self, role, docs):\n",
|
||||||
|
"\n",
|
||||||
|
" docs_text = [doc.page_content for doc in docs] \n",
|
||||||
|
" response = self.client.beta.chat.completions.parse(\n",
|
||||||
|
" model=self.model,\n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"system\",\n",
|
||||||
|
" \"content\": f'''Tour roles is to check if the sops for the provided roles: {roles} are found in the document\n",
|
||||||
|
" i.e You are validating the document can provide the sops \n",
|
||||||
|
" you retur status = True if yes and a proper message and status=False and a proper message if no\n",
|
||||||
|
" keep the message short e.g \"sops found for the roles: roles..\n",
|
||||||
|
" '''\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
|
||||||
|
" \"text\": text\n",
|
||||||
|
" } for text in docs_text\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" response_format=SOPsFound,\n",
|
||||||
|
" max_tokens=1024,\n",
|
||||||
|
" temperature=0.1\n",
|
||||||
|
" )\n",
|
||||||
|
" \n",
|
||||||
|
" return json.loads(response.choices[0].message.content)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" def generate_sops(self, roles, docs_text) -> SOPsResponse:\n",
|
||||||
|
" roles_sops_all = {}\n",
|
||||||
|
"\n",
|
||||||
|
" for role in roles:\n",
|
||||||
|
" docs_text = [doc.page_content for doc in docs] \n",
|
||||||
|
" response = self.client.beta.chat.completions.parse(\n",
|
||||||
|
" model=self.model,\n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"system\",\n",
|
||||||
|
" \"content\": f'''You are a Standard Operating Procedure (SOP) extractor. Your task is to find SOPs for the role \"{role}\" in the provided text. '\n",
|
||||||
|
" 'SOPs should be categorized under \"must\", \"shall\", and \"will\". '\n",
|
||||||
|
" 'If no SOPs are found for the role, return an empty list for each category'''\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
|
||||||
|
" \"text\": text\n",
|
||||||
|
" } for text in docs_text\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" response_format=RoleSOPs,\n",
|
||||||
|
" max_tokens=1024,\n",
|
||||||
|
" temperature=0.1\n",
|
||||||
|
" )\n",
|
||||||
|
" role_sop = json.loads(response.choices[0].message.content)\n",
|
||||||
|
" roles_sops_all[role] = role_sop\n",
|
||||||
|
" \n",
|
||||||
|
" return roles_sops_all\n",
|
||||||
|
" \n",
|
||||||
|
"# Example usage:\n",
|
||||||
|
"service = SopGenerator()\n",
|
||||||
|
"roles = [\"Devops engineers\"]\n",
|
||||||
|
"sops_response = service.check_role_sop(roles,docs)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "erp",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
openai
|
||||||
|
langchain-community
|
||||||
|
langchain-openai
|
||||||
|
pydantic
|
||||||
|
flask
|
||||||
|
python-dotenv
|
||||||
+58
@@ -0,0 +1,58 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
def create_folders(base_dir):
|
||||||
|
folders = [
|
||||||
|
"data/raw",
|
||||||
|
"data/processed",
|
||||||
|
"data/external",
|
||||||
|
"data/interim",
|
||||||
|
"notebooks",
|
||||||
|
"src/data",
|
||||||
|
"src/features",
|
||||||
|
"src/models",
|
||||||
|
"src/api",
|
||||||
|
"src/services",
|
||||||
|
"src/services/background_tasks",
|
||||||
|
"src/utils",
|
||||||
|
"tests/test_services",
|
||||||
|
"scripts",
|
||||||
|
"models",
|
||||||
|
"docs",
|
||||||
|
"config"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create all the folders
|
||||||
|
for folder in folders:
|
||||||
|
folder_path = os.path.join(base_dir, folder)
|
||||||
|
os.makedirs(folder_path, exist_ok=True)
|
||||||
|
print(f"Created folder: {folder_path}")
|
||||||
|
|
||||||
|
# Create some essential files
|
||||||
|
essential_files = [
|
||||||
|
"requirements.txt",
|
||||||
|
"environment.yml",
|
||||||
|
".gitignore",
|
||||||
|
"README.md",
|
||||||
|
"setup.py",
|
||||||
|
"src/__init__.py",
|
||||||
|
"src/data/__init__.py",
|
||||||
|
"src/features/__init__.py",
|
||||||
|
"src/models/__init__.py",
|
||||||
|
"src/api/__init__.py",
|
||||||
|
"src/services/__init__.py",
|
||||||
|
"src/services/background_tasks/__init__.py",
|
||||||
|
"src/utils/__init__.py",
|
||||||
|
"tests/__init__.py",
|
||||||
|
"tests/test_services/__init__.py"
|
||||||
|
]
|
||||||
|
|
||||||
|
for file in essential_files:
|
||||||
|
file_path = os.path.join(base_dir, file)
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
f.write("") # Create an empty file
|
||||||
|
print(f"Created file: {file_path}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
project_root = "." # Use the current directory as the project root
|
||||||
|
create_folders(project_root)
|
||||||
Reference in New Issue
Block a user