Files
erp_ai/src/services/document_parser.py
T

197 lines
7.5 KiB
Python
Raw Normal View History

2024-09-09 14:03:14 +01:00
import os
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
from src.prompts.sops import *
from src.models.response_schemas import *
from dotenv import load_dotenv
load_dotenv()
#SopGeneratorDocument
class DocumentParser:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-2024-08-06"
def _extract_text_from_docs(self, docs):
"""Extract text content from document objects."""
return [doc.page_content for doc in docs]
# Existing methods...
def extract_sops_from_doc(self, docs) -> VisionMissionResponse:
"""
Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document.
:param docs: The document(s) from which to extract information.
:return: VisionMissionResponse containing the vision, mission, and role-specific SOPs.
"""
try:
docs_text = self._extract_text_from_docs(docs)
prompt = get_sop_extraction_from_doc()
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f'''{prompt}'''
},
{
"role": "user",
"content": [{"type": "text", "text": text} for text in docs_text],
}
],
response_format=SOPsResponse,
max_tokens=4096,
temperature=0.1
)
# Parse the response from the LLM
extracted_text = json.loads(response.choices[0].message.content)
return extracted_text
except:
return False
def extract_vision_mission(self, docs) -> VisionMissionResponse:
"""
Extracts Vision, Mission, and SOPs categorized into 'must,' 'shall,' and 'will' from the document.
:param docs: The document(s) from which to extract information.
:return: VisionMissionResponse containing the vision, mission, and role-specific SOPs.
"""
try:
docs_text = self._extract_text_from_docs(docs)
prompt = get_vision_mission_extraction_from_doc()
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f'''{prompt}'''
},
{
"role": "user",
"content": [{"type": "text", "text": text} for text in docs_text],
}
],
response_format=VisionMissionResponse,
max_tokens=4096,
temperature=0.1
)
# Parse the response from the LLM
extracted_text = json.loads(response.choices[0].message.content)
return extracted_text
except:
return False
'''def extract_departments_and_managers(self, docs):
"""
Extract departments and managerial roles from the document.
:param docs: List of document chunks
:return: Dictionary containing departments and their managerial roles
"""
try:
docs_text = self._extract_text_from_docs(docs)
prompt = get_departments_and_roles_extraction_prompt()
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
],
response_format=DepartmentsAndRolesResponse,
max_tokens=4096,
temperature=0.1
)
return json.loads(response.choices[0].message.content)
except json.JSONDecodeError:
return False'''
def extract_departments_and_managers_workers(self, docs):
"""
Extract departments, managers, and workers from the document.
:param docs: List of document chunks
:return: Dictionary containing departments, their managers, and workers.
"""
try:
docs_text = self._extract_text_from_docs(docs)
prompt = get_departments_managers_workers_extraction_prompt() # Update your prompt to handle managers and workers
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": [{"type": "text", "text": text} for text in docs_text]}
],
response_format=DepartmentsAndWorkersResponse, # Use the updated response schema
max_tokens=4096,
temperature=0.1
)
return json.loads(response.choices[0].message.content)
except json.JSONDecodeError:
return False
def generate_sops_(self, docs, reference_roles):
try:
# First, extract departments and managers from the document
sop_doc = DocumentParser()
departments_and_roles = sop_doc.extract_departments_and_managers(docs)
# Prepare extracted roles (only managers)
extracted_managers = []
for department in departments_and_roles['departments']:
extracted_managers.extend([
{
'name': manager['name'],
'position': manager.get('title', 'Unknown Position'),
'role': manager.get('classification', 'Unknown Role') # PRP or SRP classification
}
for manager in department['managerial_roles']
])
# Generate prompt for the LLM to compare reference roles with extracted roles
prompt = generate_llm_comparison_prompt(reference_roles, extracted_managers)
# Send prompt to the LLM for comparison
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": "You are a role comparison assistant."},
{"role": "user", "content": prompt}
],
max_tokens=1024,
temperature=0.1
)
# Parse LLM response (assuming it returns a structured JSON with assigned, unassigned, and unavailable roles)
comparison_result = json.loads(response.choices[0].message.content)
# Return the result as a JSON response
return jsonify(comparison_result), 200
except Exception as e:
return jsonify({"error": "Processing error", "message": f"An error occurred: {str(e)}"}), 500