120 lines
5.3 KiB
Python
120 lines
5.3 KiB
Python
import os
|
|
import json
|
|
from openai import OpenAI
|
|
from pydantic import BaseModel, Field
|
|
from typing import List, Dict, Optional
|
|
from src.prompts.sops import *
|
|
from src.models.questions_response import *
|
|
from src.services.sop_document_parser import DocumentParser
|
|
from src.prompts.questions import get_questions_prompt
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
|
|
class QuestionsGenerator:
|
|
def __init__(self):
|
|
self.api_key = os.getenv("OPENAI_API_KEY")
|
|
self.client = OpenAI(api_key=self.api_key)
|
|
self.model = "gpt-4o-mini"
|
|
|
|
def generate_questions(self, input_data):
|
|
try:
|
|
sops = input_data['sops']
|
|
assessment_type = input_data['assessment_type']
|
|
frequency_type = input_data['frequency_type']
|
|
total_duration = input_data['duration']
|
|
roles_data = input_data["roles_data"]
|
|
|
|
# Chunk the SOPs into smaller pieces
|
|
chunk_size = 1000 # Define your chunk size
|
|
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
|
|
# Create a list of documents
|
|
docs = [{"type": "text", "text": text} for text in docs_text]
|
|
|
|
prompt = get_questions_prompt() # Get the questions prompt for the SOP
|
|
|
|
all_questions = []
|
|
|
|
# Iterate through each frequency number (e.g., week 1, week 2, etc.)
|
|
for frequency_number in range(1, total_duration + 1):
|
|
frequency_label = f"{frequency_type} {frequency_number}" # e.g., week 1, daily 3
|
|
|
|
# Generate questions for the current frequency number
|
|
response = self.client.beta.chat.completions.parse(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": prompt},
|
|
{"role": "user", "content": f"The SOPs are provided below."},
|
|
{"role": "user", "content": docs}, # Use the chunked documents
|
|
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
|
|
{"role": "user", "content": f"Frequency Type: {frequency_type}"},
|
|
{"role": "user", "content": f"Current Frequency Number to generate : {frequency_label}"},
|
|
{"role": "user", "content": f"Duration: {total_duration}"},
|
|
{"role": "user", "content": f"roles data : {roles_data}"}
|
|
],
|
|
response_format=AssementQuestion, # Use the updated response schema
|
|
max_tokens=4096,
|
|
temperature=0.1
|
|
)
|
|
|
|
questions = json.loads(response.choices[0].message.content)
|
|
|
|
all_questions.append({
|
|
"frequency_number": frequency_label,
|
|
"questions": questions
|
|
})
|
|
|
|
return all_questions
|
|
|
|
except json.JSONDecodeError:
|
|
return False
|
|
|
|
|
|
def generate_questions_v2(self, input_data):
|
|
try:
|
|
sops = input_data['sops']
|
|
assessment_type = input_data['assessment_type']
|
|
frequency_type = input_data['frequency_type']
|
|
frequency_number = input_data['frequency_number'] # Specific week for which to generate questions
|
|
total_duration = input_data['duration']
|
|
roles_data = input_data["roles_data"]
|
|
|
|
# Chunk the SOPs into smaller pieces
|
|
chunk_size = 1000 # Define your chunk size
|
|
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
|
|
# Create a list of documents
|
|
docs = [{"type": "text", "text": text} for text in docs_text]
|
|
|
|
prompt = get_questions_prompt() # Get the questions prompt for the SOP
|
|
|
|
# Prepare the frequency label
|
|
frequency_label = f"{frequency_type} {frequency_number}" # e.g., week 2
|
|
|
|
# Generate questions for the current frequency number only
|
|
response = self.client.beta.chat.completions.parse(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": prompt},
|
|
{"role": "user", "content": f"The SOPs are provided below."},
|
|
{"role": "user", "content": json.dumps(docs)}, # Use the chunked documents
|
|
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
|
|
{"role": "user", "content": f"Frequency Type: {frequency_type}"},
|
|
{"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
|
|
{"role": "user", "content": f"Duration: {total_duration}"},
|
|
{"role": "user", "content": f"Roles Data: {roles_data}"}
|
|
],
|
|
response_format=AssementQuestion, # Ensure you specify the correct format
|
|
max_tokens=10000,
|
|
temperature=0.1
|
|
)
|
|
|
|
# Parse and format the response
|
|
questions = json.loads(response.choices[0].message.content)
|
|
|
|
return {
|
|
"frequency_number": frequency_label,
|
|
"questions": questions
|
|
}
|
|
|
|
except json.JSONDecodeError:
|
|
return False
|