Files
erp_ai/src/services/questions_generator.py
T

120 lines
5.3 KiB
Python

import os
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
from src.prompts.sops import *
from src.models.questions_response import *
from src.services.sop_document_parser import DocumentParser
from src.prompts.questions import get_questions_prompt
from dotenv import load_dotenv
load_dotenv()
class QuestionsGenerator:
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini"
def generate_questions(self, input_data):
try:
sops = input_data['sops']
assessment_type = input_data['assessment_type']
frequency_type = input_data['frequency_type']
total_duration = input_data['duration']
roles_data = input_data["roles_data"]
# Chunk the SOPs into smaller pieces
chunk_size = 1000 # Define your chunk size
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
# Create a list of documents
docs = [{"type": "text", "text": text} for text in docs_text]
prompt = get_questions_prompt() # Get the questions prompt for the SOP
all_questions = []
# Iterate through each frequency number (e.g., week 1, week 2, etc.)
for frequency_number in range(1, total_duration + 1):
frequency_label = f"{frequency_type} {frequency_number}" # e.g., week 1, daily 3
# Generate questions for the current frequency number
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": f"The SOPs are provided below."},
{"role": "user", "content": docs}, # Use the chunked documents
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
{"role": "user", "content": f"Frequency Type: {frequency_type}"},
{"role": "user", "content": f"Current Frequency Number to generate : {frequency_label}"},
{"role": "user", "content": f"Duration: {total_duration}"},
{"role": "user", "content": f"roles data : {roles_data}"}
],
response_format=AssementQuestion, # Use the updated response schema
max_tokens=4096,
temperature=0.1
)
questions = json.loads(response.choices[0].message.content)
all_questions.append({
"frequency_number": frequency_label,
"questions": questions
})
return all_questions
except json.JSONDecodeError:
return False
def generate_questions_v2(self, input_data):
try:
sops = input_data['sops']
assessment_type = input_data['assessment_type']
frequency_type = input_data['frequency_type']
frequency_number = input_data['frequency_number'] # Specific week for which to generate questions
total_duration = input_data['duration']
roles_data = input_data["roles_data"]
# Chunk the SOPs into smaller pieces
chunk_size = 1000 # Define your chunk size
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
# Create a list of documents
docs = [{"type": "text", "text": text} for text in docs_text]
prompt = get_questions_prompt() # Get the questions prompt for the SOP
# Prepare the frequency label
frequency_label = f"{frequency_type} {frequency_number}" # e.g., week 2
# Generate questions for the current frequency number only
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": f"The SOPs are provided below."},
{"role": "user", "content": json.dumps(docs)}, # Use the chunked documents
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
{"role": "user", "content": f"Frequency Type: {frequency_type}"},
{"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
{"role": "user", "content": f"Duration: {total_duration}"},
{"role": "user", "content": f"Roles Data: {roles_data}"}
],
response_format=AssementQuestion, # Ensure you specify the correct format
max_tokens=10000,
temperature=0.1
)
# Parse and format the response
questions = json.loads(response.choices[0].message.content)
return {
"frequency_number": frequency_label,
"questions": questions
}
except json.JSONDecodeError:
return False