import os import json from openai import OpenAI from pydantic import BaseModel, Field from typing import List, Dict, Optional from src.prompts.sops import * from src.models.questions_response import * from src.services.sop_document_parser import DocumentParser from src.prompts.questions import get_questions_prompt from dotenv import load_dotenv load_dotenv() class QuestionsGenerator: def __init__(self): self.api_key = os.getenv("OPENAI_API_KEY") self.client = OpenAI(api_key=self.api_key) self.model = "gpt-4o-mini" def generate_questions(self, input_data): try: sops = input_data['sops'] assessment_type = input_data['assessment_type'] frequency_type = input_data['frequency_type'] total_duration = input_data['duration'] roles_data = input_data["roles_data"] # Chunk the SOPs into smaller pieces chunk_size = 1000 # Define your chunk size docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)] # Create a list of documents docs = [{"type": "text", "text": text} for text in docs_text] prompt = get_questions_prompt() # Get the questions prompt for the SOP all_questions = [] # Iterate through each frequency number (e.g., week 1, week 2, etc.) for frequency_number in range(1, total_duration + 1): frequency_label = f"{frequency_type} {frequency_number}" # e.g., week 1, daily 3 # Generate questions for the current frequency number response = self.client.beta.chat.completions.parse( model=self.model, messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": f"The SOPs are provided below."}, {"role": "user", "content": docs}, # Use the chunked documents {"role": "user", "content": f"Assessment Type: {assessment_type}"}, {"role": "user", "content": f"Frequency Type: {frequency_type}"}, {"role": "user", "content": f"Current Frequency Number to generate : {frequency_label}"}, {"role": "user", "content": f"Duration: {total_duration}"}, {"role": "user", "content": f"roles data : {roles_data}"} ], response_format=AssementQuestion, # Use the updated response schema max_tokens=4096, temperature=0.1 ) questions = json.loads(response.choices[0].message.content) print(f"Generated questions: {questions}") all_questions.append({ "frequency_number": frequency_label, "questions": questions }) return all_questions except json.JSONDecodeError: return False