From ec8ec6f1900060305ac577a48f33479348d83ce6 Mon Sep 17 00:00:00 2001 From: teslim Date: Wed, 16 Jul 2025 12:46:14 +0100 Subject: [PATCH] enrichment of questions --- ENRICH_QUESTIONS_API.md | 301 ++++++++++++++++++++++++++++ src/api/app.py | 2 + src/api/routes/common.py | 53 +++++ src/services/question_enrichment.py | 271 +++++++++++++++++++++++++ src/utils/auth.py | 2 +- 5 files changed, 628 insertions(+), 1 deletion(-) create mode 100644 ENRICH_QUESTIONS_API.md create mode 100644 src/api/routes/common.py create mode 100644 src/services/question_enrichment.py diff --git a/ENRICH_QUESTIONS_API.md b/ENRICH_QUESTIONS_API.md new file mode 100644 index 0000000..2563763 --- /dev/null +++ b/ENRICH_QUESTIONS_API.md @@ -0,0 +1,301 @@ +# Enrich Questions API Documentation + +## Overview + +The `enrich-questions` endpoint is a reverse API that takes existing questions and assigns them to specific areas and members. This endpoint returns the exact same response structure as `generate_questions_from_sop_v3`. Each question is intelligently assigned to the most relevant area_tag and member using OpenAI analysis. + +## Endpoint + +``` +POST /api/v1/common/enrich-questions +``` + +## Authentication + +Requires Bearer token authentication: + +``` +Authorization: Bearer +``` + +## Request Format + +### Headers + +``` +Content-Type: application/json +Authorization: Bearer +``` + +### Request Body + +The request body should be a JSON array of question objects. Each question object must contain: + +- `question` (string): The question text +- `role` (string): The role associated with the question +- `position_id` (integer): The position ID (used as role ID in response) +- `area_tags` (array): Array of area tag objects with `name` and `id` (OpenAI selects the most relevant one) +- `members` (array): Array of member objects with `id` (algorithm selects the most appropriate one) + +### Example Request + +```json +[ + { + "question": "Is the system monitoring working properly?", + "role": "IT Expert", + "position_id": 522, + "area_tags": [ + { + "name": "IT Operations", + "id": 1276 + }, + { + "name": "Communication & Coordination", + "id": 1426 + }, + { + "name": "Quality Assurance", + "id": 1427 + } + ], + "members": [ + { + "id": 159 + } + ] + }, + { + "question": "Are safety protocols being followed?", + "role": "IT Expert", + "position_id": 522, + "area_tags": [ + { + "name": "IT Operations", + "id": 1276 + }, + { + "name": "Safety Protocols", + "id": 1436 + } + ], + "members": [ + { + "id": 159 + } + ] + } +] +``` + +## Response Format + +### Success Response (200 OK) + +The response structure is identical to `generate_questions_from_sop_v3`. Each question is assigned to ONE area_tag and ONE member: + +```json +{ + "questions": { + "items": [ + { + "area_tag": 1276, + "area_name": "IT Operations", + "assigned_to": 159, + "questions": "Is the system monitoring working properly?", + "role": 522 + }, + { + "area_tag": 1436, + "area_name": "Safety Protocols", + "assigned_to": 159, + "questions": "Are safety protocols being followed?", + "role": 522 + } + ] + } +} +``` + +### Response Structure Explanation + +- Each question creates exactly ONE item in the response +- OpenAI analyzes the question content and selects the most relevant `area_tag` from available options +- The algorithm selects the most appropriate `member` from the available members +- `area_tag`: The OpenAI-selected area tag ID +- `area_name`: The OpenAI-selected area tag name +- `assigned_to`: The selected member ID +- `questions`: The question text +- `role`: The position_id from the request (used as role identifier) + +## AI-Powered Assignment Algorithm + +### OpenAI Area Tag Selection + +The system uses OpenAI's GPT-4o-mini model to intelligently analyze each question and select the most relevant area tag: + +1. **Content Analysis**: OpenAI analyzes the question content, context, and meaning +2. **Domain Matching**: Determines which area/domain the question is actually testing or assessing +3. **Relevance Scoring**: Considers the purpose and intent of the question +4. **Smart Selection**: Chooses the most specific and primary area tag from available options +5. **Fallback**: If OpenAI analysis fails, defaults to the first available area tag + +**OpenAI Prompt Guidelines:** + +- Analyze question content and context +- Match questions to appropriate area tags based on meaning and purpose +- Consider what domain/area the question is actually testing +- Choose only ONE area tag per question - the most relevant one +- If multiple areas seem relevant, choose the most specific or primary one + +### Member Selection + +Currently uses a simple selection algorithm (first member), but can be enhanced to consider: + +- Member skills and expertise +- Current workload distribution +- Availability and capacity +- Historical performance + +### Error Responses + +#### 400 Bad Request - Invalid Input Format + +```json +{ + "error": "Invalid input", + "message": "Input data must be in JSON format." +} +``` + +#### 400 Bad Request - Missing Required Fields + +```json +{ + "error": "Invalid data", + "message": "Question object at index 0 is missing required field 'question'." +} +``` + +#### 400 Bad Request - Invalid Array Structure + +```json +{ + "error": "Invalid input", + "message": "Input data must be an array of question objects." +} +``` + +#### 401 Unauthorized + +```json +{ + "error": "Unauthorized", + "message": "API key is missing or invalid." +} +``` + +#### 500 Internal Server Error + +```json +{ + "error": "Internal Server Error", + "message": "An unexpected error occurred." +} +``` + +## Usage Examples + +### Basic Usage + +```bash +curl -X POST "http://localhost:5402/api/v1/common/enrich-questions" \ + -H "Authorization: Bearer your-api-key" \ + -H "Content-Type: application/json" \ + -d '[ + { + "question": "Is the system performance being monitored?", + "role": "Developer", + "position_id": 123, + "area_tags": [ + {"name": "Development", "id": 1}, + {"name": "Performance Monitoring", "id": 2} + ], + "members": [ + {"id": 456} + ] + } + ]' +``` + +### Python Example + +```python +import requests +import json + +url = "http://localhost:5402/api/v1/common/enrich-questions" +headers = { + "Authorization": "Bearer your-api-key", + "Content-Type": "application/json" +} + +payload = [ + { + "question": "Is the system performance being monitored?", + "role": "Developer", + "position_id": 123, + "area_tags": [ + {"name": "Development", "id": 1}, + {"name": "Performance Monitoring", "id": 2} + ], + "members": [ + {"id": 456} + ] + } +] + +response = requests.post(url, json=payload, headers=headers) +result = response.json() +print(result) +``` + +## Validation Rules + +1. **Input must be a JSON array** of question objects +2. **Each question object must contain all required fields**: + - `question`: Non-empty string + - `role`: Non-empty string + - `position_id`: Integer + - `area_tags`: Array of objects with `name` and `id` + - `members`: Array of objects with `id` +3. **Area tags must be valid objects** with both `name` (string) and `id` (integer/string) +4. **Members must be valid objects** with `id` (integer/string) +5. **Arrays can be empty** but must be present + +## Response Logic + +The endpoint uses AI to intelligently assign each question to the most relevant area and member: + +- **Input**: 2 questions with multiple area_tags and members each +- **Output**: 2 items (one per question) with the best area_tag and member selected for each +- **AI Analysis**: OpenAI analyzes question content and meaning to find the most relevant area_tag +- **Smart Assignment**: Uses natural language understanding to make intelligent assignments +- **No Cartesian Product**: Each question gets exactly one area assignment and one member assignment + +## Performance Considerations + +- **Batch Processing**: OpenAI analysis is performed in batches for efficiency +- **Caching**: Consider implementing caching for frequently assigned questions +- **Fallback**: Robust fallback mechanisms ensure the endpoint always returns valid assignments +- **Error Handling**: Comprehensive error handling for OpenAI API failures + +## Integration with Existing System + +This endpoint complements the existing question generation APIs: + +- `POST /api/v1/qs/generate_questions_from_sop` - Generates questions from SOPs +- `POST /api/v1/qs/generate_questions_from_sop-latest` - Enhanced question generation +- `POST /api/v1/common/enrich-questions` - Enriches existing questions (NEW) + +The enrich-questions endpoint returns the **exact same structure** as `generate_questions_from_sop_v3`, with AI-powered intelligent assignment of questions to the most relevant areas and members, making it seamlessly interchangeable in your application workflow. diff --git a/src/api/app.py b/src/api/app.py index 8d580b6..003e39b 100644 --- a/src/api/app.py +++ b/src/api/app.py @@ -3,6 +3,7 @@ from flask import Flask from src.api.routes.sops import sops_bp from src.api.routes.questions import qs_b from src.api.routes.chatbot import bot +from src.api.routes.common import common_bp def create_app(): app = Flask(__name__) @@ -11,6 +12,7 @@ def create_app(): app.register_blueprint(sops_bp, url_prefix='/api/v1/sop') app.register_blueprint(qs_b,url_prefix='/api/v1/qs') app.register_blueprint(bot,url_prefix='/api/v1/bot') + app.register_blueprint(common_bp, url_prefix='/api/v1/common') # Set up the upload folder configuration inside the src directory UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../uploads') diff --git a/src/api/routes/common.py b/src/api/routes/common.py new file mode 100644 index 0000000..d93179f --- /dev/null +++ b/src/api/routes/common.py @@ -0,0 +1,53 @@ +import os +from flask import Blueprint, request, jsonify +from src.utils.auth import auth_check +from src.services.question_enrichment import QuestionEnrichmentService +import json + +# Initialize the Blueprint +common_bp = Blueprint('common', __name__) + +@common_bp.route('/enrich-questions', methods=['POST']) +@auth_check +def enrich_questions(): + """ + Reverse API endpoint that takes questions and assigns them to areas and members. + Returns the exact same structure as generate_questions_from_sop_v3. + Expected payload: Array of question objects with question, role, position_id, area_tags, and members. + + Example payload: + [ + { + "question": "Minor", + "role": "IT Expert", + "position_id": 522, + "area_tags": [ + {"name": "IT Operations", "id": 1276}, + {"name": "Communication & Coordination", "id": 1426} + ], + "members": [ + {"id": 159} + ] + } + ] + """ + if not request.is_json: + return jsonify({"error": "Invalid input", "message": "Input data must be in JSON format."}), 400 + + input_data = request.get_json() + + try: + # Initialize the question enrichment service + enrichment_service = QuestionEnrichmentService() + + # Enrich the questions + result = enrichment_service.enrich_questions(input_data) + + if not result['success']: + return jsonify({"error": "Invalid data", "message": result['error']}), 400 + + # Return the exact same structure as generate_questions_from_sop_v3 + return jsonify({"questions": result['questions']}), 200 + + except Exception as e: + return jsonify({"error": "Internal Server Error", "message": str(e)}), 500 \ No newline at end of file diff --git a/src/services/question_enrichment.py b/src/services/question_enrichment.py new file mode 100644 index 0000000..0b123c8 --- /dev/null +++ b/src/services/question_enrichment.py @@ -0,0 +1,271 @@ +import os +from typing import List, Dict, Any +from datetime import datetime +import json +import random +from openai import OpenAI +from dotenv import load_dotenv + +load_dotenv() + +class QuestionEnrichmentService: + """ + Service class to handle question enrichment with area and member assignments. + This is the reverse of question generation - it takes existing questions and assigns them to areas and members. + """ + + def __init__(self): + self.api_key = os.getenv("OPENAI_API_KEY") + self.client = OpenAI(api_key=self.api_key) + self.model = "gpt-4o-mini" + + def validate_question_object(self, question_obj: Dict[str, Any], index: int) -> Dict[str, str]: + """ + Validate a single question object structure. + + Args: + question_obj: The question object to validate + index: The index of the question object in the array (for error messages) + + Returns: + Dict with 'valid' boolean and 'error' message if invalid + """ + required_fields = ['question', 'role', 'position_id', 'area_tags', 'members'] + + for field in required_fields: + if field not in question_obj: + return { + 'valid': False, + 'error': f"Question object at index {index} is missing required field '{field}'." + } + + # Validate area_tags structure + if not isinstance(question_obj['area_tags'], list): + return { + 'valid': False, + 'error': f"Question object at index {index}: 'area_tags' must be an array." + } + + for area_idx, area_tag in enumerate(question_obj['area_tags']): + if not isinstance(area_tag, dict) or 'name' not in area_tag or 'id' not in area_tag: + return { + 'valid': False, + 'error': f"Question object at index {index}: area_tag at index {area_idx} must have 'name' and 'id' fields." + } + + # Validate members structure + if not isinstance(question_obj['members'], list): + return { + 'valid': False, + 'error': f"Question object at index {index}: 'members' must be an array." + } + + for member_idx, member in enumerate(question_obj['members']): + if not isinstance(member, dict) or 'id' not in member: + return { + 'valid': False, + 'error': f"Question object at index {index}: member at index {member_idx} must have 'id' field." + } + + return {'valid': True} + + def _get_question_area_assignment_prompt(self): + """ + Get the prompt for OpenAI to assign questions to the most relevant area tags. + """ + return """ + You are an expert at analyzing questions and determining which area/domain they belong to. + + Your task is to analyze each question and assign it to the most relevant area tag from the provided list. + + Guidelines: + 1. Analyze the question content and context + 2. Match the question to the most appropriate area tag based on its meaning and purpose + 3. Consider what domain/area the question is actually testing or assessing + 4. Choose only ONE area tag per question - the most relevant one + 5. If multiple areas seem relevant, choose the most specific or primary one + + Return your response as a JSON object with the question text as key and the selected area tag ID as value. + + Example format: + { + "Is the system monitoring working properly?": 1276, + "Are safety protocols being followed?": 1436 + } + """ + + def _use_openai_for_area_assignment(self, questions_data: List[Dict[str, Any]]) -> Dict[str, int]: + """ + Use OpenAI to intelligently assign questions to the most relevant area tags. + + Args: + questions_data: List of question objects + + Returns: + Dict mapping question text to selected area tag ID + """ + try: + # Prepare the data for OpenAI + questions_info = [] + all_area_tags = {} + + for question_obj in questions_data: + question_text = question_obj['question'] + area_tags = question_obj['area_tags'] + + questions_info.append({ + "question": question_text, + "available_area_tags": area_tags + }) + + # Collect all unique area tags + for area_tag in area_tags: + all_area_tags[area_tag['id']] = area_tag['name'] + + # Create the prompt content + prompt_content = f""" + Questions to analyze and assign: + {json.dumps(questions_info, indent=2)} + + Available area tags: + {json.dumps(all_area_tags, indent=2)} + + For each question, select the most relevant area tag ID from its available_area_tags list. + """ + + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": self._get_question_area_assignment_prompt()}, + {"role": "user", "content": prompt_content} + ], + temperature=0.1, + max_tokens=1000 + ) + + # Parse the response + response_content = response.choices[0].message.content + + # Try to extract JSON from the response + try: + # Look for JSON in the response + start_idx = response_content.find('{') + end_idx = response_content.rfind('}') + 1 + if start_idx != -1 and end_idx != -1: + json_str = response_content[start_idx:end_idx] + assignments = json.loads(json_str) + return assignments + except: + pass + + # Fallback: return empty dict if parsing fails + return {} + + except Exception as e: + print(f"Error in OpenAI area assignment: {e}") + return {} + + def _find_best_area_tag_for_question(self, question_text: str, area_tags: List[Dict], openai_assignments: Dict[str, int]) -> Dict: + """ + Find the most relevant area tag for a given question using OpenAI assignments. + + Args: + question_text: The question text to match + area_tags: List of available area tags + openai_assignments: OpenAI assignments from batch processing + + Returns: + The most relevant area tag + """ + # First try to use OpenAI assignment + if question_text in openai_assignments: + selected_area_id = openai_assignments[question_text] + for area_tag in area_tags: + if area_tag['id'] == selected_area_id: + return area_tag + + # Fallback to first area tag if OpenAI assignment not found + return area_tags[0] if area_tags else None + + def _select_member_for_question(self, question_text: str, members: List[Dict]) -> Dict: + """ + Select the most appropriate member for a given question. + For now, this is a simple selection, but could be enhanced with more logic. + + Args: + question_text: The question text + members: List of available members + + Returns: + Selected member + """ + # For now, just select the first member + # In a real implementation, this could consider member skills, workload, etc. + return members[0] if members else None + + def enrich_questions(self, questions_data: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Enrich multiple questions with area and member assignments. + Each question gets assigned to ONE area_tag and ONE member based on OpenAI analysis. + Returns the exact same structure as generate_questions_from_sop_v3. + + Args: + questions_data: List of question objects to enrich + + Returns: + Dict in the same format as AllQuestions model + """ + # Validate input is a list + if not isinstance(questions_data, list): + return { + 'success': False, + 'error': "Input data must be an array of question objects." + } + + # Validate each question object + for idx, question_obj in enumerate(questions_data): + validation_result = self.validate_question_object(question_obj, idx) + if not validation_result['valid']: + return { + 'success': False, + 'error': validation_result['error'] + } + + # Use OpenAI to get intelligent area assignments + openai_assignments = self._use_openai_for_area_assignment(questions_data) + + # Process the enriched questions - each question gets ONE area_tag and ONE member + enriched_items = [] + + for question_obj in questions_data: + # Find the best area tag for this question using OpenAI + best_area_tag = self._find_best_area_tag_for_question( + question_obj['question'], + question_obj['area_tags'], + openai_assignments + ) + + # Select the best member for this question + selected_member = self._select_member_for_question( + question_obj['question'], + question_obj['members'] + ) + + # Create a single item for this question + if best_area_tag and selected_member: + item = { + "area_tag": best_area_tag['id'], + "area_name": best_area_tag['name'], + "assigned_to": selected_member['id'], + "questions": question_obj['question'], + "role": question_obj['position_id'] # Using position_id as role ID + } + enriched_items.append(item) + + # Return in the exact same format as generate_questions_from_sop_v3 + return { + 'success': True, + 'questions': { + 'items': enriched_items + } + } \ No newline at end of file diff --git a/src/utils/auth.py b/src/utils/auth.py index 043af9b..10090aa 100644 --- a/src/utils/auth.py +++ b/src/utils/auth.py @@ -4,7 +4,7 @@ from flask import Flask, session, redirect, url_for, request, g, jsonify from dotenv import load_dotenv load_dotenv() -API_KEY = os.getenv("API_KEY") +API_KEY = os.getenv("API_KEY") def auth_check(func): @wraps(func)