updated
This commit is contained in:
@@ -1,78 +1,140 @@
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
from openai import AsyncOpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Optional
|
||||
from src.prompts.sops import *
|
||||
from src.models.questions_response import *
|
||||
from src.services.sop_document_parser import DocumentParser
|
||||
from src.prompts.questions import *
|
||||
import requests
|
||||
import pandas as pd
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os
|
||||
|
||||
class QuestionsGenerator:
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.client = AsyncOpenAI(api_key=self.api_key)
|
||||
self.model = "gpt-4o-mini"
|
||||
DATA_KEY = os.getenv("AI_DATA_KEY")
|
||||
# Constants for API requests
|
||||
URL = "https://erpai.mkdlabs.com//v3/api/custom/erpai/common/get-data-ai"
|
||||
HEADERS = {
|
||||
"x-project": DATA_KEY # Replace with your actual key
|
||||
}
|
||||
|
||||
async def generate_single_frequency_questions(self, docs, assessment_type, frequency_number, total_duration):
|
||||
prompt = get_questions_prompt_v3()
|
||||
frequency_label = f"{assessment_type} number : {frequency_number}"
|
||||
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": f"The SOPs are provided below."},
|
||||
{"role": "user", "content": json.dumps(docs)},
|
||||
{"role": "user", "content": f"Assessment Type: {assessment_type}"},
|
||||
{"role": "user", "content": f"Current Frequency Number to generate: {frequency_label}"},
|
||||
{"role": "user", "content": f"Duration: {total_duration}"}
|
||||
],
|
||||
temperature=0.1,
|
||||
response_format={ "type": "json_object" },
|
||||
max_tokens=10000
|
||||
)
|
||||
|
||||
questions_json = json.loads(response.choices[0].message.content)
|
||||
return questions_json
|
||||
|
||||
async def generate_questions(self, input_data: Dict) -> AssessmentQuestions:
|
||||
try:
|
||||
sops = input_data['sops']
|
||||
assessment_type = input_data['assessment_type']
|
||||
total_duration = input_data['duration']
|
||||
|
||||
chunk_size = 1000
|
||||
docs_text = [sops[i:i + chunk_size] for i in range(0, len(sops), chunk_size)]
|
||||
docs = [{"type": "text", "text": text} for text in docs_text]
|
||||
|
||||
tasks = []
|
||||
for frequency_number in range(1, total_duration + 1):
|
||||
task = self.generate_single_frequency_questions(docs, assessment_type, frequency_number, total_duration)
|
||||
tasks.append(task)
|
||||
|
||||
all_questions = await asyncio.gather(*tasks)
|
||||
|
||||
return AssessmentQuestions(questions=Questions(questions=all_questions))
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return None
|
||||
|
||||
# Usage
|
||||
async def main():
|
||||
generator = QuestionsGenerator()
|
||||
input_data = {
|
||||
"sops": "Your SOP text here",
|
||||
"assessment_type": "weekly",
|
||||
"duration": 4
|
||||
# JSON bodies for API requests
|
||||
def create_json_body(area_type, company_id):
|
||||
return {
|
||||
"type": area_type,
|
||||
"options": {
|
||||
"company_id": company_id
|
||||
}
|
||||
}
|
||||
result = await generator.generate_questions(input_data)
|
||||
print(result)
|
||||
|
||||
# Function to fetch data from the API
|
||||
def fetch_data(json_body):
|
||||
json_body["options"]["company_id"] = json_body["options"].get("company_id") # Ensure company_id is included
|
||||
response = requests.post(URL, headers=HEADERS, json=json_body)
|
||||
response.raise_for_status() # Raise an error for bad responses
|
||||
return response.json()
|
||||
|
||||
|
||||
|
||||
def convert_assessment_data_to_dataframe(assessment_data):
|
||||
df_assessment = []
|
||||
for assessment in assessment_data.get("data", []):
|
||||
assessment_id = assessment["assessment_id"]
|
||||
assessment_name = assessment["assessment_name"]
|
||||
start_date = assessment["start_date"]
|
||||
open_items = assessment["open_items"]
|
||||
completed_items = assessment["completed_items"]
|
||||
total_assigned_items = assessment["total_assigned_items"]
|
||||
red_flags = assessment["red_flags"]
|
||||
|
||||
for user in assessment.get("user_details", []):
|
||||
user_name = user["name"]
|
||||
user_total_items = user["total_assigned_items"]
|
||||
user_completed_items = user["completed_items"]
|
||||
|
||||
for area in user.get("area_list", []):
|
||||
df_assessment.append({
|
||||
"assessment_id": assessment_id,
|
||||
"assessment_name": assessment_name,
|
||||
"start_date": start_date,
|
||||
"open_items_overall": open_items,
|
||||
"completed_items_overall": completed_items,
|
||||
"total_assigned_items_overall": total_assigned_items,
|
||||
"user_name": user_name,
|
||||
"user_total_assigned_items": user_total_items,
|
||||
"user_completed_items": user_completed_items,
|
||||
"area": area,
|
||||
"red_flags": red_flags
|
||||
})
|
||||
return pd.DataFrame(df_assessment)
|
||||
|
||||
# Convert to DataFrame
|
||||
|
||||
|
||||
# Summary statistics for overall assessment level
|
||||
def generate_summary_statistics(df):
|
||||
total_assessments = df['assessment_id'].nunique()
|
||||
avg_open_items = df.groupby('assessment_id')['open_items_overall'].mean().mean()
|
||||
avg_completed_items = df.groupby('assessment_id')['completed_items_overall'].mean().mean()
|
||||
avg_total_assigned_items = df.groupby('assessment_id')['total_assigned_items_overall'].mean().mean()
|
||||
avg_red_flags = df['red_flags'].mean()
|
||||
|
||||
total_users = df['user_name'].nunique()
|
||||
avg_user_total_items = df.groupby('user_name')['user_total_assigned_items'].mean().mean()
|
||||
avg_user_completed_items = df.groupby('user_name')['user_completed_items'].mean().mean()
|
||||
completion_rate_per_user = (df['user_completed_items'].sum() / df['user_total_assigned_items'].sum()) * 100 if df['user_total_assigned_items'].sum() > 0 else 0
|
||||
|
||||
area_summary = df['area'].value_counts()
|
||||
|
||||
return {
|
||||
"total_assessments": total_assessments,
|
||||
"avg_open_items_per_assessment": avg_open_items,
|
||||
"avg_completed_items_per_assessment": avg_completed_items,
|
||||
"avg_total_assigned_items_per_assessment": avg_total_assigned_items,
|
||||
"avg_red_flags": avg_red_flags,
|
||||
"total_users": total_users,
|
||||
"avg_user_total_assigned_items": avg_user_total_items,
|
||||
"avg_user_completed_items": avg_user_completed_items,
|
||||
"completion_rate_per_user": completion_rate_per_user,
|
||||
"area_summary": area_summary.to_dict()
|
||||
}
|
||||
|
||||
# Additional statistics for efficiency and areas
|
||||
def generate_extended_statistics(df):
|
||||
df['user_completion_rate'] = (df['user_completed_items'] / df['user_total_assigned_items']).fillna(0) * 100
|
||||
|
||||
top_5_efficient_users = df.groupby('user_name')['user_completion_rate'].mean().nlargest(5).to_dict()
|
||||
bottom_5_least_efficient_users = df.groupby('user_name')['user_completion_rate'].mean().nsmallest(5).to_dict()
|
||||
|
||||
df['uncompleted_items'] = df['user_total_assigned_items'] - df['user_completed_items']
|
||||
areas_with_most_uncompleted_items = df.groupby('area')['uncompleted_items'].sum().nlargest(5).to_dict()
|
||||
|
||||
return {
|
||||
"top_5_efficient_users": top_5_efficient_users,
|
||||
"bottom_5_least_efficient_users": bottom_5_least_efficient_users,
|
||||
"areas_with_most_uncompleted_items": areas_with_most_uncompleted_items
|
||||
}
|
||||
|
||||
# Generate statistics for problematic areas
|
||||
def generate_problematic_area_statistics(df):
|
||||
total_open_items = df.groupby('name')['open_items'].sum().sort_values(ascending=False)
|
||||
total_red_flags = df.groupby('name')['red_flags'].sum().sort_values(ascending=False)
|
||||
|
||||
return pd.DataFrame({
|
||||
"total_open_items": total_open_items,
|
||||
"total_red_flags": total_red_flags
|
||||
}).fillna(0)
|
||||
|
||||
def generate_summary_stats(assessment_data, area_data):
|
||||
assessment_df = convert_assessment_data_to_dataframe(assessment_data)
|
||||
problematic_area_df = pd.DataFrame(area_data.get("data", []))
|
||||
|
||||
summary_stats = generate_summary_statistics(assessment_df)
|
||||
extended_stats = generate_extended_statistics(assessment_df)
|
||||
summary_stats["users(Workers) based stats"] = extended_stats
|
||||
|
||||
problematic_stats = generate_problematic_area_statistics(problematic_area_df)
|
||||
summary_stats["Area based stats"] = problematic_stats.to_dict(orient='index')
|
||||
|
||||
return summary_stats
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from src.services.chatbot import Chatbot
|
||||
bot = Chatbot()
|
||||
res = bot.predict_next_n_assessment(companyid=12,N=3)
|
||||
|
||||
print(res)
|
||||
Reference in New Issue
Block a user