71 lines
4.2 KiB
Python
71 lines
4.2 KiB
Python
import pandas as pd
|
|
|
|
def generate_summary_stats_v2(file_path):
|
|
# Load the DataFrame from the provided file path
|
|
df = pd.read_csv(file_path)
|
|
|
|
# Ensure date columns are correctly parsed
|
|
df['Assessment_Start_Date'] = pd.to_datetime(df['Assessment_Start_Date'])
|
|
df['Assessment_End_Date'] = pd.to_datetime(df['Assessment_End_Date'])
|
|
|
|
# Add completion rate calculation
|
|
completed_status = df['Assessment_Status'] == 'Completed'
|
|
completion_rate_by_frequency = df[completed_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
|
|
|
|
in_progress_status = df['Assessment_Status'] == 'In Progress'
|
|
incomplete_status = df['Assessment_Status'] == 'Incomplete'
|
|
|
|
# Calculate in-progress and incomplete rates by frequency
|
|
in_progress_rate_by_frequency = df[in_progress_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
|
|
incomplete_rate_by_frequency = df[incomplete_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
|
|
|
|
# Fill NaN values (where no assessments are in-progress or incomplete for certain frequencies)
|
|
completion_rate_by_frequency = completion_rate_by_frequency.fillna(0)
|
|
in_progress_rate_by_frequency = in_progress_rate_by_frequency.fillna(0)
|
|
incomplete_rate_by_frequency = incomplete_rate_by_frequency.fillna(0)
|
|
|
|
# Round all numerical values to 2 decimal places
|
|
completion_rate_by_frequency = completion_rate_by_frequency.round(2)
|
|
in_progress_rate_by_frequency = in_progress_rate_by_frequency.round(2)
|
|
incomplete_rate_by_frequency = incomplete_rate_by_frequency.round(2)
|
|
|
|
summary_stats = {
|
|
'Open Items and Red Flags': {
|
|
'Total Open Items': round(df['Open_Items'].sum(), 2),
|
|
'Average Open Items per Assessment': round(df['Open_Items'].mean(), 2),
|
|
'Total Red Flags': round(df['Red_Flags'].sum(), 2),
|
|
'Average Red Flags per Assessment': round(df['Red_Flags'].mean(), 2),
|
|
'Max Red Flags in a Single Assessment': round(df['Red_Flags'].max(), 2),
|
|
'Most Common Area with Red Flags': df[df['Red_Flags'] > 0]['Assessment_Area'].mode()[0]
|
|
},
|
|
'Assessment Frequency': {
|
|
'Assessment Type Breakdown': df['Assessment_Frequency'].value_counts(normalize=True).round(2).to_dict(),
|
|
'Average Time Between Assessments': round((df['Assessment_End_Date'] - df['Assessment_Start_Date']).dt.days.mean(), 2),
|
|
'Average Assessment Duration': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.mean(), 2),
|
|
'Completion Rate by Frequency': completion_rate_by_frequency.to_dict(),
|
|
'In Progress Rate by Frequency': in_progress_rate_by_frequency.to_dict(),
|
|
'Incomplete Rate by Frequency': incomplete_rate_by_frequency.to_dict()
|
|
},
|
|
'Assessment Start and End Dates': {
|
|
'Longest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.max(), 2),
|
|
'Shortest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.min(), 2),
|
|
},
|
|
'Assessment Areas': {
|
|
'Most Assessed Area': df['Assessment_Area'].value_counts().idxmax(),
|
|
'Most Open Items in Area': df.groupby('Assessment_Area')['Open_Items'].sum().idxmax(),
|
|
'Area with Most Red Flags': df.groupby('Assessment_Area')['Red_Flags'].sum().idxmax()
|
|
},
|
|
'Assessment Status': {
|
|
'Assessment Status Distribution': df['Assessment_Status'].value_counts(normalize=True).round(2).to_dict(),
|
|
'Incomplete Assessments': round(df[df['Assessment_Status'] == 'Incomplete'].shape[0], 2),
|
|
'In Progress Assessments': round(df[df['Assessment_Status'] == 'In Progress'].shape[0], 2)
|
|
},
|
|
'Assessment Admin': {
|
|
'Most Frequent Admin': df['Assessment_Admin'].mode()[0],
|
|
'Admin with Fewest Red Flags': df.groupby('Assessment_Admin')['Red_Flags'].sum().idxmin(),
|
|
'Admin with Most Open Items': df.groupby('Assessment_Admin')['Open_Items'].mean().idxmax()
|
|
}
|
|
}
|
|
|
|
return summary_stats
|