Files

71 lines
4.2 KiB
Python
Raw Permalink Normal View History

2024-09-14 01:50:41 +00:00
import pandas as pd
def generate_summary_stats_v2(file_path):
# Load the DataFrame from the provided file path
df = pd.read_csv(file_path)
# Ensure date columns are correctly parsed
df['Assessment_Start_Date'] = pd.to_datetime(df['Assessment_Start_Date'])
df['Assessment_End_Date'] = pd.to_datetime(df['Assessment_End_Date'])
# Add completion rate calculation
completed_status = df['Assessment_Status'] == 'Completed'
completion_rate_by_frequency = df[completed_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
in_progress_status = df['Assessment_Status'] == 'In Progress'
incomplete_status = df['Assessment_Status'] == 'Incomplete'
# Calculate in-progress and incomplete rates by frequency
in_progress_rate_by_frequency = df[in_progress_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
incomplete_rate_by_frequency = df[incomplete_status].groupby('Assessment_Frequency').size() / df.groupby('Assessment_Frequency').size()
# Fill NaN values (where no assessments are in-progress or incomplete for certain frequencies)
completion_rate_by_frequency = completion_rate_by_frequency.fillna(0)
in_progress_rate_by_frequency = in_progress_rate_by_frequency.fillna(0)
incomplete_rate_by_frequency = incomplete_rate_by_frequency.fillna(0)
# Round all numerical values to 2 decimal places
completion_rate_by_frequency = completion_rate_by_frequency.round(2)
in_progress_rate_by_frequency = in_progress_rate_by_frequency.round(2)
incomplete_rate_by_frequency = incomplete_rate_by_frequency.round(2)
summary_stats = {
'Open Items and Red Flags': {
'Total Open Items': round(df['Open_Items'].sum(), 2),
'Average Open Items per Assessment': round(df['Open_Items'].mean(), 2),
'Total Red Flags': round(df['Red_Flags'].sum(), 2),
'Average Red Flags per Assessment': round(df['Red_Flags'].mean(), 2),
'Max Red Flags in a Single Assessment': round(df['Red_Flags'].max(), 2),
'Most Common Area with Red Flags': df[df['Red_Flags'] > 0]['Assessment_Area'].mode()[0]
},
'Assessment Frequency': {
'Assessment Type Breakdown': df['Assessment_Frequency'].value_counts(normalize=True).round(2).to_dict(),
'Average Time Between Assessments': round((df['Assessment_End_Date'] - df['Assessment_Start_Date']).dt.days.mean(), 2),
'Average Assessment Duration': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.mean(), 2),
'Completion Rate by Frequency': completion_rate_by_frequency.to_dict(),
'In Progress Rate by Frequency': in_progress_rate_by_frequency.to_dict(),
'Incomplete Rate by Frequency': incomplete_rate_by_frequency.to_dict()
},
'Assessment Start and End Dates': {
'Longest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.max(), 2),
'Shortest Assessment Duration (days)': round(df['Assessment_End_Date'].sub(df['Assessment_Start_Date']).dt.days.min(), 2),
},
'Assessment Areas': {
'Most Assessed Area': df['Assessment_Area'].value_counts().idxmax(),
'Most Open Items in Area': df.groupby('Assessment_Area')['Open_Items'].sum().idxmax(),
'Area with Most Red Flags': df.groupby('Assessment_Area')['Red_Flags'].sum().idxmax()
},
'Assessment Status': {
'Assessment Status Distribution': df['Assessment_Status'].value_counts(normalize=True).round(2).to_dict(),
'Incomplete Assessments': round(df[df['Assessment_Status'] == 'Incomplete'].shape[0], 2),
'In Progress Assessments': round(df[df['Assessment_Status'] == 'In Progress'].shape[0], 2)
},
'Assessment Admin': {
'Most Frequent Admin': df['Assessment_Admin'].mode()[0],
'Admin with Fewest Red Flags': df.groupby('Assessment_Admin')['Red_Flags'].sum().idxmin(),
'Admin with Most Open Items': df.groupby('Assessment_Admin')['Open_Items'].mean().idxmax()
}
}
return summary_stats