""" Context Generator Service This service processes all data files and generates context dictionaries for each page of the medical report. It performs analysis on Pnoe, Spirometry, and SECA data. """ from datetime import datetime from typing import Dict, List, Optional, Tuple import pandas as pd class ContextGenerator: """Generate context data for report pages""" def __init__(self): self.pnoe_df = None self.spirometry_df = None self.seca_df = None self.patient_info = {} def load_data( self, pnoe_path: str, spirometry_path: str, seca_path: str, ): """Load all required datasets""" self.pnoe_df = pd.read_csv(pnoe_path, delimiter=";") self.spirometry_df = pd.read_csv(spirometry_path) self.seca_df = pd.read_excel(seca_path) self._preprocess_pnoe_data() def _preprocess_pnoe_data(self): """Apply preprocessing steps to Pnoe data""" # Convert numeric columns for col in self.pnoe_df.columns: try: self.pnoe_df[col] = pd.to_numeric(self.pnoe_df[col]) except (ValueError, TypeError): pass self.pnoe_df["VO2 Pulse"] = ( self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"] ) self.pnoe_df["VO2 Breath"] = ( self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"] ) self.pnoe_df["CHO"] = ( self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100 ) self.pnoe_df["FAT"] = ( self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100 ) window_size = 10 columns_to_smooth = [ "VO2(ml/min)", "VCO2(ml/min)", "HR(bpm)", "VT(l)", "BF(bpm)", "VE(l/min)", "VO2 Pulse", "VO2 Breath", "CHO", "FAT", ] for col in columns_to_smooth: if col in self.pnoe_df.columns: self.pnoe_df[f"{col}_smoothed"] = ( self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean() ) def extract_patient_info(self, patient_name: str) -> Dict: """Extract patient information from SECA dataset""" if self.seca_df is not None: patient_data = self.seca_df[ self.seca_df["LastName"].str.contains( patient_name, case=False, na=False ) ] if not patient_data.empty: row = patient_data.iloc[0] weight_kg = float(row.get("Weight", 0)) fat_pct = float(row.get("Adult_FMP", 0)) self.patient_info = { "name": f"{row.get('FirstName', '')} {row.get('LastName', '')}", "first_name": row.get("FirstName", ""), "last_name": row.get("LastName", ""), "age": int(row.get("Age", 0)), "height": f"{row.get('Height', '')}", "weight": weight_kg, "gender": row.get("Gender", "").lower(), "fat_percentage": fat_pct, "fat_mass_lbs": weight_kg * fat_pct / 100 * 2.20462, "lean_mass_lbs": weight_kg * (1 - fat_pct / 100) * 2.20462, } return self.patient_info def calculate_spirometry_metrics(self) -> Dict: """Calculate spirometry-related metrics""" metrics = {} for param in ["FVC", "FEV1", "FEV1/FVC%"]: row = self.spirometry_df.loc[ self.spirometry_df["Parameters"].str.strip() == param ] if not row.empty: param_key = param.lower().replace("/", "_").replace("%", "_pct") metrics[f"{param_key}_best"] = row["Best"].values[0] metrics[f"{param_key}_pred"] = row["%Pred."].values[0] return metrics def calculate_pnoe_metrics(self) -> Dict: """Calculate all Pnoe-derived metrics""" metrics = {} metrics["vo2_max"] = self.pnoe_df["VO2(ml/min)_smoothed"].max() metrics["vo2_max_per_kg"] = metrics["vo2_max"] / self.patient_info["weight"] peak_vt_idx = self.pnoe_df["VT(l)_smoothed"].idxmax() peak_vt_row = self.pnoe_df.loc[peak_vt_idx] metrics["peak_vt"] = peak_vt_row["VT(l)_smoothed"] metrics["peak_vt_hr"] = peak_vt_row["HR(bpm)_smoothed"] fat_max_idx = self.pnoe_df["FAT_smoothed"].idxmax() fat_max_row = self.pnoe_df.loc[fat_max_idx] metrics["fat_max_value"] = fat_max_row["FAT_smoothed"] metrics["fat_max_hr"] = fat_max_row["HR(bpm)_smoothed"] vt1, vt2 = self._detect_thresholds() metrics["vt1"] = vt1 metrics["vt2"] = vt2 zones = self._calculate_hr_zones(vt1, vt2, fat_max_row) metrics.update(zones) return metrics def _detect_thresholds(self) -> Tuple[Optional[Dict], Optional[Dict]]: """Detect VT1 and VT2 thresholds""" condition = self.pnoe_df["CHO_smoothed"] > self.pnoe_df["FAT_smoothed"] crossover_indices = condition[condition].index vt1 = None if len(crossover_indices) > 0: vt1_idx = crossover_indices[0] vt1_row = self.pnoe_df.loc[vt1_idx] vt1 = { "HeartRate": vt1_row["HR(bpm)_smoothed"], "Speed": vt1_row["Speed"], "Time": vt1_row["T(sec)"], } ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff() second_derivative = ve_slope.diff() vt2_idx = second_derivative.idxmax() vt2 = None if pd.notna(vt2_idx): vt2_row = self.pnoe_df.loc[vt2_idx] vt2 = { "HeartRate": vt2_row["HR(bpm)_smoothed"], "Speed": vt2_row["Speed"], "Time": vt2_row["T(sec)"], } return vt1, vt2 def _calculate_hr_zones( self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series ) -> Dict: """Calculate heart rate zones based on thresholds""" zones = {} if vt1 and vt2: zone_1_start = fat_max_row["HR(bpm)_smoothed"] - 15 zone_2_start = fat_max_row["HR(bpm)_smoothed"] zone_3_start = vt1["HeartRate"] zone_4_start = vt2["HeartRate"] - 10 zone_5_start = vt2["HeartRate"] + 10 zones["zone1_bpm"] = f"{int(zone_1_start)}-{int(zone_2_start)}bpm" zones["zone2_bpm"] = f"{int(zone_2_start)}-{int(vt1['HeartRate'])}bpm" zones["zone3_bpm"] = f"{int(zone_3_start)}-{int(zone_4_start)}bpm" zones["zone4_bpm"] = f"{int(zone_4_start)}-{int(zone_5_start)}bpm" zones["zone5_bpm"] = f"{int(zone_5_start)}+bpm" else: max_hr = 220 - self.patient_info["age"] zones["zone1_bpm"] = f"{int(max_hr * 0.55)}-{int(max_hr * 0.65)}bpm" zones["zone2_bpm"] = f"{int(max_hr * 0.65)}-{int(max_hr * 0.75)}bpm" zones["zone3_bpm"] = f"{int(max_hr * 0.75)}-{int(max_hr * 0.85)}bpm" zones["zone4_bpm"] = f"{int(max_hr * 0.85)}-{int(max_hr * 0.95)}bpm" zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm" return zones def generate_all_contexts( self, patient_name: str, graphs: Dict[str, str] ) -> List[Dict]: """Main method to generate all page contexts""" self.extract_patient_info(patient_name) spirometry_metrics = self.calculate_spirometry_metrics() pnoe_metrics = self.calculate_pnoe_metrics() contexts = [] contexts.append( { "name": self.patient_info["name"], "surname": self.patient_info["last_name"], "date": datetime.now().strftime("%B %d, %Y"), } ) contexts.append( { "patient_name": self.patient_info["name"], "test_date": datetime.now().strftime("%B %d, %Y"), } ) for i in range(4): contexts.append( {"patient_name": self.patient_info["name"], "page_number": i + 3} ) fev1_percentage = 0 if spirometry_metrics.get("fvc_best"): fev1_percentage = ( pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"] ) * 100 contexts.append( { "peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", "fev1_percentage": f"{fev1_percentage:.1f}", "lung_analysis_chart": graphs.get("spirometry_chart", ""), "respiratory_analysis_chart": graphs.get("respiratory", ""), } ) contexts.append( { "vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), "vo2_pulse_chart": graphs.get("vo2_pulse", ""), } ) contexts.append( { "fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", "fuel_utilization_chart": graphs.get("fuel_utilization", ""), "fat_metabolism_chart": graphs.get("fat_metabolism", ""), } ) contexts.append( { "fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", "body_composition_chart": graphs.get("body_composition", ""), "body_fat_percent_chart": graphs.get("body_fat_percent", ""), } ) for i in range(9): contexts.append( { "patient_name": self.patient_info["name"], "page_number": i + 11, "vo2_breath_chart": graphs.get("vo2_breath", ""), "recovery_chart": graphs.get("recovery", ""), } ) return contexts