Files
bio-performx/app/services/context_generator.py
T

356 lines
14 KiB
Python
Raw Normal View History

"""
Context Generator Service
This service processes all data files and generates context dictionaries for each page
of the medical report. It performs analysis on Pnoe, Spirometry, and SECA data.
"""
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import pandas as pd
class ContextGenerator:
"""Generate context data for report pages"""
def __init__(self):
self.pnoe_df = None
self.spirometry_df = None
self.seca_df = None
self.patient_info = {}
def load_data(
self,
pnoe_path: str,
spirometry_path: str,
seca_path: str,
):
"""Load all required datasets"""
self.pnoe_df = pd.read_csv(pnoe_path, delimiter=";")
self.spirometry_df = pd.read_csv(spirometry_path)
self.seca_df = pd.read_excel(seca_path)
self._preprocess_pnoe_data()
def _preprocess_pnoe_data(self):
"""Apply preprocessing steps to Pnoe data"""
# Convert numeric columns
for col in self.pnoe_df.columns:
try:
self.pnoe_df[col] = pd.to_numeric(self.pnoe_df[col])
except (ValueError, TypeError):
pass
self.pnoe_df["VO2 Pulse"] = (
self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"]
)
self.pnoe_df["VO2 Breath"] = (
self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"]
)
self.pnoe_df["CHO"] = (
self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100
)
self.pnoe_df["FAT"] = (
self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100
)
window_size = 10
columns_to_smooth = [
"VO2(ml/min)",
"VCO2(ml/min)",
"HR(bpm)",
"VT(l)",
"BF(bpm)",
"VE(l/min)",
"VO2 Pulse",
"VO2 Breath",
"CHO",
"FAT",
]
for col in columns_to_smooth:
if col in self.pnoe_df.columns:
self.pnoe_df[f"{col}_smoothed"] = (
self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean()
)
def extract_patient_info(self, patient_name: str) -> Dict:
"""Extract patient information from SECA dataset"""
if self.seca_df is not None:
patient_data = self.seca_df[
self.seca_df["LastName"].str.contains(
patient_name, case=False, na=False
)
]
if not patient_data.empty:
row = patient_data.iloc[0]
weight_kg = float(row.get("Weight", 0))
fat_pct = float(row.get("Adult_FMP", 0))
self.patient_info = {
"name": f"{row.get('FirstName', '')} {row.get('LastName', '')}",
"first_name": row.get("FirstName", ""),
"last_name": row.get("LastName", ""),
"age": int(row.get("Age", 0)),
"height": f"{row.get('Height', '')}",
"weight": weight_kg,
"gender": row.get("Gender", "").lower(),
"fat_percentage": fat_pct,
"fat_mass_lbs": weight_kg * fat_pct / 100 * 2.20462,
"lean_mass_lbs": weight_kg * (1 - fat_pct / 100) * 2.20462,
}
return self.patient_info
def calculate_spirometry_metrics(self, metric_overrides: Optional[Dict] = None) -> Dict:
"""Calculate spirometry-related metrics"""
if metric_overrides is None:
metric_overrides = {}
metrics = {}
for param in ["FVC", "FEV1", "FEV1/FVC%"]:
param_key = param.lower().replace("/", "_").replace("%", "_pct")
if f"{param_key}_best" in metric_overrides:
metrics[f"{param_key}_best"] = float(metric_overrides[f"{param_key}_best"])
else:
row = self.spirometry_df.loc[
self.spirometry_df["Parameters"].str.strip() == param
]
if not row.empty:
metrics[f"{param_key}_best"] = row["Best"].values[0]
if f"{param_key}_pred" in metric_overrides:
metrics[f"{param_key}_pred"] = float(metric_overrides[f"{param_key}_pred"])
else:
row = self.spirometry_df.loc[
self.spirometry_df["Parameters"].str.strip() == param
]
if not row.empty:
metrics[f"{param_key}_pred"] = row["%Pred."].values[0]
return metrics
def calculate_pnoe_metrics(self, metric_overrides: Optional[Dict] = None) -> Dict:
"""Calculate all Pnoe-derived metrics"""
if metric_overrides is None:
metric_overrides = {}
metrics = {}
# VO2 Max metrics
if "vo2_max" in metric_overrides:
metrics["vo2_max"] = float(metric_overrides["vo2_max"])
else:
metrics["vo2_max"] = self.pnoe_df["VO2(ml/min)_smoothed"].max()
if "vo2_max_per_kg" in metric_overrides:
metrics["vo2_max_per_kg"] = float(metric_overrides["vo2_max_per_kg"])
else:
metrics["vo2_max_per_kg"] = metrics["vo2_max"] / self.patient_info["weight"]
# Peak VT metrics
if "peak_vt" in metric_overrides:
metrics["peak_vt"] = float(metric_overrides["peak_vt"])
# Need to get HR from override or calculate
if "peak_vt_hr" in metric_overrides:
metrics["peak_vt_hr"] = float(metric_overrides["peak_vt_hr"])
else:
peak_vt_idx = self.pnoe_df["VT(l)_smoothed"].idxmax()
peak_vt_row = self.pnoe_df.loc[peak_vt_idx]
metrics["peak_vt_hr"] = peak_vt_row["HR(bpm)_smoothed"]
else:
peak_vt_idx = self.pnoe_df["VT(l)_smoothed"].idxmax()
peak_vt_row = self.pnoe_df.loc[peak_vt_idx]
metrics["peak_vt"] = peak_vt_row["VT(l)_smoothed"]
metrics["peak_vt_hr"] = peak_vt_row["HR(bpm)_smoothed"]
# Fat Max metrics
if "fat_max_value" in metric_overrides:
metrics["fat_max_value"] = float(metric_overrides["fat_max_value"])
if "fat_max_hr" in metric_overrides:
metrics["fat_max_hr"] = float(metric_overrides["fat_max_hr"])
else:
fat_max_idx = self.pnoe_df["FAT_smoothed"].idxmax()
fat_max_row = self.pnoe_df.loc[fat_max_idx]
metrics["fat_max_hr"] = fat_max_row["HR(bpm)_smoothed"]
else:
fat_max_idx = self.pnoe_df["FAT_smoothed"].idxmax()
fat_max_row = self.pnoe_df.loc[fat_max_idx]
metrics["fat_max_value"] = fat_max_row["FAT_smoothed"]
metrics["fat_max_hr"] = fat_max_row["HR(bpm)_smoothed"]
# VT1 and VT2 thresholds
if "vt1" in metric_overrides:
metrics["vt1"] = metric_overrides["vt1"]
else:
vt1, _ = self._detect_thresholds()
metrics["vt1"] = vt1
if "vt2" in metric_overrides:
metrics["vt2"] = metric_overrides["vt2"]
else:
_, vt2 = self._detect_thresholds()
metrics["vt2"] = vt2
# Heart rate zones
if any(f"zone{i}_bpm" in metric_overrides for i in range(1, 6)):
for i in range(1, 6):
zone_key = f"zone{i}_bpm"
if zone_key in metric_overrides:
metrics[zone_key] = metric_overrides[zone_key]
else:
fat_max_idx = self.pnoe_df["FAT_smoothed"].idxmax()
fat_max_row = self.pnoe_df.loc[fat_max_idx]
zones = self._calculate_hr_zones(metrics["vt1"], metrics["vt2"], fat_max_row)
metrics.update(zones)
return metrics
def _detect_thresholds(self) -> Tuple[Optional[Dict], Optional[Dict]]:
"""Detect VT1 and VT2 thresholds"""
condition = self.pnoe_df["CHO_smoothed"] > self.pnoe_df["FAT_smoothed"]
crossover_indices = condition[condition].index
vt1 = None
if len(crossover_indices) > 0:
vt1_idx = crossover_indices[0]
vt1_row = self.pnoe_df.loc[vt1_idx]
vt1 = {
"HeartRate": vt1_row["HR(bpm)_smoothed"],
"Speed": vt1_row["Speed"],
"Time": vt1_row["T(sec)"],
}
ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff()
second_derivative = ve_slope.diff()
vt2_idx = second_derivative.idxmax()
vt2 = None
if pd.notna(vt2_idx):
vt2_row = self.pnoe_df.loc[vt2_idx]
vt2 = {
"HeartRate": vt2_row["HR(bpm)_smoothed"],
"Speed": vt2_row["Speed"],
"Time": vt2_row["T(sec)"],
}
return vt1, vt2
def _calculate_hr_zones(
self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series
) -> Dict:
"""Calculate heart rate zones based on thresholds"""
zones = {}
if vt1 and vt2:
zone_1_start = fat_max_row["HR(bpm)_smoothed"] - 15
zone_2_start = fat_max_row["HR(bpm)_smoothed"]
zone_3_start = vt1["HeartRate"]
zone_4_start = vt2["HeartRate"] - 10
zone_5_start = vt2["HeartRate"] + 10
zones["zone1_bpm"] = f"{int(zone_1_start)}-{int(zone_2_start)}bpm"
zones["zone2_bpm"] = f"{int(zone_2_start)}-{int(vt1['HeartRate'])}bpm"
zones["zone3_bpm"] = f"{int(zone_3_start)}-{int(zone_4_start)}bpm"
zones["zone4_bpm"] = f"{int(zone_4_start)}-{int(zone_5_start)}bpm"
zones["zone5_bpm"] = f"{int(zone_5_start)}+bpm"
else:
max_hr = 220 - self.patient_info["age"]
zones["zone1_bpm"] = f"{int(max_hr * 0.55)}-{int(max_hr * 0.65)}bpm"
zones["zone2_bpm"] = f"{int(max_hr * 0.65)}-{int(max_hr * 0.75)}bpm"
zones["zone3_bpm"] = f"{int(max_hr * 0.75)}-{int(max_hr * 0.85)}bpm"
zones["zone4_bpm"] = f"{int(max_hr * 0.85)}-{int(max_hr * 0.95)}bpm"
zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm"
return zones
def generate_all_contexts(
self, patient_name: str, graphs: Dict[str, str], metric_overrides: Optional[Dict] = None
) -> List[Dict]:
"""Main method to generate all page contexts"""
if metric_overrides is None:
metric_overrides = {}
self.extract_patient_info(patient_name)
# Extract metric overrides for spirometry and pnoe
spirometry_overrides = metric_overrides.get("spirometry", {})
pnoe_overrides = metric_overrides.get("pnoe", {})
spirometry_metrics = self.calculate_spirometry_metrics(spirometry_overrides)
pnoe_metrics = self.calculate_pnoe_metrics(pnoe_overrides)
contexts = []
contexts.append(
{
"name": self.patient_info["name"],
"surname": self.patient_info["last_name"],
"date": datetime.now().strftime("%B %d, %Y"),
}
)
contexts.append(
{
"patient_name": self.patient_info["name"],
"test_date": datetime.now().strftime("%B %d, %Y"),
}
)
for i in range(4):
contexts.append(
{"patient_name": self.patient_info["name"], "page_number": i + 3}
)
fev1_percentage = 0
if spirometry_metrics.get("fvc_best"):
fev1_percentage = (
pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]
) * 100
contexts.append(
{
"peak_vt": f"{pnoe_metrics['peak_vt']:.2f}",
"peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}",
"fev1_percentage": f"{fev1_percentage:.1f}",
"lung_analysis_chart": graphs.get("spirometry_chart", ""),
"respiratory_analysis_chart": graphs.get("respiratory", ""),
}
)
contexts.append(
{
"vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}",
"age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}",
"zone1_bpm": pnoe_metrics.get("zone1_bpm", ""),
"zone2_bpm": pnoe_metrics.get("zone2_bpm", ""),
"zone3_bpm": pnoe_metrics.get("zone3_bpm", ""),
"zone4_bpm": pnoe_metrics.get("zone4_bpm", ""),
"zone5_bpm": pnoe_metrics.get("zone5_bpm", ""),
"vo2_pulse_chart": graphs.get("vo2_pulse", ""),
}
)
contexts.append(
{
"fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}",
"fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}",
"fuel_utilization_chart": graphs.get("fuel_utilization", ""),
"fat_metabolism_chart": graphs.get("fat_metabolism", ""),
}
)
contexts.append(
{
"fat_percentage": f"{self.patient_info['fat_percentage']:.1f}",
"fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}",
"lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}",
"body_composition_chart": graphs.get("body_composition", ""),
"body_fat_percent_chart": graphs.get("body_fat_percent", ""),
}
)
for i in range(9):
contexts.append(
{
"patient_name": self.patient_info["name"],
"page_number": i + 11,
"vo2_breath_chart": graphs.get("vo2_breath", ""),
"recovery_chart": graphs.get("recovery", ""),
}
)
return contexts