feat: Enhance context generation and report generation services with improved data handling and structure

This commit is contained in:
bolade
2025-10-04 10:25:10 +01:00
parent d66f3fd18b
commit 358898b7db
7 changed files with 163 additions and 48 deletions
+6
View File
@@ -3,3 +3,9 @@
data/ data/
.env .env
/graphs
/data
/reports
+124 -24
View File
@@ -6,7 +6,6 @@ of the medical report. It performs analysis on Pnoe, Spirometry, and SECA data.
""" """
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import pandas as pd import pandas as pd
@@ -35,23 +34,54 @@ class ContextGenerator:
def _preprocess_pnoe_data(self): def _preprocess_pnoe_data(self):
"""Apply preprocessing steps to Pnoe data""" """Apply preprocessing steps to Pnoe data"""
self.pnoe_df = self.pnoe_df.apply(pd.to_numeric, errors="ignore") # Convert numeric columns
self.pnoe_df["VO2 Pulse"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"] for col in self.pnoe_df.columns:
self.pnoe_df["VO2 Breath"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"] try:
self.pnoe_df["CHO"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100 self.pnoe_df[col] = pd.to_numeric(self.pnoe_df[col])
self.pnoe_df["FAT"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100 except (ValueError, TypeError):
pass
self.pnoe_df["VO2 Pulse"] = (
self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"]
)
self.pnoe_df["VO2 Breath"] = (
self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"]
)
self.pnoe_df["CHO"] = (
self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100
)
self.pnoe_df["FAT"] = (
self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100
)
window_size = 10 window_size = 10
columns_to_smooth = ["VO2(ml/min)", "VCO2(ml/min)", "HR(bpm)", "VT(l)", "BF(bpm)", "VE(l/min)", "VO2 Pulse", "VO2 Breath", "CHO", "FAT"] columns_to_smooth = [
"VO2(ml/min)",
"VCO2(ml/min)",
"HR(bpm)",
"VT(l)",
"BF(bpm)",
"VE(l/min)",
"VO2 Pulse",
"VO2 Breath",
"CHO",
"FAT",
]
for col in columns_to_smooth: for col in columns_to_smooth:
if col in self.pnoe_df.columns: if col in self.pnoe_df.columns:
self.pnoe_df[f"{col}_smoothed"] = self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean() self.pnoe_df[f"{col}_smoothed"] = (
self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean()
)
def extract_patient_info(self, patient_name: str) -> Dict: def extract_patient_info(self, patient_name: str) -> Dict:
"""Extract patient information from SECA dataset""" """Extract patient information from SECA dataset"""
if self.seca_df is not None: if self.seca_df is not None:
patient_data = self.seca_df[self.seca_df["LastName"].str.contains(patient_name, case=False, na=False)] patient_data = self.seca_df[
self.seca_df["LastName"].str.contains(
patient_name, case=False, na=False
)
]
if not patient_data.empty: if not patient_data.empty:
row = patient_data.iloc[0] row = patient_data.iloc[0]
weight_kg = float(row.get("Weight", 0)) weight_kg = float(row.get("Weight", 0))
@@ -75,9 +105,11 @@ class ContextGenerator:
"""Calculate spirometry-related metrics""" """Calculate spirometry-related metrics"""
metrics = {} metrics = {}
for param in ["FVC", "FEV1", "FEV1/FVC%"]: for param in ["FVC", "FEV1", "FEV1/FVC%"]:
row = self.spirometry_df.loc[self.spirometry_df["Parameters"].str.strip() == param] row = self.spirometry_df.loc[
self.spirometry_df["Parameters"].str.strip() == param
]
if not row.empty: if not row.empty:
param_key = param.lower().replace('/', '_').replace('%', '_pct') param_key = param.lower().replace("/", "_").replace("%", "_pct")
metrics[f"{param_key}_best"] = row["Best"].values[0] metrics[f"{param_key}_best"] = row["Best"].values[0]
metrics[f"{param_key}_pred"] = row["%Pred."].values[0] metrics[f"{param_key}_pred"] = row["%Pred."].values[0]
return metrics return metrics
@@ -115,7 +147,11 @@ class ContextGenerator:
if len(crossover_indices) > 0: if len(crossover_indices) > 0:
vt1_idx = crossover_indices[0] vt1_idx = crossover_indices[0]
vt1_row = self.pnoe_df.loc[vt1_idx] vt1_row = self.pnoe_df.loc[vt1_idx]
vt1 = {"HeartRate": vt1_row["HR(bpm)_smoothed"], "Speed": vt1_row["Speed"], "Time": vt1_row["T(sec)"]} vt1 = {
"HeartRate": vt1_row["HR(bpm)_smoothed"],
"Speed": vt1_row["Speed"],
"Time": vt1_row["T(sec)"],
}
ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff() ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff()
second_derivative = ve_slope.diff() second_derivative = ve_slope.diff()
@@ -124,11 +160,17 @@ class ContextGenerator:
vt2 = None vt2 = None
if pd.notna(vt2_idx): if pd.notna(vt2_idx):
vt2_row = self.pnoe_df.loc[vt2_idx] vt2_row = self.pnoe_df.loc[vt2_idx]
vt2 = {"HeartRate": vt2_row["HR(bpm)_smoothed"], "Speed": vt2_row["Speed"], "Time": vt2_row["T(sec)"]} vt2 = {
"HeartRate": vt2_row["HR(bpm)_smoothed"],
"Speed": vt2_row["Speed"],
"Time": vt2_row["T(sec)"],
}
return vt1, vt2 return vt1, vt2
def _calculate_hr_zones(self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series) -> Dict: def _calculate_hr_zones(
self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series
) -> Dict:
"""Calculate heart rate zones based on thresholds""" """Calculate heart rate zones based on thresholds"""
zones = {} zones = {}
if vt1 and vt2: if vt1 and vt2:
@@ -152,29 +194,87 @@ class ContextGenerator:
zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm" zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm"
return zones return zones
def generate_all_contexts(self, patient_name: str, graphs: Dict[str, str]) -> List[Dict]: def generate_all_contexts(
self, patient_name: str, graphs: Dict[str, str]
) -> List[Dict]:
"""Main method to generate all page contexts""" """Main method to generate all page contexts"""
self.extract_patient_info(patient_name) self.extract_patient_info(patient_name)
spirometry_metrics = self.calculate_spirometry_metrics() spirometry_metrics = self.calculate_spirometry_metrics()
pnoe_metrics = self.calculate_pnoe_metrics() pnoe_metrics = self.calculate_pnoe_metrics()
contexts = [] contexts = []
contexts.append({"name": self.patient_info["name"], "surname": self.patient_info["last_name"], "date": datetime.now().strftime("%B %d, %Y")}) contexts.append(
contexts.append({"patient_name": self.patient_info["name"], "test_date": datetime.now().strftime("%B %d, %Y")}) {
"name": self.patient_info["name"],
"surname": self.patient_info["last_name"],
"date": datetime.now().strftime("%B %d, %Y"),
}
)
contexts.append(
{
"patient_name": self.patient_info["name"],
"test_date": datetime.now().strftime("%B %d, %Y"),
}
)
for i in range(4): for i in range(4):
contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 3}) contexts.append(
{"patient_name": self.patient_info["name"], "page_number": i + 3}
)
fev1_percentage = 0 fev1_percentage = 0
if spirometry_metrics.get("fvc_best"): if spirometry_metrics.get("fvc_best"):
fev1_percentage = (pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]) * 100 fev1_percentage = (
pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]
) * 100
contexts.append({"peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", "fev1_percentage": f"{fev1_percentage:.1f}", "lung_analysis_chart": graphs.get("spirometry_chart", ""), "respiratory_analysis_chart": graphs.get("respiratory", "")}) contexts.append(
contexts.append({"vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), "vo2_pulse_chart": graphs.get("vo2_pulse", "")}) {
contexts.append({"fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", "fuel_utilization_chart": graphs.get("fuel_utilization", ""), "fat_metabolism_chart": graphs.get("fat_metabolism", "")}) "peak_vt": f"{pnoe_metrics['peak_vt']:.2f}",
contexts.append({"fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", "body_composition_chart": graphs.get("body_composition", ""), "body_fat_percent_chart": graphs.get("body_fat_percent", "")}) "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}",
"fev1_percentage": f"{fev1_percentage:.1f}",
"lung_analysis_chart": graphs.get("spirometry_chart", ""),
"respiratory_analysis_chart": graphs.get("respiratory", ""),
}
)
contexts.append(
{
"vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}",
"age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}",
"zone1_bpm": pnoe_metrics.get("zone1_bpm", ""),
"zone2_bpm": pnoe_metrics.get("zone2_bpm", ""),
"zone3_bpm": pnoe_metrics.get("zone3_bpm", ""),
"zone4_bpm": pnoe_metrics.get("zone4_bpm", ""),
"zone5_bpm": pnoe_metrics.get("zone5_bpm", ""),
"vo2_pulse_chart": graphs.get("vo2_pulse", ""),
}
)
contexts.append(
{
"fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}",
"fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}",
"fuel_utilization_chart": graphs.get("fuel_utilization", ""),
"fat_metabolism_chart": graphs.get("fat_metabolism", ""),
}
)
contexts.append(
{
"fat_percentage": f"{self.patient_info['fat_percentage']:.1f}",
"fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}",
"lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}",
"body_composition_chart": graphs.get("body_composition", ""),
"body_fat_percent_chart": graphs.get("body_fat_percent", ""),
}
)
for i in range(9): for i in range(9):
contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 11, "vo2_breath_chart": graphs.get("vo2_breath", ""), "recovery_chart": graphs.get("recovery", "")}) contexts.append(
{
"patient_name": self.patient_info["name"],
"page_number": i + 11,
"vo2_breath_chart": graphs.get("vo2_breath", ""),
"recovery_chart": graphs.get("recovery", ""),
}
)
return contexts return contexts
+3
View File
@@ -8,6 +8,9 @@ Based on the analysis notebooks in services_dfdf/.
import base64 import base64
from pathlib import Path from pathlib import Path
import matplotlib
matplotlib.use("Agg") # Use non-interactive backend
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms import matplotlib.transforms as mtransforms
import numpy as np import numpy as np
+11 -5
View File
@@ -11,10 +11,9 @@ from typing import Any, Dict, List
import pandas as pd import pandas as pd
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
from services.context_generator import ContextGenerator
from app.services.context_generator import ContextGenerator from services.graph_generator import GraphGenerator
from app.services.graph_generator import GraphGenerator from services.spirometry_table_extractor import extract_spirometry_table_from_pdf
from app.services.spirometry_table_extractor import extract_spirometry_table_from_pdf
class ReportGeneratorService: class ReportGeneratorService:
@@ -61,7 +60,13 @@ class ReportGeneratorService:
""" """
# Load data # Load data
df = pd.read_csv(pnoe_csv_path, delimiter=";") df = pd.read_csv(pnoe_csv_path, delimiter=";")
df = df.apply(pd.to_numeric, errors="ignore")
# Convert numeric columns (updated approach)
for col in df.columns:
try:
df[col] = pd.to_numeric(df[col])
except (ValueError, TypeError):
pass # Keep as-is if not numeric
# Calculate derived columns # Calculate derived columns
df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"] df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"]
@@ -395,6 +400,7 @@ class ReportGeneratorService:
) )
report_path = self.reports_dir / output_filename report_path = self.reports_dir / output_filename
print(f"Generating PDF report at {report_path}")
self.html_to_pdf(html_content, str(report_path)) self.html_to_pdf(html_content, str(report_path))
return { return {