feat: Enhance context generation and report generation services with improved data handling and structure

2025-10-04 10:25:10 +01:00
parent d66f3fd18b
commit 358898b7db
7 changed files with 163 additions and 48 deletions
@@ -3,3 +3,9 @@
 data/
 .env
 /graphs
 /data
 /reports
@@ -6,7 +6,6 @@ of the medical report. It performs analysis on Pnoe, Spirometry, and SECA data.
 """
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 import pandas as pd
@@ -35,23 +34,54 @@ class ContextGenerator:
    def _preprocess_pnoe_data(self):
        """Apply preprocessing steps to Pnoe data"""
-        self.pnoe_df = self.pnoe_df.apply(pd.to_numeric, errors="ignore")
+        # Convert numeric columns
-        self.pnoe_df["VO2 Pulse"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"]
+        for col in self.pnoe_df.columns:
-        self.pnoe_df["VO2 Breath"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"]
+            try:
-        self.pnoe_df["CHO"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100
+                self.pnoe_df[col] = pd.to_numeric(self.pnoe_df[col])
-        self.pnoe_df["FAT"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100
+            except (ValueError, TypeError):
                pass
        self.pnoe_df["VO2 Pulse"] = (
            self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"]
        )
        self.pnoe_df["VO2 Breath"] = (
            self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"]
        )
        self.pnoe_df["CHO"] = (
            self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100
        )
        self.pnoe_df["FAT"] = (
            self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100
        )
        window_size = 10
-        columns_to_smooth = ["VO2(ml/min)", "VCO2(ml/min)", "HR(bpm)", "VT(l)", "BF(bpm)", "VE(l/min)", "VO2 Pulse", "VO2 Breath", "CHO", "FAT"]
+        columns_to_smooth = [
            "VO2(ml/min)",
            "VCO2(ml/min)",
            "HR(bpm)",
            "VT(l)",
            "BF(bpm)",
            "VE(l/min)",
            "VO2 Pulse",
            "VO2 Breath",
            "CHO",
            "FAT",
        ]
        for col in columns_to_smooth:
            if col in self.pnoe_df.columns:
-                self.pnoe_df[f"{col}_smoothed"] = self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean()
+                self.pnoe_df[f"{col}_smoothed"] = (
                    self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean()
                )
    def extract_patient_info(self, patient_name: str) -> Dict:
        """Extract patient information from SECA dataset"""
        if self.seca_df is not None:
-            patient_data = self.seca_df[self.seca_df["LastName"].str.contains(patient_name, case=False, na=False)]
+            patient_data = self.seca_df[
                self.seca_df["LastName"].str.contains(
                    patient_name, case=False, na=False
                )
            ]
            if not patient_data.empty:
                row = patient_data.iloc[0]
                weight_kg = float(row.get("Weight", 0))
@@ -75,9 +105,11 @@ class ContextGenerator:
        """Calculate spirometry-related metrics"""
        metrics = {}
        for param in ["FVC", "FEV1", "FEV1/FVC%"]:
-            row = self.spirometry_df.loc[self.spirometry_df["Parameters"].str.strip() == param]
+            row = self.spirometry_df.loc[
                self.spirometry_df["Parameters"].str.strip() == param
            ]
            if not row.empty:
-                param_key = param.lower().replace('/', '_').replace('%', '_pct')
+                param_key = param.lower().replace("/", "_").replace("%", "_pct")
                metrics[f"{param_key}_best"] = row["Best"].values[0]
                metrics[f"{param_key}_pred"] = row["%Pred."].values[0]
        return metrics
@@ -115,7 +147,11 @@ class ContextGenerator:
        if len(crossover_indices) > 0:
            vt1_idx = crossover_indices[0]
            vt1_row = self.pnoe_df.loc[vt1_idx]
-            vt1 = {"HeartRate": vt1_row["HR(bpm)_smoothed"], "Speed": vt1_row["Speed"], "Time": vt1_row["T(sec)"]}
+            vt1 = {
                "HeartRate": vt1_row["HR(bpm)_smoothed"],
                "Speed": vt1_row["Speed"],
                "Time": vt1_row["T(sec)"],
            }
        ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff()
        second_derivative = ve_slope.diff()
@@ -124,11 +160,17 @@ class ContextGenerator:
        vt2 = None
        if pd.notna(vt2_idx):
            vt2_row = self.pnoe_df.loc[vt2_idx]
-            vt2 = {"HeartRate": vt2_row["HR(bpm)_smoothed"], "Speed": vt2_row["Speed"], "Time": vt2_row["T(sec)"]}
+            vt2 = {
                "HeartRate": vt2_row["HR(bpm)_smoothed"],
                "Speed": vt2_row["Speed"],
                "Time": vt2_row["T(sec)"],
            }
        return vt1, vt2
-    def _calculate_hr_zones(self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series) -> Dict:
+    def _calculate_hr_zones(
        self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series
    ) -> Dict:
        """Calculate heart rate zones based on thresholds"""
        zones = {}
        if vt1 and vt2:
@@ -152,29 +194,87 @@ class ContextGenerator:
            zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm"
        return zones
-    def generate_all_contexts(self, patient_name: str, graphs: Dict[str, str]) -> List[Dict]:
+    def generate_all_contexts(
        self, patient_name: str, graphs: Dict[str, str]
    ) -> List[Dict]:
        """Main method to generate all page contexts"""
        self.extract_patient_info(patient_name)
        spirometry_metrics = self.calculate_spirometry_metrics()
        pnoe_metrics = self.calculate_pnoe_metrics()
        contexts = []
-        contexts.append({"name": self.patient_info["name"], "surname": self.patient_info["last_name"], "date": datetime.now().strftime("%B %d, %Y")})
+        contexts.append(
-        contexts.append({"patient_name": self.patient_info["name"], "test_date": datetime.now().strftime("%B %d, %Y")})
+            {
                "name": self.patient_info["name"],
                "surname": self.patient_info["last_name"],
                "date": datetime.now().strftime("%B %d, %Y"),
            }
        )
        contexts.append(
            {
                "patient_name": self.patient_info["name"],
                "test_date": datetime.now().strftime("%B %d, %Y"),
            }
        )
        for i in range(4):
-            contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 3})
+            contexts.append(
                {"patient_name": self.patient_info["name"], "page_number": i + 3}
            )
        fev1_percentage = 0
        if spirometry_metrics.get("fvc_best"):
-            fev1_percentage = (pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]) * 100
+            fev1_percentage = (
                pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]
            ) * 100
-        contexts.append({"peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", "fev1_percentage": f"{fev1_percentage:.1f}", "lung_analysis_chart": graphs.get("spirometry_chart", ""), "respiratory_analysis_chart": graphs.get("respiratory", "")})
+        contexts.append(
-        contexts.append({"vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), "vo2_pulse_chart": graphs.get("vo2_pulse", "")})
+            {
-        contexts.append({"fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", "fuel_utilization_chart": graphs.get("fuel_utilization", ""), "fat_metabolism_chart": graphs.get("fat_metabolism", "")})
+                "peak_vt": f"{pnoe_metrics['peak_vt']:.2f}",
-        contexts.append({"fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", "body_composition_chart": graphs.get("body_composition", ""), "body_fat_percent_chart": graphs.get("body_fat_percent", "")})
+                "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}",
                "fev1_percentage": f"{fev1_percentage:.1f}",
                "lung_analysis_chart": graphs.get("spirometry_chart", ""),
                "respiratory_analysis_chart": graphs.get("respiratory", ""),
            }
        )
        contexts.append(
            {
                "vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}",
                "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}",
                "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""),
                "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""),
                "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""),
                "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""),
                "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""),
                "vo2_pulse_chart": graphs.get("vo2_pulse", ""),
            }
        )
        contexts.append(
            {
                "fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}",
                "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}",
                "fuel_utilization_chart": graphs.get("fuel_utilization", ""),
                "fat_metabolism_chart": graphs.get("fat_metabolism", ""),
            }
        )
        contexts.append(
            {
                "fat_percentage": f"{self.patient_info['fat_percentage']:.1f}",
                "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}",
                "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}",
                "body_composition_chart": graphs.get("body_composition", ""),
                "body_fat_percent_chart": graphs.get("body_fat_percent", ""),
            }
        )
        for i in range(9):
-            contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 11, "vo2_breath_chart": graphs.get("vo2_breath", ""), "recovery_chart": graphs.get("recovery", "")})
+            contexts.append(
                {
                    "patient_name": self.patient_info["name"],
                    "page_number": i + 11,
                    "vo2_breath_chart": graphs.get("vo2_breath", ""),
                    "recovery_chart": graphs.get("recovery", ""),
                }
            )
        return contexts
@@ -8,6 +8,9 @@ Based on the analysis notebooks in services_dfdf/.
 import base64
 from pathlib import Path
 import matplotlib
 matplotlib.use("Agg")  # Use non-interactive backend
 import matplotlib.pyplot as plt
 import matplotlib.transforms as mtransforms
 import numpy as np
@@ -11,10 +11,9 @@ from typing import Any, Dict, List
 import pandas as pd
 from jinja2 import Environment, FileSystemLoader
 from playwright.sync_api import sync_playwright
-
+from services.context_generator import ContextGenerator
-from app.services.context_generator import ContextGenerator
+from services.graph_generator import GraphGenerator
-from app.services.graph_generator import GraphGenerator
+from services.spirometry_table_extractor import extract_spirometry_table_from_pdf
 from app.services.spirometry_table_extractor import extract_spirometry_table_from_pdf
 class ReportGeneratorService:
@@ -61,7 +60,13 @@ class ReportGeneratorService:
        """
        # Load data
        df = pd.read_csv(pnoe_csv_path, delimiter=";")
-        df = df.apply(pd.to_numeric, errors="ignore")
+
        # Convert numeric columns (updated approach)
        for col in df.columns:
            try:
                df[col] = pd.to_numeric(df[col])
            except (ValueError, TypeError):
                pass  # Keep as-is if not numeric
        # Calculate derived columns
        df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"]
@@ -395,6 +400,7 @@ class ReportGeneratorService:
            )
        report_path = self.reports_dir / output_filename
        print(f"Generating PDF report at {report_path}")
        self.html_to_pdf(html_content, str(report_path))
        return {