diff --git a/.gitignore b/.gitignore index 78abf35..0f3167c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,10 @@ data/ -.env \ No newline at end of file +.env + +/graphs + +/data + +/reports \ No newline at end of file diff --git a/app/services/__pycache__/context_generator.cpython-312.pyc b/app/services/__pycache__/context_generator.cpython-312.pyc index e1de321..c3983ae 100644 Binary files a/app/services/__pycache__/context_generator.cpython-312.pyc and b/app/services/__pycache__/context_generator.cpython-312.pyc differ diff --git a/app/services/__pycache__/graph_generator.cpython-312.pyc b/app/services/__pycache__/graph_generator.cpython-312.pyc index 45c8c74..86f932d 100644 Binary files a/app/services/__pycache__/graph_generator.cpython-312.pyc and b/app/services/__pycache__/graph_generator.cpython-312.pyc differ diff --git a/app/services/__pycache__/report_generator.cpython-312.pyc b/app/services/__pycache__/report_generator.cpython-312.pyc index dfc0467..00b4f35 100644 Binary files a/app/services/__pycache__/report_generator.cpython-312.pyc and b/app/services/__pycache__/report_generator.cpython-312.pyc differ diff --git a/app/services/context_generator.py b/app/services/context_generator.py index f84fb05..2f33ef9 100644 --- a/app/services/context_generator.py +++ b/app/services/context_generator.py @@ -6,7 +6,6 @@ of the medical report. It performs analysis on Pnoe, Spirometry, and SECA data. """ from datetime import datetime -from pathlib import Path from typing import Dict, List, Optional, Tuple import pandas as pd @@ -35,28 +34,59 @@ class ContextGenerator: def _preprocess_pnoe_data(self): """Apply preprocessing steps to Pnoe data""" - self.pnoe_df = self.pnoe_df.apply(pd.to_numeric, errors="ignore") - self.pnoe_df["VO2 Pulse"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"] - self.pnoe_df["VO2 Breath"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"] - self.pnoe_df["CHO"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100 - self.pnoe_df["FAT"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100 - + # Convert numeric columns + for col in self.pnoe_df.columns: + try: + self.pnoe_df[col] = pd.to_numeric(self.pnoe_df[col]) + except (ValueError, TypeError): + pass + + self.pnoe_df["VO2 Pulse"] = ( + self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"] + ) + self.pnoe_df["VO2 Breath"] = ( + self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"] + ) + self.pnoe_df["CHO"] = ( + self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100 + ) + self.pnoe_df["FAT"] = ( + self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100 + ) + window_size = 10 - columns_to_smooth = ["VO2(ml/min)", "VCO2(ml/min)", "HR(bpm)", "VT(l)", "BF(bpm)", "VE(l/min)", "VO2 Pulse", "VO2 Breath", "CHO", "FAT"] - + columns_to_smooth = [ + "VO2(ml/min)", + "VCO2(ml/min)", + "HR(bpm)", + "VT(l)", + "BF(bpm)", + "VE(l/min)", + "VO2 Pulse", + "VO2 Breath", + "CHO", + "FAT", + ] + for col in columns_to_smooth: if col in self.pnoe_df.columns: - self.pnoe_df[f"{col}_smoothed"] = self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean() + self.pnoe_df[f"{col}_smoothed"] = ( + self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean() + ) def extract_patient_info(self, patient_name: str) -> Dict: """Extract patient information from SECA dataset""" if self.seca_df is not None: - patient_data = self.seca_df[self.seca_df["LastName"].str.contains(patient_name, case=False, na=False)] + patient_data = self.seca_df[ + self.seca_df["LastName"].str.contains( + patient_name, case=False, na=False + ) + ] if not patient_data.empty: row = patient_data.iloc[0] weight_kg = float(row.get("Weight", 0)) fat_pct = float(row.get("Adult_FMP", 0)) - + self.patient_info = { "name": f"{row.get('FirstName', '')} {row.get('LastName', '')}", "first_name": row.get("FirstName", ""), @@ -75,9 +105,11 @@ class ContextGenerator: """Calculate spirometry-related metrics""" metrics = {} for param in ["FVC", "FEV1", "FEV1/FVC%"]: - row = self.spirometry_df.loc[self.spirometry_df["Parameters"].str.strip() == param] + row = self.spirometry_df.loc[ + self.spirometry_df["Parameters"].str.strip() == param + ] if not row.empty: - param_key = param.lower().replace('/', '_').replace('%', '_pct') + param_key = param.lower().replace("/", "_").replace("%", "_pct") metrics[f"{param_key}_best"] = row["Best"].values[0] metrics[f"{param_key}_pred"] = row["%Pred."].values[0] return metrics @@ -87,21 +119,21 @@ class ContextGenerator: metrics = {} metrics["vo2_max"] = self.pnoe_df["VO2(ml/min)_smoothed"].max() metrics["vo2_max_per_kg"] = metrics["vo2_max"] / self.patient_info["weight"] - + peak_vt_idx = self.pnoe_df["VT(l)_smoothed"].idxmax() peak_vt_row = self.pnoe_df.loc[peak_vt_idx] metrics["peak_vt"] = peak_vt_row["VT(l)_smoothed"] metrics["peak_vt_hr"] = peak_vt_row["HR(bpm)_smoothed"] - + fat_max_idx = self.pnoe_df["FAT_smoothed"].idxmax() fat_max_row = self.pnoe_df.loc[fat_max_idx] metrics["fat_max_value"] = fat_max_row["FAT_smoothed"] metrics["fat_max_hr"] = fat_max_row["HR(bpm)_smoothed"] - + vt1, vt2 = self._detect_thresholds() metrics["vt1"] = vt1 metrics["vt2"] = vt2 - + zones = self._calculate_hr_zones(vt1, vt2, fat_max_row) metrics.update(zones) return metrics @@ -110,25 +142,35 @@ class ContextGenerator: """Detect VT1 and VT2 thresholds""" condition = self.pnoe_df["CHO_smoothed"] > self.pnoe_df["FAT_smoothed"] crossover_indices = condition[condition].index - + vt1 = None if len(crossover_indices) > 0: vt1_idx = crossover_indices[0] vt1_row = self.pnoe_df.loc[vt1_idx] - vt1 = {"HeartRate": vt1_row["HR(bpm)_smoothed"], "Speed": vt1_row["Speed"], "Time": vt1_row["T(sec)"]} - + vt1 = { + "HeartRate": vt1_row["HR(bpm)_smoothed"], + "Speed": vt1_row["Speed"], + "Time": vt1_row["T(sec)"], + } + ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff() second_derivative = ve_slope.diff() vt2_idx = second_derivative.idxmax() - + vt2 = None if pd.notna(vt2_idx): vt2_row = self.pnoe_df.loc[vt2_idx] - vt2 = {"HeartRate": vt2_row["HR(bpm)_smoothed"], "Speed": vt2_row["Speed"], "Time": vt2_row["T(sec)"]} - + vt2 = { + "HeartRate": vt2_row["HR(bpm)_smoothed"], + "Speed": vt2_row["Speed"], + "Time": vt2_row["T(sec)"], + } + return vt1, vt2 - def _calculate_hr_zones(self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series) -> Dict: + def _calculate_hr_zones( + self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series + ) -> Dict: """Calculate heart rate zones based on thresholds""" zones = {} if vt1 and vt2: @@ -137,7 +179,7 @@ class ContextGenerator: zone_3_start = vt1["HeartRate"] zone_4_start = vt2["HeartRate"] - 10 zone_5_start = vt2["HeartRate"] + 10 - + zones["zone1_bpm"] = f"{int(zone_1_start)}-{int(zone_2_start)}bpm" zones["zone2_bpm"] = f"{int(zone_2_start)}-{int(vt1['HeartRate'])}bpm" zones["zone3_bpm"] = f"{int(zone_3_start)}-{int(zone_4_start)}bpm" @@ -152,29 +194,87 @@ class ContextGenerator: zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm" return zones - def generate_all_contexts(self, patient_name: str, graphs: Dict[str, str]) -> List[Dict]: + def generate_all_contexts( + self, patient_name: str, graphs: Dict[str, str] + ) -> List[Dict]: """Main method to generate all page contexts""" self.extract_patient_info(patient_name) spirometry_metrics = self.calculate_spirometry_metrics() pnoe_metrics = self.calculate_pnoe_metrics() - + contexts = [] - contexts.append({"name": self.patient_info["name"], "surname": self.patient_info["last_name"], "date": datetime.now().strftime("%B %d, %Y")}) - contexts.append({"patient_name": self.patient_info["name"], "test_date": datetime.now().strftime("%B %d, %Y")}) - + contexts.append( + { + "name": self.patient_info["name"], + "surname": self.patient_info["last_name"], + "date": datetime.now().strftime("%B %d, %Y"), + } + ) + contexts.append( + { + "patient_name": self.patient_info["name"], + "test_date": datetime.now().strftime("%B %d, %Y"), + } + ) + for i in range(4): - contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 3}) - + contexts.append( + {"patient_name": self.patient_info["name"], "page_number": i + 3} + ) + fev1_percentage = 0 if spirometry_metrics.get("fvc_best"): - fev1_percentage = (pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]) * 100 - - contexts.append({"peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", "fev1_percentage": f"{fev1_percentage:.1f}", "lung_analysis_chart": graphs.get("spirometry_chart", ""), "respiratory_analysis_chart": graphs.get("respiratory", "")}) - contexts.append({"vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), "vo2_pulse_chart": graphs.get("vo2_pulse", "")}) - contexts.append({"fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", "fuel_utilization_chart": graphs.get("fuel_utilization", ""), "fat_metabolism_chart": graphs.get("fat_metabolism", "")}) - contexts.append({"fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", "body_composition_chart": graphs.get("body_composition", ""), "body_fat_percent_chart": graphs.get("body_fat_percent", "")}) - + fev1_percentage = ( + pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"] + ) * 100 + + contexts.append( + { + "peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", + "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", + "fev1_percentage": f"{fev1_percentage:.1f}", + "lung_analysis_chart": graphs.get("spirometry_chart", ""), + "respiratory_analysis_chart": graphs.get("respiratory", ""), + } + ) + contexts.append( + { + "vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", + "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", + "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), + "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), + "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), + "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), + "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), + "vo2_pulse_chart": graphs.get("vo2_pulse", ""), + } + ) + contexts.append( + { + "fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", + "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", + "fuel_utilization_chart": graphs.get("fuel_utilization", ""), + "fat_metabolism_chart": graphs.get("fat_metabolism", ""), + } + ) + contexts.append( + { + "fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", + "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", + "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", + "body_composition_chart": graphs.get("body_composition", ""), + "body_fat_percent_chart": graphs.get("body_fat_percent", ""), + } + ) + for i in range(9): - contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 11, "vo2_breath_chart": graphs.get("vo2_breath", ""), "recovery_chart": graphs.get("recovery", "")}) - + contexts.append( + { + "patient_name": self.patient_info["name"], + "page_number": i + 11, + "vo2_breath_chart": graphs.get("vo2_breath", ""), + "recovery_chart": graphs.get("recovery", ""), + } + ) + return contexts diff --git a/app/services/graph_generator.py b/app/services/graph_generator.py index 24f7f2f..82e3454 100644 --- a/app/services/graph_generator.py +++ b/app/services/graph_generator.py @@ -8,6 +8,9 @@ Based on the analysis notebooks in services_dfdf/. import base64 from pathlib import Path +import matplotlib + +matplotlib.use("Agg") # Use non-interactive backend import matplotlib.pyplot as plt import matplotlib.transforms as mtransforms import numpy as np diff --git a/app/services/report_generator.py b/app/services/report_generator.py index 5add2be..044c6f5 100644 --- a/app/services/report_generator.py +++ b/app/services/report_generator.py @@ -11,10 +11,9 @@ from typing import Any, Dict, List import pandas as pd from jinja2 import Environment, FileSystemLoader from playwright.sync_api import sync_playwright - -from app.services.context_generator import ContextGenerator -from app.services.graph_generator import GraphGenerator -from app.services.spirometry_table_extractor import extract_spirometry_table_from_pdf +from services.context_generator import ContextGenerator +from services.graph_generator import GraphGenerator +from services.spirometry_table_extractor import extract_spirometry_table_from_pdf class ReportGeneratorService: @@ -61,7 +60,13 @@ class ReportGeneratorService: """ # Load data df = pd.read_csv(pnoe_csv_path, delimiter=";") - df = df.apply(pd.to_numeric, errors="ignore") + + # Convert numeric columns (updated approach) + for col in df.columns: + try: + df[col] = pd.to_numeric(df[col]) + except (ValueError, TypeError): + pass # Keep as-is if not numeric # Calculate derived columns df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"] @@ -395,6 +400,7 @@ class ReportGeneratorService: ) report_path = self.reports_dir / output_filename + print(f"Generating PDF report at {report_path}") self.html_to_pdf(html_content, str(report_path)) return {