feat: Implement report generator service for medical reports

- Added ReportGeneratorService to handle generation of medical reports from uploaded files.
- Implemented methods for processing Pnoe CSV data, generating graphs, and calculating analysis metrics.
- Integrated Jinja2 for HTML report generation with customizable templates.
- Added functionality to convert HTML content to PDF using Playwright.
- Ensured proper directory structure for saving generated graphs and reports.
This commit is contained in:
bolade
2025-10-03 21:41:00 +01:00
parent 1d8136d6ad
commit 11ee6b192f
13 changed files with 896 additions and 658 deletions
-807
View File
@@ -1,807 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "b18c1027",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': 'gen-1759135172-DIhs7TMuaaVY0h3T2ibV', 'provider': 'Google', 'model': 'google/gemini-2.5-flash-lite', 'object': 'chat.completion', 'created': 1759135172, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'STOP', 'index': 0, 'message': {'role': 'assistant', 'content': 'Parameters,Best,LLN,Pred.,%Pred.,ZScore,PRE#1,PRE#2,PRE#3\\nFVC,L,4.24,3.03,3.79,112.0,0.95,4.24,4.17,4.15\\nFEV1,L,3.26,2.53,3.16,103.3,0.28,3.26,3.21,3.14\\nFEV1/FVC%,76.89,72.47,83.78,91.8,-1.05,76.9,77.0,75.7\\nPEF,L/m,684,222,384,178.7,-,444,438,684\\nFEF2575,L/s,2.74,2.15,3.42,80.2,-0.84,2.74,2.68,2.48\\nFEF25,L/s,6.08,-,-,-,6.08,6.0,5.53\\nFEF50,L/s,3.06,-,-,-,3.06,3.1,2.77\\nFEF75,L/s,1.06,0.71,1.41,75.1,-0.72,1.06,1.12,0.94\\nPEFTime,ms,-,-,79,-,79,49,39\\nEvol,mL,-,-,78.0,-,78.0,77.0,197.0\\nFEV6,L,4.22,3.03,3.79,111.4,-,4.22,4.17,4.13', 'refusal': None, 'reasoning': None}}], 'usage': {'prompt_tokens': 1350, 'completion_tokens': 454, 'total_tokens': 1804, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}\n",
"Content saved to extracted_table.csv\n"
]
}
],
"source": [
"\n",
"import requests\n",
"import json\n",
"import base64\n",
"from pathlib import Path\n",
"\n",
"API_KEY_REF = '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'\n",
"def encode_pdf_to_base64(pdf_path):\n",
" with open(pdf_path, \"rb\") as pdf_file:\n",
" return base64.b64encode(pdf_file.read()).decode('utf-8')\n",
"\n",
"url = \"https://openrouter.ai/api/v1/chat/completions\"\n",
"headers = {\n",
" \"Authorization\": f\"Bearer {API_KEY_REF}\",\n",
" \"Content-Type\": \"application/json\"\n",
"}\n",
"\n",
"# Read and encode the PDF\n",
"pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n",
"base64_pdf = encode_pdf_to_base64(pdf_path)\n",
"data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n",
"\n",
"messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n",
" \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n",
" \"The '-' Should be treated as empty values.\"\n",
" \"do not add 'csv' at the start or end of the response\"\n",
" },\n",
" {\n",
" \"type\": \"file\",\n",
" \"file\": {\n",
" \"filename\": \"document.pdf\",\n",
" \"file_data\": data_url\n",
" }\n",
" },\n",
" ]\n",
" }\n",
"]\n",
"\n",
"# Optional: Configure PDF processing engine\n",
"# PDF parsing will still work even if the plugin is not explicitly set\n",
"plugins = [\n",
" {\n",
" \"id\": \"file-parser\",\n",
" \"pdf\": {\n",
" \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n",
" }\n",
" }\n",
"]\n",
"\n",
"payload = {\n",
" \"model\": \"google/gemini-2.5-flash-lite\",\n",
" \"messages\": messages,\n",
"}\n",
"\n",
"response = requests.post(url, headers=headers, json=payload)\n",
"# Get the response content\n",
"response_data = response.json()\n",
"print(response_data)\n",
"\n",
"# Extract the content from the response\n",
"if 'choices' in response_data and len(response_data['choices']) > 0:\n",
" content = response_data['choices'][0]['message']['content']\n",
" \n",
" # Save to a CSV file\n",
" output_file = \"extracted_table.csv\"\n",
" with open(output_file, 'w', encoding='utf-8') as f:\n",
" f.write(content)\n",
" \n",
" print(f\"Content saved to {output_file}\")\n",
"else:\n",
" print(\"No content found in response\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "56a9d655",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FVC Best: 4.24, FVC Pred: 112.0\n",
"FEV1 Best: 3.26, FEV1 Pred: 103.3\n",
"FEV1/FVC% Best: 76.89, FEV1/FVC% Pred: 91.8\n"
]
}
],
"source": [
"import pandas as pd\n",
"spirometry_df = pd.read_csv(\"extracted_table.csv\")\n",
"\n",
"fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n",
"fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n",
"\n",
"fev1_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', 'Best'].values[0]\n",
"fev1_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', '%Pred.'].values[0]\n",
"\n",
"fev1_fevc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', 'Best'].values[0]\n",
"fev1_fevc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', '%Pred.'].values[0]\n",
"\n",
"print(f\"FVC Best: {fvc_best}, FVC Pred: {fvc_pred}\")\n",
"print(f\"FEV1 Best: {fev1_best}, FEV1 Pred: {fev1_pred}\")\n",
"print(f\"FEV1/FVC% Best: {fev1_fevc_best}, FEV1/FVC% Pred: {fev1_fevc_pred}\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "990f4b4f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Peak VT: 2.75\n",
"HR at Peak VT: 155.0\n"
]
}
],
"source": [
"df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n",
"peak_vt = df['VT(l)'].max()\n",
"max_vt_row = df.loc[df['VT(l)'].idxmax()]\n",
"print(f\"Peak VT: {peak_vt}\")\n",
"hr = max_vt_row['HR(bpm)']\n",
"print(f\"HR at Peak VT: {hr}\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "041cbc3d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Peak VT: 2.3770000000000002\n",
"HR at Peak VT: 171.525\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_301535/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
" df = df.apply(pd.to_numeric, errors='ignore')\n"
]
}
],
"source": [
"df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n",
"# Convert all columns to numeric where possible, coercing errors to NaN\n",
"df = df.apply(pd.to_numeric, errors='ignore')\n",
"df['VO2 Pulse'] = df['VO2(ml/min)'] / df['HR(bpm)'] # VO2 Pulse in mL/beat\n",
"df['VO2 Breath'] = df['VO2(ml/min)'] / df['BF(bpm)'] # VO2 per Breath in mL/breath\n",
"df['CHO'] = df['EE(kcal/min)'] * df['CARBS(%)']/100\n",
"df['FAT'] = df['EE(kcal/min)'] * df['FAT(%)']/100\n",
"# Smooth key columns using rolling window\n",
"window_size = 10\n",
"\n",
"# List of columns to smooth\n",
"columns_to_smooth = ['VO2(ml/min)', 'VCO2(ml/min)', 'HR(bpm)', 'VT(l)', 'BF(bpm)', 'VE(l/min)', 'VO2 Pulse', 'VO2 Breath', 'CHO', 'FAT']\n",
"\n",
"# Apply smoothing to each column\n",
"for col in columns_to_smooth:\n",
" if col in df.columns:\n",
" df[f'{col}_smoothed'] = df[col].rolling(window=window_size).mean()\n",
" \n",
"peak_vt = df['VT(l)_smoothed'].max()\n",
"max_vt_row = df.loc[df['VT(l)_smoothed'].idxmax()]\n",
"print(f\"Peak VT: {peak_vt}\")\n",
"hr = max_vt_row['HR(bpm)_smoothed']\n",
"print(f\"HR at Peak VT: {hr}\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "de7cadd1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Percent FEV: 72.91411042944786\n"
]
}
],
"source": [
"percent_fev = (peak_vt / fev1_best) * 100\n",
"print(f\"Percent FEV: {percent_fev}\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "cb972ed3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MeasurementDate</th>\n",
" <th>Comment</th>\n",
" <th>ExternalDeviceId</th>\n",
" <th>ExternalPatientId</th>\n",
" <th>FirstName</th>\n",
" <th>LastName</th>\n",
" <th>BirthDate</th>\n",
" <th>Age</th>\n",
" <th>Ethnicity</th>\n",
" <th>Gender</th>\n",
" <th>...</th>\n",
" <th>Child_XC</th>\n",
" <th>Child_XC_Unit</th>\n",
" <th>Child_BIVA_ZRh</th>\n",
" <th>Child_BIVA_ZXcH</th>\n",
" <th>Child_PhA</th>\n",
" <th>Child_PhA_Unit</th>\n",
" <th>Child_REE_Kcal</th>\n",
" <th>Child_REE_MJ</th>\n",
" <th>Child_TEE_Kcal</th>\n",
" <th>Child_TEE_MJ</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>2025-07-29T18:58:54.0000000Z</td>\n",
" <td>NaN</td>\n",
" <td>10000001583275_0055003f5631501320313557</td>\n",
" <td>KM6479696509</td>\n",
" <td>Keirstyn</td>\n",
" <td>Moran</td>\n",
" <td>1991-02-01T00:00:00.0000000Z</td>\n",
" <td>34</td>\n",
" <td>Caucasian</td>\n",
" <td>Female</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 147 columns</p>\n",
"</div>"
],
"text/plain": [
" MeasurementDate Comment \\\n",
"13 2025-07-29T18:58:54.0000000Z NaN \n",
"\n",
" ExternalDeviceId ExternalPatientId FirstName \\\n",
"13 10000001583275_0055003f5631501320313557 KM6479696509 Keirstyn \n",
"\n",
" LastName BirthDate Age Ethnicity Gender ... \\\n",
"13 Moran 1991-02-01T00:00:00.0000000Z 34 Caucasian Female ... \n",
"\n",
" Child_XC Child_XC_Unit Child_BIVA_ZRh Child_BIVA_ZXcH Child_PhA \\\n",
"13 NaN NaN NaN NaN NaN \n",
"\n",
" Child_PhA_Unit Child_REE_Kcal Child_REE_MJ Child_TEE_Kcal Child_TEE_MJ \n",
"13 NaN NaN NaN NaN NaN \n",
"\n",
"[1 rows x 147 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"personal_df = pd.read_excel('data/SECA body comp for all patients.xlsx')\n",
"\n",
"keirstyn_data = personal_df[personal_df['LastName'].str.contains('Moran', case=False, na=False)]\n",
"keirstyn_data"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "98d9295a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VO2 Max: 47.906290322580645\n"
]
}
],
"source": [
"v02_max = df['VO2(ml/min)_smoothed'].max()\n",
"weight = keirstyn_data['Weight'].iloc[0]\n",
"print(f\"VO2 Max: {v02_max/weight}\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "cdfeb309",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"==================================================\n",
"Optimal Fat Burning Zone (highest fat:carb ratio):\n",
"Time: 164.0 seconds\n",
"Fat burn rate: 3.894 kcal/min\n",
"Carb burn rate: 1.575 kcal/min\n",
"Fat:Carb ratio: 2.47\n",
"Heart Rate: 96.7 bpm\n",
"VO2: 1147.9 ml/min\n"
]
}
],
"source": [
"# Find the point where fat burning is highest and carb burning is lowest\n",
"# Using the smoothed data for more stable results\n",
"fat_burn_max_idx = df['FAT_smoothed'].idxmax()\n",
"carb_burn_min_idx = df['CHO_smoothed'].idxmin()\n",
"\n",
"# # Get the data at maximum fat burning point\n",
"# max_fat_row = df.loc[fat_burn_max_idx]\n",
"# print(f\"Maximum Fat Burning Point:\")\n",
"# print(f\"Time: {max_fat_row['T(sec)']} seconds\")\n",
"# print(f\"Fat burn rate: {max_fat_row['FAT_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Carb burn rate: {max_fat_row['CHO_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Heart Rate: {max_fat_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"# print(f\"VO2: {max_fat_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
"\n",
"# print(\"\\n\" + \"=\"*50)\n",
"\n",
"# # Get the data at minimum carb burning point\n",
"# min_carb_row = df.loc[carb_burn_min_idx]\n",
"# print(f\"Minimum Carbohydrate Burning Point:\")\n",
"# print(f\"Time: {min_carb_row['T(sec)']} seconds\")\n",
"# print(f\"Fat burn rate: {min_carb_row['FAT_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Carb burn rate: {min_carb_row['CHO_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Heart Rate: {min_carb_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"# print(f\"VO2: {min_carb_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
"\n",
"print(\"\\n\" + \"=\"*50)\n",
"\n",
"# Find the optimal fat burning zone (highest fat:carb ratio)\n",
"df['fat_carb_ratio'] = df['FAT_smoothed'] / (df['CHO_smoothed'] + 0.00000001) # Add small value to avoid division by zero\n",
"optimal_fat_idx = df['fat_carb_ratio'].idxmax()\n",
"optimal_row = df.loc[optimal_fat_idx]\n",
"\n",
"print(f\"Optimal Fat Burning Zone (highest fat:carb ratio):\")\n",
"print(f\"Time: {optimal_row['T(sec)']} seconds\")\n",
"print(f\"Fat burn rate: {optimal_row['FAT_smoothed']:.3f} kcal/min\")\n",
"print(f\"Carb burn rate: {optimal_row['CHO_smoothed']:.3f} kcal/min\")\n",
"print(f\"Fat:Carb ratio: {optimal_row['fat_carb_ratio']:.2f}\")\n",
"print(f\"Heart Rate: {optimal_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"print(f\"VO2: {optimal_row['VO2(ml/min)_smoothed']:.1f} ml/min\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "4420cfea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 2 intersections at indices: [18, 47]\n",
"\n",
"Last intersection at index 47:\n",
"Time: 251.0 seconds\n",
"Fat burn rate: 3.040 kcal/min\n",
"Carb burn rate: 3.166 kcal/min\n",
"Heart Rate: 100.5 bpm\n",
"VO2: 1283.0 ml/min\n"
]
}
],
"source": [
"# Find intersections where FAT_smoothed and CHO_smoothed cross each other\n",
"intersections = []\n",
"\n",
"for i in range(1, len(df)):\n",
" # Check if there's a crossover between consecutive points\n",
" prev_fat = df.iloc[i-1]['FAT_smoothed']\n",
" prev_cho = df.iloc[i-1]['CHO_smoothed']\n",
" curr_fat = df.iloc[i]['FAT_smoothed']\n",
" curr_cho = df.iloc[i]['CHO_smoothed']\n",
" \n",
" # Skip if any values are NaN\n",
" if pd.isna(prev_fat) or pd.isna(prev_cho) or pd.isna(curr_fat) or pd.isna(curr_cho):\n",
" continue\n",
" \n",
" # Check if lines cross (fat was above/below cho and now it's below/above)\n",
" if ((prev_fat > prev_cho and curr_fat < curr_cho) or \n",
" (prev_fat < prev_cho and curr_fat > curr_cho)):\n",
" intersections.append(i)\n",
"\n",
"print(f\"Found {len(intersections)} intersections at indices: {intersections}\")\n",
"\n",
"if intersections:\n",
" # Get the last intersection\n",
" last_intersection_idx = intersections[-1]\n",
" last_intersection_row = df.iloc[last_intersection_idx]\n",
" \n",
" print(f\"\\nLast intersection at index {last_intersection_idx}:\")\n",
" print(f\"Time: {last_intersection_row['T(sec)']} seconds\")\n",
" print(f\"Fat burn rate: {last_intersection_row['FAT_smoothed']:.3f} kcal/min\")\n",
" print(f\"Carb burn rate: {last_intersection_row['CHO_smoothed']:.3f} kcal/min\")\n",
" print(f\"Heart Rate: {last_intersection_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"VO2: {last_intersection_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
"else:\n",
" print(\"No intersections found between FAT and CHO curves\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "62803668",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VT1: {'HeartRate': 100.5, 'Speed': 4.0, 'Time': 251.0}\n",
"VT2: {'HeartRate': 189.71300000000002, 'Speed': 7.5, 'Time': 1524.0}\n"
]
}
],
"source": [
"def detect_vt1(df, fat_col=\"FAT_smoothed\", carb_col=\"CHO_smoothed\"):\n",
" \"\"\"\n",
" Detect VT1 as the first index where carb burn > fat burn and remains higher.\n",
" \"\"\"\n",
" condition = df[carb_col] > df[fat_col]\n",
" crossover_indices = condition[condition].index\n",
"\n",
" if len(crossover_indices) == 0:\n",
" return None # No crossover found\n",
" \n",
" # Find first crossover where carbs remain higher for the rest\n",
" for idx in crossover_indices:\n",
" if all(df.loc[idx:][carb_col] > df.loc[idx:][fat_col]):\n",
" return idx\n",
" return None\n",
"\n",
"\n",
"def detect_vt2(df, vent_col=\"VE(l/min)_smoothed\", bf_col=\"BF(bpm)_smoothed\", smooth_window=5):\n",
" \"\"\"\n",
" Detect VT2 using slope/inflection method.\n",
" Works with either Ventilation (VE) or Breathing Frequency (Bf).\n",
" \"\"\"\n",
" col = vent_col if vent_col in df.columns else bf_col\n",
" \n",
" # Use already smoothed data\n",
" smoothed_col = col\n",
" \n",
" # Compute slope (first derivative)\n",
" df[\"slope\"] = df[smoothed_col].diff()\n",
" \n",
" # Detect inflection: largest change in slope (second derivative peak)\n",
" df[\"second_derivative\"] = df[\"slope\"].diff()\n",
" inflection_idx = df[\"second_derivative\"].idxmax()\n",
" \n",
" return inflection_idx\n",
"\n",
"\n",
"def analyze_thresholds(df_input):\n",
" # Use the existing dataframe\n",
" df_copy = df_input.copy()\n",
" \n",
" # --- Detect VT1 ---\n",
" vt1_idx = detect_vt1(df_copy)\n",
" vt1 = None\n",
" if vt1_idx is not None:\n",
" vt1 = {\n",
" \"HeartRate\": df_copy.loc[vt1_idx, \"HR(bpm)_smoothed\"],\n",
" \"Speed\": df_copy.loc[vt1_idx, \"Speed\"],\n",
" \"Time\": df_copy.loc[vt1_idx, \"T(sec)\"]\n",
" }\n",
" \n",
" # --- Detect VT2 ---\n",
" vt2_idx = detect_vt2(df_copy)\n",
" vt2 = None\n",
" if vt2_idx is not None:\n",
" vt2 = {\n",
" \"HeartRate\": df_copy.loc[vt2_idx, \"HR(bpm)_smoothed\"],\n",
" \"Speed\": df_copy.loc[vt2_idx, \"Speed\"],\n",
" \"Time\": df_copy.loc[vt2_idx, \"T(sec)\"]\n",
" }\n",
" \n",
" return vt1, vt2\n",
"\n",
"\n",
"vt1, vt2 = analyze_thresholds(df)\n",
"print(\"VT1:\", vt1)\n",
"print(\"VT2:\", vt2)\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "07593b56",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n",
"Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n",
"Zone 3 (Aerobic): 100.5 - 179.7 bpm\n",
"Zone 4 (Lactate Threshold): 179.7 - 199.7 bpm\n",
"Zone 5 (VO2 Max): 199.7+ bpm\n"
]
}
],
"source": [
"zone_1_start = optimal_row['HR(bpm)_smoothed'] - 15\n",
"zone_2_start = optimal_row['HR(bpm)_smoothed']\n",
"zone_3_start = vt1\n",
"zone_4_start = vt2['HeartRate'] - 10\n",
"zone_5_start = vt2['HeartRate'] + 10\n",
"\n",
"zone_1_end = zone_2_start\n",
"zone_2_end = vt1['HeartRate']\n",
"zone_3_end = zone_4_start\n",
"zone_4_end = zone_5_start\n",
"\n",
"print(f\"Zone 1 (Active Recovery): {zone_1_start:.1f} - {zone_1_end:.1f} bpm\")\n",
"print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n",
"print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n",
"print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n",
"print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f}+ bpm\")"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "c90415b2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VO2 Max detected at index 202:\n",
"Time: 985.0 seconds\n",
"VO2 Breath: 58.2 ml/breath\n",
"VO2: 2167.8 ml/min\n",
"VO2 per kg: 38.8 ml/kg/min\n",
"Heart Rate: 170.5 bpm\n",
"Speed: 6.0 km/h\n",
"VO2 Breath Slope: -0.02\n"
]
}
],
"source": [
"# Calculate the slope of VO2 Breath (first derivative)\n",
"df['vo2_breath_slope'] = df['VO2 Breath_smoothed'].diff()\n",
"\n",
"# Find points where slope is consistently zero or negative\n",
"# We'll use a rolling window to check for consistent negative/zero slope\n",
"window = len(df) // 3 # Number of consecutive points to check\n",
"\n",
"# Calculate rolling mean of slope to smooth out noise\n",
"df['vo2_breath_slope_smoothed'] = df['vo2_breath_slope'].rolling(window=window).mean()\n",
"\n",
"# Find where slope becomes consistently zero or negative\n",
"mask = df['vo2_breath_slope_smoothed'] <= 0\n",
"consistent_negative_indices = mask[mask].index\n",
"\n",
"if len(consistent_negative_indices) > 0:\n",
" # Find the first point where slope becomes consistently negative/zero\n",
" vo2_max_idx = consistent_negative_indices[0]\n",
" vo2_max_row = df.loc[vo2_max_idx]\n",
" \n",
" print(f\"VO2 Max detected at index {vo2_max_idx}:\")\n",
" print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n",
" print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
" print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n",
" print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"Speed: {vo2_max_row['Speed']} km/h\")\n",
" print(f\"VO2 Breath Slope: {vo2_max_row['vo2_breath_slope_smoothed']:.2f}\")\n",
"else:\n",
" # If no consistent negative slope found, use the maximum VO2 Breath value\n",
" vo2_max_idx = df['VO2 Breath_smoothed'].idxmax()\n",
" vo2_max_row = df.loc[vo2_max_idx]\n",
" \n",
" print(f\"No consistent negative slope found. Using peak VO2 Breath at index {vo2_max_idx}:\")\n",
" print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n",
" print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
" print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n",
" print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"Speed: {vo2_max_row['Speed']} km/h\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "c3b2cc59",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VO2 Pulse and HR slopes diverge consistently starting at index 89:\n",
"Time: 485.0 seconds\n",
"VO2 Pulse (smoothed): 13.91\n",
"Heart Rate (smoothed): 136.2 bpm\n",
"VO2 Pulse Slope: 0.672\n",
"HR Slope: 1.000\n",
"Slope Difference: 1.006\n",
"VO2: 1897.8 ml/min\n",
"Speed: 4.5 km/h\n",
"Threshold used: 0.615\n"
]
}
],
"source": [
"# Calculate slopes for both VO2 Pulse and HR\n",
"df['vo2_pulse_slope'] = df['VO2 Pulse_smoothed'].diff()\n",
"df['hr_slope'] = df['HR(bpm)_smoothed'].diff()\n",
"\n",
"# Calculate the difference between the slopes\n",
"df['slope_difference'] = abs(df['vo2_pulse_slope'] - df['hr_slope'])\n",
"\n",
"# Find where the slope difference becomes consistently large (slopes diverge)\n",
"# Use a rolling window to smooth out noise\n",
"window_size = len(df) // 5 # Adjust window size as needed\n",
"df['slope_difference_smoothed'] = df['slope_difference'].rolling(window=window_size).mean()\n",
"\n",
"# Find the threshold - we'll use the 75th percentile of slope differences as threshold\n",
"threshold = df['slope_difference_smoothed'].quantile(0.75)\n",
"\n",
"# Find points where slope difference exceeds threshold\n",
"divergence_mask = df['slope_difference_smoothed'] > threshold\n",
"divergence_indices = divergence_mask[divergence_mask].index\n",
"\n",
"if len(divergence_indices) > 0:\n",
" # Find the first sustained divergence point\n",
" min_consecutive_points = 5\n",
" consistent_divergence_idx = None\n",
" \n",
" for start_idx in divergence_indices:\n",
" # Check if divergence is sustained for consecutive points\n",
" consecutive_count = 0\n",
" for j in range(start_idx, min(start_idx + min_consecutive_points, len(df))):\n",
" if j in divergence_indices:\n",
" consecutive_count += 1\n",
" else:\n",
" break\n",
" \n",
" if consecutive_count >= min_consecutive_points:\n",
" consistent_divergence_idx = start_idx\n",
" break\n",
" \n",
" if consistent_divergence_idx is not None:\n",
" divergence_row = df.iloc[consistent_divergence_idx]\n",
" \n",
" print(f\"VO2 Pulse and HR slopes diverge consistently starting at index {consistent_divergence_idx}:\")\n",
" print(f\"Time: {divergence_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Pulse (smoothed): {divergence_row['VO2 Pulse_smoothed']:.2f}\")\n",
" print(f\"Heart Rate (smoothed): {divergence_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"VO2 Pulse Slope: {divergence_row['vo2_pulse_slope']:.3f}\")\n",
" print(f\"HR Slope: {divergence_row['hr_slope']:.3f}\")\n",
" print(f\"Slope Difference: {divergence_row['slope_difference_smoothed']:.3f}\")\n",
" print(f\"VO2: {divergence_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
" print(f\"Speed: {divergence_row['Speed']} km/h\")\n",
" print(f\"Threshold used: {threshold:.3f}\")\n",
" else:\n",
" print(f\"No sustained divergence found. Threshold: {threshold:.3f}\")\n",
" # Show the point with maximum slope difference instead\n",
" max_diff_idx = df['slope_difference_smoothed'].idxmax()\n",
" max_diff_row = df.iloc[max_diff_idx]\n",
" \n",
" print(f\"\\nPoint with maximum slope difference at index {max_diff_idx}:\")\n",
" print(f\"Time: {max_diff_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Pulse (smoothed): {max_diff_row['VO2 Pulse_smoothed']:.2f}\")\n",
" print(f\"Heart Rate (smoothed): {max_diff_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"Slope Difference: {max_diff_row['slope_difference_smoothed']:.3f}\")\n",
"else:\n",
" print(\"No significant slope divergence found between VO2 Pulse and HR\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "672d68f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Maximum FAT_smoothed occurs at index 30:\n",
"Heart Rate (smoothed): 96.7 bpm\n",
"FAT (smoothed): 3.894 kcal/min\n"
]
}
],
"source": [
"max_fat_smoothed_idx = df['FAT_smoothed'].idxmax()\n",
"max_fat_smoothed_row = df.loc[max_fat_smoothed_idx]\n",
"max_heart_rate = 220 - keirstyn_data['Age'].iloc[0]\n",
"\n",
"print(f\"Maximum FAT_smoothed occurs at index {max_fat_smoothed_idx}:\")\n",
"print(f\"Heart Rate (smoothed): {max_fat_smoothed_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"print(f\"FAT (smoothed): {max_fat_smoothed_row['FAT_smoothed']:.3f} kcal/min\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe3b7605",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "report_generation",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+942
View File
@@ -0,0 +1,942 @@
import base64
from pathlib import Path
from typing import Dict
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.patches import FancyBboxPatch
class GraphGenerator:
def __init__(self, charts_dir: str = "graphs"):
"""Initialize the GraphGenerator with output directory for charts"""
self.charts_dir = Path(charts_dir)
self.charts_dir.mkdir(exist_ok=True)
def _image_to_base64(self, image_path: Path) -> str:
"""Convert image to base64 string"""
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
except FileNotFoundError:
return ""
def generate_respiratory_chart(
self, df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate respiratory chart showing VT and Speed over time"""
# Get phase times for background regions
first_unique_phase = df.drop_duplicates(subset="PHASE")
phase_times = first_unique_phase["T(sec)"].tolist()
plt.figure(figsize=(18, 5))
ax1 = plt.subplot()
# Plot VT with step-like appearance
sns.lineplot(data=df, x="T(sec)", y="VT(l)_smoothed", label="VT (L)")
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("VT (L)")
ax1.grid(True, alpha=0.1)
ax1.set_ylim(0, min(8, df["VT(l)_smoothed"].max()))
# Plot speed as step function on secondary y-axis
ax2 = ax1.twinx()
ax1.set_xticks(np.arange(0, df["T(sec)"].max() + 200, 200))
line2 = sns.lineplot(
data=df,
x="T(sec)",
y="Speed",
color="green",
ax=ax2,
drawstyle="steps-post",
linewidth=2,
label="Speed",
)
ax2.set_ylabel("Speed")
ax2.set_ylim(0, min(30, df["Speed"].max()) + 1)
# Remove default legends first
ax1.get_legend().remove()
ax2.get_legend().remove()
# Combine legends from both axes in the top left
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")
# Add colored background regions
if len(phase_times) >= 4:
ax1.axvspan(0, phase_times[1], alpha=0.2, color="lightblue")
ax1.axvspan(phase_times[1], phase_times[2], alpha=0.2, color="purple")
ax1.axvspan(phase_times[2], phase_times[3], alpha=0.2, color="lightgreen")
ax1.axvspan(phase_times[3], df["T(sec)"].max(), alpha=0.2, color="blue")
chart_path = self.charts_dir / "respiratory.png"
plt.savefig(chart_path, dpi=300, bbox_inches="tight")
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_fuel_utilization_chart(
self, df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate fuel utilization chart with stacked bars showing fat vs carbs"""
# Group by speed and calculate mean for numeric columns only
speed_groups = df.groupby("Speed").mean(numeric_only=True).round(1)
speed_groups = speed_groups.iloc[1:-1]
filtered_data = speed_groups[
(speed_groups.index >= 3.5) & (speed_groups.index <= 7.5)
]
plt.figure(figsize=(15, 8))
plt.style.use("default")
# Create stage labels and positions
stage_labels = [f"Stage {i}" for i in range(1, len(filtered_data) + 1)]
x_positions = np.arange(len(filtered_data))
# Calculate fat and carbs energy expenditure from percentages
fat_ee = filtered_data["EE(kcal/min)"] * filtered_data["FAT(%)"] / 100
carbs_ee = filtered_data["EE(kcal/min)"] * filtered_data["CARBS(%)"] / 100
# Create the main axis for the stacked bars
ax1 = plt.gca()
# Create stacked bar chart with colors
ax1.bar(x_positions, fat_ee, color="#1f77b4", alpha=0.8, width=0.6, label="Fat")
ax1.bar(
x_positions,
carbs_ee,
bottom=fat_ee,
color="#ff7f0e",
alpha=0.8,
width=0.6,
label="Carbs",
)
# Set labels and formatting for primary axis
ax1.set_xlabel("", fontsize=12)
ax1.set_ylabel("Fuel (kcal/min)", fontsize=12)
ax1.set_ylim(0, 20)
# Add individual values on each bar segment
for i, (fat_val, carb_val, total_val) in enumerate(
zip(fat_ee, carbs_ee, filtered_data["EE(kcal/min)"])
):
if fat_val > 0.3: # Fat value
ax1.text(
i,
fat_val / 2,
f"{fat_val:.1f}",
ha="center",
va="center",
fontsize=9,
fontweight="bold",
color="white",
)
if carb_val > 0.3: # Carbs value
ax1.text(
i,
fat_val + carb_val / 2,
f"{carb_val:.1f}",
ha="center",
va="center",
fontsize=9,
fontweight="bold",
color="white",
)
# Total EE
ax1.text(
i,
total_val + 0.5,
f"{total_val:.1f} kcal",
ha="center",
va="bottom",
fontsize=10,
fontweight="bold",
color="black",
)
# Add speed labels below x-axis
for i, speed in enumerate(filtered_data.index):
ax1.text(i, -1.5, f"{speed:.1f} mph", ha="center", va="top", fontsize=9)
ax1.text(
i,
-2.8,
f"{speed * 1.609:.1f} min/km",
ha="center",
va="top",
fontsize=8,
color="gray",
)
# Create secondary y-axis for heart rate
ax2 = ax1.twinx()
# Plot heart rate line
ax2.plot(
x_positions,
filtered_data["HR(bpm)"],
marker="o",
linewidth=3,
markersize=8,
color="red",
label="Heart Rate",
)
# Set heart rate axis formatting
ax2.set_ylabel("Heart Rate (bpm)", fontsize=12, color="red")
ax2.tick_params(axis="y", labelcolor="red")
ax2.set_ylim(0, 220)
# Add HR values above the points
for i, hr in enumerate(filtered_data["HR(bpm)"]):
ax2.text(
i,
hr + 10,
f"{int(hr)}bpm",
ha="center",
va="bottom",
fontsize=10,
fontweight="bold",
color="red",
)
# Set x-axis formatting
ax1.set_xticks(x_positions)
ax1.set_xticklabels(stage_labels, fontsize=11)
# Create legend
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(
lines1 + lines2,
labels1 + labels2,
loc="upper left",
frameon=True,
fancybox=True,
shadow=True,
)
# Add grid
ax1.grid(True, alpha=0.3, linestyle="-", linewidth=0.5)
ax1.set_axisbelow(True)
# Adjust layout
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.9)
chart_path = self.charts_dir / "fuel_utilization_chart.png"
plt.savefig(chart_path, dpi=300)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_vo2_pulse_chart(
self, df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate VO2 Pulse chart with heart rate and speed"""
first_unique_phase = df.drop_duplicates(subset="PHASE")
phase_times = first_unique_phase["T(sec)"].tolist()
plt.figure(figsize=(18, 5))
ax1 = plt.subplot()
# Plot VO2 Pulse
sns.lineplot(
data=df,
x="T(sec)",
y="VO2 Pulse_smoothed",
label="VO2 Pulse (mL/beat)",
color="blue",
)
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("VO2 Pulse (mL/beat)")
ax1.set_ylim(0, df["VO2 Pulse_smoothed"].max())
ax1.grid(True, alpha=0.1)
# Create second y-axis for heart rate
ax2 = ax1.twinx()
sns.lineplot(
data=df,
x="T(sec)",
y="HR(bpm)_smoothed",
color="red",
ax=ax2,
linewidth=2,
label="Heart Rate (bpm)",
)
ax2.set_ylabel("Heart Rate (bpm)", color="red")
ax2.tick_params(axis="y", labelcolor="red")
ax2.set_ylim(0, df["HR(bpm)_smoothed"].max() + 1)
# Create third y-axis for speed
ax3 = ax1.twinx()
ax3.spines["right"].set_position(("outward", 60))
sns.lineplot(
data=df,
x="T(sec)",
y="Speed",
color="green",
ax=ax3,
drawstyle="steps-post",
linewidth=2,
label="Speed",
)
ax3.set_ylabel("Speed", color="green")
ax3.tick_params(axis="y", labelcolor="green")
ax3.set_ylim(0, df["Speed"].max() + 1)
ax1.set_xticks(np.arange(0, df["T(sec)"].max() + 200, 200))
# Remove default legends first
for ax in [ax1, ax2, ax3]:
if ax.get_legend():
ax.get_legend().remove()
# Combine legends from all axes
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(
lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc="upper left"
)
# Add colored background regions
if len(phase_times) >= 4:
ax1.axvspan(0, phase_times[1], alpha=0.2, color="lightblue")
ax1.axvspan(phase_times[1], phase_times[2], alpha=0.2, color="purple")
ax1.axvspan(phase_times[2], phase_times[3], alpha=0.2, color="lightgreen")
ax1.axvspan(phase_times[3], df["T(sec)"].max(), alpha=0.2, color="blue")
chart_path = self.charts_dir / "vo2_pulse_chart.png"
plt.savefig(chart_path, bbox_inches="tight", dpi=300)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_vo2_breath_chart(
self, df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate VO2 per Breath chart"""
first_unique_phase = df.drop_duplicates(subset="PHASE")
phase_times = first_unique_phase["T(sec)"].tolist()
plt.figure(figsize=(18, 5))
ax1 = plt.subplot()
# Plot VO2 per Breath
sns.lineplot(
data=df,
x="T(sec)",
y="VO2 Breath_smoothed",
label="VO2 per Breath (mL/breath)",
)
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("VO2 per Breath (mL/breath)")
ax1.set_ylim(0, df["VO2 Breath_smoothed"].max() + 1)
ax1.grid(True, alpha=0.1)
# Plot speed as step function on secondary y-axis
ax2 = ax1.twinx()
ax1.set_xticks(np.arange(0, df["T(sec)"].max() + 200, 200))
sns.lineplot(
data=df,
x="T(sec)",
y="Speed",
color="green",
ax=ax2,
drawstyle="steps-post",
linewidth=2,
label="Speed",
)
ax2.set_ylim(0, df["Speed"].max() + 1)
ax2.set_ylabel("Speed")
# Remove default legends first
ax1.get_legend().remove()
ax2.get_legend().remove()
# Combine legends from both axes in the top left
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")
# Add colored background regions
if len(phase_times) >= 4:
ax1.axvspan(0, phase_times[1], alpha=0.2, color="lightblue")
ax1.axvspan(phase_times[1], phase_times[2], alpha=0.2, color="purple")
ax1.axvspan(phase_times[2], phase_times[3], alpha=0.2, color="lightgreen")
ax1.axvspan(phase_times[3], df["T(sec)"].max(), alpha=0.2, color="blue")
chart_path = self.charts_dir / "vo2_breath_chart.png"
plt.savefig(chart_path, bbox_inches="tight", dpi=300)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_fat_metabolism_chart(
self, df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate CHO and FAT metabolism chart"""
first_unique_phase = df.drop_duplicates(subset="PHASE")
phase_times = first_unique_phase["T(sec)"].tolist()
plt.figure(figsize=(18, 5))
ax1 = plt.subplot()
# Plot CHO
sns.lineplot(data=df, x="T(sec)", y="CHO_smoothed", label="CHO (kcal/min)")
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("CHO (kcal/min)")
ax1.grid(True, alpha=0.1)
# Plot FAT on secondary y-axis
ax2 = ax1.twinx()
ax1.set_xticks(np.arange(0, df["T(sec)"].max() + 200, 200))
sns.lineplot(
data=df,
x="T(sec)",
y="FAT_smoothed",
color="green",
ax=ax2,
label="FAT (kcal/min)",
)
ax2.set_ylabel("FAT (kcal/min)")
ax2.set_ylim(0, 15)
# Remove default legends first
ax1.get_legend().remove()
ax2.get_legend().remove()
# Combine legends from both axes in the top left
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")
# Add colored background regions
if len(phase_times) >= 4:
ax1.axvspan(0, phase_times[1], alpha=0.2, color="lightblue")
ax1.axvspan(phase_times[1], phase_times[2], alpha=0.2, color="purple")
ax1.axvspan(phase_times[2], phase_times[3], alpha=0.2, color="lightgreen")
ax1.axvspan(phase_times[3], df["T(sec)"].max(), alpha=0.2, color="blue")
chart_path = self.charts_dir / "fat_metabolism_chart.png"
plt.savefig(chart_path, bbox_inches="tight", dpi=300)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_recovery_chart(
self, df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate recovery chart with VCO2, HR, and BF"""
first_unique_phase = df.drop_duplicates(subset="PHASE")
phase_times = first_unique_phase["T(sec)"].tolist()
plt.figure(figsize=(18, 5))
ax1 = plt.subplot()
# Plot VCO2
sns.lineplot(
data=df,
x="T(sec)",
y="VCO2(ml/min)_smoothed",
label="VCO2 (ml/min)",
color="blue",
)
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("VCO2 (ml/min)")
ax1.set_ylim(0, df["VCO2(ml/min)"].max())
ax1.grid(True, alpha=0.1)
# Create second y-axis for heart rate
ax2 = ax1.twinx()
sns.lineplot(
data=df,
x="T(sec)",
y="HR(bpm)_smoothed",
color="red",
ax=ax2,
linewidth=2,
label="Heart Rate (bpm)",
)
ax2.set_ylabel("Heart Rate (bpm)", color="red")
ax2.set_ylim(df["HR(bpm)_smoothed"].min(), df["HR(bpm)_smoothed"].max() + 1)
ax2.tick_params(axis="y", labelcolor="red")
# Create third y-axis for breathing frequency
ax3 = ax1.twinx()
ax3.spines["right"].set_position(("outward", 60))
sns.lineplot(
data=df,
x="T(sec)",
y="BF(bpm)_smoothed",
color="green",
ax=ax3,
linewidth=2,
label="BF (bpm)",
)
ax3.set_ylabel("BF (bpm)", color="green")
ax3.tick_params(axis="y", labelcolor="green")
ax3.set_ylim(0, df["BF(bpm)_smoothed"].max() + 1)
ax1.set_xticks(np.arange(0, df["T(sec)"].max() + 200, 200))
# Remove default legends first
for ax in [ax1, ax2, ax3]:
if ax.get_legend():
ax.get_legend().remove()
# Combine legends from all axes in the top left
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax1.legend(
lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc="upper left"
)
# Add colored background regions
if len(phase_times) >= 4:
ax1.axvspan(0, phase_times[1], alpha=0.2, color="lightblue")
ax1.axvspan(phase_times[1], phase_times[2], alpha=0.2, color="purple")
ax1.axvspan(phase_times[2], phase_times[3], alpha=0.2, color="lightgreen")
ax1.axvspan(phase_times[3], df["T(sec)"].max(), alpha=0.2, color="blue")
chart_path = self.charts_dir / "recovery_chart.png"
plt.savefig(chart_path, bbox_inches="tight", dpi=300)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_body_fat_percentage_chart(
self,
gender: str,
age: int,
body_fat_percentage: float,
save_as_base64: bool = False,
) -> str:
"""Generate body fat percentage chart with ranges"""
# Define the segments with muted colors
segments = [
("#F8A8A8", 0, 15), # Muted Red/Salmon: 0% to 15%
("#FFEECC", 15, 5), # Pale Yellow/Cream: 15% to 20%
("#D0F0C0", 20, 15), # Pale Green/Mint: 20% to 35%
("#FFEECC", 35, 5), # Pale Yellow/Cream: 35% to 40%
("#F8A8A8", 40, 10), # Muted Red/Salmon: 40% to 50%
]
# Determine age group
if 20 <= age <= 39:
age_group = "20-39"
elif 40 <= age <= 59:
age_group = "40-59"
elif 60 <= age <= 79:
age_group = "60-79"
else:
age_group = "N/A"
demographic = f"{age_group}\n({gender[0].upper()})"
fig, ax = plt.subplots(figsize=(10, 2))
# Create the Segmented Bar
for color, start, length in segments:
ax.barh(
y=0,
width=length,
left=start,
height=1,
color=color,
edgecolor="black",
linewidth=0.5,
)
# Add the Indicator (Triangle)
ax.plot(
body_fat_percentage,
1.05,
marker="v",
color="black",
markersize=10,
clip_on=False,
transform=ax.get_xaxis_transform(),
)
# Set Axis Properties and Labels
ax.set_xlim(0, 50)
ax.set_xticks(range(0, 51, 5))
ax.set_yticks([])
ax.text(
-0.05,
0,
demographic,
transform=ax.get_yaxis_transform(),
va="center",
ha="right",
fontsize=12,
)
ax.set_xlim(0, 50)
ticks = range(0, 51, 5)
ax.set_xticks(ticks)
labels = [f"{t}%" for t in ticks]
ax.set_xticklabels(labels)
# Clean up spines and add small ticks
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["bottom"].set_visible(True)
for x in range(0, 51, 5):
ax.plot(
[x, x],
[-0.05, -0.01],
color="black",
transform=ax.get_xaxis_transform(),
clip_on=False,
)
plt.tight_layout()
chart_path = self.charts_dir / "body_fat_percentage_chart.png"
plt.savefig(chart_path, bbox_inches="tight", dpi=300)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_body_composition_chart(
self, fat_mass_lbs: float, lean_mass_lbs: float, save_as_base64: bool = False
) -> str:
"""Generate donut chart for body composition"""
# Calculate percentages
total_weight = fat_mass_lbs + lean_mass_lbs
fat_percentage = (fat_mass_lbs / total_weight) * 100
lean_percentage = (lean_mass_lbs / total_weight) * 100
# Data for the chart
sizes = [fat_percentage, lean_percentage]
colors = ["#fde3ac", "#ff9966"] # Light yellow/tan and orange
plt.figure(figsize=(8, 8))
# Create the donut chart without labels first
wedges, texts, autotexts = plt.pie(
sizes,
autopct="", # Remove auto percentages
startangle=90,
wedgeprops=dict(width=0.5, edgecolor="w"),
colors=colors,
labels=["", ""],
) # Remove default labels
# Add custom text annotations positioned manually
plt.text(
-1,
1,
f"Fat Mass ({fat_mass_lbs:.1f}lbs)\n{fat_percentage:.1f}%",
fontsize=14,
fontweight="bold",
ha="center",
va="center",
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8),
)
plt.text(
1,
-1,
f"Lean Mass ({lean_mass_lbs:.1f}lbs)\n{lean_percentage:.1f}%",
fontsize=14,
fontweight="bold",
ha="center",
va="center",
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8),
)
# Set the title
plt.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle
chart_path = self.charts_dir / "body_composition_chart.png"
plt.savefig(chart_path, bbox_inches="tight", dpi=600)
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_spirometry_chart(
self, spirometry_df: pd.DataFrame, save_as_base64: bool = False
) -> str:
"""Generate spirometry chart with Z-scores and ranges"""
# Coerce numeric columns
for col in ["Best", "LLN", "Pred.", "%Pred.", "ZScore"]:
if col in spirometry_df.columns:
spirometry_df[col] = pd.to_numeric(spirometry_df[col], errors="coerce")
# Select rows of interest and prepare display values
rows_map = {
"Lung Volume": "FVC",
"Lung Power": "FEV1",
"Power/Volume": "FEV1/FVC%",
}
records = []
for label, param in rows_map.items():
row = spirometry_df.loc[spirometry_df["Parameters"].str.strip() == param]
if row.empty:
continue
row = row.iloc[0]
records.append(
{
"label": label,
"param": param,
"best": row["Best"],
"pct": row["%Pred."],
"z": row["ZScore"],
}
)
# Figure setup
fig, axes = plt.subplots(
nrows=3,
ncols=1,
figsize=(11.5, 3.6),
sharex=True,
gridspec_kw={"hspace": 0.65},
)
x_min, x_max = -5, 3
# Segment colors: red -> orange -> yellow -> green
segments = [
(-5, -4, "#f4a7a7"), # red-ish
(-4, -3, "#f7c49a"), # orange-ish
(-3, -1.7, "#f6e3a3"), # yellow-ish
(-1.7, 3, "#c9f0cc"), # green-ish
]
ticks = np.arange(x_min, x_max + 1, 1)
labels = [str(i) for i in ticks]
# Plot each row
for ax, rec in zip(axes, records):
# Background segments
for a, b, color in segments:
ax.barh(
0, width=b - a, left=a, height=0.6, color=color, edgecolor="none"
)
# LLN (-1) and Predicted (0) markers
ax.axvline(0, color="black", lw=1)
# Z-score pointer (downward triangle) at top of each panel
if pd.notna(rec["z"]):
trans = mtransforms.blended_transform_factory(
ax.transData, ax.transAxes
)
ax.plot(
float(rec["z"]),
1.2,
marker="v",
markersize=12,
color="dimgray",
transform=trans,
clip_on=False,
)
# Labels, ticks, and styling
ax.set_title(
rec["label"], loc="left", fontsize=11, fontweight="bold", pad=2
)
ax.set_xlim(x_min, x_max)
ax.set_yticks([])
ax.set_xticks(ticks)
ax.set_xticklabels(labels, fontsize=8)
ax.set_xlabel("")
# Top annotations
axes[0].text(-1.7, 0.45, "LLN", ha="center", va="bottom", fontsize=9)
axes[0].text(0, 0.45, "Predicted", ha="center", va="bottom", fontsize=9)
# Right-side summary boxes
fig.subplots_adjust(right=0.78)
box_ax = fig.add_axes(
[0.805, 0.06, 0.18, 0.90]
) # [left, bottom, width, height]
box_ax.axis("off")
# Helper to draw a pill-shaped text box
def pill(ax, xy, text):
x, y = xy
# Draw rounded rectangle background
bbox = FancyBboxPatch(
(x - 0.48, y - 0.09),
0.96,
0.18,
boxstyle="round,pad=0.02,rounding_size=0.08",
ec="#dddddd",
fc="#f3f3f3",
linewidth=1.0,
)
ax.add_patch(bbox)
ax.text(
x,
y + 0.025,
text,
ha="center",
va="center",
fontsize=11,
fontweight="bold",
)
ax.text(
x,
y - 0.055,
"of predicted",
ha="center",
va="center",
fontsize=9,
color="#555555",
)
box_ax.set_xlim(0, 1)
box_ax.set_ylim(0, 1)
# Prepare display strings and positions (top to bottom)
right_items = []
for rec in records:
name = (
"FVC"
if rec["param"] == "FVC"
else ("FEV1" if rec["param"] == "FEV1" else "FEV1/FVC")
)
unit = "L" if rec["param"] in ("FVC", "FEV1") else "%"
value_fmt = f"{rec['best']:.2f}{unit}"
pct_fmt = f"{rec['pct']:.1f}%"
right_items.append((name, value_fmt, pct_fmt))
# Sort to match image order on the right (FVC, FEV1, FEV1/FVC)
order = ["FVC", "FEV1", "FEV1/FVC"]
right_items_sorted = [
next(item for item in right_items if item[0] == k) for k in order
]
ys = [0.82, 0.48, 0.15]
for (name, value_fmt, pct_fmt), y in zip(right_items_sorted, ys):
main_line = f"{name}\n{value_fmt}{pct_fmt}"
pill(box_ax, (0.5, y), main_line)
chart_path = self.charts_dir / "spirometry_chart.png"
plt.savefig(chart_path, dpi=300, bbox_inches="tight")
plt.close()
return self._image_to_base64(chart_path) if save_as_base64 else str(chart_path)
def generate_all_charts(
self,
pnoe_df: pd.DataFrame,
spirometry_df: pd.DataFrame,
patient_data: Dict,
save_as_base64: bool = False,
) -> Dict[str, str]:
"""Generate all charts at once and return dictionary of paths/base64 strings"""
charts = {}
# Generate physiological charts
charts["respiratory"] = self.generate_respiratory_chart(pnoe_df, save_as_base64)
charts["fuel_utilization_chart"] = self.generate_fuel_utilization_chart(
pnoe_df, save_as_base64
)
charts["vo2_pulse_chart"] = self.generate_vo2_pulse_chart(
pnoe_df, save_as_base64
)
charts["vo2_breath_chart"] = self.generate_vo2_breath_chart(
pnoe_df, save_as_base64
)
charts["fat_metabolism_chart"] = self.generate_fat_metabolism_chart(
pnoe_df, save_as_base64
)
charts["recovery_chart"] = self.generate_recovery_chart(pnoe_df, save_as_base64)
# Generate body composition charts
if (
"gender" in patient_data
and "age" in patient_data
and "fat_percentage" in patient_data
):
charts["body_fat_percentage_chart"] = (
self.generate_body_fat_percentage_chart(
patient_data["gender"],
patient_data["age"],
patient_data["fat_percentage"],
save_as_base64,
)
)
if "fat_mass_lbs" in patient_data and "lean_mass_lbs" in patient_data:
charts["body_composition_chart"] = self.generate_body_composition_chart(
patient_data["fat_mass_lbs"],
patient_data["lean_mass_lbs"],
save_as_base64,
)
# Generate spirometry chart
charts["spirometry_chart"] = self.generate_spirometry_chart(
spirometry_df, save_as_base64
)
return charts
# Example usage
if __name__ == "__main__":
# Initialize graph generator
generator = GraphGenerator()
# Load sample data (you would pass your actual dataframes)
pnoe_df = pd.read_csv("data/Pnoe_20250729_1550-Moran_Keirstyn.csv", delimiter=";")
spirometry_df = pd.read_csv("data/spirometry_data.csv")
# Preprocess pnoe data (same as in your notebook)
pnoe_df = pnoe_df.apply(pd.to_numeric, errors="ignore")
pnoe_df["VO2 Pulse"] = pnoe_df["VO2(ml/min)"] / pnoe_df["HR(bpm)"]
pnoe_df["VO2 Breath"] = pnoe_df["VO2(ml/min)"] / pnoe_df["BF(bpm)"]
pnoe_df["CHO"] = pnoe_df["EE(kcal/min)"] * pnoe_df["CARBS(%)"] / 100
pnoe_df["FAT"] = pnoe_df["EE(kcal/min)"] * pnoe_df["FAT(%)"] / 100
# Apply smoothing
window_size = 10
columns_to_smooth = [
"VO2(ml/min)",
"VCO2(ml/min)",
"HR(bpm)",
"VT(l)",
"BF(bpm)",
"VE(l/min)",
"VO2 Pulse",
"VO2 Breath",
"CHO",
"FAT",
]
for col in columns_to_smooth:
if col in pnoe_df.columns:
pnoe_df[f"{col}_smoothed"] = (
pnoe_df[col].rolling(window=window_size, min_periods=1).mean()
)
# Patient data
patient_data = {
"gender": "female",
"age": 25,
"fat_percentage": 22.4,
"fat_mass_lbs": 27.6,
"lean_mass_lbs": 95.4,
}
# Generate all charts
charts = generator.generate_all_charts(
pnoe_df, spirometry_df, patient_data, save_as_base64=True
)
print(f"Generated {len(charts)} charts:")
for chart_name in charts.keys():
print(f"- {chart_name}")
-124
View File
@@ -1,124 +0,0 @@
from jinja2 import Environment, FileSystemLoader
from playwright.sync_api import sync_playwright
from context import context_list
env = Environment(loader=FileSystemLoader("report_gen"))
html_pages = []
header_context = {
"patient_name": "Keirstyn Moran",
"age": 34,
"height": "5'4\"",
"weight": "123lbs",
"focus": "Endurance",
}
footer_context = [
{
"contact_email": "info@ishplabs.com ",
"website": "www.ishplabs.com",
"social": "@ishplabs",
"page_number": i + 1,
}
for i in range(len(context_list))
]
header_html = env.get_template("header.html").render(header_context)
footer_html_list = [
env.get_template("footer.html").render(context) for context in footer_context
]
for i, context in enumerate(context_list):
template = env.get_template(f"page_{i + 1}.html").render(context)
if (i + 1) > 2:
full_html = f"""
<div class="page flex flex-col justify-between">
<div>
{header_html}
</div>
<main class="flex-grow p-4">
{template}
</main>
<div class="border-t text-center text-sm text-gray-600">
{footer_html_list[i]}
</div>
</div>
"""
html_pages.append(full_html)
else:
html_pages.append(template)
# Combine with page breaks
final_html = "<div class='page-break'></div>".join(html_pages)
# Wrap in full HTML document
html_doc = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link href="https://cdn.jsdelivr.net/npm/tailwindcss/dist/tailwind.min.css" rel="stylesheet">
<style>
html, body {{
height: 100%;
margin: 0;
padding: 0;
}}
.page-break {{ page-break-after: always; }}
.page {{
height: 100vh;
min-height: 100vh;
display: flex;
flex-direction: column;
}}
.page main {{
flex: 1;
overflow: hidden;
}}
/* Reset margins and padding everywhere */
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
/* Prevent images from being too large */
img {{
max-height: 300px;
}}
/* Larger images for specific charts */
.chart-large {{
max-height: 500px !important;
}}
</style>
</head>
<body class="m-0 p-0">
{final_html}
</body>
</html>
"""
# Generate PDF
def html_string_to_pdf(html_content, pdf_path):
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
# Set the HTML directly
page.set_content(html_content)
# Export to PDF
page.pdf(path=pdf_path, format="A4", print_background=True)
browser.close()
html_string_to_pdf(html_doc, "multi_page_report.pdf")
# pdfkit.from_string(html_doc, "truth_report.pdf", options=options)
print("✅ PDF generated: multi_page_report.pdf")
File diff suppressed because one or more lines are too long
+318
View File
@@ -0,0 +1,318 @@
"""
Report Generator Service
This service handles the generation of medical reports from uploaded files.
It processes data, generates graphs, and creates PDF reports.
"""
from pathlib import Path
from typing import Any, Dict, List
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from playwright.sync_api import sync_playwright
from app.services.context import context_list
from app.services.graph_generator import GraphGenerator
class ReportGeneratorService:
"""Service for generating medical performance reports"""
def __init__(
self,
template_dir: str = "app/report_gen",
graphs_dir: str = "graphs",
reports_dir: str = "reports",
):
"""
Initialize the report generator service.
Args:
template_dir: Directory containing Jinja2 templates
graphs_dir: Directory to save generated graphs
reports_dir: Directory to save generated reports
"""
self.template_dir = template_dir
self.graphs_dir = Path(graphs_dir)
self.reports_dir = Path(reports_dir)
self.graph_generator = GraphGenerator(charts_dir=str(graphs_dir))
self.env = Environment(loader=FileSystemLoader(template_dir))
# Ensure directories exist
self.graphs_dir.mkdir(exist_ok=True)
self.reports_dir.mkdir(exist_ok=True)
def process_pnoe_data(self, pnoe_csv_path: str) -> pd.DataFrame:
"""
Load and process Pnoe CSV data.
Args:
pnoe_csv_path: Path to Pnoe CSV file
Returns:
Processed DataFrame with smoothed columns
"""
# Load data
df = pd.read_csv(pnoe_csv_path, delimiter=";")
df = df.apply(pd.to_numeric, errors="ignore")
# Calculate derived columns
df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"]
df["VO2 Breath"] = df["VO2(ml/min)"] / df["BF(bpm)"]
df["CHO"] = df["EE(kcal/min)"] * df["CARBS(%)"] / 100
df["FAT"] = df["EE(kcal/min)"] * df["FAT(%)"] / 100
# Smooth columns
window_size = 10
columns_to_smooth = [
"VO2(ml/min)",
"VCO2(ml/min)",
"HR(bpm)",
"VT(l)",
"BF(bpm)",
"VE(l/min)",
"VO2 Pulse",
"VO2 Breath",
"CHO",
"FAT",
]
for col in columns_to_smooth:
if col in df.columns:
df[f"{col}_smoothed"] = (
df[col].rolling(window=window_size, min_periods=1).mean()
)
return df
def generate_graphs(self, df: pd.DataFrame) -> List[Dict[str, str]]:
"""
Generate all required graphs from processed data.
Args:
df: Processed DataFrame with smoothed columns
Returns:
List of dictionaries containing graph names and paths
"""
graphs_generated = []
# List of graphs to generate
graph_methods = [
("respiratory", self.graph_generator.generate_respiratory_chart),
("fuel_utilization", self.graph_generator.generate_fuel_utilization_chart),
("vo2_pulse", self.graph_generator.generate_vo2_pulse_chart),
("vo2_breath", self.graph_generator.generate_vo2_breath_chart),
("fat_metabolism", self.graph_generator.generate_fat_metabolism_chart),
("recovery", self.graph_generator.generate_recovery_chart),
]
for name, method in graph_methods:
try:
path = method(df, save_as_base64=False)
graphs_generated.append({"name": name, "path": str(path)})
except Exception as e:
print(f"Warning: Could not generate {name} chart: {e}")
return graphs_generated
def calculate_analysis_metrics(self, df: pd.DataFrame) -> Dict[str, Any]:
"""
Calculate basic analysis metrics from processed data.
Args:
df: Processed DataFrame with smoothed columns
Returns:
Dictionary containing analysis metrics
"""
return {
"vo2_max": float(df["VO2(ml/min)_smoothed"].max())
if "VO2(ml/min)_smoothed" in df.columns
else 0,
"peak_vt": float(df["VT(l)_smoothed"].max())
if "VT(l)_smoothed" in df.columns
else 0,
"max_hr": float(df["HR(bpm)_smoothed"].max())
if "HR(bpm)_smoothed" in df.columns
else 0,
}
def generate_html(self, patient_info: Dict[str, Any]) -> str:
"""
Generate HTML content for the report.
Args:
patient_info: Dictionary containing patient information
(patient_name, age, height, weight, focus)
Returns:
Complete HTML document as string
"""
html_pages = []
# Header context
header_context = {
"patient_name": patient_info.get("patient_name", ""),
"age": patient_info.get("age", ""),
"height": patient_info.get("height", ""),
"weight": patient_info.get("weight", ""),
"focus": patient_info.get("focus", "Endurance"),
}
# Footer context
footer_context = [
{
"contact_email": "info@ishplabs.com",
"website": "www.ishplabs.com",
"social": "@ishplabs",
"page_number": i + 1,
}
for i in range(len(context_list))
]
# Render header
header_html = self.env.get_template("header.html").render(header_context)
# Render footers
footer_html_list = [
self.env.get_template("footer.html").render(context)
for context in footer_context
]
# Render pages
for i, context in enumerate(context_list):
template = self.env.get_template(f"page_{i + 1}.html").render(context)
if (i + 1) > 2:
full_html = f"""
<div class="page flex flex-col justify-between">
<div>
{header_html}
</div>
<main class="flex-grow p-4">
{template}
</main>
<div class="border-t text-center text-sm text-gray-600">
{footer_html_list[i]}
</div>
</div>
"""
html_pages.append(full_html)
else:
html_pages.append(template)
# Combine with page breaks
final_html = "<div class='page-break'></div>".join(html_pages)
# Wrap in full HTML document
html_doc = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link href="https://cdn.jsdelivr.net/npm/tailwindcss/dist/tailwind.min.css" rel="stylesheet">
<style>
html, body {{
height: 100%;
margin: 0;
padding: 0;
}}
.page-break {{ page-break-after: always; }}
.page {{
height: 100vh;
min-height: 100vh;
display: flex;
flex-direction: column;
}}
.page main {{
flex: 1;
overflow: hidden;
}}
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
img {{
max-height: 300px;
}}
.chart-large {{
max-height: 500px !important;
}}
</style>
</head>
<body class="m-0 p-0">
{final_html}
</body>
</html>
"""
return html_doc
def html_to_pdf(self, html_content: str, pdf_path: str) -> None:
"""
Convert HTML content to PDF file.
Args:
html_content: HTML content as string
pdf_path: Path where PDF should be saved
"""
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.set_content(html_content)
page.pdf(path=pdf_path, format="A4", print_background=True)
browser.close()
def generate_report(
self,
spirometry_pdf_path: str,
pnoe_csv_path: str,
seca_excel_path: str,
patient_info: Dict[str, Any],
output_filename: str = None,
) -> Dict[str, Any]:
"""
Generate complete medical report from uploaded files.
Args:
spirometry_pdf_path: Path to Spirometry PDF file
pnoe_csv_path: Path to Pnoe CSV file
seca_excel_path: Path to SECA Excel file
patient_info: Dictionary containing patient information
output_filename: Optional custom output filename
Returns:
Dictionary containing report path, graphs generated, and analysis data
"""
# Process data
df = self.process_pnoe_data(pnoe_csv_path)
# Generate graphs
graphs_generated = self.generate_graphs(df)
# Calculate analysis metrics
analysis_data = self.calculate_analysis_metrics(df)
analysis_data["graphs_count"] = len(graphs_generated)
# Generate HTML
html_content = self.generate_html(patient_info)
# Generate PDF
if output_filename is None:
patient_name = patient_info.get("patient_name", "Unknown")
session_id = patient_info.get("session_id", "default")
output_filename = (
f"report_{patient_name.replace(' ', '_')}_{session_id}.pdf"
)
report_path = self.reports_dir / output_filename
self.html_to_pdf(html_content, str(report_path))
return {
"report_path": str(report_path),
"graphs_generated": graphs_generated,
"analysis_data": analysis_data,
}
@@ -0,0 +1,64 @@
import base64
import os
import requests
from dotenv import load_dotenv
load_dotenv()
API_KEY_REF = os.getenv("OPENROUTER_API_KEY")
def encode_pdf_to_base64(pdf_path):
with open(pdf_path, "rb") as pdf_file:
return base64.b64encode(pdf_file.read()).decode("utf-8")
def extract_spirometry_table_from_pdf(pdf_path):
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {API_KEY_REF}",
"Content-Type": "application/json",
}
# Read and encode the PDF
base64_pdf = encode_pdf_to_base64(pdf_path)
data_url = f"data:application/pdf;base64,{base64_pdf}"
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please extract the Spirometry table from the pdf and return the values in csv format, "
"note that it is the unit of parameter that is beside it and it should not be a column. "
"The '-' Should be treated as empty values."
"do not add 'csv' at the start or end of the response",
},
{
"type": "file",
"file": {"filename": "document.pdf", "file_data": data_url},
},
],
}
]
payload = {
"model": "google/gemini-2.5-flash-lite",
"messages": messages,
}
response = requests.post(url, headers=headers, json=payload)
response_data = response.json()
if "choices" in response_data and len(response_data["choices"]) > 0:
content = response_data["choices"][0]["message"]["content"]
# Save to a CSV file
output_file = "extracted_spirometry_table.csv"
with open(output_file, "w", encoding="utf-8") as f:
f.write(content)
return f"Extracted table saved to {output_file}"
else:
return "No content found in response"