Files
bio-performx/analysis.ipynb
T

800 lines
29 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "b18c1027",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# import requests\n",
"# import json\n",
"# import base64\n",
"# from pathlib import Path\n",
"\n",
"# API_KEY_REF = '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'\n",
"# def encode_pdf_to_base64(pdf_path):\n",
"# with open(pdf_path, \"rb\") as pdf_file:\n",
"# return base64.b64encode(pdf_file.read()).decode('utf-8')\n",
"\n",
"# url = \"https://openrouter.ai/api/v1/chat/completions\"\n",
"# headers = {\n",
"# \"Authorization\": f\"Bearer {API_KEY_REF}\",\n",
"# \"Content-Type\": \"application/json\"\n",
"# }\n",
"\n",
"# # Read and encode the PDF\n",
"# pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n",
"# base64_pdf = encode_pdf_to_base64(pdf_path)\n",
"# data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n",
"\n",
"# messages = [\n",
"# {\n",
"# \"role\": \"user\",\n",
"# \"content\": [\n",
"# {\n",
"# \"type\": \"text\",\n",
"# \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n",
"# \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n",
"# \"The '-' Should be treated as empty values.\"\n",
"# \"do not add 'csv' at the start or end of the response\"\n",
"# },\n",
"# {\n",
"# \"type\": \"file\",\n",
"# \"file\": {\n",
"# \"filename\": \"document.pdf\",\n",
"# \"file_data\": data_url\n",
"# }\n",
"# },\n",
"# ]\n",
"# }\n",
"# ]\n",
"\n",
"# # Optional: Configure PDF processing engine\n",
"# # PDF parsing will still work even if the plugin is not explicitly set\n",
"# plugins = [\n",
"# {\n",
"# \"id\": \"file-parser\",\n",
"# \"pdf\": {\n",
"# \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n",
"# }\n",
"# }\n",
"# ]\n",
"\n",
"# payload = {\n",
"# \"model\": \"google/gemini-2.5-flash-lite\",\n",
"# \"messages\": messages,\n",
"# }\n",
"\n",
"# response = requests.post(url, headers=headers, json=payload)\n",
"# # Get the response content\n",
"# response_data = response.json()\n",
"# print(response_data)\n",
"\n",
"# # Extract the content from the response\n",
"# if 'choices' in response_data and len(response_data['choices']) > 0:\n",
"# content = response_data['choices'][0]['message']['content']\n",
" \n",
"# # Save to a CSV file\n",
"# output_file = \"extracted_table.csv\"\n",
"# with open(output_file, 'w', encoding='utf-8') as f:\n",
"# f.write(content)\n",
" \n",
"# print(f\"Content saved to {output_file}\")\n",
"# else:\n",
"# print(\"No content found in response\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "56a9d655",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FVC Best: 4.24, FVC Pred: 112.0\n",
"FEV1 Best: 3.26, FEV1 Pred: 103.3\n",
"FEV1/FVC% Best: 76.9, FEV1/FVC% Pred: 91.8\n"
]
}
],
"source": [
"import pandas as pd\n",
"spirometry_df = pd.read_csv(\"data/spirometry_data.csv\")\n",
"\n",
"fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n",
"fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n",
"\n",
"fev1_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', 'Best'].values[0]\n",
"fev1_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', '%Pred.'].values[0]\n",
"\n",
"fev1_fevc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', 'Best'].values[0]\n",
"fev1_fevc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', '%Pred.'].values[0]\n",
"\n",
"print(f\"FVC Best: {fvc_best}, FVC Pred: {fvc_pred}\")\n",
"print(f\"FEV1 Best: {fev1_best}, FEV1 Pred: {fev1_pred}\")\n",
"print(f\"FEV1/FVC% Best: {fev1_fevc_best}, FEV1/FVC% Pred: {fev1_fevc_pred}\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "990f4b4f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Peak VT: 2.75\n",
"HR at Peak VT: 155.0\n"
]
}
],
"source": [
"df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n",
"peak_vt = df['VT(l)'].max()\n",
"max_vt_row = df.loc[df['VT(l)'].idxmax()]\n",
"print(f\"Peak VT: {peak_vt}\")\n",
"hr = max_vt_row['HR(bpm)']\n",
"print(f\"HR at Peak VT: {hr}\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "041cbc3d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Peak VT: 2.3770000000000002\n",
"HR at Peak VT: 171.525\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_69398/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
" df = df.apply(pd.to_numeric, errors='ignore')\n"
]
}
],
"source": [
"df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n",
"# Convert all columns to numeric where possible, coercing errors to NaN\n",
"df = df.apply(pd.to_numeric, errors='ignore')\n",
"df['VO2 Pulse'] = df['VO2(ml/min)'] / df['HR(bpm)'] # VO2 Pulse in mL/beat\n",
"df['VO2 Breath'] = df['VO2(ml/min)'] / df['BF(bpm)'] # VO2 per Breath in mL/breath\n",
"df['CHO'] = df['EE(kcal/min)'] * df['CARBS(%)']/100\n",
"df['FAT'] = df['EE(kcal/min)'] * df['FAT(%)']/100\n",
"# Smooth key columns using rolling window\n",
"window_size = 10\n",
"\n",
"# List of columns to smooth\n",
"columns_to_smooth = ['VO2(ml/min)', 'VCO2(ml/min)', 'HR(bpm)', 'VT(l)', 'BF(bpm)', 'VE(l/min)', 'VO2 Pulse', 'VO2 Breath', 'CHO', 'FAT']\n",
"\n",
"# Apply smoothing to each column\n",
"for col in columns_to_smooth:\n",
" if col in df.columns:\n",
" df[f'{col}_smoothed'] = df[col].rolling(window=window_size).mean()\n",
" \n",
"peak_vt = df['VT(l)_smoothed'].max()\n",
"max_vt_row = df.loc[df['VT(l)_smoothed'].idxmax()]\n",
"print(f\"Peak VT: {peak_vt}\")\n",
"hr = max_vt_row['HR(bpm)_smoothed']\n",
"print(f\"HR at Peak VT: {hr}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "de7cadd1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Percent FEV: 72.91411042944786\n"
]
}
],
"source": [
"percent_fev = (peak_vt / fev1_best) * 100\n",
"print(f\"Percent FEV: {percent_fev}\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "cb972ed3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MeasurementDate</th>\n",
" <th>Comment</th>\n",
" <th>ExternalDeviceId</th>\n",
" <th>ExternalPatientId</th>\n",
" <th>FirstName</th>\n",
" <th>LastName</th>\n",
" <th>BirthDate</th>\n",
" <th>Age</th>\n",
" <th>Ethnicity</th>\n",
" <th>Gender</th>\n",
" <th>...</th>\n",
" <th>Child_XC</th>\n",
" <th>Child_XC_Unit</th>\n",
" <th>Child_BIVA_ZRh</th>\n",
" <th>Child_BIVA_ZXcH</th>\n",
" <th>Child_PhA</th>\n",
" <th>Child_PhA_Unit</th>\n",
" <th>Child_REE_Kcal</th>\n",
" <th>Child_REE_MJ</th>\n",
" <th>Child_TEE_Kcal</th>\n",
" <th>Child_TEE_MJ</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>2025-07-29T18:58:54.0000000Z</td>\n",
" <td>NaN</td>\n",
" <td>10000001583275_0055003f5631501320313557</td>\n",
" <td>KM6479696509</td>\n",
" <td>Keirstyn</td>\n",
" <td>Moran</td>\n",
" <td>1991-02-01T00:00:00.0000000Z</td>\n",
" <td>34</td>\n",
" <td>Caucasian</td>\n",
" <td>Female</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 147 columns</p>\n",
"</div>"
],
"text/plain": [
" MeasurementDate Comment \\\n",
"13 2025-07-29T18:58:54.0000000Z NaN \n",
"\n",
" ExternalDeviceId ExternalPatientId FirstName \\\n",
"13 10000001583275_0055003f5631501320313557 KM6479696509 Keirstyn \n",
"\n",
" LastName BirthDate Age Ethnicity Gender ... \\\n",
"13 Moran 1991-02-01T00:00:00.0000000Z 34 Caucasian Female ... \n",
"\n",
" Child_XC Child_XC_Unit Child_BIVA_ZRh Child_BIVA_ZXcH Child_PhA \\\n",
"13 NaN NaN NaN NaN NaN \n",
"\n",
" Child_PhA_Unit Child_REE_Kcal Child_REE_MJ Child_TEE_Kcal Child_TEE_MJ \n",
"13 NaN NaN NaN NaN NaN \n",
"\n",
"[1 rows x 147 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"personal_df = pd.read_excel('data/SECA body comp for all patients.xlsx')\n",
"\n",
"keirstyn_data = personal_df[personal_df['LastName'].str.contains('Moran', case=False, na=False)]\n",
"keirstyn_data"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "98d9295a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VO2 Max: 47.906290322580645\n"
]
}
],
"source": [
"v02_max = df['VO2(ml/min)_smoothed'].max()\n",
"weight = keirstyn_data['Weight'].iloc[0]\n",
"print(f\"VO2 Max: {v02_max/weight}\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "cdfeb309",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"==================================================\n",
"Optimal Fat Burning Zone (highest fat:carb ratio):\n",
"Time: 164.0 seconds\n",
"Fat burn rate: 3.894 kcal/min\n",
"Carb burn rate: 1.575 kcal/min\n",
"Fat:Carb ratio: 2.47\n",
"Heart Rate: 96.7 bpm\n",
"VO2: 1147.9 ml/min\n"
]
}
],
"source": [
"# Find the point where fat burning is highest and carb burning is lowest\n",
"# Using the smoothed data for more stable results\n",
"fat_burn_max_idx = df['FAT_smoothed'].idxmax()\n",
"carb_burn_min_idx = df['CHO_smoothed'].idxmin()\n",
"\n",
"# # Get the data at maximum fat burning point\n",
"# max_fat_row = df.loc[fat_burn_max_idx]\n",
"# print(f\"Maximum Fat Burning Point:\")\n",
"# print(f\"Time: {max_fat_row['T(sec)']} seconds\")\n",
"# print(f\"Fat burn rate: {max_fat_row['FAT_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Carb burn rate: {max_fat_row['CHO_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Heart Rate: {max_fat_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"# print(f\"VO2: {max_fat_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
"\n",
"# print(\"\\n\" + \"=\"*50)\n",
"\n",
"# # Get the data at minimum carb burning point\n",
"# min_carb_row = df.loc[carb_burn_min_idx]\n",
"# print(f\"Minimum Carbohydrate Burning Point:\")\n",
"# print(f\"Time: {min_carb_row['T(sec)']} seconds\")\n",
"# print(f\"Fat burn rate: {min_carb_row['FAT_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Carb burn rate: {min_carb_row['CHO_smoothed']:.3f} kcal/min\")\n",
"# print(f\"Heart Rate: {min_carb_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"# print(f\"VO2: {min_carb_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
"\n",
"print(\"\\n\" + \"=\"*50)\n",
"\n",
"# Find the optimal fat burning zone (highest fat:carb ratio)\n",
"df['fat_carb_ratio'] = df['FAT_smoothed'] / (df['CHO_smoothed'] + 0.00000001) # Add small value to avoid division by zero\n",
"optimal_fat_idx = df['fat_carb_ratio'].idxmax()\n",
"optimal_row = df.loc[optimal_fat_idx]\n",
"\n",
"print(f\"Optimal Fat Burning Zone (highest fat:carb ratio):\")\n",
"print(f\"Time: {optimal_row['T(sec)']} seconds\")\n",
"print(f\"Fat burn rate: {optimal_row['FAT_smoothed']:.3f} kcal/min\")\n",
"print(f\"Carb burn rate: {optimal_row['CHO_smoothed']:.3f} kcal/min\")\n",
"print(f\"Fat:Carb ratio: {optimal_row['fat_carb_ratio']:.2f}\")\n",
"print(f\"Heart Rate: {optimal_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"print(f\"VO2: {optimal_row['VO2(ml/min)_smoothed']:.1f} ml/min\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4420cfea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 2 intersections at indices: [18, 47]\n",
"\n",
"Last intersection at index 47:\n",
"Time: 251.0 seconds\n",
"Fat burn rate: 3.040 kcal/min\n",
"Carb burn rate: 3.166 kcal/min\n",
"Heart Rate: 100.5 bpm\n",
"VO2: 1283.0 ml/min\n"
]
}
],
"source": [
"# Find intersections where FAT_smoothed and CHO_smoothed cross each other\n",
"intersections = []\n",
"\n",
"for i in range(1, len(df)):\n",
" # Check if there's a crossover between consecutive points\n",
" prev_fat = df.iloc[i-1]['FAT_smoothed']\n",
" prev_cho = df.iloc[i-1]['CHO_smoothed']\n",
" curr_fat = df.iloc[i]['FAT_smoothed']\n",
" curr_cho = df.iloc[i]['CHO_smoothed']\n",
" \n",
" # Skip if any values are NaN\n",
" if pd.isna(prev_fat) or pd.isna(prev_cho) or pd.isna(curr_fat) or pd.isna(curr_cho):\n",
" continue\n",
" \n",
" # Check if lines cross (fat was above/below cho and now it's below/above)\n",
" if ((prev_fat > prev_cho and curr_fat < curr_cho) or \n",
" (prev_fat < prev_cho and curr_fat > curr_cho)):\n",
" intersections.append(i)\n",
"\n",
"print(f\"Found {len(intersections)} intersections at indices: {intersections}\")\n",
"\n",
"if intersections:\n",
" # Get the last intersection\n",
" last_intersection_idx = intersections[-1]\n",
" last_intersection_row = df.iloc[last_intersection_idx]\n",
" \n",
" print(f\"\\nLast intersection at index {last_intersection_idx}:\")\n",
" print(f\"Time: {last_intersection_row['T(sec)']} seconds\")\n",
" print(f\"Fat burn rate: {last_intersection_row['FAT_smoothed']:.3f} kcal/min\")\n",
" print(f\"Carb burn rate: {last_intersection_row['CHO_smoothed']:.3f} kcal/min\")\n",
" print(f\"Heart Rate: {last_intersection_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"VO2: {last_intersection_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
"else:\n",
" print(\"No intersections found between FAT and CHO curves\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "62803668",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VT1: {'HeartRate': 100.5, 'Speed': 4.0, 'Time': 251.0}\n",
"VT2: {'HeartRate': 189.71300000000002, 'Speed': 7.5, 'Time': 1524.0}\n"
]
}
],
"source": [
"def detect_vt1(df, fat_col=\"FAT_smoothed\", carb_col=\"CHO_smoothed\"):\n",
" \"\"\"\n",
" Detect VT1 as the first index where carb burn > fat burn and remains higher.\n",
" \"\"\"\n",
" condition = df[carb_col] > df[fat_col]\n",
" crossover_indices = condition[condition].index\n",
"\n",
" if len(crossover_indices) == 0:\n",
" return None # No crossover found\n",
" \n",
" # Find first crossover where carbs remain higher for the rest\n",
" for idx in crossover_indices:\n",
" if all(df.loc[idx:][carb_col] > df.loc[idx:][fat_col]):\n",
" return idx\n",
" return None\n",
"\n",
"\n",
"def detect_vt2(df, vent_col=\"VE(l/min)_smoothed\", bf_col=\"BF(bpm)_smoothed\", smooth_window=5):\n",
" \"\"\"\n",
" Detect VT2 using slope/inflection method.\n",
" Works with either Ventilation (VE) or Breathing Frequency (Bf).\n",
" \"\"\"\n",
" col = vent_col if vent_col in df.columns else bf_col\n",
" \n",
" # Use already smoothed data\n",
" smoothed_col = col\n",
" \n",
" # Compute slope (first derivative)\n",
" df[\"slope\"] = df[smoothed_col].diff()\n",
" \n",
" # Detect inflection: largest change in slope (second derivative peak)\n",
" df[\"second_derivative\"] = df[\"slope\"].diff()\n",
" inflection_idx = df[\"second_derivative\"].idxmax()\n",
" \n",
" return inflection_idx\n",
"\n",
"\n",
"def analyze_thresholds(df_input):\n",
" # Use the existing dataframe\n",
" df_copy = df_input.copy()\n",
" \n",
" # --- Detect VT1 ---\n",
" vt1_idx = detect_vt1(df_copy)\n",
" vt1 = None\n",
" if vt1_idx is not None:\n",
" vt1 = {\n",
" \"HeartRate\": df_copy.loc[vt1_idx, \"HR(bpm)_smoothed\"],\n",
" \"Speed\": df_copy.loc[vt1_idx, \"Speed\"],\n",
" \"Time\": df_copy.loc[vt1_idx, \"T(sec)\"]\n",
" }\n",
" \n",
" # --- Detect VT2 ---\n",
" vt2_idx = detect_vt2(df_copy)\n",
" vt2 = None\n",
" if vt2_idx is not None:\n",
" vt2 = {\n",
" \"HeartRate\": df_copy.loc[vt2_idx, \"HR(bpm)_smoothed\"],\n",
" \"Speed\": df_copy.loc[vt2_idx, \"Speed\"],\n",
" \"Time\": df_copy.loc[vt2_idx, \"T(sec)\"]\n",
" }\n",
" \n",
" return vt1, vt2\n",
"\n",
"\n",
"vt1, vt2 = analyze_thresholds(df)\n",
"print(\"VT1:\", vt1)\n",
"print(\"VT2:\", vt2)\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "07593b56",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n",
"Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n",
"Zone 3 (Aerobic): 100.5 - 179.7 bpm\n",
"Zone 4 (Lactate Threshold): 179.7 - 189.7 bpm\n",
"Zone 5 (VO2 Max): 189.7 - 199.7 bpm\n"
]
}
],
"source": [
"zone_1_start = optimal_row['HR(bpm)_smoothed'] - 15\n",
"zone_2_start = optimal_row['HR(bpm)_smoothed']\n",
"zone_3_start = vt1\n",
"zone_4_start = vt2['HeartRate'] - 10\n",
"zone_5_start = vt2['HeartRate']\n",
"zone_5_end = vt2['HeartRate'] + 10\n",
"\n",
"zone_1_end = zone_2_start\n",
"zone_2_end = vt1['HeartRate']\n",
"zone_3_end = zone_4_start\n",
"zone_4_end = zone_5_start\n",
"\n",
"print(f\"Zone 1 (Active Recovery): {zone_1_start:.1f} - {zone_1_end:.1f} bpm\")\n",
"print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n",
"print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n",
"print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n",
"print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f} - {zone_5_end:.1f} bpm\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "c90415b2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VO2 Max detected at index 202:\n",
"Time: 985.0 seconds\n",
"VO2 Breath: 58.2 ml/breath\n",
"VO2: 2167.8 ml/min\n",
"VO2 per kg: 38.8 ml/kg/min\n",
"Heart Rate: 170.5 bpm\n",
"Speed: 6.0 km/h\n",
"VO2 Breath Slope: -0.02\n"
]
}
],
"source": [
"# Calculate the slope of VO2 Breath (first derivative)\n",
"df['vo2_breath_slope'] = df['VO2 Breath_smoothed'].diff()\n",
"\n",
"# Find points where slope is consistently zero or negative\n",
"# We'll use a rolling window to check for consistent negative/zero slope\n",
"window = len(df) // 3 # Number of consecutive points to check\n",
"\n",
"# Calculate rolling mean of slope to smooth out noise\n",
"df['vo2_breath_slope_smoothed'] = df['vo2_breath_slope'].rolling(window=window).mean()\n",
"\n",
"# Find where slope becomes consistently zero or negative\n",
"mask = df['vo2_breath_slope_smoothed'] <= 0\n",
"consistent_negative_indices = mask[mask].index\n",
"\n",
"if len(consistent_negative_indices) > 0:\n",
" # Find the first point where slope becomes consistently negative/zero\n",
" vo2_max_idx = consistent_negative_indices[0]\n",
" vo2_max_row = df.loc[vo2_max_idx]\n",
" \n",
" print(f\"VO2 Max detected at index {vo2_max_idx}:\")\n",
" print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n",
" print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
" print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n",
" print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"Speed: {vo2_max_row['Speed']} km/h\")\n",
" print(f\"VO2 Breath Slope: {vo2_max_row['vo2_breath_slope_smoothed']:.2f}\")\n",
"else:\n",
" # If no consistent negative slope found, use the maximum VO2 Breath value\n",
" vo2_max_idx = df['VO2 Breath_smoothed'].idxmax()\n",
" vo2_max_row = df.loc[vo2_max_idx]\n",
" \n",
" print(f\"No consistent negative slope found. Using peak VO2 Breath at index {vo2_max_idx}:\")\n",
" print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n",
" print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
" print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n",
" print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"Speed: {vo2_max_row['Speed']} km/h\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "c3b2cc59",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VO2 Pulse and HR slopes diverge consistently starting at index 89:\n",
"Time: 485.0 seconds\n",
"VO2 Pulse (smoothed): 13.91\n",
"Heart Rate (smoothed): 136.2 bpm\n",
"VO2 Pulse Slope: 0.672\n",
"HR Slope: 1.000\n",
"Slope Difference: 1.006\n",
"VO2: 1897.8 ml/min\n",
"Speed: 4.5 km/h\n",
"Threshold used: 0.615\n"
]
}
],
"source": [
"# Calculate slopes for both VO2 Pulse and HR\n",
"df['vo2_pulse_slope'] = df['VO2 Pulse_smoothed'].diff()\n",
"df['hr_slope'] = df['HR(bpm)_smoothed'].diff()\n",
"\n",
"# Calculate the difference between the slopes\n",
"df['slope_difference'] = abs(df['vo2_pulse_slope'] - df['hr_slope'])\n",
"\n",
"# Find where the slope difference becomes consistently large (slopes diverge)\n",
"# Use a rolling window to smooth out noise\n",
"window_size = len(df) // 5 # Adjust window size as needed\n",
"df['slope_difference_smoothed'] = df['slope_difference'].rolling(window=window_size).mean()\n",
"\n",
"# Find the threshold - we'll use the 75th percentile of slope differences as threshold\n",
"threshold = df['slope_difference_smoothed'].quantile(0.75)\n",
"\n",
"# Find points where slope difference exceeds threshold\n",
"divergence_mask = df['slope_difference_smoothed'] > threshold\n",
"divergence_indices = divergence_mask[divergence_mask].index\n",
"\n",
"if len(divergence_indices) > 0:\n",
" # Find the first sustained divergence point\n",
" min_consecutive_points = 5\n",
" consistent_divergence_idx = None\n",
" \n",
" for start_idx in divergence_indices:\n",
" # Check if divergence is sustained for consecutive points\n",
" consecutive_count = 0\n",
" for j in range(start_idx, min(start_idx + min_consecutive_points, len(df))):\n",
" if j in divergence_indices:\n",
" consecutive_count += 1\n",
" else:\n",
" break\n",
" \n",
" if consecutive_count >= min_consecutive_points:\n",
" consistent_divergence_idx = start_idx\n",
" break\n",
" \n",
" if consistent_divergence_idx is not None:\n",
" divergence_row = df.iloc[consistent_divergence_idx]\n",
" \n",
" print(f\"VO2 Pulse and HR slopes diverge consistently starting at index {consistent_divergence_idx}:\")\n",
" print(f\"Time: {divergence_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Pulse (smoothed): {divergence_row['VO2 Pulse_smoothed']:.2f}\")\n",
" print(f\"Heart Rate (smoothed): {divergence_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"VO2 Pulse Slope: {divergence_row['vo2_pulse_slope']:.3f}\")\n",
" print(f\"HR Slope: {divergence_row['hr_slope']:.3f}\")\n",
" print(f\"Slope Difference: {divergence_row['slope_difference_smoothed']:.3f}\")\n",
" print(f\"VO2: {divergence_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
" print(f\"Speed: {divergence_row['Speed']} km/h\")\n",
" print(f\"Threshold used: {threshold:.3f}\")\n",
" else:\n",
" print(f\"No sustained divergence found. Threshold: {threshold:.3f}\")\n",
" # Show the point with maximum slope difference instead\n",
" max_diff_idx = df['slope_difference_smoothed'].idxmax()\n",
" max_diff_row = df.iloc[max_diff_idx]\n",
" \n",
" print(f\"\\nPoint with maximum slope difference at index {max_diff_idx}:\")\n",
" print(f\"Time: {max_diff_row['T(sec)']} seconds\")\n",
" print(f\"VO2 Pulse (smoothed): {max_diff_row['VO2 Pulse_smoothed']:.2f}\")\n",
" print(f\"Heart Rate (smoothed): {max_diff_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
" print(f\"Slope Difference: {max_diff_row['slope_difference_smoothed']:.3f}\")\n",
"else:\n",
" print(\"No significant slope divergence found between VO2 Pulse and HR\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "672d68f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Maximum FAT_smoothed occurs at index 30:\n",
"Heart Rate (smoothed): 96.7 bpm\n",
"FAT (smoothed): 3.894 kcal/min\n"
]
}
],
"source": [
"max_fat_smoothed_idx = df['FAT_smoothed'].idxmax()\n",
"max_fat_smoothed_row = df.loc[max_fat_smoothed_idx]\n",
"max_heart_rate = 220 - keirstyn_data['Age'].iloc[0]\n",
"\n",
"print(f\"Maximum FAT_smoothed occurs at index {max_fat_smoothed_idx}:\")\n",
"print(f\"Heart Rate (smoothed): {max_fat_smoothed_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
"print(f\"FAT (smoothed): {max_fat_smoothed_row['FAT_smoothed']:.3f} kcal/min\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe3b7605",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "report_generation",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}