{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "b18c1027", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'id': 'gen-1758708788-9UUhU8KfktBmyteT4BUC', 'provider': 'Google', 'model': 'google/gemini-2.5-flash-lite', 'object': 'chat.completion', 'created': 1758708788, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'STOP', 'index': 0, 'message': {'role': 'assistant', 'content': 'Parameters,Best,LLN,Pred.,%Pred.,ZScore,PRE#1,PRE#2,PRE#3\\nFVC,4.24,3.03,3.79,112.0,0.95,4.24,4.17,4.15\\nFEV1,3.26,2.53,3.16,103.3,0.28,3.26,3.21,3.14\\nFEV1/FVC%,76.89,72.47,83.78,91.8,-1.05,76.9,77.0,75.7\\nPEF,684,222,384,178.7,-,444,438,684\\nFEF2575,2.74,2.15,3.42,80.2,-0.84,2.74,2.68,2.48\\nFEF25,6.08,,,0.0,-,6.08,6.0,5.53\\nFEF50,3.06,,,0.0,-,3.06,3.1,2.77\\nFEF75,1.06,0.71,1.41,75.1,-0.72,1.06,1.12,0.94\\nPEFTime,79,,,49,-,79,40,39\\nEVol,78.0,,,77.0,-,78.0,77.0,197.0\\nFEV6,4.22,3.03,3.79,111.4,-,4.22,4.17,4.13', 'refusal': None, 'reasoning': None}}], 'usage': {'prompt_tokens': 1348, 'completion_tokens': 434, 'total_tokens': 1782, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}\n", "Content saved to extracted_table.csv\n" ] } ], "source": [ "\n", "import requests\n", "import json\n", "import base64\n", "from pathlib import Path\n", "\n", "API_KEY_REF = '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'\n", "def encode_pdf_to_base64(pdf_path):\n", " with open(pdf_path, \"rb\") as pdf_file:\n", " return base64.b64encode(pdf_file.read()).decode('utf-8')\n", "\n", "url = \"https://openrouter.ai/api/v1/chat/completions\"\n", "headers = {\n", " \"Authorization\": f\"Bearer {API_KEY_REF}\",\n", " \"Content-Type\": \"application/json\"\n", "}\n", "\n", "# Read and encode the PDF\n", "pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n", "base64_pdf = encode_pdf_to_base64(pdf_path)\n", "data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n", "\n", "messages = [\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\n", " \"type\": \"text\",\n", " \"text\": \"Please extract the table from the pdf and return the values in csv format, \"\n", " \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n", " \"The '-' Should be treated as empty values.\"\n", " \"do not add 'csv' at the start or end of the response\"\n", " },\n", " {\n", " \"type\": \"file\",\n", " \"file\": {\n", " \"filename\": \"document.pdf\",\n", " \"file_data\": data_url\n", " }\n", " },\n", " ]\n", " }\n", "]\n", "\n", "# Optional: Configure PDF processing engine\n", "# PDF parsing will still work even if the plugin is not explicitly set\n", "plugins = [\n", " {\n", " \"id\": \"file-parser\",\n", " \"pdf\": {\n", " \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n", " }\n", " }\n", "]\n", "\n", "payload = {\n", " \"model\": \"google/gemini-2.5-flash-lite\",\n", " \"messages\": messages,\n", "}\n", "\n", "response = requests.post(url, headers=headers, json=payload)\n", "# Get the response content\n", "response_data = response.json()\n", "print(response_data)\n", "\n", "# Extract the content from the response\n", "if 'choices' in response_data and len(response_data['choices']) > 0:\n", " content = response_data['choices'][0]['message']['content']\n", " \n", " # Save to a CSV file\n", " output_file = \"extracted_table.csv\"\n", " with open(output_file, 'w', encoding='utf-8') as f:\n", " f.write(content)\n", " \n", " print(f\"Content saved to {output_file}\")\n", "else:\n", " print(\"No content found in response\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "56a9d655", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FVC Best: 4.24, FVC Pred: 112.0\n", "FEV1 Best: 3.26, FEV1 Pred: 103.3\n", "FEV1/FVC% Best: 76.89, FEV1/FVC% Pred: 91.8\n" ] } ], "source": [ "import pandas as pd\n", "spirometry_df = pd.read_csv(\"extracted_table.csv\")\n", "\n", "fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n", "fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n", "\n", "fev1_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', 'Best'].values[0]\n", "fev1_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', '%Pred.'].values[0]\n", "\n", "fev1_fevc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', 'Best'].values[0]\n", "fev1_fevc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', '%Pred.'].values[0]\n", "\n", "print(f\"FVC Best: {fvc_best}, FVC Pred: {fvc_pred}\")\n", "print(f\"FEV1 Best: {fev1_best}, FEV1 Pred: {fev1_pred}\")\n", "print(f\"FEV1/FVC% Best: {fev1_fevc_best}, FEV1/FVC% Pred: {fev1_fevc_pred}\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "990f4b4f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Peak VT: 2.75\n", "HR at Peak VT: 155.0\n" ] } ], "source": [ "df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n", "peak_vt = df['VT(l)'].max()\n", "max_vt_row = df.loc[df['VT(l)'].idxmax()]\n", "print(f\"Peak VT: {peak_vt}\")\n", "hr = max_vt_row['HR(bpm)']\n", "print(f\"HR at Peak VT: {hr}\")" ] }, { "cell_type": "code", "execution_count": 18, "id": "041cbc3d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Peak VT: 2.3770000000000002\n", "HR at Peak VT: 171.525\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_301535/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", " df = df.apply(pd.to_numeric, errors='ignore')\n" ] } ], "source": [ "df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n", "# Convert all columns to numeric where possible, coercing errors to NaN\n", "df = df.apply(pd.to_numeric, errors='ignore')\n", "df['VO2 Pulse'] = df['VO2(ml/min)'] / df['HR(bpm)'] # VO2 Pulse in mL/beat\n", "df['VO2 Breath'] = df['VO2(ml/min)'] / df['BF(bpm)'] # VO2 per Breath in mL/breath\n", "df['CHO'] = df['EE(kcal/min)'] * df['CARBS(%)']/100\n", "df['FAT'] = df['EE(kcal/min)'] * df['FAT(%)']/100\n", "# Smooth key columns using rolling window\n", "window_size = 10\n", "\n", "# List of columns to smooth\n", "columns_to_smooth = ['VO2(ml/min)', 'VCO2(ml/min)', 'HR(bpm)', 'VT(l)', 'BF(bpm)', 'VE(l/min)', 'VO2 Pulse', 'VO2 Breath', 'CHO', 'FAT']\n", "\n", "# Apply smoothing to each column\n", "for col in columns_to_smooth:\n", " if col in df.columns:\n", " df[f'{col}_smoothed'] = df[col].rolling(window=window_size).mean()\n", " \n", "peak_vt = df['VT(l)_smoothed'].max()\n", "max_vt_row = df.loc[df['VT(l)_smoothed'].idxmax()]\n", "print(f\"Peak VT: {peak_vt}\")\n", "hr = max_vt_row['HR(bpm)_smoothed']\n", "print(f\"HR at Peak VT: {hr}\")" ] }, { "cell_type": "code", "execution_count": 20, "id": "de7cadd1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Percent FEV: 72.91411042944786\n" ] } ], "source": [ "percent_fev = (peak_vt / fev1_best) * 100\n", "print(f\"Percent FEV: {percent_fev}\")" ] }, { "cell_type": "code", "execution_count": 24, "id": "cb972ed3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateCommentExternalDeviceIdExternalPatientIdFirstNameLastNameBirthDateAgeEthnicityGender...Child_XCChild_XC_UnitChild_BIVA_ZRhChild_BIVA_ZXcHChild_PhAChild_PhA_UnitChild_REE_KcalChild_REE_MJChild_TEE_KcalChild_TEE_MJ
132025-07-29T18:58:54.0000000ZNaN10000001583275_0055003f5631501320313557KM6479696509KeirstynMoran1991-02-01T00:00:00.0000000Z34CaucasianFemale...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

1 rows × 147 columns

\n", "
" ], "text/plain": [ " MeasurementDate Comment \\\n", "13 2025-07-29T18:58:54.0000000Z NaN \n", "\n", " ExternalDeviceId ExternalPatientId FirstName \\\n", "13 10000001583275_0055003f5631501320313557 KM6479696509 Keirstyn \n", "\n", " LastName BirthDate Age Ethnicity Gender ... \\\n", "13 Moran 1991-02-01T00:00:00.0000000Z 34 Caucasian Female ... \n", "\n", " Child_XC Child_XC_Unit Child_BIVA_ZRh Child_BIVA_ZXcH Child_PhA \\\n", "13 NaN NaN NaN NaN NaN \n", "\n", " Child_PhA_Unit Child_REE_Kcal Child_REE_MJ Child_TEE_Kcal Child_TEE_MJ \n", "13 NaN NaN NaN NaN NaN \n", "\n", "[1 rows x 147 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personal_df = pd.read_excel('data/SECA body comp for all patients.xlsx')\n", "\n", "keirstyn_data = personal_df[personal_df['LastName'].str.contains('Moran', case=False, na=False)]\n", "keirstyn_data" ] }, { "cell_type": "code", "execution_count": 26, "id": "98d9295a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VO2 Max: 47.906290322580645\n" ] } ], "source": [ "v02_max = df['VO2(ml/min)_smoothed'].max()\n", "weight = keirstyn_data['Weight'].iloc[0]\n", "print(f\"VO2 Max: {v02_max/weight}\")" ] }, { "cell_type": "code", "execution_count": 32, "id": "cdfeb309", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "==================================================\n", "Optimal Fat Burning Zone (highest fat:carb ratio):\n", "Time: 164.0 seconds\n", "Fat burn rate: 3.894 kcal/min\n", "Carb burn rate: 1.575 kcal/min\n", "Fat:Carb ratio: 2.47\n", "Heart Rate: 96.7 bpm\n", "VO2: 1147.9 ml/min\n" ] } ], "source": [ "# Find the point where fat burning is highest and carb burning is lowest\n", "# Using the smoothed data for more stable results\n", "fat_burn_max_idx = df['FAT_smoothed'].idxmax()\n", "carb_burn_min_idx = df['CHO_smoothed'].idxmin()\n", "\n", "# # Get the data at maximum fat burning point\n", "# max_fat_row = df.loc[fat_burn_max_idx]\n", "# print(f\"Maximum Fat Burning Point:\")\n", "# print(f\"Time: {max_fat_row['T(sec)']} seconds\")\n", "# print(f\"Fat burn rate: {max_fat_row['FAT_smoothed']:.3f} kcal/min\")\n", "# print(f\"Carb burn rate: {max_fat_row['CHO_smoothed']:.3f} kcal/min\")\n", "# print(f\"Heart Rate: {max_fat_row['HR(bpm)_smoothed']:.1f} bpm\")\n", "# print(f\"VO2: {max_fat_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n", "\n", "# print(\"\\n\" + \"=\"*50)\n", "\n", "# # Get the data at minimum carb burning point\n", "# min_carb_row = df.loc[carb_burn_min_idx]\n", "# print(f\"Minimum Carbohydrate Burning Point:\")\n", "# print(f\"Time: {min_carb_row['T(sec)']} seconds\")\n", "# print(f\"Fat burn rate: {min_carb_row['FAT_smoothed']:.3f} kcal/min\")\n", "# print(f\"Carb burn rate: {min_carb_row['CHO_smoothed']:.3f} kcal/min\")\n", "# print(f\"Heart Rate: {min_carb_row['HR(bpm)_smoothed']:.1f} bpm\")\n", "# print(f\"VO2: {min_carb_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n", "\n", "print(\"\\n\" + \"=\"*50)\n", "\n", "# Find the optimal fat burning zone (highest fat:carb ratio)\n", "df['fat_carb_ratio'] = df['FAT_smoothed'] / (df['CHO_smoothed'] + 0.00000001) # Add small value to avoid division by zero\n", "optimal_fat_idx = df['fat_carb_ratio'].idxmax()\n", "optimal_row = df.loc[optimal_fat_idx]\n", "\n", "print(f\"Optimal Fat Burning Zone (highest fat:carb ratio):\")\n", "print(f\"Time: {optimal_row['T(sec)']} seconds\")\n", "print(f\"Fat burn rate: {optimal_row['FAT_smoothed']:.3f} kcal/min\")\n", "print(f\"Carb burn rate: {optimal_row['CHO_smoothed']:.3f} kcal/min\")\n", "print(f\"Fat:Carb ratio: {optimal_row['fat_carb_ratio']:.2f}\")\n", "print(f\"Heart Rate: {optimal_row['HR(bpm)_smoothed']:.1f} bpm\")\n", "print(f\"VO2: {optimal_row['VO2(ml/min)_smoothed']:.1f} ml/min\")" ] }, { "cell_type": "code", "execution_count": 33, "id": "4420cfea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 2 intersections at indices: [18, 47]\n", "\n", "Last intersection at index 47:\n", "Time: 251.0 seconds\n", "Fat burn rate: 3.040 kcal/min\n", "Carb burn rate: 3.166 kcal/min\n", "Heart Rate: 100.5 bpm\n", "VO2: 1283.0 ml/min\n" ] } ], "source": [ "# Find intersections where FAT_smoothed and CHO_smoothed cross each other\n", "intersections = []\n", "\n", "for i in range(1, len(df)):\n", " # Check if there's a crossover between consecutive points\n", " prev_fat = df.iloc[i-1]['FAT_smoothed']\n", " prev_cho = df.iloc[i-1]['CHO_smoothed']\n", " curr_fat = df.iloc[i]['FAT_smoothed']\n", " curr_cho = df.iloc[i]['CHO_smoothed']\n", " \n", " # Skip if any values are NaN\n", " if pd.isna(prev_fat) or pd.isna(prev_cho) or pd.isna(curr_fat) or pd.isna(curr_cho):\n", " continue\n", " \n", " # Check if lines cross (fat was above/below cho and now it's below/above)\n", " if ((prev_fat > prev_cho and curr_fat < curr_cho) or \n", " (prev_fat < prev_cho and curr_fat > curr_cho)):\n", " intersections.append(i)\n", "\n", "print(f\"Found {len(intersections)} intersections at indices: {intersections}\")\n", "\n", "if intersections:\n", " # Get the last intersection\n", " last_intersection_idx = intersections[-1]\n", " last_intersection_row = df.iloc[last_intersection_idx]\n", " \n", " print(f\"\\nLast intersection at index {last_intersection_idx}:\")\n", " print(f\"Time: {last_intersection_row['T(sec)']} seconds\")\n", " print(f\"Fat burn rate: {last_intersection_row['FAT_smoothed']:.3f} kcal/min\")\n", " print(f\"Carb burn rate: {last_intersection_row['CHO_smoothed']:.3f} kcal/min\")\n", " print(f\"Heart Rate: {last_intersection_row['HR(bpm)_smoothed']:.1f} bpm\")\n", " print(f\"VO2: {last_intersection_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n", "else:\n", " print(\"No intersections found between FAT and CHO curves\")" ] }, { "cell_type": "code", "execution_count": 37, "id": "62803668", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VT1: {'HeartRate': 100.5, 'Speed': 4.0, 'Time': 251.0}\n", "VT2: {'HeartRate': 189.71300000000002, 'Speed': 7.5, 'Time': 1524.0}\n" ] } ], "source": [ "def detect_vt1(df, fat_col=\"FAT_smoothed\", carb_col=\"CHO_smoothed\"):\n", " \"\"\"\n", " Detect VT1 as the first index where carb burn > fat burn and remains higher.\n", " \"\"\"\n", " condition = df[carb_col] > df[fat_col]\n", " crossover_indices = condition[condition].index\n", "\n", " if len(crossover_indices) == 0:\n", " return None # No crossover found\n", " \n", " # Find first crossover where carbs remain higher for the rest\n", " for idx in crossover_indices:\n", " if all(df.loc[idx:][carb_col] > df.loc[idx:][fat_col]):\n", " return idx\n", " return None\n", "\n", "\n", "def detect_vt2(df, vent_col=\"VE(l/min)_smoothed\", bf_col=\"BF(bpm)_smoothed\", smooth_window=5):\n", " \"\"\"\n", " Detect VT2 using slope/inflection method.\n", " Works with either Ventilation (VE) or Breathing Frequency (Bf).\n", " \"\"\"\n", " col = vent_col if vent_col in df.columns else bf_col\n", " \n", " # Use already smoothed data\n", " smoothed_col = col\n", " \n", " # Compute slope (first derivative)\n", " df[\"slope\"] = df[smoothed_col].diff()\n", " \n", " # Detect inflection: largest change in slope (second derivative peak)\n", " df[\"second_derivative\"] = df[\"slope\"].diff()\n", " inflection_idx = df[\"second_derivative\"].idxmax()\n", " \n", " return inflection_idx\n", "\n", "\n", "def analyze_thresholds(df_input):\n", " # Use the existing dataframe\n", " df_copy = df_input.copy()\n", " \n", " # --- Detect VT1 ---\n", " vt1_idx = detect_vt1(df_copy)\n", " vt1 = None\n", " if vt1_idx is not None:\n", " vt1 = {\n", " \"HeartRate\": df_copy.loc[vt1_idx, \"HR(bpm)_smoothed\"],\n", " \"Speed\": df_copy.loc[vt1_idx, \"Speed\"],\n", " \"Time\": df_copy.loc[vt1_idx, \"T(sec)\"]\n", " }\n", " \n", " # --- Detect VT2 ---\n", " vt2_idx = detect_vt2(df_copy)\n", " vt2 = None\n", " if vt2_idx is not None:\n", " vt2 = {\n", " \"HeartRate\": df_copy.loc[vt2_idx, \"HR(bpm)_smoothed\"],\n", " \"Speed\": df_copy.loc[vt2_idx, \"Speed\"],\n", " \"Time\": df_copy.loc[vt2_idx, \"T(sec)\"]\n", " }\n", " \n", " return vt1, vt2\n", "\n", "\n", "vt1, vt2 = analyze_thresholds(df)\n", "print(\"VT1:\", vt1)\n", "print(\"VT2:\", vt2)\n" ] }, { "cell_type": "code", "execution_count": 40, "id": "07593b56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n", "Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n", "Zone 3 (Aerobic): 100.5 - 179.7 bpm\n", "Zone 4 (Lactate Threshold): 179.7 - 199.7 bpm\n", "Zone 5 (VO2 Max): 199.7+ bpm\n" ] } ], "source": [ "zone_1_start = optimal_row['HR(bpm)_smoothed'] - 15\n", "zone_2_start = optimal_row['HR(bpm)_smoothed']\n", "zone_3_start = vt1\n", "zone_4_start = vt2['HeartRate'] - 10\n", "zone_5_start = vt2['HeartRate'] + 10\n", "\n", "zone_1_end = zone_2_start\n", "zone_2_end = vt1['HeartRate']\n", "zone_3_end = zone_4_start\n", "zone_4_end = zone_5_start\n", "\n", "print(f\"Zone 1 (Active Recovery): {zone_1_start:.1f} - {zone_1_end:.1f} bpm\")\n", "print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n", "print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n", "print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n", "print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f}+ bpm\")" ] }, { "cell_type": "code", "execution_count": 60, "id": "c90415b2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VO2 Max detected at index 202:\n", "Time: 985.0 seconds\n", "VO2 Breath: 58.2 ml/breath\n", "VO2: 2167.8 ml/min\n", "VO2 per kg: 38.8 ml/kg/min\n", "Heart Rate: 170.5 bpm\n", "Speed: 6.0 km/h\n", "VO2 Breath Slope: -0.02\n" ] } ], "source": [ "# Calculate the slope of VO2 Breath (first derivative)\n", "df['vo2_breath_slope'] = df['VO2 Breath_smoothed'].diff()\n", "\n", "# Find points where slope is consistently zero or negative\n", "# We'll use a rolling window to check for consistent negative/zero slope\n", "window = len(df) // 3 # Number of consecutive points to check\n", "\n", "# Calculate rolling mean of slope to smooth out noise\n", "df['vo2_breath_slope_smoothed'] = df['vo2_breath_slope'].rolling(window=window).mean()\n", "\n", "# Find where slope becomes consistently zero or negative\n", "mask = df['vo2_breath_slope_smoothed'] <= 0\n", "consistent_negative_indices = mask[mask].index\n", "\n", "if len(consistent_negative_indices) > 0:\n", " # Find the first point where slope becomes consistently negative/zero\n", " vo2_max_idx = consistent_negative_indices[0]\n", " vo2_max_row = df.loc[vo2_max_idx]\n", " \n", " print(f\"VO2 Max detected at index {vo2_max_idx}:\")\n", " print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n", " print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n", " print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n", " print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n", " print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n", " print(f\"Speed: {vo2_max_row['Speed']} km/h\")\n", " print(f\"VO2 Breath Slope: {vo2_max_row['vo2_breath_slope_smoothed']:.2f}\")\n", "else:\n", " # If no consistent negative slope found, use the maximum VO2 Breath value\n", " vo2_max_idx = df['VO2 Breath_smoothed'].idxmax()\n", " vo2_max_row = df.loc[vo2_max_idx]\n", " \n", " print(f\"No consistent negative slope found. Using peak VO2 Breath at index {vo2_max_idx}:\")\n", " print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n", " print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n", " print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n", " print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n", " print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n", " print(f\"Speed: {vo2_max_row['Speed']} km/h\")" ] }, { "cell_type": "code", "execution_count": 66, "id": "c3b2cc59", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VO2 Pulse and HR slopes diverge consistently starting at index 89:\n", "Time: 485.0 seconds\n", "VO2 Pulse (smoothed): 13.91\n", "Heart Rate (smoothed): 136.2 bpm\n", "VO2 Pulse Slope: 0.672\n", "HR Slope: 1.000\n", "Slope Difference: 1.006\n", "VO2: 1897.8 ml/min\n", "Speed: 4.5 km/h\n", "Threshold used: 0.615\n" ] } ], "source": [ "# Calculate slopes for both VO2 Pulse and HR\n", "df['vo2_pulse_slope'] = df['VO2 Pulse_smoothed'].diff()\n", "df['hr_slope'] = df['HR(bpm)_smoothed'].diff()\n", "\n", "# Calculate the difference between the slopes\n", "df['slope_difference'] = abs(df['vo2_pulse_slope'] - df['hr_slope'])\n", "\n", "# Find where the slope difference becomes consistently large (slopes diverge)\n", "# Use a rolling window to smooth out noise\n", "window_size = len(df) // 5 # Adjust window size as needed\n", "df['slope_difference_smoothed'] = df['slope_difference'].rolling(window=window_size).mean()\n", "\n", "# Find the threshold - we'll use the 75th percentile of slope differences as threshold\n", "threshold = df['slope_difference_smoothed'].quantile(0.75)\n", "\n", "# Find points where slope difference exceeds threshold\n", "divergence_mask = df['slope_difference_smoothed'] > threshold\n", "divergence_indices = divergence_mask[divergence_mask].index\n", "\n", "if len(divergence_indices) > 0:\n", " # Find the first sustained divergence point\n", " min_consecutive_points = 5\n", " consistent_divergence_idx = None\n", " \n", " for start_idx in divergence_indices:\n", " # Check if divergence is sustained for consecutive points\n", " consecutive_count = 0\n", " for j in range(start_idx, min(start_idx + min_consecutive_points, len(df))):\n", " if j in divergence_indices:\n", " consecutive_count += 1\n", " else:\n", " break\n", " \n", " if consecutive_count >= min_consecutive_points:\n", " consistent_divergence_idx = start_idx\n", " break\n", " \n", " if consistent_divergence_idx is not None:\n", " divergence_row = df.iloc[consistent_divergence_idx]\n", " \n", " print(f\"VO2 Pulse and HR slopes diverge consistently starting at index {consistent_divergence_idx}:\")\n", " print(f\"Time: {divergence_row['T(sec)']} seconds\")\n", " print(f\"VO2 Pulse (smoothed): {divergence_row['VO2 Pulse_smoothed']:.2f}\")\n", " print(f\"Heart Rate (smoothed): {divergence_row['HR(bpm)_smoothed']:.1f} bpm\")\n", " print(f\"VO2 Pulse Slope: {divergence_row['vo2_pulse_slope']:.3f}\")\n", " print(f\"HR Slope: {divergence_row['hr_slope']:.3f}\")\n", " print(f\"Slope Difference: {divergence_row['slope_difference_smoothed']:.3f}\")\n", " print(f\"VO2: {divergence_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n", " print(f\"Speed: {divergence_row['Speed']} km/h\")\n", " print(f\"Threshold used: {threshold:.3f}\")\n", " else:\n", " print(f\"No sustained divergence found. Threshold: {threshold:.3f}\")\n", " # Show the point with maximum slope difference instead\n", " max_diff_idx = df['slope_difference_smoothed'].idxmax()\n", " max_diff_row = df.iloc[max_diff_idx]\n", " \n", " print(f\"\\nPoint with maximum slope difference at index {max_diff_idx}:\")\n", " print(f\"Time: {max_diff_row['T(sec)']} seconds\")\n", " print(f\"VO2 Pulse (smoothed): {max_diff_row['VO2 Pulse_smoothed']:.2f}\")\n", " print(f\"Heart Rate (smoothed): {max_diff_row['HR(bpm)_smoothed']:.1f} bpm\")\n", " print(f\"Slope Difference: {max_diff_row['slope_difference_smoothed']:.3f}\")\n", "else:\n", " print(\"No significant slope divergence found between VO2 Pulse and HR\")" ] }, { "cell_type": "code", "execution_count": null, "id": "672d68f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Maximum FAT_smoothed occurs at index 30:\n", "Heart Rate (smoothed): 96.7 bpm\n", "FAT (smoothed): 3.894 kcal/min\n" ] } ], "source": [ "max_fat_smoothed_idx = df['FAT_smoothed'].idxmax()\n", "max_fat_smoothed_row = df.loc[max_fat_smoothed_idx]\n", "max_heart_rate = 220 - keirstyn_data['Age'].iloc[0]\n", "\n", "print(f\"Maximum FAT_smoothed occurs at index {max_fat_smoothed_idx}:\")\n", "print(f\"Heart Rate (smoothed): {max_fat_smoothed_row['HR(bpm)_smoothed']:.1f} bpm\")\n", "print(f\"FAT (smoothed): {max_fat_smoothed_row['FAT_smoothed']:.3f} kcal/min\")" ] }, { "cell_type": "code", "execution_count": null, "id": "fe3b7605", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "report_generation", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }