{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "b18c1027", "metadata": {}, "outputs": [], "source": [ "\n", "# import requests\n", "# import json\n", "# import base64\n", "# from pathlib import Path\n", "\n", "# API_KEY_REF = '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'\n", "# def encode_pdf_to_base64(pdf_path):\n", "# with open(pdf_path, \"rb\") as pdf_file:\n", "# return base64.b64encode(pdf_file.read()).decode('utf-8')\n", "\n", "# url = \"https://openrouter.ai/api/v1/chat/completions\"\n", "# headers = {\n", "# \"Authorization\": f\"Bearer {API_KEY_REF}\",\n", "# \"Content-Type\": \"application/json\"\n", "# }\n", "\n", "# # Read and encode the PDF\n", "# pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n", "# base64_pdf = encode_pdf_to_base64(pdf_path)\n", "# data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n", "\n", "# messages = [\n", "# {\n", "# \"role\": \"user\",\n", "# \"content\": [\n", "# {\n", "# \"type\": \"text\",\n", "# \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n", "# \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n", "# \"The '-' Should be treated as empty values.\"\n", "# \"do not add 'csv' at the start or end of the response\"\n", "# },\n", "# {\n", "# \"type\": \"file\",\n", "# \"file\": {\n", "# \"filename\": \"document.pdf\",\n", "# \"file_data\": data_url\n", "# }\n", "# },\n", "# ]\n", "# }\n", "# ]\n", "\n", "# # Optional: Configure PDF processing engine\n", "# # PDF parsing will still work even if the plugin is not explicitly set\n", "# plugins = [\n", "# {\n", "# \"id\": \"file-parser\",\n", "# \"pdf\": {\n", "# \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n", "# }\n", "# }\n", "# ]\n", "\n", "# payload = {\n", "# \"model\": \"google/gemini-2.5-flash-lite\",\n", "# \"messages\": messages,\n", "# }\n", "\n", "# response = requests.post(url, headers=headers, json=payload)\n", "# # Get the response content\n", "# response_data = response.json()\n", "# print(response_data)\n", "\n", "# # Extract the content from the response\n", "# if 'choices' in response_data and len(response_data['choices']) > 0:\n", "# content = response_data['choices'][0]['message']['content']\n", " \n", "# # Save to a CSV file\n", "# output_file = \"extracted_table.csv\"\n", "# with open(output_file, 'w', encoding='utf-8') as f:\n", "# f.write(content)\n", " \n", "# print(f\"Content saved to {output_file}\")\n", "# else:\n", "# print(\"No content found in response\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "56a9d655", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FVC Best: 4.24, FVC Pred: 112.0\n", "FEV1 Best: 3.26, FEV1 Pred: 103.3\n", "FEV1/FVC% Best: 76.9, FEV1/FVC% Pred: 91.8\n" ] } ], "source": [ "import pandas as pd\n", "spirometry_df = pd.read_csv(\"data/spirometry_data.csv\")\n", "\n", "fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n", "fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n", "\n", "fev1_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', 'Best'].values[0]\n", "fev1_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', '%Pred.'].values[0]\n", "\n", "fev1_fevc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', 'Best'].values[0]\n", "fev1_fevc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', '%Pred.'].values[0]\n", "\n", "print(f\"FVC Best: {fvc_best}, FVC Pred: {fvc_pred}\")\n", "print(f\"FEV1 Best: {fev1_best}, FEV1 Pred: {fev1_pred}\")\n", "print(f\"FEV1/FVC% Best: {fev1_fevc_best}, FEV1/FVC% Pred: {fev1_fevc_pred}\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "990f4b4f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Peak VT: 2.75\n", "HR at Peak VT: 155.0\n" ] } ], "source": [ "df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n", "peak_vt = df['VT(l)'].max()\n", "max_vt_row = df.loc[df['VT(l)'].idxmax()]\n", "print(f\"Peak VT: {peak_vt}\")\n", "hr = max_vt_row['HR(bpm)']\n", "print(f\"HR at Peak VT: {hr}\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "041cbc3d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Peak VT: 2.3770000000000002\n", "HR at Peak VT: 171.525\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_69398/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", " df = df.apply(pd.to_numeric, errors='ignore')\n" ] } ], "source": [ "df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n", "# Convert all columns to numeric where possible, coercing errors to NaN\n", "df = df.apply(pd.to_numeric, errors='ignore')\n", "df['VO2 Pulse'] = df['VO2(ml/min)'] / df['HR(bpm)'] # VO2 Pulse in mL/beat\n", "df['VO2 Breath'] = df['VO2(ml/min)'] / df['BF(bpm)'] # VO2 per Breath in mL/breath\n", "df['CHO'] = df['EE(kcal/min)'] * df['CARBS(%)']/100\n", "df['FAT'] = df['EE(kcal/min)'] * df['FAT(%)']/100\n", "# Smooth key columns using rolling window\n", "window_size = 10\n", "\n", "# List of columns to smooth\n", "columns_to_smooth = ['VO2(ml/min)', 'VCO2(ml/min)', 'HR(bpm)', 'VT(l)', 'BF(bpm)', 'VE(l/min)', 'VO2 Pulse', 'VO2 Breath', 'CHO', 'FAT']\n", "\n", "# Apply smoothing to each column\n", "for col in columns_to_smooth:\n", " if col in df.columns:\n", " df[f'{col}_smoothed'] = df[col].rolling(window=window_size).mean()\n", " \n", "peak_vt = df['VT(l)_smoothed'].max()\n", "max_vt_row = df.loc[df['VT(l)_smoothed'].idxmax()]\n", "print(f\"Peak VT: {peak_vt}\")\n", "hr = max_vt_row['HR(bpm)_smoothed']\n", "print(f\"HR at Peak VT: {hr}\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "de7cadd1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Percent FEV: 72.91411042944786\n" ] } ], "source": [ "percent_fev = (peak_vt / fev1_best) * 100\n", "print(f\"Percent FEV: {percent_fev}\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "cb972ed3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | MeasurementDate | \n", "Comment | \n", "ExternalDeviceId | \n", "ExternalPatientId | \n", "FirstName | \n", "LastName | \n", "BirthDate | \n", "Age | \n", "Ethnicity | \n", "Gender | \n", "... | \n", "Child_XC | \n", "Child_XC_Unit | \n", "Child_BIVA_ZRh | \n", "Child_BIVA_ZXcH | \n", "Child_PhA | \n", "Child_PhA_Unit | \n", "Child_REE_Kcal | \n", "Child_REE_MJ | \n", "Child_TEE_Kcal | \n", "Child_TEE_MJ | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13 | \n", "2025-07-29T18:58:54.0000000Z | \n", "NaN | \n", "10000001583275_0055003f5631501320313557 | \n", "KM6479696509 | \n", "Keirstyn | \n", "Moran | \n", "1991-02-01T00:00:00.0000000Z | \n", "34 | \n", "Caucasian | \n", "Female | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 rows × 147 columns
\n", "