Refactor analysis notebook: comment out API calls and update CSV file handling; modify page 2 of report for content and structure adjustments

This commit is contained in:
bolade
2025-10-22 15:45:57 +01:00
parent 1d5625b61a
commit 85ea73ade8
3 changed files with 135 additions and 140 deletions
+91 -110
View File
@@ -2,102 +2,93 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 6,
"id": "b18c1027", "id": "b18c1027",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': 'gen-1759135172-DIhs7TMuaaVY0h3T2ibV', 'provider': 'Google', 'model': 'google/gemini-2.5-flash-lite', 'object': 'chat.completion', 'created': 1759135172, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'STOP', 'index': 0, 'message': {'role': 'assistant', 'content': 'Parameters,Best,LLN,Pred.,%Pred.,ZScore,PRE#1,PRE#2,PRE#3\\nFVC,L,4.24,3.03,3.79,112.0,0.95,4.24,4.17,4.15\\nFEV1,L,3.26,2.53,3.16,103.3,0.28,3.26,3.21,3.14\\nFEV1/FVC%,76.89,72.47,83.78,91.8,-1.05,76.9,77.0,75.7\\nPEF,L/m,684,222,384,178.7,-,444,438,684\\nFEF2575,L/s,2.74,2.15,3.42,80.2,-0.84,2.74,2.68,2.48\\nFEF25,L/s,6.08,-,-,-,6.08,6.0,5.53\\nFEF50,L/s,3.06,-,-,-,3.06,3.1,2.77\\nFEF75,L/s,1.06,0.71,1.41,75.1,-0.72,1.06,1.12,0.94\\nPEFTime,ms,-,-,79,-,79,49,39\\nEvol,mL,-,-,78.0,-,78.0,77.0,197.0\\nFEV6,L,4.22,3.03,3.79,111.4,-,4.22,4.17,4.13', 'refusal': None, 'reasoning': None}}], 'usage': {'prompt_tokens': 1350, 'completion_tokens': 454, 'total_tokens': 1804, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}\n",
"Content saved to extracted_table.csv\n"
]
}
],
"source": [ "source": [
"\n", "\n",
"import requests\n", "# import requests\n",
"import json\n", "# import json\n",
"import base64\n", "# import base64\n",
"from pathlib import Path\n", "# from pathlib import Path\n",
"\n", "\n",
"API_KEY_REF = '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'\n", "# API_KEY_REF = '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'\n",
"def encode_pdf_to_base64(pdf_path):\n", "# def encode_pdf_to_base64(pdf_path):\n",
" with open(pdf_path, \"rb\") as pdf_file:\n", "# with open(pdf_path, \"rb\") as pdf_file:\n",
" return base64.b64encode(pdf_file.read()).decode('utf-8')\n", "# return base64.b64encode(pdf_file.read()).decode('utf-8')\n",
"\n", "\n",
"url = \"https://openrouter.ai/api/v1/chat/completions\"\n", "# url = \"https://openrouter.ai/api/v1/chat/completions\"\n",
"headers = {\n", "# headers = {\n",
" \"Authorization\": f\"Bearer {API_KEY_REF}\",\n", "# \"Authorization\": f\"Bearer {API_KEY_REF}\",\n",
" \"Content-Type\": \"application/json\"\n", "# \"Content-Type\": \"application/json\"\n",
"}\n", "# }\n",
"\n", "\n",
"# Read and encode the PDF\n", "# # Read and encode the PDF\n",
"pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n", "# pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n",
"base64_pdf = encode_pdf_to_base64(pdf_path)\n", "# base64_pdf = encode_pdf_to_base64(pdf_path)\n",
"data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n", "# data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n",
"\n", "\n",
"messages = [\n", "# messages = [\n",
" {\n", "# {\n",
" \"role\": \"user\",\n", "# \"role\": \"user\",\n",
" \"content\": [\n", "# \"content\": [\n",
" {\n", "# {\n",
" \"type\": \"text\",\n", "# \"type\": \"text\",\n",
" \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n", "# \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n",
" \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n", "# \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n",
" \"The '-' Should be treated as empty values.\"\n", "# \"The '-' Should be treated as empty values.\"\n",
" \"do not add 'csv' at the start or end of the response\"\n", "# \"do not add 'csv' at the start or end of the response\"\n",
" },\n", "# },\n",
" {\n", "# {\n",
" \"type\": \"file\",\n", "# \"type\": \"file\",\n",
" \"file\": {\n", "# \"file\": {\n",
" \"filename\": \"document.pdf\",\n", "# \"filename\": \"document.pdf\",\n",
" \"file_data\": data_url\n", "# \"file_data\": data_url\n",
" }\n", "# }\n",
" },\n", "# },\n",
" ]\n", "# ]\n",
" }\n", "# }\n",
"]\n", "# ]\n",
"\n", "\n",
"# Optional: Configure PDF processing engine\n", "# # Optional: Configure PDF processing engine\n",
"# PDF parsing will still work even if the plugin is not explicitly set\n", "# # PDF parsing will still work even if the plugin is not explicitly set\n",
"plugins = [\n", "# plugins = [\n",
" {\n", "# {\n",
" \"id\": \"file-parser\",\n", "# \"id\": \"file-parser\",\n",
" \"pdf\": {\n", "# \"pdf\": {\n",
" \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n", "# \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n",
" }\n", "# }\n",
" }\n", "# }\n",
"]\n", "# ]\n",
"\n", "\n",
"payload = {\n", "# payload = {\n",
" \"model\": \"google/gemini-2.5-flash-lite\",\n", "# \"model\": \"google/gemini-2.5-flash-lite\",\n",
" \"messages\": messages,\n", "# \"messages\": messages,\n",
"}\n", "# }\n",
"\n", "\n",
"response = requests.post(url, headers=headers, json=payload)\n", "# response = requests.post(url, headers=headers, json=payload)\n",
"# Get the response content\n", "# # Get the response content\n",
"response_data = response.json()\n", "# response_data = response.json()\n",
"print(response_data)\n", "# print(response_data)\n",
"\n", "\n",
"# Extract the content from the response\n", "# # Extract the content from the response\n",
"if 'choices' in response_data and len(response_data['choices']) > 0:\n", "# if 'choices' in response_data and len(response_data['choices']) > 0:\n",
" content = response_data['choices'][0]['message']['content']\n", "# content = response_data['choices'][0]['message']['content']\n",
" \n", " \n",
" # Save to a CSV file\n", "# # Save to a CSV file\n",
" output_file = \"extracted_table.csv\"\n", "# output_file = \"extracted_table.csv\"\n",
" with open(output_file, 'w', encoding='utf-8') as f:\n", "# with open(output_file, 'w', encoding='utf-8') as f:\n",
" f.write(content)\n", "# f.write(content)\n",
" \n", " \n",
" print(f\"Content saved to {output_file}\")\n", "# print(f\"Content saved to {output_file}\")\n",
"else:\n", "# else:\n",
" print(\"No content found in response\")" "# print(\"No content found in response\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 7,
"id": "56a9d655", "id": "56a9d655",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -107,13 +98,13 @@
"text": [ "text": [
"FVC Best: 4.24, FVC Pred: 112.0\n", "FVC Best: 4.24, FVC Pred: 112.0\n",
"FEV1 Best: 3.26, FEV1 Pred: 103.3\n", "FEV1 Best: 3.26, FEV1 Pred: 103.3\n",
"FEV1/FVC% Best: 76.89, FEV1/FVC% Pred: 91.8\n" "FEV1/FVC% Best: 76.9, FEV1/FVC% Pred: 91.8\n"
] ]
} }
], ],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"spirometry_df = pd.read_csv(\"extracted_table.csv\")\n", "spirometry_df = pd.read_csv(\"data/spirometry_data.csv\")\n",
"\n", "\n",
"fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n", "fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n",
"fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n", "fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n",
@@ -131,7 +122,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 8,
"id": "990f4b4f", "id": "990f4b4f",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -155,7 +146,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 9,
"id": "041cbc3d", "id": "041cbc3d",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -171,7 +162,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/tmp/ipykernel_301535/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", "/tmp/ipykernel_69398/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
" df = df.apply(pd.to_numeric, errors='ignore')\n" " df = df.apply(pd.to_numeric, errors='ignore')\n"
] ]
} }
@@ -204,7 +195,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 10,
"id": "de7cadd1", "id": "de7cadd1",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -223,7 +214,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 11,
"id": "cb972ed3", "id": "cb972ed3",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -320,7 +311,7 @@
"[1 rows x 147 columns]" "[1 rows x 147 columns]"
] ]
}, },
"execution_count": 24, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -334,7 +325,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 12,
"id": "98d9295a", "id": "98d9295a",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -354,7 +345,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 13,
"id": "cdfeb309", "id": "cdfeb309",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -418,7 +409,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 14,
"id": "4420cfea", "id": "4420cfea",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -476,7 +467,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 15,
"id": "62803668", "id": "62803668",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -561,7 +552,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 16,
"id": "07593b56", "id": "07593b56",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -572,8 +563,8 @@
"Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n", "Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n",
"Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n", "Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n",
"Zone 3 (Aerobic): 100.5 - 179.7 bpm\n", "Zone 3 (Aerobic): 100.5 - 179.7 bpm\n",
"Zone 4 (Lactate Threshold): 179.7 - 199.7 bpm\n", "Zone 4 (Lactate Threshold): 179.7 - 189.7 bpm\n",
"Zone 5 (VO2 Max): 199.7+ bpm\n" "Zone 5 (VO2 Max): 189.7 - 199.7 bpm\n"
] ]
} }
], ],
@@ -582,7 +573,8 @@
"zone_2_start = optimal_row['HR(bpm)_smoothed']\n", "zone_2_start = optimal_row['HR(bpm)_smoothed']\n",
"zone_3_start = vt1\n", "zone_3_start = vt1\n",
"zone_4_start = vt2['HeartRate'] - 10\n", "zone_4_start = vt2['HeartRate'] - 10\n",
"zone_5_start = vt2['HeartRate'] + 10\n", "zone_5_start = vt2['HeartRate']\n",
"zone_5_end = vt2['HeartRate'] + 10\n",
"\n", "\n",
"zone_1_end = zone_2_start\n", "zone_1_end = zone_2_start\n",
"zone_2_end = vt1['HeartRate']\n", "zone_2_end = vt1['HeartRate']\n",
@@ -593,12 +585,12 @@
"print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n", "print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n",
"print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n", "print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n",
"print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n", "print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n",
"print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f}+ bpm\")" "print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f} - {zone_5_end:.1f} bpm\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 60, "execution_count": 17,
"id": "c90415b2", "id": "c90415b2",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -661,7 +653,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 18,
"id": "c3b2cc59", "id": "c3b2cc59",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -750,7 +742,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 19,
"id": "672d68f3", "id": "672d68f3",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -779,18 +771,7 @@
"execution_count": null, "execution_count": null,
"id": "fe3b7605", "id": "fe3b7605",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [] "source": []
} }
], ],
Binary file not shown.
+44 -30
View File
@@ -1,7 +1,7 @@
<div class="bg-white w-full page m-0 px-10"> <div class="bg-white w-full page m-0 px-10">
<div class="px-16 py-10"> <div class="px-16 pt-10">
<!-- Table of Contents Header --> <!-- Table of Contents Header -->
<div class="mb-8"> <div class="mb-2">
<h1 <h1
class="text-5xl font-bold text-black mb-6 tracking-wide border-b-4 border-blue-500 pb-2 text-center" class="text-5xl font-bold text-black mb-6 tracking-wide border-b-4 border-blue-500 pb-2 text-center"
> >
@@ -12,12 +12,46 @@
<!-- Table of Contents Items --> <!-- Table of Contents Items -->
<div class="flex flex-col justify-between space-y-6 py-6"> <div class="flex flex-col justify-between space-y-6 py-6">
<!-- Nutrition Guidelines -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
4
</div>
<div class="flex flex-col flex-1 py-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black">
Nutrition Guidelines
</h2>
<p class="text-gray-600 text-base">
Ultrasound & Body Composition Assessment
</p>
<p class="text-gray-600 text-base">
Resting Metabolic Rate Assessment
</p>
</div>
</div>
<!-- Nutrition Recommendations -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
6
</div>
<div class="flex flex-col py-1 flex-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black">
Nutrition Recommendations
</h2>
</div>
</div>
<!-- Lung Analysis --> <!-- Lung Analysis -->
<div class="flex items-start bg-gray-200 h-24"> <div class="flex items-start bg-gray-200 h-24">
<div <div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0" class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
> >
3 7
</div> </div>
<div class="flex flex-col flex-1 py-1 justify-center h-full"> <div class="flex flex-col flex-1 py-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black"> <h2 class="text-2xl font-semibold text-black">
@@ -37,10 +71,10 @@
<div <div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0" class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
> >
4 8
</div> </div>
<div class="flex flex-col py-1 flex-1 justify-center h-full"> <div class="flex flex-col py-1 flex-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black mb-3"> <h2 class="text-2xl font-semibold text-black">
Cardio Metrics Cardio Metrics
</h2> </h2>
<p class="text-gray-600 text-base"> <p class="text-gray-600 text-base">
@@ -49,26 +83,12 @@
</div> </div>
</div> </div>
<!-- Fuel Utilization -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
5
</div>
<div class="flex flex-col py-1 flex-1 justify-center flex-1 h-full">
<h2 class="text-2xl font-semibold text-black">
Fuel Utilization
</h2>
</div>
</div>
<!-- Local Muscle Activity --> <!-- Local Muscle Activity -->
<div class="flex items-start bg-gray-200 h-24"> <div class="flex items-start bg-gray-200 h-24">
<div <div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0" class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
> >
9 11
</div> </div>
<div class="flex flex-col justify-center h-full flex-1"> <div class="flex flex-col justify-center h-full flex-1">
<h2 class="text-2xl font-semibold text-black"> <h2 class="text-2xl font-semibold text-black">
@@ -82,7 +102,7 @@
<div <div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0" class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
> >
10 12
</div> </div>
<div class="flex flex-col h-full justify-center flex-1"> <div class="flex flex-col h-full justify-center flex-1">
<h2 class="text-2xl font-semibold text-black"> <h2 class="text-2xl font-semibold text-black">
@@ -96,15 +116,12 @@
<div <div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0" class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
> >
12 14
</div> </div>
<div class="flex flex-col h-full justify-center flex-1"> <div class="flex flex-col h-full justify-center flex-1">
<h2 class="text-2xl font-semibold text-black"> <h2 class="text-2xl font-semibold text-black">
Next Steps Next Steps
</h2> </h2>
<div class="space-y-2">
<!-- No sub-items -->
</div>
</div> </div>
</div> </div>
@@ -113,15 +130,12 @@
<div <div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0" class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
> >
13 15
</div> </div>
<div class="flex flex-col h-full justify-center flex-1"> <div class="flex flex-col h-full justify-center flex-1">
<h2 class="text-2xl font-semibold text-black"> <h2 class="text-2xl font-semibold text-black">
Glossary Glossary
</h2> </h2>
<div class="space-y-2">
<!-- No sub-items -->
</div>
</div> </div>
</div> </div>
</div> </div>