Refactor analysis notebook: comment out API calls and update CSV file handling; modify page 2 of report for content and structure adjustments

This commit is contained in:
bolade
2025-10-22 15:45:57 +01:00
parent 1d5625b61a
commit 85ea73ade8
3 changed files with 135 additions and 140 deletions
+91 -110
View File
@@ -2,102 +2,93 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"id": "b18c1027",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': 'gen-1759135172-DIhs7TMuaaVY0h3T2ibV', 'provider': 'Google', 'model': 'google/gemini-2.5-flash-lite', 'object': 'chat.completion', 'created': 1759135172, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'STOP', 'index': 0, 'message': {'role': 'assistant', 'content': 'Parameters,Best,LLN,Pred.,%Pred.,ZScore,PRE#1,PRE#2,PRE#3\\nFVC,L,4.24,3.03,3.79,112.0,0.95,4.24,4.17,4.15\\nFEV1,L,3.26,2.53,3.16,103.3,0.28,3.26,3.21,3.14\\nFEV1/FVC%,76.89,72.47,83.78,91.8,-1.05,76.9,77.0,75.7\\nPEF,L/m,684,222,384,178.7,-,444,438,684\\nFEF2575,L/s,2.74,2.15,3.42,80.2,-0.84,2.74,2.68,2.48\\nFEF25,L/s,6.08,-,-,-,6.08,6.0,5.53\\nFEF50,L/s,3.06,-,-,-,3.06,3.1,2.77\\nFEF75,L/s,1.06,0.71,1.41,75.1,-0.72,1.06,1.12,0.94\\nPEFTime,ms,-,-,79,-,79,49,39\\nEvol,mL,-,-,78.0,-,78.0,77.0,197.0\\nFEV6,L,4.22,3.03,3.79,111.4,-,4.22,4.17,4.13', 'refusal': None, 'reasoning': None}}], 'usage': {'prompt_tokens': 1350, 'completion_tokens': 454, 'total_tokens': 1804, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}\n",
"Content saved to extracted_table.csv\n"
]
}
],
"outputs": [],
"source": [
"\n",
"import requests\n",
"import json\n",
"import base64\n",
"from pathlib import Path\n",
"# import requests\n",
"# import json\n",
"# import base64\n",
"# from pathlib import Path\n",
"\n",
"API_KEY_REF = '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'\n",
"def encode_pdf_to_base64(pdf_path):\n",
" with open(pdf_path, \"rb\") as pdf_file:\n",
" return base64.b64encode(pdf_file.read()).decode('utf-8')\n",
"# API_KEY_REF = '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'\n",
"# def encode_pdf_to_base64(pdf_path):\n",
"# with open(pdf_path, \"rb\") as pdf_file:\n",
"# return base64.b64encode(pdf_file.read()).decode('utf-8')\n",
"\n",
"url = \"https://openrouter.ai/api/v1/chat/completions\"\n",
"headers = {\n",
" \"Authorization\": f\"Bearer {API_KEY_REF}\",\n",
" \"Content-Type\": \"application/json\"\n",
"}\n",
"# url = \"https://openrouter.ai/api/v1/chat/completions\"\n",
"# headers = {\n",
"# \"Authorization\": f\"Bearer {API_KEY_REF}\",\n",
"# \"Content-Type\": \"application/json\"\n",
"# }\n",
"\n",
"# Read and encode the PDF\n",
"pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n",
"base64_pdf = encode_pdf_to_base64(pdf_path)\n",
"data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n",
"# # Read and encode the PDF\n",
"# pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n",
"# base64_pdf = encode_pdf_to_base64(pdf_path)\n",
"# data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n",
"\n",
"messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n",
" \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n",
" \"The '-' Should be treated as empty values.\"\n",
" \"do not add 'csv' at the start or end of the response\"\n",
" },\n",
" {\n",
" \"type\": \"file\",\n",
" \"file\": {\n",
" \"filename\": \"document.pdf\",\n",
" \"file_data\": data_url\n",
" }\n",
" },\n",
" ]\n",
" }\n",
"]\n",
"# messages = [\n",
"# {\n",
"# \"role\": \"user\",\n",
"# \"content\": [\n",
"# {\n",
"# \"type\": \"text\",\n",
"# \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n",
"# \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n",
"# \"The '-' Should be treated as empty values.\"\n",
"# \"do not add 'csv' at the start or end of the response\"\n",
"# },\n",
"# {\n",
"# \"type\": \"file\",\n",
"# \"file\": {\n",
"# \"filename\": \"document.pdf\",\n",
"# \"file_data\": data_url\n",
"# }\n",
"# },\n",
"# ]\n",
"# }\n",
"# ]\n",
"\n",
"# Optional: Configure PDF processing engine\n",
"# PDF parsing will still work even if the plugin is not explicitly set\n",
"plugins = [\n",
" {\n",
" \"id\": \"file-parser\",\n",
" \"pdf\": {\n",
" \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n",
" }\n",
" }\n",
"]\n",
"# # Optional: Configure PDF processing engine\n",
"# # PDF parsing will still work even if the plugin is not explicitly set\n",
"# plugins = [\n",
"# {\n",
"# \"id\": \"file-parser\",\n",
"# \"pdf\": {\n",
"# \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n",
"# }\n",
"# }\n",
"# ]\n",
"\n",
"payload = {\n",
" \"model\": \"google/gemini-2.5-flash-lite\",\n",
" \"messages\": messages,\n",
"}\n",
"# payload = {\n",
"# \"model\": \"google/gemini-2.5-flash-lite\",\n",
"# \"messages\": messages,\n",
"# }\n",
"\n",
"response = requests.post(url, headers=headers, json=payload)\n",
"# Get the response content\n",
"response_data = response.json()\n",
"print(response_data)\n",
"# response = requests.post(url, headers=headers, json=payload)\n",
"# # Get the response content\n",
"# response_data = response.json()\n",
"# print(response_data)\n",
"\n",
"# Extract the content from the response\n",
"if 'choices' in response_data and len(response_data['choices']) > 0:\n",
" content = response_data['choices'][0]['message']['content']\n",
"# # Extract the content from the response\n",
"# if 'choices' in response_data and len(response_data['choices']) > 0:\n",
"# content = response_data['choices'][0]['message']['content']\n",
" \n",
" # Save to a CSV file\n",
" output_file = \"extracted_table.csv\"\n",
" with open(output_file, 'w', encoding='utf-8') as f:\n",
" f.write(content)\n",
"# # Save to a CSV file\n",
"# output_file = \"extracted_table.csv\"\n",
"# with open(output_file, 'w', encoding='utf-8') as f:\n",
"# f.write(content)\n",
" \n",
" print(f\"Content saved to {output_file}\")\n",
"else:\n",
" print(\"No content found in response\")"
"# print(f\"Content saved to {output_file}\")\n",
"# else:\n",
"# print(\"No content found in response\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 7,
"id": "56a9d655",
"metadata": {},
"outputs": [
@@ -107,13 +98,13 @@
"text": [
"FVC Best: 4.24, FVC Pred: 112.0\n",
"FEV1 Best: 3.26, FEV1 Pred: 103.3\n",
"FEV1/FVC% Best: 76.89, FEV1/FVC% Pred: 91.8\n"
"FEV1/FVC% Best: 76.9, FEV1/FVC% Pred: 91.8\n"
]
}
],
"source": [
"import pandas as pd\n",
"spirometry_df = pd.read_csv(\"extracted_table.csv\")\n",
"spirometry_df = pd.read_csv(\"data/spirometry_data.csv\")\n",
"\n",
"fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n",
"fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n",
@@ -131,7 +122,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 8,
"id": "990f4b4f",
"metadata": {},
"outputs": [
@@ -155,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 9,
"id": "041cbc3d",
"metadata": {},
"outputs": [
@@ -171,7 +162,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_301535/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
"/tmp/ipykernel_69398/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
" df = df.apply(pd.to_numeric, errors='ignore')\n"
]
}
@@ -204,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 10,
"id": "de7cadd1",
"metadata": {},
"outputs": [
@@ -223,7 +214,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 11,
"id": "cb972ed3",
"metadata": {},
"outputs": [
@@ -320,7 +311,7 @@
"[1 rows x 147 columns]"
]
},
"execution_count": 24,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -334,7 +325,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 12,
"id": "98d9295a",
"metadata": {},
"outputs": [
@@ -354,7 +345,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 13,
"id": "cdfeb309",
"metadata": {},
"outputs": [
@@ -418,7 +409,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 14,
"id": "4420cfea",
"metadata": {},
"outputs": [
@@ -476,7 +467,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 15,
"id": "62803668",
"metadata": {},
"outputs": [
@@ -561,7 +552,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 16,
"id": "07593b56",
"metadata": {},
"outputs": [
@@ -572,8 +563,8 @@
"Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n",
"Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n",
"Zone 3 (Aerobic): 100.5 - 179.7 bpm\n",
"Zone 4 (Lactate Threshold): 179.7 - 199.7 bpm\n",
"Zone 5 (VO2 Max): 199.7+ bpm\n"
"Zone 4 (Lactate Threshold): 179.7 - 189.7 bpm\n",
"Zone 5 (VO2 Max): 189.7 - 199.7 bpm\n"
]
}
],
@@ -582,7 +573,8 @@
"zone_2_start = optimal_row['HR(bpm)_smoothed']\n",
"zone_3_start = vt1\n",
"zone_4_start = vt2['HeartRate'] - 10\n",
"zone_5_start = vt2['HeartRate'] + 10\n",
"zone_5_start = vt2['HeartRate']\n",
"zone_5_end = vt2['HeartRate'] + 10\n",
"\n",
"zone_1_end = zone_2_start\n",
"zone_2_end = vt1['HeartRate']\n",
@@ -593,12 +585,12 @@
"print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n",
"print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n",
"print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n",
"print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f}+ bpm\")"
"print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f} - {zone_5_end:.1f} bpm\")"
]
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 17,
"id": "c90415b2",
"metadata": {},
"outputs": [
@@ -661,7 +653,7 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": 18,
"id": "c3b2cc59",
"metadata": {},
"outputs": [
@@ -750,7 +742,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"id": "672d68f3",
"metadata": {},
"outputs": [
@@ -779,18 +771,7 @@
"execution_count": null,
"id": "fe3b7605",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": []
}
],
Binary file not shown.
+44 -30
View File
@@ -1,7 +1,7 @@
<div class="bg-white w-full page m-0 px-10">
<div class="px-16 py-10">
<div class="px-16 pt-10">
<!-- Table of Contents Header -->
<div class="mb-8">
<div class="mb-2">
<h1
class="text-5xl font-bold text-black mb-6 tracking-wide border-b-4 border-blue-500 pb-2 text-center"
>
@@ -12,12 +12,46 @@
<!-- Table of Contents Items -->
<div class="flex flex-col justify-between space-y-6 py-6">
<!-- Nutrition Guidelines -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
4
</div>
<div class="flex flex-col flex-1 py-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black">
Nutrition Guidelines
</h2>
<p class="text-gray-600 text-base">
Ultrasound & Body Composition Assessment
</p>
<p class="text-gray-600 text-base">
Resting Metabolic Rate Assessment
</p>
</div>
</div>
<!-- Nutrition Recommendations -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
6
</div>
<div class="flex flex-col py-1 flex-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black">
Nutrition Recommendations
</h2>
</div>
</div>
<!-- Lung Analysis -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
3
7
</div>
<div class="flex flex-col flex-1 py-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black">
@@ -37,10 +71,10 @@
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
4
8
</div>
<div class="flex flex-col py-1 flex-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black mb-3">
<div class="flex flex-col py-1 flex-1 justify-center h-full">
<h2 class="text-2xl font-semibold text-black">
Cardio Metrics
</h2>
<p class="text-gray-600 text-base">
@@ -49,26 +83,12 @@
</div>
</div>
<!-- Fuel Utilization -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
5
</div>
<div class="flex flex-col py-1 flex-1 justify-center flex-1 h-full">
<h2 class="text-2xl font-semibold text-black">
Fuel Utilization
</h2>
</div>
</div>
<!-- Local Muscle Activity -->
<div class="flex items-start bg-gray-200 h-24">
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
9
11
</div>
<div class="flex flex-col justify-center h-full flex-1">
<h2 class="text-2xl font-semibold text-black">
@@ -82,7 +102,7 @@
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
10
12
</div>
<div class="flex flex-col h-full justify-center flex-1">
<h2 class="text-2xl font-semibold text-black">
@@ -96,15 +116,12 @@
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
12
14
</div>
<div class="flex flex-col h-full justify-center flex-1">
<h2 class="text-2xl font-semibold text-black">
Next Steps
</h2>
<div class="space-y-2">
<!-- No sub-items -->
</div>
</div>
</div>
@@ -113,15 +130,12 @@
<div
class="bg-black text-white text-2xl font-bold w-16 h-full flex items-center justify-center mr-8 flex-shrink-0"
>
13
15
</div>
<div class="flex flex-col h-full justify-center flex-1">
<h2 class="text-2xl font-semibold text-black">
Glossary
</h2>
<div class="space-y-2">
<!-- No sub-items -->
</div>
</div>
</div>
</div>