diff --git a/analysis.ipynb b/analysis.ipynb index 9a63529..21fe407 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -2,102 +2,93 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "b18c1027", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id': 'gen-1759135172-DIhs7TMuaaVY0h3T2ibV', 'provider': 'Google', 'model': 'google/gemini-2.5-flash-lite', 'object': 'chat.completion', 'created': 1759135172, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'STOP', 'index': 0, 'message': {'role': 'assistant', 'content': 'Parameters,Best,LLN,Pred.,%Pred.,ZScore,PRE#1,PRE#2,PRE#3\\nFVC,L,4.24,3.03,3.79,112.0,0.95,4.24,4.17,4.15\\nFEV1,L,3.26,2.53,3.16,103.3,0.28,3.26,3.21,3.14\\nFEV1/FVC%,76.89,72.47,83.78,91.8,-1.05,76.9,77.0,75.7\\nPEF,L/m,684,222,384,178.7,-,444,438,684\\nFEF2575,L/s,2.74,2.15,3.42,80.2,-0.84,2.74,2.68,2.48\\nFEF25,L/s,6.08,-,-,-,6.08,6.0,5.53\\nFEF50,L/s,3.06,-,-,-,3.06,3.1,2.77\\nFEF75,L/s,1.06,0.71,1.41,75.1,-0.72,1.06,1.12,0.94\\nPEFTime,ms,-,-,79,-,79,49,39\\nEvol,mL,-,-,78.0,-,78.0,77.0,197.0\\nFEV6,L,4.22,3.03,3.79,111.4,-,4.22,4.17,4.13', 'refusal': None, 'reasoning': None}}], 'usage': {'prompt_tokens': 1350, 'completion_tokens': 454, 'total_tokens': 1804, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}\n", - "Content saved to extracted_table.csv\n" - ] - } - ], + "outputs": [], "source": [ "\n", - "import requests\n", - "import json\n", - "import base64\n", - "from pathlib import Path\n", + "# import requests\n", + "# import json\n", + "# import base64\n", + "# from pathlib import Path\n", "\n", - "API_KEY_REF = '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'\n", - "def encode_pdf_to_base64(pdf_path):\n", - " with open(pdf_path, \"rb\") as pdf_file:\n", - " return base64.b64encode(pdf_file.read()).decode('utf-8')\n", + "# API_KEY_REF = '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'\n", + "# def encode_pdf_to_base64(pdf_path):\n", + "# with open(pdf_path, \"rb\") as pdf_file:\n", + "# return base64.b64encode(pdf_file.read()).decode('utf-8')\n", "\n", - "url = \"https://openrouter.ai/api/v1/chat/completions\"\n", - "headers = {\n", - " \"Authorization\": f\"Bearer {API_KEY_REF}\",\n", - " \"Content-Type\": \"application/json\"\n", - "}\n", + "# url = \"https://openrouter.ai/api/v1/chat/completions\"\n", + "# headers = {\n", + "# \"Authorization\": f\"Bearer {API_KEY_REF}\",\n", + "# \"Content-Type\": \"application/json\"\n", + "# }\n", "\n", - "# Read and encode the PDF\n", - "pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n", - "base64_pdf = encode_pdf_to_base64(pdf_path)\n", - "data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n", + "# # Read and encode the PDF\n", + "# pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n", + "# base64_pdf = encode_pdf_to_base64(pdf_path)\n", + "# data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n", "\n", - "messages = [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": [\n", - " {\n", - " \"type\": \"text\",\n", - " \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n", - " \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n", - " \"The '-' Should be treated as empty values.\"\n", - " \"do not add 'csv' at the start or end of the response\"\n", - " },\n", - " {\n", - " \"type\": \"file\",\n", - " \"file\": {\n", - " \"filename\": \"document.pdf\",\n", - " \"file_data\": data_url\n", - " }\n", - " },\n", - " ]\n", - " }\n", - "]\n", + "# messages = [\n", + "# {\n", + "# \"role\": \"user\",\n", + "# \"content\": [\n", + "# {\n", + "# \"type\": \"text\",\n", + "# \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n", + "# \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n", + "# \"The '-' Should be treated as empty values.\"\n", + "# \"do not add 'csv' at the start or end of the response\"\n", + "# },\n", + "# {\n", + "# \"type\": \"file\",\n", + "# \"file\": {\n", + "# \"filename\": \"document.pdf\",\n", + "# \"file_data\": data_url\n", + "# }\n", + "# },\n", + "# ]\n", + "# }\n", + "# ]\n", "\n", - "# Optional: Configure PDF processing engine\n", - "# PDF parsing will still work even if the plugin is not explicitly set\n", - "plugins = [\n", - " {\n", - " \"id\": \"file-parser\",\n", - " \"pdf\": {\n", - " \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n", - " }\n", - " }\n", - "]\n", + "# # Optional: Configure PDF processing engine\n", + "# # PDF parsing will still work even if the plugin is not explicitly set\n", + "# plugins = [\n", + "# {\n", + "# \"id\": \"file-parser\",\n", + "# \"pdf\": {\n", + "# \"engine\": \"pdf-text\" # defaults to \"mistral-ocr\". See Pricing above\n", + "# }\n", + "# }\n", + "# ]\n", "\n", - "payload = {\n", - " \"model\": \"google/gemini-2.5-flash-lite\",\n", - " \"messages\": messages,\n", - "}\n", + "# payload = {\n", + "# \"model\": \"google/gemini-2.5-flash-lite\",\n", + "# \"messages\": messages,\n", + "# }\n", "\n", - "response = requests.post(url, headers=headers, json=payload)\n", - "# Get the response content\n", - "response_data = response.json()\n", - "print(response_data)\n", + "# response = requests.post(url, headers=headers, json=payload)\n", + "# # Get the response content\n", + "# response_data = response.json()\n", + "# print(response_data)\n", "\n", - "# Extract the content from the response\n", - "if 'choices' in response_data and len(response_data['choices']) > 0:\n", - " content = response_data['choices'][0]['message']['content']\n", + "# # Extract the content from the response\n", + "# if 'choices' in response_data and len(response_data['choices']) > 0:\n", + "# content = response_data['choices'][0]['message']['content']\n", " \n", - " # Save to a CSV file\n", - " output_file = \"extracted_table.csv\"\n", - " with open(output_file, 'w', encoding='utf-8') as f:\n", - " f.write(content)\n", + "# # Save to a CSV file\n", + "# output_file = \"extracted_table.csv\"\n", + "# with open(output_file, 'w', encoding='utf-8') as f:\n", + "# f.write(content)\n", " \n", - " print(f\"Content saved to {output_file}\")\n", - "else:\n", - " print(\"No content found in response\")" + "# print(f\"Content saved to {output_file}\")\n", + "# else:\n", + "# print(\"No content found in response\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "id": "56a9d655", "metadata": {}, "outputs": [ @@ -107,13 +98,13 @@ "text": [ "FVC Best: 4.24, FVC Pred: 112.0\n", "FEV1 Best: 3.26, FEV1 Pred: 103.3\n", - "FEV1/FVC% Best: 76.89, FEV1/FVC% Pred: 91.8\n" + "FEV1/FVC% Best: 76.9, FEV1/FVC% Pred: 91.8\n" ] } ], "source": [ "import pandas as pd\n", - "spirometry_df = pd.read_csv(\"extracted_table.csv\")\n", + "spirometry_df = pd.read_csv(\"data/spirometry_data.csv\")\n", "\n", "fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n", "fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n", @@ -131,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "id": "990f4b4f", "metadata": {}, "outputs": [ @@ -155,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 9, "id": "041cbc3d", "metadata": {}, "outputs": [ @@ -171,7 +162,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_301535/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", + "/tmp/ipykernel_69398/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", " df = df.apply(pd.to_numeric, errors='ignore')\n" ] } @@ -204,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 10, "id": "de7cadd1", "metadata": {}, "outputs": [ @@ -223,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 11, "id": "cb972ed3", "metadata": {}, "outputs": [ @@ -320,7 +311,7 @@ "[1 rows x 147 columns]" ] }, - "execution_count": 24, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -334,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 12, "id": "98d9295a", "metadata": {}, "outputs": [ @@ -354,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 13, "id": "cdfeb309", "metadata": {}, "outputs": [ @@ -418,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 14, "id": "4420cfea", "metadata": {}, "outputs": [ @@ -476,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 15, "id": "62803668", "metadata": {}, "outputs": [ @@ -561,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 16, "id": "07593b56", "metadata": {}, "outputs": [ @@ -572,8 +563,8 @@ "Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n", "Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n", "Zone 3 (Aerobic): 100.5 - 179.7 bpm\n", - "Zone 4 (Lactate Threshold): 179.7 - 199.7 bpm\n", - "Zone 5 (VO2 Max): 199.7+ bpm\n" + "Zone 4 (Lactate Threshold): 179.7 - 189.7 bpm\n", + "Zone 5 (VO2 Max): 189.7 - 199.7 bpm\n" ] } ], @@ -582,7 +573,8 @@ "zone_2_start = optimal_row['HR(bpm)_smoothed']\n", "zone_3_start = vt1\n", "zone_4_start = vt2['HeartRate'] - 10\n", - "zone_5_start = vt2['HeartRate'] + 10\n", + "zone_5_start = vt2['HeartRate']\n", + "zone_5_end = vt2['HeartRate'] + 10\n", "\n", "zone_1_end = zone_2_start\n", "zone_2_end = vt1['HeartRate']\n", @@ -593,12 +585,12 @@ "print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n", "print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n", "print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n", - "print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f}+ bpm\")" + "print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f} - {zone_5_end:.1f} bpm\")" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 17, "id": "c90415b2", "metadata": {}, "outputs": [ @@ -661,7 +653,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 18, "id": "c3b2cc59", "metadata": {}, "outputs": [ @@ -750,7 +742,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "672d68f3", "metadata": {}, "outputs": [ @@ -779,18 +771,7 @@ "execution_count": null, "id": "fe3b7605", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [] } ], diff --git a/multi_page_report.pdf b/multi_page_report.pdf index 8736fd6..8ef80b2 100644 Binary files a/multi_page_report.pdf and b/multi_page_report.pdf differ diff --git a/report_gen/page_2.html b/report_gen/page_2.html index 581b88f..cd6a407 100644 --- a/report_gen/page_2.html +++ b/report_gen/page_2.html @@ -1,7 +1,7 @@
+ Ultrasound & Body Composition Assessment +
++ Resting Metabolic Rate Assessment +
+@@ -49,26 +83,12 @@