feat: Implement report generator service for medical reports

- Added ReportGeneratorService to handle generation of medical reports from uploaded files. - Implemented methods for processing Pnoe CSV data, generating graphs, and calculating analysis metrics. - Integrated Jinja2 for HTML report generation with customizable templates. - Added functionality to convert HTML content to PDF using Playwright. - Ensured proper directory structure for saving generated graphs and reports.
2025-10-03 21:41:00 +01:00
parent 1d8136d6ad
commit 11ee6b192f
13 changed files with 896 additions and 658 deletions
@@ -0,0 +1,807 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b18c1027",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'id': 'gen-1759135172-DIhs7TMuaaVY0h3T2ibV', 'provider': 'Google', 'model': 'google/gemini-2.5-flash-lite', 'object': 'chat.completion', 'created': 1759135172, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'STOP', 'index': 0, 'message': {'role': 'assistant', 'content': 'Parameters,Best,LLN,Pred.,%Pred.,ZScore,PRE#1,PRE#2,PRE#3\\nFVC,L,4.24,3.03,3.79,112.0,0.95,4.24,4.17,4.15\\nFEV1,L,3.26,2.53,3.16,103.3,0.28,3.26,3.21,3.14\\nFEV1/FVC%,76.89,72.47,83.78,91.8,-1.05,76.9,77.0,75.7\\nPEF,L/m,684,222,384,178.7,-,444,438,684\\nFEF2575,L/s,2.74,2.15,3.42,80.2,-0.84,2.74,2.68,2.48\\nFEF25,L/s,6.08,-,-,-,6.08,6.0,5.53\\nFEF50,L/s,3.06,-,-,-,3.06,3.1,2.77\\nFEF75,L/s,1.06,0.71,1.41,75.1,-0.72,1.06,1.12,0.94\\nPEFTime,ms,-,-,79,-,79,49,39\\nEvol,mL,-,-,78.0,-,78.0,77.0,197.0\\nFEV6,L,4.22,3.03,3.79,111.4,-,4.22,4.17,4.13', 'refusal': None, 'reasoning': None}}], 'usage': {'prompt_tokens': 1350, 'completion_tokens': 454, 'total_tokens': 1804, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}\n",
+      "Content saved to extracted_table.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "import requests\n",
+    "import json\n",
+    "import base64\n",
+    "from pathlib import Path\n",
+    "\n",
+    "API_KEY_REF = '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'\n",
+    "def encode_pdf_to_base64(pdf_path):\n",
+    "    with open(pdf_path, \"rb\") as pdf_file:\n",
+    "        return base64.b64encode(pdf_file.read()).decode('utf-8')\n",
+    "\n",
+    "url = \"https://openrouter.ai/api/v1/chat/completions\"\n",
+    "headers = {\n",
+    "    \"Authorization\": f\"Bearer {API_KEY_REF}\",\n",
+    "    \"Content-Type\": \"application/json\"\n",
+    "}\n",
+    "\n",
+    "# Read and encode the PDF\n",
+    "pdf_path = \"data/~Moran~K~19910201~Spirometry Exam~20250729~20250729032843.pdf\"\n",
+    "base64_pdf = encode_pdf_to_base64(pdf_path)\n",
+    "data_url = f\"data:application/pdf;base64,{base64_pdf}\"\n",
+    "\n",
+    "messages = [\n",
+    "    {\n",
+    "        \"role\": \"user\",\n",
+    "        \"content\": [\n",
+    "            {\n",
+    "                \"type\": \"text\",\n",
+    "                \"text\": \"Please extract the Spirometry table from the pdf and return the values in csv format, \"\n",
+    "                \"note that it is the unit of parameter that is beside it and it should not be a column. \"\n",
+    "                \"The '-' Should be treated as empty values.\"\n",
+    "                \"do not add 'csv' at the start or end of the response\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"type\": \"file\",\n",
+    "                \"file\": {\n",
+    "                    \"filename\": \"document.pdf\",\n",
+    "                    \"file_data\": data_url\n",
+    "                }\n",
+    "            },\n",
+    "        ]\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Optional: Configure PDF processing engine\n",
+    "# PDF parsing will still work even if the plugin is not explicitly set\n",
+    "plugins = [\n",
+    "    {\n",
+    "        \"id\": \"file-parser\",\n",
+    "        \"pdf\": {\n",
+    "            \"engine\": \"pdf-text\"  # defaults to \"mistral-ocr\". See Pricing above\n",
+    "        }\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "payload = {\n",
+    "    \"model\": \"google/gemini-2.5-flash-lite\",\n",
+    "    \"messages\": messages,\n",
+    "}\n",
+    "\n",
+    "response = requests.post(url, headers=headers, json=payload)\n",
+    "# Get the response content\n",
+    "response_data = response.json()\n",
+    "print(response_data)\n",
+    "\n",
+    "# Extract the content from the response\n",
+    "if 'choices' in response_data and len(response_data['choices']) > 0:\n",
+    "    content = response_data['choices'][0]['message']['content']\n",
+    "    \n",
+    "    # Save to a CSV file\n",
+    "    output_file = \"extracted_table.csv\"\n",
+    "    with open(output_file, 'w', encoding='utf-8') as f:\n",
+    "        f.write(content)\n",
+    "    \n",
+    "    print(f\"Content saved to {output_file}\")\n",
+    "else:\n",
+    "    print(\"No content found in response\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "56a9d655",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "FVC Best: 4.24, FVC Pred: 112.0\n",
+      "FEV1 Best: 3.26, FEV1 Pred: 103.3\n",
+      "FEV1/FVC% Best: 76.89, FEV1/FVC% Pred: 91.8\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "spirometry_df = pd.read_csv(\"extracted_table.csv\")\n",
+    "\n",
+    "fvc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', 'Best'].values[0]\n",
+    "fvc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FVC', '%Pred.'].values[0]\n",
+    "\n",
+    "fev1_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', 'Best'].values[0]\n",
+    "fev1_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1', '%Pred.'].values[0]\n",
+    "\n",
+    "fev1_fevc_best = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', 'Best'].values[0]\n",
+    "fev1_fevc_pred = spirometry_df.loc[spirometry_df['Parameters'] == 'FEV1/FVC%', '%Pred.'].values[0]\n",
+    "\n",
+    "print(f\"FVC Best: {fvc_best}, FVC Pred: {fvc_pred}\")\n",
+    "print(f\"FEV1 Best: {fev1_best}, FEV1 Pred: {fev1_pred}\")\n",
+    "print(f\"FEV1/FVC% Best: {fev1_fevc_best}, FEV1/FVC% Pred: {fev1_fevc_pred}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "990f4b4f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Peak VT: 2.75\n",
+      "HR at Peak VT: 155.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n",
+    "peak_vt = df['VT(l)'].max()\n",
+    "max_vt_row = df.loc[df['VT(l)'].idxmax()]\n",
+    "print(f\"Peak VT: {peak_vt}\")\n",
+    "hr = max_vt_row['HR(bpm)']\n",
+    "print(f\"HR at Peak VT: {hr}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "041cbc3d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Peak VT: 2.3770000000000002\n",
+      "HR at Peak VT: 171.525\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_301535/4157056299.py:3: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
+      "  df = df.apply(pd.to_numeric, errors='ignore')\n"
+     ]
+    }
+   ],
+   "source": [
+    "df = pd.read_csv('data/Pnoe_20250729_1550-Moran_Keirstyn.csv', delimiter=';')\n",
+    "# Convert all columns to numeric where possible, coercing errors to NaN\n",
+    "df = df.apply(pd.to_numeric, errors='ignore')\n",
+    "df['VO2 Pulse'] = df['VO2(ml/min)'] / df['HR(bpm)']  # VO2 Pulse in mL/beat\n",
+    "df['VO2 Breath'] = df['VO2(ml/min)'] / df['BF(bpm)']  # VO2 per Breath in mL/breath\n",
+    "df['CHO'] = df['EE(kcal/min)'] * df['CARBS(%)']/100\n",
+    "df['FAT'] = df['EE(kcal/min)'] * df['FAT(%)']/100\n",
+    "# Smooth key columns using rolling window\n",
+    "window_size = 10\n",
+    "\n",
+    "# List of columns to smooth\n",
+    "columns_to_smooth = ['VO2(ml/min)', 'VCO2(ml/min)', 'HR(bpm)', 'VT(l)', 'BF(bpm)', 'VE(l/min)', 'VO2 Pulse', 'VO2 Breath', 'CHO', 'FAT']\n",
+    "\n",
+    "# Apply smoothing to each column\n",
+    "for col in columns_to_smooth:\n",
+    "    if col in df.columns:\n",
+    "        df[f'{col}_smoothed'] = df[col].rolling(window=window_size).mean()\n",
+    "        \n",
+    "peak_vt = df['VT(l)_smoothed'].max()\n",
+    "max_vt_row = df.loc[df['VT(l)_smoothed'].idxmax()]\n",
+    "print(f\"Peak VT: {peak_vt}\")\n",
+    "hr = max_vt_row['HR(bpm)_smoothed']\n",
+    "print(f\"HR at Peak VT: {hr}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "de7cadd1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Percent FEV: 72.91411042944786\n"
+     ]
+    }
+   ],
+   "source": [
+    "percent_fev = (peak_vt / fev1_best) * 100\n",
+    "print(f\"Percent FEV: {percent_fev}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "cb972ed3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MeasurementDate</th>\n",
+       "      <th>Comment</th>\n",
+       "      <th>ExternalDeviceId</th>\n",
+       "      <th>ExternalPatientId</th>\n",
+       "      <th>FirstName</th>\n",
+       "      <th>LastName</th>\n",
+       "      <th>BirthDate</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Ethnicity</th>\n",
+       "      <th>Gender</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Child_XC</th>\n",
+       "      <th>Child_XC_Unit</th>\n",
+       "      <th>Child_BIVA_ZRh</th>\n",
+       "      <th>Child_BIVA_ZXcH</th>\n",
+       "      <th>Child_PhA</th>\n",
+       "      <th>Child_PhA_Unit</th>\n",
+       "      <th>Child_REE_Kcal</th>\n",
+       "      <th>Child_REE_MJ</th>\n",
+       "      <th>Child_TEE_Kcal</th>\n",
+       "      <th>Child_TEE_MJ</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>2025-07-29T18:58:54.0000000Z</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10000001583275_0055003f5631501320313557</td>\n",
+       "      <td>KM6479696509</td>\n",
+       "      <td>Keirstyn</td>\n",
+       "      <td>Moran</td>\n",
+       "      <td>1991-02-01T00:00:00.0000000Z</td>\n",
+       "      <td>34</td>\n",
+       "      <td>Caucasian</td>\n",
+       "      <td>Female</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1 rows × 147 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 MeasurementDate  Comment  \\\n",
+       "13  2025-07-29T18:58:54.0000000Z      NaN   \n",
+       "\n",
+       "                           ExternalDeviceId ExternalPatientId FirstName  \\\n",
+       "13  10000001583275_0055003f5631501320313557      KM6479696509  Keirstyn   \n",
+       "\n",
+       "   LastName                     BirthDate  Age  Ethnicity  Gender  ...  \\\n",
+       "13    Moran  1991-02-01T00:00:00.0000000Z   34  Caucasian  Female  ...   \n",
+       "\n",
+       "    Child_XC Child_XC_Unit  Child_BIVA_ZRh Child_BIVA_ZXcH  Child_PhA  \\\n",
+       "13       NaN           NaN             NaN             NaN        NaN   \n",
+       "\n",
+       "   Child_PhA_Unit  Child_REE_Kcal  Child_REE_MJ Child_TEE_Kcal  Child_TEE_MJ  \n",
+       "13            NaN             NaN           NaN            NaN           NaN  \n",
+       "\n",
+       "[1 rows x 147 columns]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "personal_df = pd.read_excel('data/SECA body comp for all patients.xlsx')\n",
+    "\n",
+    "keirstyn_data = personal_df[personal_df['LastName'].str.contains('Moran', case=False, na=False)]\n",
+    "keirstyn_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "98d9295a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "VO2 Max: 47.906290322580645\n"
+     ]
+    }
+   ],
+   "source": [
+    "v02_max = df['VO2(ml/min)_smoothed'].max()\n",
+    "weight = keirstyn_data['Weight'].iloc[0]\n",
+    "print(f\"VO2 Max: {v02_max/weight}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "cdfeb309",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==================================================\n",
+      "Optimal Fat Burning Zone (highest fat:carb ratio):\n",
+      "Time: 164.0 seconds\n",
+      "Fat burn rate: 3.894 kcal/min\n",
+      "Carb burn rate: 1.575 kcal/min\n",
+      "Fat:Carb ratio: 2.47\n",
+      "Heart Rate: 96.7 bpm\n",
+      "VO2: 1147.9 ml/min\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Find the point where fat burning is highest and carb burning is lowest\n",
+    "# Using the smoothed data for more stable results\n",
+    "fat_burn_max_idx = df['FAT_smoothed'].idxmax()\n",
+    "carb_burn_min_idx = df['CHO_smoothed'].idxmin()\n",
+    "\n",
+    "# # Get the data at maximum fat burning point\n",
+    "# max_fat_row = df.loc[fat_burn_max_idx]\n",
+    "# print(f\"Maximum Fat Burning Point:\")\n",
+    "# print(f\"Time: {max_fat_row['T(sec)']} seconds\")\n",
+    "# print(f\"Fat burn rate: {max_fat_row['FAT_smoothed']:.3f} kcal/min\")\n",
+    "# print(f\"Carb burn rate: {max_fat_row['CHO_smoothed']:.3f} kcal/min\")\n",
+    "# print(f\"Heart Rate: {max_fat_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "# print(f\"VO2: {max_fat_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
+    "\n",
+    "# print(\"\\n\" + \"=\"*50)\n",
+    "\n",
+    "# # Get the data at minimum carb burning point\n",
+    "# min_carb_row = df.loc[carb_burn_min_idx]\n",
+    "# print(f\"Minimum Carbohydrate Burning Point:\")\n",
+    "# print(f\"Time: {min_carb_row['T(sec)']} seconds\")\n",
+    "# print(f\"Fat burn rate: {min_carb_row['FAT_smoothed']:.3f} kcal/min\")\n",
+    "# print(f\"Carb burn rate: {min_carb_row['CHO_smoothed']:.3f} kcal/min\")\n",
+    "# print(f\"Heart Rate: {min_carb_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "# print(f\"VO2: {min_carb_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*50)\n",
+    "\n",
+    "# Find the optimal fat burning zone (highest fat:carb ratio)\n",
+    "df['fat_carb_ratio'] = df['FAT_smoothed'] / (df['CHO_smoothed'] + 0.00000001)  # Add small value to avoid division by zero\n",
+    "optimal_fat_idx = df['fat_carb_ratio'].idxmax()\n",
+    "optimal_row = df.loc[optimal_fat_idx]\n",
+    "\n",
+    "print(f\"Optimal Fat Burning Zone (highest fat:carb ratio):\")\n",
+    "print(f\"Time: {optimal_row['T(sec)']} seconds\")\n",
+    "print(f\"Fat burn rate: {optimal_row['FAT_smoothed']:.3f} kcal/min\")\n",
+    "print(f\"Carb burn rate: {optimal_row['CHO_smoothed']:.3f} kcal/min\")\n",
+    "print(f\"Fat:Carb ratio: {optimal_row['fat_carb_ratio']:.2f}\")\n",
+    "print(f\"Heart Rate: {optimal_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "print(f\"VO2: {optimal_row['VO2(ml/min)_smoothed']:.1f} ml/min\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "4420cfea",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 2 intersections at indices: [18, 47]\n",
+      "\n",
+      "Last intersection at index 47:\n",
+      "Time: 251.0 seconds\n",
+      "Fat burn rate: 3.040 kcal/min\n",
+      "Carb burn rate: 3.166 kcal/min\n",
+      "Heart Rate: 100.5 bpm\n",
+      "VO2: 1283.0 ml/min\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Find intersections where FAT_smoothed and CHO_smoothed cross each other\n",
+    "intersections = []\n",
+    "\n",
+    "for i in range(1, len(df)):\n",
+    "    # Check if there's a crossover between consecutive points\n",
+    "    prev_fat = df.iloc[i-1]['FAT_smoothed']\n",
+    "    prev_cho = df.iloc[i-1]['CHO_smoothed']\n",
+    "    curr_fat = df.iloc[i]['FAT_smoothed']\n",
+    "    curr_cho = df.iloc[i]['CHO_smoothed']\n",
+    "    \n",
+    "    # Skip if any values are NaN\n",
+    "    if pd.isna(prev_fat) or pd.isna(prev_cho) or pd.isna(curr_fat) or pd.isna(curr_cho):\n",
+    "        continue\n",
+    "    \n",
+    "    # Check if lines cross (fat was above/below cho and now it's below/above)\n",
+    "    if ((prev_fat > prev_cho and curr_fat < curr_cho) or \n",
+    "        (prev_fat < prev_cho and curr_fat > curr_cho)):\n",
+    "        intersections.append(i)\n",
+    "\n",
+    "print(f\"Found {len(intersections)} intersections at indices: {intersections}\")\n",
+    "\n",
+    "if intersections:\n",
+    "    # Get the last intersection\n",
+    "    last_intersection_idx = intersections[-1]\n",
+    "    last_intersection_row = df.iloc[last_intersection_idx]\n",
+    "    \n",
+    "    print(f\"\\nLast intersection at index {last_intersection_idx}:\")\n",
+    "    print(f\"Time: {last_intersection_row['T(sec)']} seconds\")\n",
+    "    print(f\"Fat burn rate: {last_intersection_row['FAT_smoothed']:.3f} kcal/min\")\n",
+    "    print(f\"Carb burn rate: {last_intersection_row['CHO_smoothed']:.3f} kcal/min\")\n",
+    "    print(f\"Heart Rate: {last_intersection_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "    print(f\"VO2: {last_intersection_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
+    "else:\n",
+    "    print(\"No intersections found between FAT and CHO curves\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "62803668",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "VT1: {'HeartRate': 100.5, 'Speed': 4.0, 'Time': 251.0}\n",
+      "VT2: {'HeartRate': 189.71300000000002, 'Speed': 7.5, 'Time': 1524.0}\n"
+     ]
+    }
+   ],
+   "source": [
+    "def detect_vt1(df, fat_col=\"FAT_smoothed\", carb_col=\"CHO_smoothed\"):\n",
+    "    \"\"\"\n",
+    "    Detect VT1 as the first index where carb burn > fat burn and remains higher.\n",
+    "    \"\"\"\n",
+    "    condition = df[carb_col] > df[fat_col]\n",
+    "    crossover_indices = condition[condition].index\n",
+    "\n",
+    "    if len(crossover_indices) == 0:\n",
+    "        return None  # No crossover found\n",
+    "    \n",
+    "    # Find first crossover where carbs remain higher for the rest\n",
+    "    for idx in crossover_indices:\n",
+    "        if all(df.loc[idx:][carb_col] > df.loc[idx:][fat_col]):\n",
+    "            return idx\n",
+    "    return None\n",
+    "\n",
+    "\n",
+    "def detect_vt2(df, vent_col=\"VE(l/min)_smoothed\", bf_col=\"BF(bpm)_smoothed\", smooth_window=5):\n",
+    "    \"\"\"\n",
+    "    Detect VT2 using slope/inflection method.\n",
+    "    Works with either Ventilation (VE) or Breathing Frequency (Bf).\n",
+    "    \"\"\"\n",
+    "    col = vent_col if vent_col in df.columns else bf_col\n",
+    "    \n",
+    "    # Use already smoothed data\n",
+    "    smoothed_col = col\n",
+    "    \n",
+    "    # Compute slope (first derivative)\n",
+    "    df[\"slope\"] = df[smoothed_col].diff()\n",
+    "    \n",
+    "    # Detect inflection: largest change in slope (second derivative peak)\n",
+    "    df[\"second_derivative\"] = df[\"slope\"].diff()\n",
+    "    inflection_idx = df[\"second_derivative\"].idxmax()\n",
+    "    \n",
+    "    return inflection_idx\n",
+    "\n",
+    "\n",
+    "def analyze_thresholds(df_input):\n",
+    "    # Use the existing dataframe\n",
+    "    df_copy = df_input.copy()\n",
+    "    \n",
+    "    # --- Detect VT1 ---\n",
+    "    vt1_idx = detect_vt1(df_copy)\n",
+    "    vt1 = None\n",
+    "    if vt1_idx is not None:\n",
+    "        vt1 = {\n",
+    "            \"HeartRate\": df_copy.loc[vt1_idx, \"HR(bpm)_smoothed\"],\n",
+    "            \"Speed\": df_copy.loc[vt1_idx, \"Speed\"],\n",
+    "            \"Time\": df_copy.loc[vt1_idx, \"T(sec)\"]\n",
+    "        }\n",
+    "    \n",
+    "    # --- Detect VT2 ---\n",
+    "    vt2_idx = detect_vt2(df_copy)\n",
+    "    vt2 = None\n",
+    "    if vt2_idx is not None:\n",
+    "        vt2 = {\n",
+    "            \"HeartRate\": df_copy.loc[vt2_idx, \"HR(bpm)_smoothed\"],\n",
+    "            \"Speed\": df_copy.loc[vt2_idx, \"Speed\"],\n",
+    "            \"Time\": df_copy.loc[vt2_idx, \"T(sec)\"]\n",
+    "        }\n",
+    "    \n",
+    "    return vt1, vt2\n",
+    "\n",
+    "\n",
+    "vt1, vt2 = analyze_thresholds(df)\n",
+    "print(\"VT1:\", vt1)\n",
+    "print(\"VT2:\", vt2)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "07593b56",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Zone 1 (Active Recovery): 81.7 - 96.7 bpm\n",
+      "Zone 2 (Aerobic Base): 96.7 - 100.5 bpm\n",
+      "Zone 3 (Aerobic): 100.5 - 179.7 bpm\n",
+      "Zone 4 (Lactate Threshold): 179.7 - 199.7 bpm\n",
+      "Zone 5 (VO2 Max): 199.7+ bpm\n"
+     ]
+    }
+   ],
+   "source": [
+    "zone_1_start = optimal_row['HR(bpm)_smoothed'] - 15\n",
+    "zone_2_start = optimal_row['HR(bpm)_smoothed']\n",
+    "zone_3_start = vt1\n",
+    "zone_4_start = vt2['HeartRate'] - 10\n",
+    "zone_5_start = vt2['HeartRate'] + 10\n",
+    "\n",
+    "zone_1_end = zone_2_start\n",
+    "zone_2_end = vt1['HeartRate']\n",
+    "zone_3_end = zone_4_start\n",
+    "zone_4_end = zone_5_start\n",
+    "\n",
+    "print(f\"Zone 1 (Active Recovery): {zone_1_start:.1f} - {zone_1_end:.1f} bpm\")\n",
+    "print(f\"Zone 2 (Aerobic Base): {zone_2_start:.1f} - {zone_2_end:.1f} bpm\")\n",
+    "print(f\"Zone 3 (Aerobic): {zone_3_start['HeartRate']:.1f} - {zone_3_end:.1f} bpm\")\n",
+    "print(f\"Zone 4 (Lactate Threshold): {zone_4_start:.1f} - {zone_4_end:.1f} bpm\")\n",
+    "print(f\"Zone 5 (VO2 Max): {zone_5_start:.1f}+ bpm\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "c90415b2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "VO2 Max detected at index 202:\n",
+      "Time: 985.0 seconds\n",
+      "VO2 Breath: 58.2 ml/breath\n",
+      "VO2: 2167.8 ml/min\n",
+      "VO2 per kg: 38.8 ml/kg/min\n",
+      "Heart Rate: 170.5 bpm\n",
+      "Speed: 6.0 km/h\n",
+      "VO2 Breath Slope: -0.02\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate the slope of VO2 Breath (first derivative)\n",
+    "df['vo2_breath_slope'] = df['VO2 Breath_smoothed'].diff()\n",
+    "\n",
+    "# Find points where slope is consistently zero or negative\n",
+    "# We'll use a rolling window to check for consistent negative/zero slope\n",
+    "window = len(df) // 3  # Number of consecutive points to check\n",
+    "\n",
+    "# Calculate rolling mean of slope to smooth out noise\n",
+    "df['vo2_breath_slope_smoothed'] = df['vo2_breath_slope'].rolling(window=window).mean()\n",
+    "\n",
+    "# Find where slope becomes consistently zero or negative\n",
+    "mask = df['vo2_breath_slope_smoothed'] <= 0\n",
+    "consistent_negative_indices = mask[mask].index\n",
+    "\n",
+    "if len(consistent_negative_indices) > 0:\n",
+    "    # Find the first point where slope becomes consistently negative/zero\n",
+    "    vo2_max_idx = consistent_negative_indices[0]\n",
+    "    vo2_max_row = df.loc[vo2_max_idx]\n",
+    "    \n",
+    "    print(f\"VO2 Max detected at index {vo2_max_idx}:\")\n",
+    "    print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n",
+    "    print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n",
+    "    print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
+    "    print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n",
+    "    print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "    print(f\"Speed: {vo2_max_row['Speed']} km/h\")\n",
+    "    print(f\"VO2 Breath Slope: {vo2_max_row['vo2_breath_slope_smoothed']:.2f}\")\n",
+    "else:\n",
+    "    # If no consistent negative slope found, use the maximum VO2 Breath value\n",
+    "    vo2_max_idx = df['VO2 Breath_smoothed'].idxmax()\n",
+    "    vo2_max_row = df.loc[vo2_max_idx]\n",
+    "    \n",
+    "    print(f\"No consistent negative slope found. Using peak VO2 Breath at index {vo2_max_idx}:\")\n",
+    "    print(f\"Time: {vo2_max_row['T(sec)']} seconds\")\n",
+    "    print(f\"VO2 Breath: {vo2_max_row['VO2 Breath_smoothed']:.1f} ml/breath\")\n",
+    "    print(f\"VO2: {vo2_max_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
+    "    print(f\"VO2 per kg: {vo2_max_row['VO2(ml/min)_smoothed']/weight:.1f} ml/kg/min\")\n",
+    "    print(f\"Heart Rate: {vo2_max_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "    print(f\"Speed: {vo2_max_row['Speed']} km/h\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "c3b2cc59",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "VO2 Pulse and HR slopes diverge consistently starting at index 89:\n",
+      "Time: 485.0 seconds\n",
+      "VO2 Pulse (smoothed): 13.91\n",
+      "Heart Rate (smoothed): 136.2 bpm\n",
+      "VO2 Pulse Slope: 0.672\n",
+      "HR Slope: 1.000\n",
+      "Slope Difference: 1.006\n",
+      "VO2: 1897.8 ml/min\n",
+      "Speed: 4.5 km/h\n",
+      "Threshold used: 0.615\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate slopes for both VO2 Pulse and HR\n",
+    "df['vo2_pulse_slope'] = df['VO2 Pulse_smoothed'].diff()\n",
+    "df['hr_slope'] = df['HR(bpm)_smoothed'].diff()\n",
+    "\n",
+    "# Calculate the difference between the slopes\n",
+    "df['slope_difference'] = abs(df['vo2_pulse_slope'] - df['hr_slope'])\n",
+    "\n",
+    "# Find where the slope difference becomes consistently large (slopes diverge)\n",
+    "# Use a rolling window to smooth out noise\n",
+    "window_size = len(df) // 5  # Adjust window size as needed\n",
+    "df['slope_difference_smoothed'] = df['slope_difference'].rolling(window=window_size).mean()\n",
+    "\n",
+    "# Find the threshold - we'll use the 75th percentile of slope differences as threshold\n",
+    "threshold = df['slope_difference_smoothed'].quantile(0.75)\n",
+    "\n",
+    "# Find points where slope difference exceeds threshold\n",
+    "divergence_mask = df['slope_difference_smoothed'] > threshold\n",
+    "divergence_indices = divergence_mask[divergence_mask].index\n",
+    "\n",
+    "if len(divergence_indices) > 0:\n",
+    "    # Find the first sustained divergence point\n",
+    "    min_consecutive_points = 5\n",
+    "    consistent_divergence_idx = None\n",
+    "    \n",
+    "    for start_idx in divergence_indices:\n",
+    "        # Check if divergence is sustained for consecutive points\n",
+    "        consecutive_count = 0\n",
+    "        for j in range(start_idx, min(start_idx + min_consecutive_points, len(df))):\n",
+    "            if j in divergence_indices:\n",
+    "                consecutive_count += 1\n",
+    "            else:\n",
+    "                break\n",
+    "        \n",
+    "        if consecutive_count >= min_consecutive_points:\n",
+    "            consistent_divergence_idx = start_idx\n",
+    "            break\n",
+    "    \n",
+    "    if consistent_divergence_idx is not None:\n",
+    "        divergence_row = df.iloc[consistent_divergence_idx]\n",
+    "        \n",
+    "        print(f\"VO2 Pulse and HR slopes diverge consistently starting at index {consistent_divergence_idx}:\")\n",
+    "        print(f\"Time: {divergence_row['T(sec)']} seconds\")\n",
+    "        print(f\"VO2 Pulse (smoothed): {divergence_row['VO2 Pulse_smoothed']:.2f}\")\n",
+    "        print(f\"Heart Rate (smoothed): {divergence_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "        print(f\"VO2 Pulse Slope: {divergence_row['vo2_pulse_slope']:.3f}\")\n",
+    "        print(f\"HR Slope: {divergence_row['hr_slope']:.3f}\")\n",
+    "        print(f\"Slope Difference: {divergence_row['slope_difference_smoothed']:.3f}\")\n",
+    "        print(f\"VO2: {divergence_row['VO2(ml/min)_smoothed']:.1f} ml/min\")\n",
+    "        print(f\"Speed: {divergence_row['Speed']} km/h\")\n",
+    "        print(f\"Threshold used: {threshold:.3f}\")\n",
+    "    else:\n",
+    "        print(f\"No sustained divergence found. Threshold: {threshold:.3f}\")\n",
+    "        # Show the point with maximum slope difference instead\n",
+    "        max_diff_idx = df['slope_difference_smoothed'].idxmax()\n",
+    "        max_diff_row = df.iloc[max_diff_idx]\n",
+    "        \n",
+    "        print(f\"\\nPoint with maximum slope difference at index {max_diff_idx}:\")\n",
+    "        print(f\"Time: {max_diff_row['T(sec)']} seconds\")\n",
+    "        print(f\"VO2 Pulse (smoothed): {max_diff_row['VO2 Pulse_smoothed']:.2f}\")\n",
+    "        print(f\"Heart Rate (smoothed): {max_diff_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "        print(f\"Slope Difference: {max_diff_row['slope_difference_smoothed']:.3f}\")\n",
+    "else:\n",
+    "    print(\"No significant slope divergence found between VO2 Pulse and HR\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "672d68f3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Maximum FAT_smoothed occurs at index 30:\n",
+      "Heart Rate (smoothed): 96.7 bpm\n",
+      "FAT (smoothed): 3.894 kcal/min\n"
+     ]
+    }
+   ],
+   "source": [
+    "max_fat_smoothed_idx = df['FAT_smoothed'].idxmax()\n",
+    "max_fat_smoothed_row = df.loc[max_fat_smoothed_idx]\n",
+    "max_heart_rate = 220 - keirstyn_data['Age'].iloc[0]\n",
+    "\n",
+    "print(f\"Maximum FAT_smoothed occurs at index {max_fat_smoothed_idx}:\")\n",
+    "print(f\"Heart Rate (smoothed): {max_fat_smoothed_row['HR(bpm)_smoothed']:.1f} bpm\")\n",
+    "print(f\"FAT (smoothed): {max_fat_smoothed_row['FAT_smoothed']:.3f} kcal/min\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe3b7605",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "report_generation",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,318 @@
+"""
+Report Generator Service
+
+This service handles the generation of medical reports from uploaded files.
+It processes data, generates graphs, and creates PDF reports.
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pandas as pd
+from jinja2 import Environment, FileSystemLoader
+from playwright.sync_api import sync_playwright
+
+from app.services.context import context_list
+from app.services.graph_generator import GraphGenerator
+
+
+class ReportGeneratorService:
+    """Service for generating medical performance reports"""
+
+    def __init__(
+        self,
+        template_dir: str = "app/report_gen",
+        graphs_dir: str = "graphs",
+        reports_dir: str = "reports",
+    ):
+        """
+        Initialize the report generator service.
+
+        Args:
+            template_dir: Directory containing Jinja2 templates
+            graphs_dir: Directory to save generated graphs
+            reports_dir: Directory to save generated reports
+        """
+        self.template_dir = template_dir
+        self.graphs_dir = Path(graphs_dir)
+        self.reports_dir = Path(reports_dir)
+        self.graph_generator = GraphGenerator(charts_dir=str(graphs_dir))
+        self.env = Environment(loader=FileSystemLoader(template_dir))
+
+        # Ensure directories exist
+        self.graphs_dir.mkdir(exist_ok=True)
+        self.reports_dir.mkdir(exist_ok=True)
+
+    def process_pnoe_data(self, pnoe_csv_path: str) -> pd.DataFrame:
+        """
+        Load and process Pnoe CSV data.
+
+        Args:
+            pnoe_csv_path: Path to Pnoe CSV file
+
+        Returns:
+            Processed DataFrame with smoothed columns
+        """
+        # Load data
+        df = pd.read_csv(pnoe_csv_path, delimiter=";")
+        df = df.apply(pd.to_numeric, errors="ignore")
+
+        # Calculate derived columns
+        df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"]
+        df["VO2 Breath"] = df["VO2(ml/min)"] / df["BF(bpm)"]
+        df["CHO"] = df["EE(kcal/min)"] * df["CARBS(%)"] / 100
+        df["FAT"] = df["EE(kcal/min)"] * df["FAT(%)"] / 100
+
+        # Smooth columns
+        window_size = 10
+        columns_to_smooth = [
+            "VO2(ml/min)",
+            "VCO2(ml/min)",
+            "HR(bpm)",
+            "VT(l)",
+            "BF(bpm)",
+            "VE(l/min)",
+            "VO2 Pulse",
+            "VO2 Breath",
+            "CHO",
+            "FAT",
+        ]
+
+        for col in columns_to_smooth:
+            if col in df.columns:
+                df[f"{col}_smoothed"] = (
+                    df[col].rolling(window=window_size, min_periods=1).mean()
+                )
+
+        return df
+
+    def generate_graphs(self, df: pd.DataFrame) -> List[Dict[str, str]]:
+        """
+        Generate all required graphs from processed data.
+
+        Args:
+            df: Processed DataFrame with smoothed columns
+
+        Returns:
+            List of dictionaries containing graph names and paths
+        """
+        graphs_generated = []
+
+        # List of graphs to generate
+        graph_methods = [
+            ("respiratory", self.graph_generator.generate_respiratory_chart),
+            ("fuel_utilization", self.graph_generator.generate_fuel_utilization_chart),
+            ("vo2_pulse", self.graph_generator.generate_vo2_pulse_chart),
+            ("vo2_breath", self.graph_generator.generate_vo2_breath_chart),
+            ("fat_metabolism", self.graph_generator.generate_fat_metabolism_chart),
+            ("recovery", self.graph_generator.generate_recovery_chart),
+        ]
+
+        for name, method in graph_methods:
+            try:
+                path = method(df, save_as_base64=False)
+                graphs_generated.append({"name": name, "path": str(path)})
+            except Exception as e:
+                print(f"Warning: Could not generate {name} chart: {e}")
+
+        return graphs_generated
+
+    def calculate_analysis_metrics(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """
+        Calculate basic analysis metrics from processed data.
+
+        Args:
+            df: Processed DataFrame with smoothed columns
+
+        Returns:
+            Dictionary containing analysis metrics
+        """
+        return {
+            "vo2_max": float(df["VO2(ml/min)_smoothed"].max())
+            if "VO2(ml/min)_smoothed" in df.columns
+            else 0,
+            "peak_vt": float(df["VT(l)_smoothed"].max())
+            if "VT(l)_smoothed" in df.columns
+            else 0,
+            "max_hr": float(df["HR(bpm)_smoothed"].max())
+            if "HR(bpm)_smoothed" in df.columns
+            else 0,
+        }
+
+    def generate_html(self, patient_info: Dict[str, Any]) -> str:
+        """
+        Generate HTML content for the report.
+
+        Args:
+            patient_info: Dictionary containing patient information
+                (patient_name, age, height, weight, focus)
+
+        Returns:
+            Complete HTML document as string
+        """
+        html_pages = []
+
+        # Header context
+        header_context = {
+            "patient_name": patient_info.get("patient_name", ""),
+            "age": patient_info.get("age", ""),
+            "height": patient_info.get("height", ""),
+            "weight": patient_info.get("weight", ""),
+            "focus": patient_info.get("focus", "Endurance"),
+        }
+
+        # Footer context
+        footer_context = [
+            {
+                "contact_email": "info@ishplabs.com",
+                "website": "www.ishplabs.com",
+                "social": "@ishplabs",
+                "page_number": i + 1,
+            }
+            for i in range(len(context_list))
+        ]
+
+        # Render header
+        header_html = self.env.get_template("header.html").render(header_context)
+
+        # Render footers
+        footer_html_list = [
+            self.env.get_template("footer.html").render(context)
+            for context in footer_context
+        ]
+
+        # Render pages
+        for i, context in enumerate(context_list):
+            template = self.env.get_template(f"page_{i + 1}.html").render(context)
+
+            if (i + 1) > 2:
+                full_html = f"""
+                <div class="page flex flex-col justify-between">
+                    <div>
+                        {header_html}
+                    </div>
+                    <main class="flex-grow p-4">
+                        {template}
+                    </main>
+                    <div class="border-t text-center text-sm text-gray-600">
+                        {footer_html_list[i]}
+                    </div>
+                </div>
+                """
+                html_pages.append(full_html)
+            else:
+                html_pages.append(template)
+
+        # Combine with page breaks
+        final_html = "<div class='page-break'></div>".join(html_pages)
+
+        # Wrap in full HTML document
+        html_doc = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+          <meta charset="utf-8">
+          <link href="https://cdn.jsdelivr.net/npm/tailwindcss/dist/tailwind.min.css" rel="stylesheet">
+          <style>
+            html, body {{
+                height: 100%;
+                margin: 0;
+                padding: 0;
+            }}
+            .page-break {{ page-break-after: always; }}
+            .page {{
+              height: 100vh;
+              min-height: 100vh;
+              display: flex;
+              flex-direction: column;
+            }}
+            .page main {{
+              flex: 1;
+              overflow: hidden;
+            }}
+            * {{
+              margin: 0;
+              padding: 0;
+              box-sizing: border-box;
+            }}
+            img {{
+              max-height: 300px;
+            }}
+            .chart-large {{
+              max-height: 500px !important;
+            }}
+          </style>
+        </head>
+        <body class="m-0 p-0">
+          {final_html}
+        </body>
+        </html>
+        """
+
+        return html_doc
+
+    def html_to_pdf(self, html_content: str, pdf_path: str) -> None:
+        """
+        Convert HTML content to PDF file.
+
+        Args:
+            html_content: HTML content as string
+            pdf_path: Path where PDF should be saved
+        """
+        with sync_playwright() as p:
+            browser = p.chromium.launch()
+            page = browser.new_page()
+            page.set_content(html_content)
+            page.pdf(path=pdf_path, format="A4", print_background=True)
+            browser.close()
+
+    def generate_report(
+        self,
+        spirometry_pdf_path: str,
+        pnoe_csv_path: str,
+        seca_excel_path: str,
+        patient_info: Dict[str, Any],
+        output_filename: str = None,
+    ) -> Dict[str, Any]:
+        """
+        Generate complete medical report from uploaded files.
+
+        Args:
+            spirometry_pdf_path: Path to Spirometry PDF file
+            pnoe_csv_path: Path to Pnoe CSV file
+            seca_excel_path: Path to SECA Excel file
+            patient_info: Dictionary containing patient information
+            output_filename: Optional custom output filename
+
+        Returns:
+            Dictionary containing report path, graphs generated, and analysis data
+        """
+        # Process data
+        df = self.process_pnoe_data(pnoe_csv_path)
+
+        # Generate graphs
+        graphs_generated = self.generate_graphs(df)
+
+        # Calculate analysis metrics
+        analysis_data = self.calculate_analysis_metrics(df)
+        analysis_data["graphs_count"] = len(graphs_generated)
+
+        # Generate HTML
+        html_content = self.generate_html(patient_info)
+
+        # Generate PDF
+        if output_filename is None:
+            patient_name = patient_info.get("patient_name", "Unknown")
+            session_id = patient_info.get("session_id", "default")
+            output_filename = (
+                f"report_{patient_name.replace(' ', '_')}_{session_id}.pdf"
+            )
+
+        report_path = self.reports_dir / output_filename
+        self.html_to_pdf(html_content, str(report_path))
+
+        return {
+            "report_path": str(report_path),
+            "graphs_generated": graphs_generated,
+            "analysis_data": analysis_data,
+        }