feat: Refactor report generation to use async methods and improve error handling; enhance spirometry table extraction with better CSV formatting

2025-10-04 10:35:02 +01:00
parent 358898b7db
commit 0a735d88c8
5 changed files with 123 additions and 38 deletions
@@ -10,7 +10,7 @@ from typing import Any, Dict, List

 import pandas as pd
 from jinja2 import Environment, FileSystemLoader
-from playwright.sync_api import sync_playwright
+from playwright.async_api import async_playwright
 from services.context_generator import ContextGenerator
 from services.graph_generator import GraphGenerator
 from services.spirometry_table_extractor import extract_spirometry_table_from_pdf
@@ -265,7 +265,7 @@ class ReportGeneratorService:

        return html_doc

-    def html_to_pdf(self, html_content: str, pdf_path: str) -> None:
+    async def html_to_pdf(self, html_content: str, pdf_path: str) -> None:
        """
        Convert HTML content to PDF file.

@@ -273,14 +273,14 @@ class ReportGeneratorService:
            html_content: HTML content as string
            pdf_path: Path where PDF should be saved
        """
-        with sync_playwright() as p:
-            browser = p.chromium.launch()
-            page = browser.new_page()
-            page.set_content(html_content)
-            page.pdf(path=pdf_path, format="A4", print_background=True)
-            browser.close()
+        async with async_playwright() as p:
+            browser = await p.chromium.launch()
+            page = await browser.new_page()
+            await page.set_content(html_content)
+            await page.pdf(path=pdf_path, format="A4", print_background=True)
+            await browser.close()

-    def generate_report(
+    async def generate_report(
        self,
        spirometry_pdf_path: str,
        pnoe_csv_path: str,
@@ -309,19 +309,18 @@ class ReportGeneratorService:
            Dictionary containing report path, graphs generated, and analysis data
        """
        # Step 1: Extract spirometry table from PDF
-        spirometry_csv_path = self.data_dir / "extracted_spirometry_table.csv"
-        extract_spirometry_table_from_pdf(spirometry_pdf_path)
-
-        # The extraction saves to current directory, move it to data_dir
-        import shutil
-
-        if Path("extracted_spirometry_table.csv").exists():
-            shutil.move("extracted_spirometry_table.csv", spirometry_csv_path)
+        print("Step 1: Extracting spirometry data from PDF...")
+        spirometry_csv_path = extract_spirometry_table_from_pdf(
+            spirometry_pdf_path, output_dir=str(self.data_dir)
+        )
+        print(f"Spirometry data saved to: {spirometry_csv_path}")

        # Step 2: Process Pnoe data
+        print("Step 2: Processing Pnoe data...")
        df = self.process_pnoe_data(pnoe_csv_path)

        # Step 3: Generate all graphs
+        print("Step 3: Generating graphs...")
        graphs_generated = self.generate_graphs(df)

        # Create graph dictionary with base64 encoded images
@@ -370,13 +369,20 @@ class ReportGeneratorService:
            graphs_dict["body_fat_percent"] = body_fat_b64

        # Generate spirometry chart
-        spirometry_df = pd.read_csv(spirometry_csv_path)
-        spirometry_chart_b64 = self.graph_generator.generate_spirometry_chart(
-            spirometry_df, save_as_base64=True
-        )
-        graphs_dict["spirometry_chart"] = spirometry_chart_b64
+        print("Step 4: Generating spirometry chart...")
+        try:
+            spirometry_df = pd.read_csv(spirometry_csv_path)
+            print(f"Spirometry data loaded: {len(spirometry_df)} rows")
+            spirometry_chart_b64 = self.graph_generator.generate_spirometry_chart(
+                spirometry_df, save_as_base64=True
+            )
+            graphs_dict["spirometry_chart"] = spirometry_chart_b64
+        except Exception as e:
+            print(f"Warning: Could not generate spirometry chart: {e}")
+            graphs_dict["spirometry_chart"] = ""

-        # Step 4: Generate context for all pages
+        # Step 5: Generate context for all pages
+        print("Step 5: Generating page contexts...")
        self.context_generator.load_data(
            pnoe_csv_path, str(spirometry_csv_path), seca_excel_path
        )
@@ -401,7 +407,7 @@ class ReportGeneratorService:

        report_path = self.reports_dir / output_filename
        print(f"Generating PDF report at {report_path}")
-        self.html_to_pdf(html_content, str(report_path))
+        await self.html_to_pdf(html_content, str(report_path))

        return {
            "report_path": str(report_path),