Add compiled Python bytecode for report generator and spirometry table extractor services

- Generated bytecode for report_generator.py and spirometry_table_extractor.py - These changes include the compiled .pyc files in the __pycache__ directory - The report generator service handles the generation of medical reports from uploaded files - The spirometry table extractor service extracts data from PDF files and processes it for further analysis
2025-10-04 10:07:40 +01:00
parent 14dc64234d
commit d66f3fd18b
15 changed files with 482 additions and 3751 deletions
@@ -12,8 +12,9 @@ import pandas as pd
 from jinja2 import Environment, FileSystemLoader
 from playwright.sync_api import sync_playwright

-from app.services.context import context_list
+from app.services.context_generator import ContextGenerator
 from app.services.graph_generator import GraphGenerator
+from app.services.spirometry_table_extractor import extract_spirometry_table_from_pdf


 class ReportGeneratorService:
@@ -24,6 +25,7 @@ class ReportGeneratorService:
        template_dir: str = "app/report_gen",
        graphs_dir: str = "graphs",
        reports_dir: str = "reports",
+        data_dir: str = "data",
    ):
        """
        Initialize the report generator service.
@@ -32,16 +34,20 @@ class ReportGeneratorService:
            template_dir: Directory containing Jinja2 templates
            graphs_dir: Directory to save generated graphs
            reports_dir: Directory to save generated reports
+            data_dir: Directory to store extracted/processed data
        """
        self.template_dir = template_dir
        self.graphs_dir = Path(graphs_dir)
        self.reports_dir = Path(reports_dir)
-        self.graph_generator = GraphGenerator(charts_dir=str(graphs_dir))
+        self.data_dir = Path(data_dir)
+        self.graph_generator = GraphGenerator(charts_dir=str(self.graphs_dir))
+        self.context_generator = ContextGenerator()
        self.env = Environment(loader=FileSystemLoader(template_dir))

        # Ensure directories exist
        self.graphs_dir.mkdir(exist_ok=True)
        self.reports_dir.mkdir(exist_ok=True)
+        self.data_dir.mkdir(exist_ok=True)

    def process_pnoe_data(self, pnoe_csv_path: str) -> pd.DataFrame:
        """
@@ -139,13 +145,16 @@ class ReportGeneratorService:
            else 0,
        }

-    def generate_html(self, patient_info: Dict[str, Any]) -> str:
+    def generate_html(
+        self, patient_info: Dict[str, Any], context_list: List[Dict[str, Any]]
+    ) -> str:
        """
        Generate HTML content for the report.

        Args:
            patient_info: Dictionary containing patient information
                (patient_name, age, height, weight, focus)
+            context_list: List of context dictionaries for each page

        Returns:
            Complete HTML document as string
@@ -277,35 +286,112 @@ class ReportGeneratorService:
        """
        Generate complete medical report from uploaded files.

+        This follows the complete workflow:
+        1. Extract spirometry data from PDF
+        2. Store all data in data directory
+        3. Generate all graphs
+        4. Generate context for each page
+        5. Generate final HTML and PDF report
+
        Args:
            spirometry_pdf_path: Path to Spirometry PDF file
            pnoe_csv_path: Path to Pnoe CSV file
            seca_excel_path: Path to SECA Excel file
            patient_info: Dictionary containing patient information
            output_filename: Optional custom output filename
-n
+
        Returns:
            Dictionary containing report path, graphs generated, and analysis data
        """
-        # Process data
+        # Step 1: Extract spirometry table from PDF
+        spirometry_csv_path = self.data_dir / "extracted_spirometry_table.csv"
+        extract_spirometry_table_from_pdf(spirometry_pdf_path)
+
+        # The extraction saves to current directory, move it to data_dir
+        import shutil
+
+        if Path("extracted_spirometry_table.csv").exists():
+            shutil.move("extracted_spirometry_table.csv", spirometry_csv_path)
+
+        # Step 2: Process Pnoe data
        df = self.process_pnoe_data(pnoe_csv_path)

-        # Generate graphs
+        # Step 3: Generate all graphs
        graphs_generated = self.generate_graphs(df)

-        # Calculate analysis metrics
+        # Create graph dictionary with base64 encoded images
+        graphs_dict = {}
+        for graph in graphs_generated:
+            # Read the graph file and convert to base64
+            graph_path = Path(graph["path"])
+            if graph_path.exists():
+                import base64
+
+                with open(graph_path, "rb") as f:
+                    graphs_dict[graph["name"]] = base64.b64encode(f.read()).decode(
+                        "utf-8"
+                    )
+
+        # Also generate body composition charts
+        # Extract patient data for these charts
+        patient_name = patient_info.get("patient_name", "").split()[-1]  # Get last name
+
+        # Load SECA data to get body composition info
+        seca_df = pd.read_excel(seca_excel_path)
+        patient_data = seca_df[
+            seca_df["LastName"].str.contains(patient_name, case=False, na=False)
+        ]
+
+        if not patient_data.empty:
+            row = patient_data.iloc[0]
+            weight_kg = float(row.get("Weight", 0))
+            fat_pct = float(row.get("Adult_FMP", 0))
+            age = int(row.get("Age", patient_info.get("age", 25)))
+            gender = row.get("Gender", "female").lower()
+
+            fat_mass_lbs = weight_kg * fat_pct / 100 * 2.20462
+            lean_mass_lbs = weight_kg * (1 - fat_pct / 100) * 2.20462
+
+            # Generate body composition chart
+            body_comp_b64 = self.graph_generator.generate_body_composition_chart(
+                fat_mass_lbs, lean_mass_lbs, save_as_base64=True
+            )
+            graphs_dict["body_composition"] = body_comp_b64
+
+            # Generate body fat percent chart
+            body_fat_b64 = self.graph_generator.generate_body_fat_percent_chart(
+                fat_pct, age, gender, save_as_base64=True
+            )
+            graphs_dict["body_fat_percent"] = body_fat_b64
+
+        # Generate spirometry chart
+        spirometry_df = pd.read_csv(spirometry_csv_path)
+        spirometry_chart_b64 = self.graph_generator.generate_spirometry_chart(
+            spirometry_df, save_as_base64=True
+        )
+        graphs_dict["spirometry_chart"] = spirometry_chart_b64
+
+        # Step 4: Generate context for all pages
+        self.context_generator.load_data(
+            pnoe_csv_path, str(spirometry_csv_path), seca_excel_path
+        )
+        context_list = self.context_generator.generate_all_contexts(
+            patient_name, graphs_dict
+        )
+
+        # Step 5: Calculate analysis metrics
        analysis_data = self.calculate_analysis_metrics(df)
        analysis_data["graphs_count"] = len(graphs_generated)

-        # Generate HTML
-        html_content = self.generate_html(patient_info)
+        # Step 6: Generate HTML
+        html_content = self.generate_html(patient_info, context_list)

-        # Generate PDF
+        # Step 7: Generate PDF
        if output_filename is None:
-            patient_name = patient_info.get("patient_name", "Unknown")
+            patient_name_full = patient_info.get("patient_name", "Unknown")
            session_id = patient_info.get("session_id", "default")
            output_filename = (
-                f"report_{patient_name.replace(' ', '_')}_{session_id}.pdf"
+                f"report_{patient_name_full.replace(' ', '_')}_{session_id}.pdf"
            )

        report_path = self.reports_dir / output_filename