Anton_wireframe/app/services/report_gen.py

from pathlib import Path
from typing import Any, Dict, List, Optional

# Import database models and compatibility score service
from db.models import InvestorTable, ProjectTable
from jinja2 import Environment, FileSystemLoader
from playwright.async_api import async_playwright

from services.compatibility_score import calculate_project_investor_compatibility


class ReportGenerator:
    """Service for generating PDF reports from HTML templates"""

    def __init__(self):
        # Set up Jinja2 environment
        template_dir = Path(__file__).parent.parent / "templates"
        self.env = Environment(loader=FileSystemLoader(str(template_dir)))

    async def generate_investor_report(
        self,
        investor_data: Dict[str, Any],
        project_data: Optional[Dict[str, Any]] = None,
        investor_model: Optional[InvestorTable] = None,
        project_model: Optional[ProjectTable] = None,
    ) -> bytes:
        """
        Generate a PDF report for an investor profile.

        Args:
            investor_data: Dictionary containing investor information
            project_data: Optional dictionary containing project information for compatibility analysis
            investor_model: Optional database model for investor (used for compatibility scoring)
            project_model: Optional database model for project (used for compatibility scoring)

        Returns:
            bytes: PDF file content
        """
        # Prepare template context
        context = self._prepare_context(
            investor_data, project_data, investor_model, project_model
        )

        # Render HTML from template
        template = self.env.get_template("report.html")
        html_content = template.render(**context)
        # Convert HTML to PDF using Playwright
        pdf_bytes = await self._html_to_pdf(html_content)

        return pdf_bytes

    def _prepare_context(
        self,
        investor_data: Dict[str, Any],
        project_data: Optional[Dict[str, Any]] = None,
        investor_model: Optional[InvestorTable] = None,
        project_model: Optional[ProjectTable] = None,
    ) -> Dict[str, Any]:
        """Prepare the context dictionary for template rendering"""
        context = {
            "investor": investor_data,
            "project": project_data,
            "compatibility_score": 0,
            "match_criteria": [],
            "recommendation": None,
        }

        # If project data is provided, calculate compatibility
        if project_data:
            # Use the compatibility_score service if models are provided
            if investor_model and project_model:
                # Calculate using the standardized compatibility score service
                # Returns score between 0 and 1, convert to percentage (0-100)
                score_decimal = calculate_project_investor_compatibility(
                    project=project_model, investor=investor_model, use_funds=True
                )
                context["compatibility_score"] = int(score_decimal * 100)
            else:
                # Fallback to old calculation method if models not provided
                context["compatibility_score"] = self._calculate_compatibility_score(
                    investor_data, project_data
                )

            context["match_criteria"] = self._generate_match_criteria(
                investor_data, project_data
            )
            context["recommendation"] = self._generate_recommendation(
                context["compatibility_score"], context["match_criteria"]
            )

        return context

    def _calculate_compatibility_score(
        self, investor_data: Dict[str, Any], project_data: Dict[str, Any]
    ) -> int:
        """Calculate overall compatibility score between investor and project"""
        score = 0
        weights = {
            "sector": 30,
            "stage": 30,
            "geography": 20,
            "check_size": 20,
        }

        # Aggregate data from all funds
        all_sectors = set(investor_data.get("sectors", []))
        all_stages = set()
        all_geographies = []
        check_ranges = []

        for fund in investor_data.get("funds", []):
            all_sectors.update(fund.get("sectors", []))
            all_stages.update(fund.get("investment_stages", []))
            if fund.get("geographic_focus"):
                all_geographies.append(fund["geographic_focus"])
            if fund.get("check_size_lower") and fund.get("check_size_upper"):
                check_ranges.append(
                    {
                        "lower": fund["check_size_lower"],
                        "upper": fund["check_size_upper"],
                    }
                )

        # Sector match
        project_sectors = set(project_data.get("sectors", []))
        if all_sectors and project_sectors:
            if all_sectors & project_sectors:
                score += weights["sector"]

        # Stage match - case insensitive comparison
        project_stage = project_data.get("stage")
        if project_stage and all_stages:
            # Normalize stage names for comparison (case-insensitive)
            normalized_stages = {
                stage.lower().replace("_", " ") for stage in all_stages
            }
            project_stage_normalized = project_stage.lower().replace("_", " ")
            if project_stage_normalized in normalized_stages:
                score += weights["stage"]

        # Geography match - check if any fund matches
        project_geo = (project_data.get("location") or "").lower()
        geo_match = False
        if all_geographies:
            for geo in all_geographies:
                if geo:
                    geo_lower = geo.lower()
                    # Match if investor geography is "global" or if there's a location overlap
                    if "global" in geo_lower or "worldwide" in geo_lower:
                        geo_match = True
                        break
                    if project_geo and (
                        geo_lower in project_geo or project_geo in geo_lower
                    ):
                        geo_match = True
                        break
        if geo_match:
            score += weights["geography"]

        # Check size match - check if any fund's range matches
        project_valuation = project_data.get("valuation", 0)
        check_match = False
        if project_valuation and check_ranges:
            for check_range in check_ranges:
                if check_range["lower"] <= project_valuation <= check_range["upper"]:
                    check_match = True
                    break
        if check_match:
            score += weights["check_size"]

        return min(score, 100)

    def _generate_match_criteria(
        self, investor_data: Dict[str, Any], project_data: Dict[str, Any]
    ) -> List[Dict[str, str]]:
        """Generate detailed match criteria table"""
        criteria = []

        # Aggregate data from all funds
        all_sectors = set(investor_data.get("sectors", []))
        all_stages = set()
        all_geographies = []
        check_ranges = []

        for fund in investor_data.get("funds", []):
            all_sectors.update(fund.get("sectors", []))
            all_stages.update(fund.get("investment_stages", []))
            if fund.get("geographic_focus"):
                all_geographies.append(fund["geographic_focus"])
            if fund.get("check_size_lower") and fund.get("check_size_upper"):
                check_ranges.append(
                    {
                        "lower": fund["check_size_lower"],
                        "upper": fund["check_size_upper"],
                        "fund_name": fund.get("fund_name", "Unnamed Fund"),
                    }
                )

        # Sector criterion
        project_sectors = project_data.get("sectors", [])
        sector_match = "Perfect" if all_sectors & set(project_sectors) else "Mismatch"
        criteria.append(
            {
                "name": "Sector",
                "requirement": ", ".join(project_sectors) if project_sectors else "N/A",
                "evidence": ", ".join(list(all_sectors)[:3]) if all_sectors else "N/A",
                "match": sector_match,
                "weight": "30%",
            }
        )

        # Stage criterion - case insensitive comparison
        project_stage = project_data.get("stage", "N/A")
        stage_match = "Mismatch"
        if project_stage != "N/A" and all_stages:
            # Normalize stage names for comparison
            normalized_stages = {
                stage.lower().replace("_", " ") for stage in all_stages
            }
            project_stage_normalized = project_stage.lower().replace("_", " ")
            stage_match = (
                "Perfect"
                if project_stage_normalized in normalized_stages
                else "Mismatch"
            )
        elif project_stage == "N/A":
            stage_match = "N/A"

        criteria.append(
            {
                "name": "Stage",
                "requirement": str(project_stage),
                "evidence": ", ".join(all_stages) if all_stages else "N/A",
                "match": stage_match,
                "weight": "30%",
            }
        )

        # Geography criterion
        project_geo = project_data.get("location") or "N/A"
        investor_geo_display = ", ".join(all_geographies) if all_geographies else "N/A"

        # Safe comparison handling None values and "Global" matches
        geo_match = "Mismatch"
        if project_geo != "N/A" and all_geographies:
            for geo in all_geographies:
                if geo:
                    geo_lower = geo.lower()
                    # Match if investor geography is "global" or if there's a location overlap
                    if "global" in geo_lower or "worldwide" in geo_lower:
                        geo_match = "Perfect"
                        break
                    if (
                        geo_lower in project_geo.lower()
                        or project_geo.lower() in geo_lower
                    ):
                        geo_match = "Strong"
                        break
        elif not all_geographies and project_geo == "N/A":
            geo_match = "N/A"

        criteria.append(
            {
                "name": "Geography",
                "requirement": project_geo,
                "evidence": investor_geo_display,
                "match": geo_match,
                "weight": "20%",
            }
        )

        # Check Size criterion
        project_val = project_data.get("valuation", 0)

        # Build evidence string from all fund ranges
        check_evidence = "N/A"
        if check_ranges:
            evidence_parts = []
            for cr in check_ranges[:3]:  # Show up to 3 funds
                range_str = (
                    f"€{cr['lower'] / 1000000:.0f}M - €{cr['upper'] / 1000000:.0f}M"
                )
                if cr["fund_name"]:
                    evidence_parts.append(f"{cr['fund_name']}: {range_str}")
                else:
                    evidence_parts.append(range_str)
            check_evidence = "; ".join(evidence_parts)

        # Check if project valuation matches any fund
        check_match = "N/A"
        if project_val > 0 and check_ranges:
            match_found = any(
                cr["lower"] <= project_val <= cr["upper"] for cr in check_ranges
            )
            check_match = "Perfect" if match_found else "Mismatch"

        criteria.append(
            {
                "name": "Check Size",
                "requirement": f"€{project_val / 1000000:.0f}M"
                if project_val
                else "N/A",
                "evidence": check_evidence,
                "match": check_match,
                "weight": "20%",
            }
        )

        return criteria

    def _generate_recommendation(
        self, score: int, criteria: List[Dict[str, str]]
    ) -> str:
        """Generate recommendation text based on score and criteria"""
        if score >= 85:
            return "High Priority. A strong target due to exceptional alignment on the most heavily-weighted criteria: Sector and Stage. The strong geographic fit further solidifies this recommendation."
        elif score >= 70:
            return "Medium Priority. Good alignment on key criteria with some areas of strong fit. The geographic fit in the target region supports this recommendation."
        else:
            return "Low Priority. Limited alignment on key investment criteria. Consider for future evaluation if circumstances change."

    async def _html_to_pdf(self, html_content: str) -> bytes:
        """Convert HTML content to PDF using Playwright"""
        async with async_playwright() as p:
            browser = await p.chromium.launch()
            page = await browser.new_page()

            # Set content and wait for any dynamic content to load
            await page.set_content(html_content, wait_until="networkidle")

            # Generate PDF with proper settings
            pdf_bytes = await page.pdf(
                format="A4",
                print_background=True,
                margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
            )

            await browser.close()

        return pdf_bytes