From 9e1ec258f1513470ea8a356e858c68ede3300a90 Mon Sep 17 00:00:00 2001 From: bolade Date: Tue, 14 Oct 2025 12:02:23 +0100 Subject: [PATCH] feat: Implement report generation service and add report route for investor profiles --- app/__pycache__/main.cpython-312.pyc | Bin 5130 -> 5225 bytes app/main.py | 10 +- .../__pycache__/companies.cpython-312.pyc | Bin 11055 -> 11046 bytes .../__pycache__/investors.cpython-312.pyc | Bin 23602 -> 23593 bytes app/routers/report_route.py | 121 +++++++ .../__pycache__/llm_parser.cpython-312.pyc | Bin 37410 -> 37401 bytes .../__pycache__/querying.cpython-312.pyc | Bin 8324 -> 8315 bytes app/services/report_gen.py | 247 ++++++++++++++ app/templates/report.html | 319 ++++++++++++++++++ requirements.txt | 1 + 10 files changed, 697 insertions(+), 1 deletion(-) create mode 100644 app/routers/report_route.py create mode 100644 app/templates/report.html diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index 413ba49e71a5379ee1d0f71a46fab7343d36a4d0..a03bc929ab23772b706d998d4472ae29b145e415 100644 GIT binary patch delta 1200 zcmaJCw!GrOJ2NDu5Uc7sOD8cMW6E7b0O5(|xEg=3e$$b0X`@VVe_Pu$M|C$fI z3kI7IU!?GTAraaL#qsz?KV2!u)EGuO(*0&(Db>%Z8Pc;jQMVE6z}bc@4@|N# zoRE4+AG?Pyt>oquJyvTtPSh_Hheu9)^iGl>@dS_P@p=3zadd^=MKW_fJz0~FGvs-O zJ134Fnk2e7`*)OW`I-~|*0(Rtvgf|^f7p&w+3=}s*FjQ1ii4|6*w&me4_WT?u{X+@+aAXYK{Epi^a^30fE))` zq7JyNnpU(d+bKGRZBb1FA%@@cLXU_E$g+aJOYxHSuv`AI%bxIewW}YMK1wi~Q~T_Tif52^$nLWai%B)B1D;{wF{rQ$(nGMRddT0Ny^ zSj&VuHc*)2?0xp4?NQqhT;aB>REm~fq%g_`+np6?7qJ*EEs%1N4tc`@?a+lVCm|vf z5lUd$;I#~yW?A!!2Z#!${h0BtHmrFNwSyQo+!P=>>q_&)@ml7}6`MM)Y#JO(vTl?d zS28Te>w{OW8#1htS=CAX?SQM)%kl>7`m4MS8!T6Xs`bYte+0(Lwq7;KRVeaHG2iql xTMK8}4t##>J3s>35T3%@Tj60G{fMM}6y8PQ^|Ad(Za0$qfcn|zNS`d8z)!uz0XF~u delta 924 zcmaJ<&1(}u6ragvH)-p}WSex8Y(D8%YG}0<{Gyb8AQp>a^`I<54cSS%?Pe2aH-*xp z@t~gKi2p>r1w4E2K@SSDmwHtws24#mzS*F(q7Lkj_ulXK-pqS%Kb9VsBKxx3kKoj< z9B#apJCP&~cgCGXj0|Litf9K9tM#Ow^!>1vs;Bj|Z}(YRJ)>tZ3Lp~R;}zfl)89sV z7Gl}X2UW`j&=q71w2^UwoZJ(7B0W!Xn+r_A6Jimp%WNK}gkdtm?&9;4qczDGYKy-L z>SJ8d%R7pON|HonuW+KAuLTUP9sHd!4jHRQwLP1}H*Y=-WCb8uA&|h_N&Q`!@Rk1{ zmH&~0KB=BEGFv5ts0K5{R#`8@Jj$DQ*rL$EB6}-b>}tU+EN0+kQT9YE;52(7zQlP} zmD-sUL}{9X2H;6c4ZCKp(;_%ICWMMfpBo4F`h-M>xp0KTD4PwX=XjhRFF|uW#(@`f zVgL`(`$+7;wQoUD?Ei`YdKc!{v(RkO&puwllQu}jayRG{B#*H#p&^{^hQo0Y53;OW z8kmMKJ%HC73z?+s(IM|-tq>UX<|5wh4Dm`B$3uI^{2+d>!;E2R_2w!Jv>`)7%hv;(R-l9@Hx9| bUn%q*pp|!MiNvCZ?tC$j(mxz7t^ delta 41 vcmZ3vgK^UiM!wU$yj%=Gkmmk2gDHF?Uwb&0d_iSOVqQsRa{T6L;geYa6*djt diff --git a/app/routers/report_route.py b/app/routers/report_route.py new file mode 100644 index 0000000..8ab7d24 --- /dev/null +++ b/app/routers/report_route.py @@ -0,0 +1,121 @@ +from typing import Optional + +from db.db import get_db +from db.models import FundTable, InvestorTable, ProjectTable +from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi.responses import Response +from services.report_gen import ReportGenerator +from sqlalchemy.orm import Session, selectinload + +router = APIRouter(tags=["Report Generation"]) + + +@router.get("/report/investor/{investor_id}") +async def generate_investor_report( + investor_id: int, + project_id: Optional[int] = Query( + None, description="Optional project ID for compatibility analysis" + ), + db: Session = Depends(get_db), +): + """ + Generate a PDF report for an investor profile. + + Args: + investor_id: The ID of the investor to generate a report for + project_id: Optional project ID to include mandate match analysis + + Returns: + PDF file as a downloadable response + """ + # Fetch investor data with all relationships + investor = ( + db.query(InvestorTable) + .options( + selectinload(InvestorTable.portfolio_companies), + selectinload(InvestorTable.team_members), + selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds).selectinload(FundTable.investment_stages), + selectinload(InvestorTable.funds).selectinload(FundTable.sectors), + ) + .filter(InvestorTable.id == investor_id) + .first() + ) + + if not investor: + raise HTTPException(status_code=404, detail="Investor not found") + + # Prepare investor data dictionary + investor_data = { + "name": investor.name, + "description": investor.description, + "website": investor.website, + "headquarters": investor.headquarters, + "aum": investor.aum, + "geographic_focus": investor.geographic_focus, + "portfolio_highlights": investor.portfolio_highlights or [], + "investment_thesis": investor.investment_thesis or [], + "sectors": [sector.name for sector in investor.sectors], + "team_members": [ + { + "name": member.name, + "role": member.role, + "title": member.title, + "email": member.email, + } + for member in investor.team_members + ], + "check_size_lower": None, + "check_size_upper": None, + "investment_stages": [], + } + + # Get check sizes and stages from funds + if investor.funds: + # Use the first fund's data or aggregate + fund = investor.funds[0] + investor_data["check_size_lower"] = fund.check_size_lower + investor_data["check_size_upper"] = fund.check_size_upper + + # Aggregate all investment stages from all funds + stages = set() + for fund in investor.funds: + for stage in fund.investment_stages: + stages.add(stage.name) + investor_data["investment_stages"] = list(stages) + + # Fetch project data if project_id is provided + project_data = None + if project_id: + project = ( + db.query(ProjectTable) + .options(selectinload(ProjectTable.sector)) + .filter(ProjectTable.id == project_id) + .first() + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = { + "name": project.name, + "description": project.description, + "location": project.location, + "valuation": project.valuation, + "stage": project.stage.name if project.stage else None, + "sectors": [sector.name for sector in project.sector], + } + + # Generate PDF report + report_generator = ReportGenerator() + pdf_bytes = await report_generator.generate_investor_report( + investor_data, project_data + ) + + # Return PDF as downloadable file + filename = f"{investor.name.replace(' ', '_')}_Report.pdf" + return Response( + content=pdf_bytes, + media_type="application/pdf", + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index 8ee9cab36abc72f708b752a879ad3efaeb3e179e..35f2e13aff671e2c87b9195b7fec7b87a9b3e151 100644 GIT binary patch delta 36 rcmZ3qglXmyCf?J$yj%=G&{6p| bytes: + """ + Generate a PDF report for an investor profile. + + Args: + investor_data: Dictionary containing investor information + project_data: Optional dictionary containing project information for compatibility analysis + + Returns: + bytes: PDF file content + """ + # Prepare template context + context = self._prepare_context(investor_data, project_data) + + # Render HTML from template + template = self.env.get_template("report.html") + html_content = template.render(**context) + + # Convert HTML to PDF using Playwright + pdf_bytes = await self._html_to_pdf(html_content) + + return pdf_bytes + + def _prepare_context( + self, + investor_data: Dict[str, Any], + project_data: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """Prepare the context dictionary for template rendering""" + context = { + "investor": investor_data, + "project": project_data, + "compatibility_score": 0, + "match_criteria": [], + "recommendation": None, + } + + # If project data is provided, calculate compatibility + if project_data: + context["compatibility_score"] = self._calculate_compatibility_score( + investor_data, project_data + ) + context["match_criteria"] = self._generate_match_criteria( + investor_data, project_data + ) + context["recommendation"] = self._generate_recommendation( + context["compatibility_score"], context["match_criteria"] + ) + + return context + + def _calculate_compatibility_score( + self, investor_data: Dict[str, Any], project_data: Dict[str, Any] + ) -> int: + """Calculate overall compatibility score between investor and project""" + score = 0 + weights = { + "sector": 30, + "stage": 30, + "geography": 20, + "check_size": 15, + "thesis": 5, + } + + # Sector match + investor_sectors = set(investor_data.get("sectors", [])) + project_sectors = set(project_data.get("sectors", [])) + if investor_sectors and project_sectors: + if investor_sectors & project_sectors: + score += weights["sector"] + + # Stage match + investor_stages = set(investor_data.get("investment_stages", [])) + project_stage = project_data.get("stage") + if project_stage and project_stage in investor_stages: + score += weights["stage"] + + # Geography match + investor_geo = investor_data.get("geographic_focus", "").lower() + project_geo = project_data.get("location", "").lower() + if investor_geo and project_geo and investor_geo in project_geo: + score += weights["geography"] + + # Check size match + project_valuation = project_data.get("valuation", 0) + check_lower = investor_data.get("check_size_lower", 0) + check_upper = investor_data.get("check_size_upper", float("inf")) + if check_lower <= project_valuation <= check_upper: + score += weights["check_size"] + + # Thesis alignment (simplified) + score += weights["thesis"] + + return min(score, 100) + + def _generate_match_criteria( + self, investor_data: Dict[str, Any], project_data: Dict[str, Any] + ) -> List[Dict[str, str]]: + """Generate detailed match criteria table""" + criteria = [] + + # Sector criterion + investor_sectors = investor_data.get("sectors", []) + project_sectors = project_data.get("sectors", []) + sector_match = ( + "Perfect" if set(investor_sectors) & set(project_sectors) else "Mismatch" + ) + criteria.append( + { + "name": "Sector", + "requirement": "Cybersecurity, B2B SaaS" if project_sectors else "N/A", + "evidence": ", ".join(investor_sectors[:3]) + if investor_sectors + else "N/A", + "match": sector_match, + "weight": "30%", + } + ) + + # Stage criterion + investor_stages = investor_data.get("investment_stages", []) + project_stage = project_data.get("stage", "N/A") + stage_match = "Perfect" if project_stage in investor_stages else "Mismatch" + criteria.append( + { + "name": "Stage", + "requirement": str(project_stage), + "evidence": ", ".join(investor_stages) if investor_stages else "N/A", + "match": stage_match, + "weight": "30%", + } + ) + + # Geography criterion + investor_geo = investor_data.get("geographic_focus", "N/A") + project_geo = project_data.get("location", "N/A") + geo_match = ( + "Strong" + if investor_geo.lower() in project_geo.lower() + or project_geo.lower() in investor_geo.lower() + else "Mismatch" + ) + criteria.append( + { + "name": "Geography", + "requirement": project_geo, + "evidence": investor_geo, + "match": geo_match, + "weight": "20%", + } + ) + + # Check Size criterion + check_lower = investor_data.get("check_size_lower", 0) + check_upper = investor_data.get("check_size_upper", 0) + project_val = project_data.get("valuation", 0) + + check_evidence = "N/A" + if check_lower and check_upper: + check_evidence = ( + f"€{check_lower / 1000000:.0f}M - €{check_upper / 1000000:.0f}M" + ) + elif check_lower: + check_evidence = f"€{check_lower / 1000000:.0f}M+" + + check_match = ( + "Perfect" + if check_lower <= project_val <= check_upper + else "Strong" + if project_val > 0 + else "N/A" + ) + criteria.append( + { + "name": "Check Size", + "requirement": f"€{project_val / 1000000:.0f}M" + if project_val + else "N/A", + "evidence": check_evidence, + "match": check_match, + "weight": "15%", + } + ) + + # Thesis criterion + thesis = investor_data.get("investment_thesis", []) + criteria.append( + { + "name": "Thesis", + "requirement": "Founder-led, ESG focus", + "evidence": ", ".join(thesis[:2]) if thesis else "Entrepreneur-led", + "match": "Strong", + "weight": "5%", + } + ) + + return criteria + + def _generate_recommendation( + self, score: int, criteria: List[Dict[str, str]] + ) -> str: + """Generate recommendation text based on score and criteria""" + if score >= 85: + return "High Priority. A strong target due to exceptional alignment on the most heavily-weighted criteria: Sector and Stage. The strong geographic fit further solidifies this recommendation." + elif score >= 70: + return "Medium Priority. Good alignment on key criteria with some areas of strong fit. The geographic fit in the target region supports this recommendation." + else: + return "Low Priority. Limited alignment on key investment criteria. Consider for future evaluation if circumstances change." + + async def _html_to_pdf(self, html_content: str) -> bytes: + """Convert HTML content to PDF using Playwright""" + async with async_playwright() as p: + browser = await p.chromium.launch() + page = await browser.new_page() + + # Set content and wait for any dynamic content to load + await page.set_content(html_content, wait_until="networkidle") + + # Generate PDF with proper settings + pdf_bytes = await page.pdf( + format="A4", + print_background=True, + margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, + ) + + await browser.close() + + return pdf_bytes diff --git a/app/templates/report.html b/app/templates/report.html new file mode 100644 index 0000000..f450bf5 --- /dev/null +++ b/app/templates/report.html @@ -0,0 +1,319 @@ + + + + + + Investor Profile Report + + + + + +
+
+
+

Investor Profile

+

+ {{ investor.name }} +

+
+ +
+ +
+ +
+
+

+ Investor Description +

+

+ {{ investor.description or 'No description + available.' }} +

+
+ +
+

+ Portfolio Highlights +

+
+ {% if investor.portfolio_highlights %} {% for + company in investor.portfolio_highlights[:5] %} + {{ company }} + {% endfor %} {% else %} +

+ No portfolio highlights available +

+ {% endif %} +
+
+ +
+

+ Senior Leadership +

+ {% if investor.team_members %} {% for member in + investor.team_members[:2] %} +
+

+ {{ member.name }} +

+

+ {{ member.role or member.title or 'Team Member' + }} +

+ {% if member.email %} +

+ {{ member.email }} +

+ {% endif %} +
+ {% endfor %} {% else %} +

+ No team information available +

+ {% endif %} +
+
+ + +
+

+ Key Data +

+ +
+

Headquarters:

+

+ {{ investor.headquarters or 'N/A' }} +

+
+ +
+

Sectors:

+

+ {% if investor.sectors %} {{ investor.sectors | + join(', ') }} {% else %} N/A {% endif %} +

+
+ +
+

DACH Region:

+

+ {{ investor.geographic_focus or 'N/A' }} +

+
+ +
+

+ AUM: (EUR million) (as of Fund IX) +

+

+ {% if investor.aum %} €{{ + '{:,.0f}'.format(investor.aum / 1000000) }}M {% else + %} N/A {% endif %} +

+
+ +
+

+ Investment Stage: +

+

+ {% if investor.investment_stages %} {{ + investor.investment_stages | join(', ') }} {% else + %} N/A {% endif %} +

+
+ +
+

+ Est. Investment Size: +

+

+ {% if investor.check_size_lower and + investor.check_size_upper %} €{{ + '{:,.0f}'.format(investor.check_size_lower / + 1000000) }}M - €{{ + '{:,.0f}'.format(investor.check_size_upper / + 1000000) }}M {% elif investor.check_size_lower %} + €{{ '{:,.0f}'.format(investor.check_size_lower / + 1000000) }}M+ {% else %} N/A {% endif %} +

+
+ +
+

+ Select Deals, Series A, Series B: +

+

+ Growth +

+
+ +
+

Focus Areas:

+

+ {% if investor.investment_thesis %} {{ + investor.investment_thesis[:3] | join(', ') }} {% + else %} Disruptive Technologies, Entrepreneur-led, + Sustainability {% endif %} +

+
+
+
+ +
+ Page 3 +
+
+ + + {% if project %} +
+

+ {{ investor.name }}: Mandate Match Analysis +

+ + +
+
+

+ Overall Mandate Match +

+
+ {{ compatibility_score }}% +
+
+
+ + +
+

+ Mandate Alignment Analysis +

+ + + + + + + + + + + + {% for criterion in match_criteria %} + + + + + + + + {% endfor %} + +
+ Criterion + + Mandate Requirement + + Investor Evidence (from Database) + + Match Score + + Weight +
+ {{ criterion.name }} + + {{ criterion.requirement }} + + {{ criterion.evidence }} + + + {{ criterion.match }} + + + {{ criterion.weight }} +
+
+ + +
+

+ Final Recommendation & Rationale +

+

+ {{ recommendation or "High Priority. A strong target due to + exceptional alignment on the most heavily-weighted criteria: + Sector and Stage. The strong geographic fit in the DACH + region further solidifies this recommendation." }} +

+
+ +
+ Page 4 +
+
+ {% endif %} + + diff --git a/requirements.txt b/requirements.txt index cd581d8..98345ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,6 +48,7 @@ jsonpointer==3.0.0 jsonschema==4.25.1 jsonschema-specifications==2025.4.1 kubernetes==33.1.0 +playwright==1.48.0 langchain==0.3.27 langchain-community==0.3.29 langchain-core==0.3.75