diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index 413ba49..a03bc92 100644 Binary files a/app/__pycache__/main.cpython-312.pyc and b/app/__pycache__/main.cpython-312.pyc differ diff --git a/app/main.py b/app/main.py index 0f04f93..c1105ee 100644 --- a/app/main.py +++ b/app/main.py @@ -5,7 +5,14 @@ from db.db import Base, db_dependency, engine from dotenv import load_dotenv from fastapi import FastAPI, File, Form, UploadFile from pydantic import BaseModel -from routers import companies, folk_crm, insight_route, investors, projects +from routers import ( + companies, + folk_crm, + insight_route, + investors, + projects, + report_route, +) from schemas.router_schemas import InvestmentResponse, PaginatedResponse from services.llm_parser import InvestorProcessor from services.querying import QueryProcessor @@ -110,6 +117,7 @@ app.include_router(companies.router) app.include_router(projects.router) app.include_router(folk_crm.router) app.include_router(insight_route.router) +app.include_router(report_route.router) if __name__ == "__main__": import uvicorn diff --git a/app/routers/__pycache__/companies.cpython-312.pyc b/app/routers/__pycache__/companies.cpython-312.pyc index 6e3cf95..14c1a50 100644 Binary files a/app/routers/__pycache__/companies.cpython-312.pyc and b/app/routers/__pycache__/companies.cpython-312.pyc differ diff --git a/app/routers/__pycache__/investors.cpython-312.pyc b/app/routers/__pycache__/investors.cpython-312.pyc index dd436cf..1bb8a47 100644 Binary files a/app/routers/__pycache__/investors.cpython-312.pyc and b/app/routers/__pycache__/investors.cpython-312.pyc differ diff --git a/app/routers/report_route.py b/app/routers/report_route.py new file mode 100644 index 0000000..8ab7d24 --- /dev/null +++ b/app/routers/report_route.py @@ -0,0 +1,121 @@ +from typing import Optional + +from db.db import get_db +from db.models import FundTable, InvestorTable, ProjectTable +from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi.responses import Response +from services.report_gen import ReportGenerator +from sqlalchemy.orm import Session, selectinload + +router = APIRouter(tags=["Report Generation"]) + + +@router.get("/report/investor/{investor_id}") +async def generate_investor_report( + investor_id: int, + project_id: Optional[int] = Query( + None, description="Optional project ID for compatibility analysis" + ), + db: Session = Depends(get_db), +): + """ + Generate a PDF report for an investor profile. + + Args: + investor_id: The ID of the investor to generate a report for + project_id: Optional project ID to include mandate match analysis + + Returns: + PDF file as a downloadable response + """ + # Fetch investor data with all relationships + investor = ( + db.query(InvestorTable) + .options( + selectinload(InvestorTable.portfolio_companies), + selectinload(InvestorTable.team_members), + selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds).selectinload(FundTable.investment_stages), + selectinload(InvestorTable.funds).selectinload(FundTable.sectors), + ) + .filter(InvestorTable.id == investor_id) + .first() + ) + + if not investor: + raise HTTPException(status_code=404, detail="Investor not found") + + # Prepare investor data dictionary + investor_data = { + "name": investor.name, + "description": investor.description, + "website": investor.website, + "headquarters": investor.headquarters, + "aum": investor.aum, + "geographic_focus": investor.geographic_focus, + "portfolio_highlights": investor.portfolio_highlights or [], + "investment_thesis": investor.investment_thesis or [], + "sectors": [sector.name for sector in investor.sectors], + "team_members": [ + { + "name": member.name, + "role": member.role, + "title": member.title, + "email": member.email, + } + for member in investor.team_members + ], + "check_size_lower": None, + "check_size_upper": None, + "investment_stages": [], + } + + # Get check sizes and stages from funds + if investor.funds: + # Use the first fund's data or aggregate + fund = investor.funds[0] + investor_data["check_size_lower"] = fund.check_size_lower + investor_data["check_size_upper"] = fund.check_size_upper + + # Aggregate all investment stages from all funds + stages = set() + for fund in investor.funds: + for stage in fund.investment_stages: + stages.add(stage.name) + investor_data["investment_stages"] = list(stages) + + # Fetch project data if project_id is provided + project_data = None + if project_id: + project = ( + db.query(ProjectTable) + .options(selectinload(ProjectTable.sector)) + .filter(ProjectTable.id == project_id) + .first() + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = { + "name": project.name, + "description": project.description, + "location": project.location, + "valuation": project.valuation, + "stage": project.stage.name if project.stage else None, + "sectors": [sector.name for sector in project.sector], + } + + # Generate PDF report + report_generator = ReportGenerator() + pdf_bytes = await report_generator.generate_investor_report( + investor_data, project_data + ) + + # Return PDF as downloadable file + filename = f"{investor.name.replace(' ', '_')}_Report.pdf" + return Response( + content=pdf_bytes, + media_type="application/pdf", + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index 8ee9cab..35f2e13 100644 Binary files a/app/services/__pycache__/llm_parser.cpython-312.pyc and b/app/services/__pycache__/llm_parser.cpython-312.pyc differ diff --git a/app/services/__pycache__/querying.cpython-312.pyc b/app/services/__pycache__/querying.cpython-312.pyc index 2f5f856..d5a560a 100644 Binary files a/app/services/__pycache__/querying.cpython-312.pyc and b/app/services/__pycache__/querying.cpython-312.pyc differ diff --git a/app/services/report_gen.py b/app/services/report_gen.py index e69de29..2651e55 100644 --- a/app/services/report_gen.py +++ b/app/services/report_gen.py @@ -0,0 +1,247 @@ +from pathlib import Path +from typing import Any, Dict, List, Optional + +from jinja2 import Environment, FileSystemLoader +from playwright.async_api import async_playwright + + +class ReportGenerator: + """Service for generating PDF reports from HTML templates""" + + def __init__(self): + # Set up Jinja2 environment + template_dir = Path(__file__).parent.parent / "templates" + self.env = Environment(loader=FileSystemLoader(str(template_dir))) + + async def generate_investor_report( + self, + investor_data: Dict[str, Any], + project_data: Optional[Dict[str, Any]] = None, + ) -> bytes: + """ + Generate a PDF report for an investor profile. + + Args: + investor_data: Dictionary containing investor information + project_data: Optional dictionary containing project information for compatibility analysis + + Returns: + bytes: PDF file content + """ + # Prepare template context + context = self._prepare_context(investor_data, project_data) + + # Render HTML from template + template = self.env.get_template("report.html") + html_content = template.render(**context) + + # Convert HTML to PDF using Playwright + pdf_bytes = await self._html_to_pdf(html_content) + + return pdf_bytes + + def _prepare_context( + self, + investor_data: Dict[str, Any], + project_data: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """Prepare the context dictionary for template rendering""" + context = { + "investor": investor_data, + "project": project_data, + "compatibility_score": 0, + "match_criteria": [], + "recommendation": None, + } + + # If project data is provided, calculate compatibility + if project_data: + context["compatibility_score"] = self._calculate_compatibility_score( + investor_data, project_data + ) + context["match_criteria"] = self._generate_match_criteria( + investor_data, project_data + ) + context["recommendation"] = self._generate_recommendation( + context["compatibility_score"], context["match_criteria"] + ) + + return context + + def _calculate_compatibility_score( + self, investor_data: Dict[str, Any], project_data: Dict[str, Any] + ) -> int: + """Calculate overall compatibility score between investor and project""" + score = 0 + weights = { + "sector": 30, + "stage": 30, + "geography": 20, + "check_size": 15, + "thesis": 5, + } + + # Sector match + investor_sectors = set(investor_data.get("sectors", [])) + project_sectors = set(project_data.get("sectors", [])) + if investor_sectors and project_sectors: + if investor_sectors & project_sectors: + score += weights["sector"] + + # Stage match + investor_stages = set(investor_data.get("investment_stages", [])) + project_stage = project_data.get("stage") + if project_stage and project_stage in investor_stages: + score += weights["stage"] + + # Geography match + investor_geo = investor_data.get("geographic_focus", "").lower() + project_geo = project_data.get("location", "").lower() + if investor_geo and project_geo and investor_geo in project_geo: + score += weights["geography"] + + # Check size match + project_valuation = project_data.get("valuation", 0) + check_lower = investor_data.get("check_size_lower", 0) + check_upper = investor_data.get("check_size_upper", float("inf")) + if check_lower <= project_valuation <= check_upper: + score += weights["check_size"] + + # Thesis alignment (simplified) + score += weights["thesis"] + + return min(score, 100) + + def _generate_match_criteria( + self, investor_data: Dict[str, Any], project_data: Dict[str, Any] + ) -> List[Dict[str, str]]: + """Generate detailed match criteria table""" + criteria = [] + + # Sector criterion + investor_sectors = investor_data.get("sectors", []) + project_sectors = project_data.get("sectors", []) + sector_match = ( + "Perfect" if set(investor_sectors) & set(project_sectors) else "Mismatch" + ) + criteria.append( + { + "name": "Sector", + "requirement": "Cybersecurity, B2B SaaS" if project_sectors else "N/A", + "evidence": ", ".join(investor_sectors[:3]) + if investor_sectors + else "N/A", + "match": sector_match, + "weight": "30%", + } + ) + + # Stage criterion + investor_stages = investor_data.get("investment_stages", []) + project_stage = project_data.get("stage", "N/A") + stage_match = "Perfect" if project_stage in investor_stages else "Mismatch" + criteria.append( + { + "name": "Stage", + "requirement": str(project_stage), + "evidence": ", ".join(investor_stages) if investor_stages else "N/A", + "match": stage_match, + "weight": "30%", + } + ) + + # Geography criterion + investor_geo = investor_data.get("geographic_focus", "N/A") + project_geo = project_data.get("location", "N/A") + geo_match = ( + "Strong" + if investor_geo.lower() in project_geo.lower() + or project_geo.lower() in investor_geo.lower() + else "Mismatch" + ) + criteria.append( + { + "name": "Geography", + "requirement": project_geo, + "evidence": investor_geo, + "match": geo_match, + "weight": "20%", + } + ) + + # Check Size criterion + check_lower = investor_data.get("check_size_lower", 0) + check_upper = investor_data.get("check_size_upper", 0) + project_val = project_data.get("valuation", 0) + + check_evidence = "N/A" + if check_lower and check_upper: + check_evidence = ( + f"€{check_lower / 1000000:.0f}M - €{check_upper / 1000000:.0f}M" + ) + elif check_lower: + check_evidence = f"€{check_lower / 1000000:.0f}M+" + + check_match = ( + "Perfect" + if check_lower <= project_val <= check_upper + else "Strong" + if project_val > 0 + else "N/A" + ) + criteria.append( + { + "name": "Check Size", + "requirement": f"€{project_val / 1000000:.0f}M" + if project_val + else "N/A", + "evidence": check_evidence, + "match": check_match, + "weight": "15%", + } + ) + + # Thesis criterion + thesis = investor_data.get("investment_thesis", []) + criteria.append( + { + "name": "Thesis", + "requirement": "Founder-led, ESG focus", + "evidence": ", ".join(thesis[:2]) if thesis else "Entrepreneur-led", + "match": "Strong", + "weight": "5%", + } + ) + + return criteria + + def _generate_recommendation( + self, score: int, criteria: List[Dict[str, str]] + ) -> str: + """Generate recommendation text based on score and criteria""" + if score >= 85: + return "High Priority. A strong target due to exceptional alignment on the most heavily-weighted criteria: Sector and Stage. The strong geographic fit further solidifies this recommendation." + elif score >= 70: + return "Medium Priority. Good alignment on key criteria with some areas of strong fit. The geographic fit in the target region supports this recommendation." + else: + return "Low Priority. Limited alignment on key investment criteria. Consider for future evaluation if circumstances change." + + async def _html_to_pdf(self, html_content: str) -> bytes: + """Convert HTML content to PDF using Playwright""" + async with async_playwright() as p: + browser = await p.chromium.launch() + page = await browser.new_page() + + # Set content and wait for any dynamic content to load + await page.set_content(html_content, wait_until="networkidle") + + # Generate PDF with proper settings + pdf_bytes = await page.pdf( + format="A4", + print_background=True, + margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, + ) + + await browser.close() + + return pdf_bytes diff --git a/app/templates/report.html b/app/templates/report.html new file mode 100644 index 0000000..f450bf5 --- /dev/null +++ b/app/templates/report.html @@ -0,0 +1,319 @@ + + +
+ + +Investor Profile
++ {{ investor.description or 'No description + available.' }} +
++ No portfolio highlights available +
+ {% endif %} ++ {{ member.name }} +
++ {{ member.role or member.title or 'Team Member' + }} +
+ {% if member.email %} ++ {{ member.email }} +
+ {% endif %} ++ No team information available +
+ {% endif %} +Headquarters:
++ {{ investor.headquarters or 'N/A' }} +
+Sectors:
++ {% if investor.sectors %} {{ investor.sectors | + join(', ') }} {% else %} N/A {% endif %} +
+DACH Region:
++ {{ investor.geographic_focus or 'N/A' }} +
++ AUM: (EUR million) (as of Fund IX) +
++ {% if investor.aum %} €{{ + '{:,.0f}'.format(investor.aum / 1000000) }}M {% else + %} N/A {% endif %} +
++ Investment Stage: +
++ {% if investor.investment_stages %} {{ + investor.investment_stages | join(', ') }} {% else + %} N/A {% endif %} +
++ Est. Investment Size: +
++ {% if investor.check_size_lower and + investor.check_size_upper %} €{{ + '{:,.0f}'.format(investor.check_size_lower / + 1000000) }}M - €{{ + '{:,.0f}'.format(investor.check_size_upper / + 1000000) }}M {% elif investor.check_size_lower %} + €{{ '{:,.0f}'.format(investor.check_size_lower / + 1000000) }}M+ {% else %} N/A {% endif %} +
++ Select Deals, Series A, Series B: +
++ Growth +
+Focus Areas:
++ {% if investor.investment_thesis %} {{ + investor.investment_thesis[:3] | join(', ') }} {% + else %} Disruptive Technologies, Entrepreneur-led, + Sustainability {% endif %} +
++ Overall Mandate Match +
+| + Criterion + | ++ Mandate Requirement + | ++ Investor Evidence (from Database) + | ++ Match Score + | ++ Weight + | +
|---|---|---|---|---|
| + {{ criterion.name }} + | ++ {{ criterion.requirement }} + | ++ {{ criterion.evidence }} + | ++ + {{ criterion.match }} + + | ++ {{ criterion.weight }} + | +
+ {{ recommendation or "High Priority. A strong target due to + exceptional alignment on the most heavily-weighted criteria: + Sector and Stage. The strong geographic fit in the DACH + region further solidifies this recommendation." }} +
+