2025-10-14 12:02:23 +01:00
from pathlib import Path
from typing import Any , Dict , List , Optional
from jinja2 import Environment , FileSystemLoader
from playwright . async_api import async_playwright
class ReportGenerator :
""" Service for generating PDF reports from HTML templates """
def __init__ ( self ) :
# Set up Jinja2 environment
template_dir = Path ( __file__ ) . parent . parent / " templates "
self . env = Environment ( loader = FileSystemLoader ( str ( template_dir ) ) )
async def generate_investor_report (
self ,
investor_data : Dict [ str , Any ] ,
project_data : Optional [ Dict [ str , Any ] ] = None ,
) - > bytes :
"""
Generate a PDF report for an investor profile.
Args:
investor_data: Dictionary containing investor information
project_data: Optional dictionary containing project information for compatibility analysis
Returns:
bytes: PDF file content
"""
# Prepare template context
context = self . _prepare_context ( investor_data , project_data )
# Render HTML from template
template = self . env . get_template ( " report.html " )
html_content = template . render ( * * context )
# Convert HTML to PDF using Playwright
pdf_bytes = await self . _html_to_pdf ( html_content )
return pdf_bytes
def _prepare_context (
self ,
investor_data : Dict [ str , Any ] ,
project_data : Optional [ Dict [ str , Any ] ] = None ,
) - > Dict [ str , Any ] :
""" Prepare the context dictionary for template rendering """
context = {
" investor " : investor_data ,
" project " : project_data ,
" compatibility_score " : 0 ,
" match_criteria " : [ ] ,
" recommendation " : None ,
}
# If project data is provided, calculate compatibility
if project_data :
context [ " compatibility_score " ] = self . _calculate_compatibility_score (
investor_data , project_data
)
context [ " match_criteria " ] = self . _generate_match_criteria (
investor_data , project_data
)
context [ " recommendation " ] = self . _generate_recommendation (
context [ " compatibility_score " ] , context [ " match_criteria " ]
)
return context
def _calculate_compatibility_score (
self , investor_data : Dict [ str , Any ] , project_data : Dict [ str , Any ]
) - > int :
""" Calculate overall compatibility score between investor and project """
score = 0
weights = {
" sector " : 30 ,
" stage " : 30 ,
" geography " : 20 ,
" check_size " : 15 ,
" thesis " : 5 ,
}
# Sector match
investor_sectors = set ( investor_data . get ( " sectors " , [ ] ) )
project_sectors = set ( project_data . get ( " sectors " , [ ] ) )
if investor_sectors and project_sectors :
if investor_sectors & project_sectors :
score + = weights [ " sector " ]
# Stage match
investor_stages = set ( investor_data . get ( " investment_stages " , [ ] ) )
project_stage = project_data . get ( " stage " )
if project_stage and project_stage in investor_stages :
score + = weights [ " stage " ]
# Geography match
2025-10-15 17:58:31 +00:00
investor_geo = ( investor_data . get ( " geographic_focus " ) or " " ) . lower ( )
project_geo = ( project_data . get ( " location " ) or " " ) . lower ( )
2025-10-14 12:02:23 +01:00
if investor_geo and project_geo and investor_geo in project_geo :
score + = weights [ " geography " ]
# Check size match
project_valuation = project_data . get ( " valuation " , 0 )
2025-10-15 17:58:31 +00:00
check_lower = investor_data . get ( " check_size_lower " ) or 0
check_upper = investor_data . get ( " check_size_upper " ) or float ( " inf " )
2025-10-17 23:15:57 +01:00
if (
check_lower
and check_upper
and check_lower < = project_valuation < = check_upper
) :
2025-10-14 12:02:23 +01:00
score + = weights [ " check_size " ]
# Thesis alignment (simplified)
score + = weights [ " thesis " ]
return min ( score , 100 )
def _generate_match_criteria (
self , investor_data : Dict [ str , Any ] , project_data : Dict [ str , Any ]
) - > List [ Dict [ str , str ] ] :
""" Generate detailed match criteria table """
criteria = [ ]
# Sector criterion
investor_sectors = investor_data . get ( " sectors " , [ ] )
project_sectors = project_data . get ( " sectors " , [ ] )
sector_match = (
" Perfect " if set ( investor_sectors ) & set ( project_sectors ) else " Mismatch "
)
criteria . append (
{
" name " : " Sector " ,
" requirement " : " Cybersecurity, B2B SaaS " if project_sectors else " N/A " ,
" evidence " : " , " . join ( investor_sectors [ : 3 ] )
if investor_sectors
else " N/A " ,
" match " : sector_match ,
" weight " : " 30 % " ,
}
)
# Stage criterion
investor_stages = investor_data . get ( " investment_stages " , [ ] )
project_stage = project_data . get ( " stage " , " N/A " )
stage_match = " Perfect " if project_stage in investor_stages else " Mismatch "
criteria . append (
{
" name " : " Stage " ,
" requirement " : str ( project_stage ) ,
" evidence " : " , " . join ( investor_stages ) if investor_stages else " N/A " ,
" match " : stage_match ,
" weight " : " 30 % " ,
}
)
# Geography criterion
2025-10-15 17:58:31 +00:00
investor_geo = investor_data . get ( " geographic_focus " ) or " N/A "
project_geo = project_data . get ( " location " ) or " N/A "
2025-10-17 23:15:57 +01:00
2025-10-15 17:58:31 +00:00
# Safe comparison handling None values
if investor_geo == " N/A " or project_geo == " N/A " :
2025-10-17 23:15:57 +01:00
geo_match = (
" N/A " if investor_geo == " N/A " and project_geo == " N/A " else " Mismatch "
)
2025-10-15 17:58:31 +00:00
else :
investor_geo_lower = investor_geo . lower ( )
project_geo_lower = project_geo . lower ( )
geo_match = (
" Strong "
if investor_geo_lower in project_geo_lower
or project_geo_lower in investor_geo_lower
else " Mismatch "
)
2025-10-14 12:02:23 +01:00
criteria . append (
{
" name " : " Geography " ,
" requirement " : project_geo ,
" evidence " : investor_geo ,
" match " : geo_match ,
" weight " : " 20 % " ,
}
)
# Check Size criterion
2025-10-15 17:58:31 +00:00
check_lower = investor_data . get ( " check_size_lower " ) or 0
check_upper = investor_data . get ( " check_size_upper " ) or 0
2025-10-14 12:02:23 +01:00
project_val = project_data . get ( " valuation " , 0 )
check_evidence = " N/A "
if check_lower and check_upper :
check_evidence = (
f " € { check_lower / 1000000 : .0f } M - € { check_upper / 1000000 : .0f } M "
)
elif check_lower :
check_evidence = f " € { check_lower / 1000000 : .0f } M+ "
check_match = (
" Perfect "
2025-10-15 17:58:31 +00:00
if check_lower and check_upper and check_lower < = project_val < = check_upper
2025-10-14 12:02:23 +01:00
else " Strong "
if project_val > 0
else " N/A "
)
criteria . append (
{
" name " : " Check Size " ,
" requirement " : f " € { project_val / 1000000 : .0f } M "
if project_val
else " N/A " ,
" evidence " : check_evidence ,
" match " : check_match ,
" weight " : " 15 % " ,
}
)
# Thesis criterion
thesis = investor_data . get ( " investment_thesis " , [ ] )
criteria . append (
{
" name " : " Thesis " ,
" requirement " : " Founder-led, ESG focus " ,
" evidence " : " , " . join ( thesis [ : 2 ] ) if thesis else " Entrepreneur-led " ,
" match " : " Strong " ,
" weight " : " 5 % " ,
}
)
return criteria
def _generate_recommendation (
self , score : int , criteria : List [ Dict [ str , str ] ]
) - > str :
""" Generate recommendation text based on score and criteria """
if score > = 85 :
return " High Priority. A strong target due to exceptional alignment on the most heavily-weighted criteria: Sector and Stage. The strong geographic fit further solidifies this recommendation. "
elif score > = 70 :
return " Medium Priority. Good alignment on key criteria with some areas of strong fit. The geographic fit in the target region supports this recommendation. "
else :
return " Low Priority. Limited alignment on key investment criteria. Consider for future evaluation if circumstances change. "
async def _html_to_pdf ( self , html_content : str ) - > bytes :
""" Convert HTML content to PDF using Playwright """
async with async_playwright ( ) as p :
browser = await p . chromium . launch ( )
page = await browser . new_page ( )
# Set content and wait for any dynamic content to load
await page . set_content ( html_content , wait_until = " networkidle " )
# Generate PDF with proper settings
pdf_bytes = await page . pdf (
format = " A4 " ,
print_background = True ,
margin = { " top " : " 0 " , " right " : " 0 " , " bottom " : " 0 " , " left " : " 0 " } ,
)
await browser . close ( )
return pdf_bytes