2025-10-14 12:02:23 +01:00
from pathlib import Path
from typing import Any , Dict , List , Optional
2025-10-29 00:27:39 +01:00
# Import database models and compatibility score service
from db . models import InvestorTable , ProjectTable
2025-10-14 12:02:23 +01:00
from jinja2 import Environment , FileSystemLoader
from playwright . async_api import async_playwright
2025-10-27 19:15:47 +00:00
from services . compatibility_score import calculate_project_investor_compatibility
2025-10-14 12:02:23 +01:00
class ReportGenerator :
""" Service for generating PDF reports from HTML templates """
def __init__ ( self ) :
# Set up Jinja2 environment
template_dir = Path ( __file__ ) . parent . parent / " templates "
self . env = Environment ( loader = FileSystemLoader ( str ( template_dir ) ) )
async def generate_investor_report (
self ,
investor_data : Dict [ str , Any ] ,
project_data : Optional [ Dict [ str , Any ] ] = None ,
2025-10-27 19:15:47 +00:00
investor_model : Optional [ InvestorTable ] = None ,
project_model : Optional [ ProjectTable ] = None ,
2025-10-14 12:02:23 +01:00
) - > bytes :
"""
Generate a PDF report for an investor profile.
Args:
investor_data: Dictionary containing investor information
project_data: Optional dictionary containing project information for compatibility analysis
2025-10-27 19:15:47 +00:00
investor_model: Optional database model for investor (used for compatibility scoring)
project_model: Optional database model for project (used for compatibility scoring)
2025-10-14 12:02:23 +01:00
Returns:
bytes: PDF file content
"""
# Prepare template context
2025-10-27 19:15:47 +00:00
context = self . _prepare_context (
investor_data , project_data , investor_model , project_model
)
2025-10-14 12:02:23 +01:00
# Render HTML from template
template = self . env . get_template ( " report.html " )
html_content = template . render ( * * context )
# Convert HTML to PDF using Playwright
pdf_bytes = await self . _html_to_pdf ( html_content )
return pdf_bytes
def _prepare_context (
self ,
investor_data : Dict [ str , Any ] ,
project_data : Optional [ Dict [ str , Any ] ] = None ,
2025-10-27 19:15:47 +00:00
investor_model : Optional [ InvestorTable ] = None ,
project_model : Optional [ ProjectTable ] = None ,
2025-10-14 12:02:23 +01:00
) - > Dict [ str , Any ] :
""" Prepare the context dictionary for template rendering """
context = {
" investor " : investor_data ,
" project " : project_data ,
" compatibility_score " : 0 ,
" match_criteria " : [ ] ,
" recommendation " : None ,
}
# If project data is provided, calculate compatibility
if project_data :
2025-10-27 19:15:47 +00:00
# Use the compatibility_score service if models are provided
if investor_model and project_model :
# Calculate using the standardized compatibility score service
# Returns score between 0 and 1, convert to percentage (0-100)
score_decimal = calculate_project_investor_compatibility (
project = project_model , investor = investor_model , use_funds = True
)
context [ " compatibility_score " ] = int ( score_decimal * 100 )
else :
# Fallback to old calculation method if models not provided
context [ " compatibility_score " ] = self . _calculate_compatibility_score (
investor_data , project_data
)
2025-10-29 00:27:39 +01:00
2025-10-14 12:02:23 +01:00
context [ " match_criteria " ] = self . _generate_match_criteria (
investor_data , project_data
)
context [ " recommendation " ] = self . _generate_recommendation (
context [ " compatibility_score " ] , context [ " match_criteria " ]
)
return context
def _calculate_compatibility_score (
self , investor_data : Dict [ str , Any ] , project_data : Dict [ str , Any ]
) - > int :
""" Calculate overall compatibility score between investor and project """
score = 0
weights = {
" sector " : 30 ,
" stage " : 30 ,
" geography " : 20 ,
2025-10-29 00:27:39 +01:00
" check_size " : 20 ,
2025-10-14 12:02:23 +01:00
}
2025-10-21 10:48:58 +01:00
# Aggregate data from all funds
all_sectors = set ( investor_data . get ( " sectors " , [ ] ) )
all_stages = set ( )
all_geographies = [ ]
check_ranges = [ ]
for fund in investor_data . get ( " funds " , [ ] ) :
all_sectors . update ( fund . get ( " sectors " , [ ] ) )
all_stages . update ( fund . get ( " investment_stages " , [ ] ) )
if fund . get ( " geographic_focus " ) :
all_geographies . append ( fund [ " geographic_focus " ] )
if fund . get ( " check_size_lower " ) and fund . get ( " check_size_upper " ) :
check_ranges . append (
{
" lower " : fund [ " check_size_lower " ] ,
" upper " : fund [ " check_size_upper " ] ,
}
)
2025-10-14 12:02:23 +01:00
# Sector match
project_sectors = set ( project_data . get ( " sectors " , [ ] ) )
2025-10-21 10:48:58 +01:00
if all_sectors and project_sectors :
if all_sectors & project_sectors :
2025-10-14 12:02:23 +01:00
score + = weights [ " sector " ]
2025-10-21 10:48:58 +01:00
# Stage match - case insensitive comparison
2025-10-14 12:02:23 +01:00
project_stage = project_data . get ( " stage " )
2025-10-21 10:48:58 +01:00
if project_stage and all_stages :
# Normalize stage names for comparison (case-insensitive)
normalized_stages = {
stage . lower ( ) . replace ( " _ " , " " ) for stage in all_stages
}
project_stage_normalized = project_stage . lower ( ) . replace ( " _ " , " " )
if project_stage_normalized in normalized_stages :
score + = weights [ " stage " ]
2025-10-14 12:02:23 +01:00
2025-10-21 10:48:58 +01:00
# Geography match - check if any fund matches
2025-10-15 17:58:31 +00:00
project_geo = ( project_data . get ( " location " ) or " " ) . lower ( )
2025-10-21 10:48:58 +01:00
geo_match = False
if all_geographies :
for geo in all_geographies :
if geo :
geo_lower = geo . lower ( )
# Match if investor geography is "global" or if there's a location overlap
if " global " in geo_lower or " worldwide " in geo_lower :
geo_match = True
break
if project_geo and (
geo_lower in project_geo or project_geo in geo_lower
) :
geo_match = True
break
if geo_match :
2025-10-14 12:02:23 +01:00
score + = weights [ " geography " ]
2025-10-21 10:48:58 +01:00
# Check size match - check if any fund's range matches
2025-10-14 12:02:23 +01:00
project_valuation = project_data . get ( " valuation " , 0 )
2025-10-21 10:48:58 +01:00
check_match = False
if project_valuation and check_ranges :
for check_range in check_ranges :
if check_range [ " lower " ] < = project_valuation < = check_range [ " upper " ] :
check_match = True
break
if check_match :
2025-10-14 12:02:23 +01:00
score + = weights [ " check_size " ]
return min ( score , 100 )
def _generate_match_criteria (
self , investor_data : Dict [ str , Any ] , project_data : Dict [ str , Any ]
) - > List [ Dict [ str , str ] ] :
""" Generate detailed match criteria table """
criteria = [ ]
2025-10-21 10:48:58 +01:00
# Aggregate data from all funds
all_sectors = set ( investor_data . get ( " sectors " , [ ] ) )
all_stages = set ( )
all_geographies = [ ]
check_ranges = [ ]
for fund in investor_data . get ( " funds " , [ ] ) :
all_sectors . update ( fund . get ( " sectors " , [ ] ) )
all_stages . update ( fund . get ( " investment_stages " , [ ] ) )
if fund . get ( " geographic_focus " ) :
all_geographies . append ( fund [ " geographic_focus " ] )
if fund . get ( " check_size_lower " ) and fund . get ( " check_size_upper " ) :
check_ranges . append (
{
" lower " : fund [ " check_size_lower " ] ,
" upper " : fund [ " check_size_upper " ] ,
" fund_name " : fund . get ( " fund_name " , " Unnamed Fund " ) ,
}
)
2025-10-14 12:02:23 +01:00
# Sector criterion
project_sectors = project_data . get ( " sectors " , [ ] )
2025-10-21 10:48:58 +01:00
sector_match = " Perfect " if all_sectors & set ( project_sectors ) else " Mismatch "
2025-10-14 12:02:23 +01:00
criteria . append (
{
" name " : " Sector " ,
2025-10-21 10:48:58 +01:00
" requirement " : " , " . join ( project_sectors ) if project_sectors else " N/A " ,
" evidence " : " , " . join ( list ( all_sectors ) [ : 3 ] ) if all_sectors else " N/A " ,
2025-10-14 12:02:23 +01:00
" match " : sector_match ,
" weight " : " 30 % " ,
}
)
2025-10-21 10:48:58 +01:00
# Stage criterion - case insensitive comparison
2025-10-14 12:02:23 +01:00
project_stage = project_data . get ( " stage " , " N/A " )
2025-10-21 10:48:58 +01:00
stage_match = " Mismatch "
if project_stage != " N/A " and all_stages :
# Normalize stage names for comparison
normalized_stages = {
stage . lower ( ) . replace ( " _ " , " " ) for stage in all_stages
}
project_stage_normalized = project_stage . lower ( ) . replace ( " _ " , " " )
stage_match = (
" Perfect "
if project_stage_normalized in normalized_stages
else " Mismatch "
)
elif project_stage == " N/A " :
stage_match = " N/A "
2025-10-14 12:02:23 +01:00
criteria . append (
{
" name " : " Stage " ,
" requirement " : str ( project_stage ) ,
2025-10-21 10:48:58 +01:00
" evidence " : " , " . join ( all_stages ) if all_stages else " N/A " ,
2025-10-14 12:02:23 +01:00
" match " : stage_match ,
" weight " : " 30 % " ,
}
)
# Geography criterion
2025-10-15 17:58:31 +00:00
project_geo = project_data . get ( " location " ) or " N/A "
2025-10-21 10:48:58 +01:00
investor_geo_display = " , " . join ( all_geographies ) if all_geographies else " N/A "
# Safe comparison handling None values and "Global" matches
geo_match = " Mismatch "
if project_geo != " N/A " and all_geographies :
for geo in all_geographies :
if geo :
geo_lower = geo . lower ( )
# Match if investor geography is "global" or if there's a location overlap
if " global " in geo_lower or " worldwide " in geo_lower :
geo_match = " Perfect "
break
if (
geo_lower in project_geo . lower ( )
or project_geo . lower ( ) in geo_lower
) :
geo_match = " Strong "
break
elif not all_geographies and project_geo == " N/A " :
geo_match = " N/A "
2025-10-17 23:15:57 +01:00
2025-10-14 12:02:23 +01:00
criteria . append (
{
" name " : " Geography " ,
" requirement " : project_geo ,
2025-10-21 10:48:58 +01:00
" evidence " : investor_geo_display ,
2025-10-14 12:02:23 +01:00
" match " : geo_match ,
" weight " : " 20 % " ,
}
)
# Check Size criterion
project_val = project_data . get ( " valuation " , 0 )
2025-10-21 10:48:58 +01:00
# Build evidence string from all fund ranges
2025-10-14 12:02:23 +01:00
check_evidence = " N/A "
2025-10-21 10:48:58 +01:00
if check_ranges :
evidence_parts = [ ]
for cr in check_ranges [ : 3 ] : # Show up to 3 funds
range_str = (
f " € { cr [ ' lower ' ] / 1000000 : .0f } M - € { cr [ ' upper ' ] / 1000000 : .0f } M "
)
if cr [ " fund_name " ] :
evidence_parts . append ( f " { cr [ ' fund_name ' ] } : { range_str } " )
else :
evidence_parts . append ( range_str )
check_evidence = " ; " . join ( evidence_parts )
# Check if project valuation matches any fund
check_match = " N/A "
if project_val > 0 and check_ranges :
match_found = any (
cr [ " lower " ] < = project_val < = cr [ " upper " ] for cr in check_ranges
2025-10-14 12:02:23 +01:00
)
2025-10-21 10:48:58 +01:00
check_match = " Perfect " if match_found else " Mismatch "
2025-10-14 12:02:23 +01:00
criteria . append (
{
" name " : " Check Size " ,
" requirement " : f " € { project_val / 1000000 : .0f } M "
if project_val
else " N/A " ,
" evidence " : check_evidence ,
" match " : check_match ,
2025-10-29 00:27:39 +01:00
" weight " : " 20 % " ,
2025-10-14 12:02:23 +01:00
}
)
return criteria
def _generate_recommendation (
self , score : int , criteria : List [ Dict [ str , str ] ]
) - > str :
""" Generate recommendation text based on score and criteria """
if score > = 85 :
return " High Priority. A strong target due to exceptional alignment on the most heavily-weighted criteria: Sector and Stage. The strong geographic fit further solidifies this recommendation. "
elif score > = 70 :
return " Medium Priority. Good alignment on key criteria with some areas of strong fit. The geographic fit in the target region supports this recommendation. "
else :
return " Low Priority. Limited alignment on key investment criteria. Consider for future evaluation if circumstances change. "
async def _html_to_pdf ( self , html_content : str ) - > bytes :
""" Convert HTML content to PDF using Playwright """
async with async_playwright ( ) as p :
browser = await p . chromium . launch ( )
page = await browser . new_page ( )
# Set content and wait for any dynamic content to load
await page . set_content ( html_content , wait_until = " networkidle " )
# Generate PDF with proper settings
pdf_bytes = await page . pdf (
format = " A4 " ,
print_background = True ,
margin = { " top " : " 0 " , " right " : " 0 " , " bottom " : " 0 " , " left " : " 0 " } ,
)
await browser . close ( )
return pdf_bytes