feat: Update investor report generation and HTML template to include fund details and improve data handling

This commit is contained in:
bolade
2025-10-21 10:48:58 +01:00
parent 63d8e57e57
commit 483c2cc114
10 changed files with 289 additions and 135 deletions
+137 -61
View File
@@ -80,34 +80,70 @@ class ReportGenerator:
"thesis": 5,
}
# Aggregate data from all funds
all_sectors = set(investor_data.get("sectors", []))
all_stages = set()
all_geographies = []
check_ranges = []
for fund in investor_data.get("funds", []):
all_sectors.update(fund.get("sectors", []))
all_stages.update(fund.get("investment_stages", []))
if fund.get("geographic_focus"):
all_geographies.append(fund["geographic_focus"])
if fund.get("check_size_lower") and fund.get("check_size_upper"):
check_ranges.append(
{
"lower": fund["check_size_lower"],
"upper": fund["check_size_upper"],
}
)
# Sector match
investor_sectors = set(investor_data.get("sectors", []))
project_sectors = set(project_data.get("sectors", []))
if investor_sectors and project_sectors:
if investor_sectors & project_sectors:
if all_sectors and project_sectors:
if all_sectors & project_sectors:
score += weights["sector"]
# Stage match
investor_stages = set(investor_data.get("investment_stages", []))
# Stage match - case insensitive comparison
project_stage = project_data.get("stage")
if project_stage and project_stage in investor_stages:
score += weights["stage"]
if project_stage and all_stages:
# Normalize stage names for comparison (case-insensitive)
normalized_stages = {
stage.lower().replace("_", " ") for stage in all_stages
}
project_stage_normalized = project_stage.lower().replace("_", " ")
if project_stage_normalized in normalized_stages:
score += weights["stage"]
# Geography match
investor_geo = (investor_data.get("geographic_focus") or "").lower()
# Geography match - check if any fund matches
project_geo = (project_data.get("location") or "").lower()
if investor_geo and project_geo and investor_geo in project_geo:
geo_match = False
if all_geographies:
for geo in all_geographies:
if geo:
geo_lower = geo.lower()
# Match if investor geography is "global" or if there's a location overlap
if "global" in geo_lower or "worldwide" in geo_lower:
geo_match = True
break
if project_geo and (
geo_lower in project_geo or project_geo in geo_lower
):
geo_match = True
break
if geo_match:
score += weights["geography"]
# Check size match
# Check size match - check if any fund's range matches
project_valuation = project_data.get("valuation", 0)
check_lower = investor_data.get("check_size_lower") or 0
check_upper = investor_data.get("check_size_upper") or float("inf")
if (
check_lower
and check_upper
and check_lower <= project_valuation <= check_upper
):
check_match = False
if project_valuation and check_ranges:
for check_range in check_ranges:
if check_range["lower"] <= project_valuation <= check_range["upper"]:
check_match = True
break
if check_match:
score += weights["check_size"]
# Thesis alignment (simplified)
@@ -121,86 +157,126 @@ class ReportGenerator:
"""Generate detailed match criteria table"""
criteria = []
# Aggregate data from all funds
all_sectors = set(investor_data.get("sectors", []))
all_stages = set()
all_geographies = []
check_ranges = []
for fund in investor_data.get("funds", []):
all_sectors.update(fund.get("sectors", []))
all_stages.update(fund.get("investment_stages", []))
if fund.get("geographic_focus"):
all_geographies.append(fund["geographic_focus"])
if fund.get("check_size_lower") and fund.get("check_size_upper"):
check_ranges.append(
{
"lower": fund["check_size_lower"],
"upper": fund["check_size_upper"],
"fund_name": fund.get("fund_name", "Unnamed Fund"),
}
)
# Sector criterion
investor_sectors = investor_data.get("sectors", [])
project_sectors = project_data.get("sectors", [])
sector_match = (
"Perfect" if set(investor_sectors) & set(project_sectors) else "Mismatch"
)
sector_match = "Perfect" if all_sectors & set(project_sectors) else "Mismatch"
criteria.append(
{
"name": "Sector",
"requirement": "Cybersecurity, B2B SaaS" if project_sectors else "N/A",
"evidence": ", ".join(investor_sectors[:3])
if investor_sectors
else "N/A",
"requirement": ", ".join(project_sectors) if project_sectors else "N/A",
"evidence": ", ".join(list(all_sectors)[:3]) if all_sectors else "N/A",
"match": sector_match,
"weight": "30%",
}
)
# Stage criterion
investor_stages = investor_data.get("investment_stages", [])
# Stage criterion - case insensitive comparison
project_stage = project_data.get("stage", "N/A")
stage_match = "Perfect" if project_stage in investor_stages else "Mismatch"
stage_match = "Mismatch"
if project_stage != "N/A" and all_stages:
# Normalize stage names for comparison
normalized_stages = {
stage.lower().replace("_", " ") for stage in all_stages
}
project_stage_normalized = project_stage.lower().replace("_", " ")
stage_match = (
"Perfect"
if project_stage_normalized in normalized_stages
else "Mismatch"
)
elif project_stage == "N/A":
stage_match = "N/A"
criteria.append(
{
"name": "Stage",
"requirement": str(project_stage),
"evidence": ", ".join(investor_stages) if investor_stages else "N/A",
"evidence": ", ".join(all_stages) if all_stages else "N/A",
"match": stage_match,
"weight": "30%",
}
)
# Geography criterion
investor_geo = investor_data.get("geographic_focus") or "N/A"
project_geo = project_data.get("location") or "N/A"
investor_geo_display = ", ".join(all_geographies) if all_geographies else "N/A"
# Safe comparison handling None values and "Global" matches
geo_match = "Mismatch"
if project_geo != "N/A" and all_geographies:
for geo in all_geographies:
if geo:
geo_lower = geo.lower()
# Match if investor geography is "global" or if there's a location overlap
if "global" in geo_lower or "worldwide" in geo_lower:
geo_match = "Perfect"
break
if (
geo_lower in project_geo.lower()
or project_geo.lower() in geo_lower
):
geo_match = "Strong"
break
elif not all_geographies and project_geo == "N/A":
geo_match = "N/A"
# Safe comparison handling None values
if investor_geo == "N/A" or project_geo == "N/A":
geo_match = (
"N/A" if investor_geo == "N/A" and project_geo == "N/A" else "Mismatch"
)
else:
investor_geo_lower = investor_geo.lower()
project_geo_lower = project_geo.lower()
geo_match = (
"Strong"
if investor_geo_lower in project_geo_lower
or project_geo_lower in investor_geo_lower
else "Mismatch"
)
criteria.append(
{
"name": "Geography",
"requirement": project_geo,
"evidence": investor_geo,
"evidence": investor_geo_display,
"match": geo_match,
"weight": "20%",
}
)
# Check Size criterion
check_lower = investor_data.get("check_size_lower") or 0
check_upper = investor_data.get("check_size_upper") or 0
project_val = project_data.get("valuation", 0)
# Build evidence string from all fund ranges
check_evidence = "N/A"
if check_lower and check_upper:
check_evidence = (
f"{check_lower / 1000000:.0f}M - €{check_upper / 1000000:.0f}M"
)
elif check_lower:
check_evidence = f"{check_lower / 1000000:.0f}M+"
if check_ranges:
evidence_parts = []
for cr in check_ranges[:3]: # Show up to 3 funds
range_str = (
f"{cr['lower'] / 1000000:.0f}M - €{cr['upper'] / 1000000:.0f}M"
)
if cr["fund_name"]:
evidence_parts.append(f"{cr['fund_name']}: {range_str}")
else:
evidence_parts.append(range_str)
check_evidence = "; ".join(evidence_parts)
# Check if project valuation matches any fund
check_match = "N/A"
if project_val > 0 and check_ranges:
match_found = any(
cr["lower"] <= project_val <= cr["upper"] for cr in check_ranges
)
check_match = "Perfect" if match_found else "Mismatch"
elif project_val > 0:
check_match = "Strong"
check_match = (
"Perfect"
if check_lower and check_upper and check_lower <= project_val <= check_upper
else "Strong"
if project_val > 0
else "N/A"
)
criteria.append(
{
"name": "Check Size",