feat: Update investor report generation and HTML template to include fund details and improve data handling
This commit is contained in:
Binary file not shown.
+5
-3
@@ -61,16 +61,18 @@ async def parse_csv(
|
||||
- Handles AUM, fund sizes, and check sizes as integers
|
||||
|
||||
**For companies:**
|
||||
- Expected columns: Name, Website, Investor, Final Investor Profile (company profile)
|
||||
- Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile)
|
||||
- 100% manual JSON parsing - no LLM needed
|
||||
- Extracts company details, executives, investors, and client categories
|
||||
- Automatically links companies to investors in database
|
||||
- **Only extracts:** founded_year and key_executives
|
||||
- **Only updates companies already in the database** (syncs with existing records)
|
||||
- Skips companies not found in the database
|
||||
|
||||
**Benefits:**
|
||||
- Fast processing (5-10s per record)
|
||||
- Low cost (minimal or no LLM usage)
|
||||
- Accurate data extraction
|
||||
- Automatic database persistence
|
||||
- Safe: won't create duplicate companies
|
||||
"""
|
||||
# Read uploaded CSV with pandas
|
||||
content = await file.read()
|
||||
|
||||
Binary file not shown.
+12
-15
@@ -52,7 +52,6 @@ async def generate_investor_report(
|
||||
"website": investor.website,
|
||||
"headquarters": investor.headquarters,
|
||||
"aum": investor.aum,
|
||||
"geographic_focus": investor.geographic_focus,
|
||||
"portfolio_highlights": investor.portfolio_highlights or [],
|
||||
"investment_thesis": investor.investment_thesis or [],
|
||||
"sectors": [sector.name for sector in investor.sectors],
|
||||
@@ -65,24 +64,22 @@ async def generate_investor_report(
|
||||
}
|
||||
for member in investor.team_members
|
||||
],
|
||||
"check_size_lower": None,
|
||||
"check_size_upper": None,
|
||||
"investment_stages": [],
|
||||
"funds": [],
|
||||
}
|
||||
|
||||
# Get check sizes and stages from funds
|
||||
# Get all funds with their data
|
||||
if investor.funds:
|
||||
# Use the first fund's data or aggregate
|
||||
fund = investor.funds[0]
|
||||
investor_data["check_size_lower"] = fund.check_size_lower
|
||||
investor_data["check_size_upper"] = fund.check_size_upper
|
||||
|
||||
# Aggregate all investment stages from all funds
|
||||
stages = set()
|
||||
for fund in investor.funds:
|
||||
for stage in fund.investment_stages:
|
||||
stages.add(stage.name)
|
||||
investor_data["investment_stages"] = list(stages)
|
||||
fund_data = {
|
||||
"fund_name": fund.fund_name,
|
||||
"fund_size": fund.fund_size,
|
||||
"check_size_lower": fund.check_size_lower,
|
||||
"check_size_upper": fund.check_size_upper,
|
||||
"geographic_focus": fund.geographic_focus,
|
||||
"investment_stages": [stage.name for stage in fund.investment_stages],
|
||||
"sectors": [sector.name for sector in fund.sectors],
|
||||
}
|
||||
investor_data["funds"].append(fund_data)
|
||||
|
||||
# Fetch project data if project_id is provided
|
||||
project_data = None
|
||||
|
||||
Binary file not shown.
Binary file not shown.
+97
-25
@@ -145,16 +145,74 @@ Return the lower and upper bounds in USD."""
|
||||
"""
|
||||
Manually parse the JSON profile from the CSV.
|
||||
Returns a cleaned dictionary with the investor profile data.
|
||||
Handles JSON wrapped in markdown code blocks (```json ... ```).
|
||||
Handles trailing quotes and extra data after JSON.
|
||||
"""
|
||||
if not json_str or pd.isna(json_str):
|
||||
return None
|
||||
|
||||
try:
|
||||
# Clean the JSON string
|
||||
cleaned_json = json_str.strip()
|
||||
|
||||
# Check if it's plain text (no JSON structure)
|
||||
if not cleaned_json.startswith(("{", "```", "'")):
|
||||
print(" ⚠️ No JSON structure found - skipping")
|
||||
return None
|
||||
|
||||
# Remove markdown code block markers if present
|
||||
if cleaned_json.startswith("```"):
|
||||
# Remove opening marker (```json or ```Json or ```)
|
||||
lines = cleaned_json.split("\n")
|
||||
if lines[0].startswith("```"):
|
||||
lines = lines[1:] # Remove first line
|
||||
# Remove closing marker (``` or ```')
|
||||
if lines and lines[-1].strip() in ("```", "```'", '```"'):
|
||||
lines = lines[:-1] # Remove last line
|
||||
cleaned_json = "\n".join(lines).strip()
|
||||
|
||||
# Remove trailing quotes that might be left over
|
||||
if cleaned_json.endswith(("'", '"')):
|
||||
cleaned_json = cleaned_json[:-1].strip()
|
||||
|
||||
# Try to find JSON boundaries if there's extra data
|
||||
# Look for the first { and the last }
|
||||
start_idx = cleaned_json.find("{")
|
||||
if start_idx == -1:
|
||||
print(" ⚠️ No opening brace found - not valid JSON")
|
||||
return None
|
||||
|
||||
# Find the matching closing brace
|
||||
# We need to count braces to find the actual end
|
||||
brace_count = 0
|
||||
end_idx = -1
|
||||
for i in range(start_idx, len(cleaned_json)):
|
||||
if cleaned_json[i] == "{":
|
||||
brace_count += 1
|
||||
elif cleaned_json[i] == "}":
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
end_idx = i + 1
|
||||
break
|
||||
|
||||
if end_idx == -1:
|
||||
print(" ⚠️ No matching closing brace found")
|
||||
return None
|
||||
|
||||
# Extract just the JSON part
|
||||
cleaned_json = cleaned_json[start_idx:end_idx]
|
||||
|
||||
# Parse JSON string
|
||||
profile = json.loads(json_str)
|
||||
profile = json.loads(cleaned_json)
|
||||
return profile
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing JSON: {e}")
|
||||
print(f" ❌ JSON parsing error: {e}")
|
||||
# Print first 200 chars for debugging
|
||||
preview = json_str[:200] if len(json_str) > 200 else json_str
|
||||
print(f" Preview: {preview}...")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" ❌ Unexpected error: {e}")
|
||||
return None
|
||||
|
||||
async def process_investor_profile(
|
||||
@@ -338,34 +396,45 @@ Return the lower and upper bounds in USD."""
|
||||
if existing_company:
|
||||
# Update only founded_year on existing company
|
||||
company = existing_company
|
||||
updated_fields = []
|
||||
|
||||
if company_data.get("founded_year"):
|
||||
company.founded_year = company_data["founded_year"]
|
||||
updated_fields.append(
|
||||
f"founded_year: {company_data['founded_year']}"
|
||||
)
|
||||
|
||||
# Add/update company members (key executives)
|
||||
# First, remove existing members if updating
|
||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||
|
||||
exec_count = 0
|
||||
for exec_data in company_data.get("key_executives", []):
|
||||
member = CompanyMember(
|
||||
name=exec_data.get("name"),
|
||||
role=exec_data.get("title"),
|
||||
linkedin=exec_data.get(
|
||||
"source_url"
|
||||
), # Store source URL in linkedin field
|
||||
company_id=company.id,
|
||||
)
|
||||
db.add(member)
|
||||
exec_count += 1
|
||||
|
||||
if exec_count > 0:
|
||||
updated_fields.append(f"{exec_count} executives")
|
||||
|
||||
if updated_fields:
|
||||
print(f" 📝 Updated: {', '.join(updated_fields)}")
|
||||
|
||||
return company
|
||||
else:
|
||||
# Company should already be in base database, but if not found, skip
|
||||
print(
|
||||
f"⚠️ Company '{company_data['name']}' not found in base database - skipping"
|
||||
)
|
||||
# Company not found in base database, skip
|
||||
print(" ⚠️ Not in database - skipping")
|
||||
return None
|
||||
|
||||
# Add/update company members (key executives)
|
||||
# First, remove existing members if updating
|
||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||
|
||||
for exec_data in company_data.get("key_executives", []):
|
||||
member = CompanyMember(
|
||||
name=exec_data.get("name"),
|
||||
role=exec_data.get("title"),
|
||||
linkedin=exec_data.get(
|
||||
"source_url"
|
||||
), # Store source URL in linkedin field
|
||||
company_id=company.id,
|
||||
)
|
||||
db.add(member)
|
||||
|
||||
return company
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error saving company to database: {e}")
|
||||
print(f" ❌ Error saving: {e}")
|
||||
db.rollback()
|
||||
return None
|
||||
|
||||
@@ -789,8 +858,11 @@ Return the lower and upper bounds in USD."""
|
||||
if pd.notna(row.get("Investor"))
|
||||
else None
|
||||
)
|
||||
# Try both column names for flexibility
|
||||
profile_json = (
|
||||
row.get("Final Investor Profile", "")
|
||||
row.get("Perplexity Gap Output", "")
|
||||
if pd.notna(row.get("Perplexity Gap Output"))
|
||||
else row.get("Final Investor Profile", "")
|
||||
if pd.notna(row.get("Final Investor Profile"))
|
||||
else None
|
||||
)
|
||||
|
||||
+137
-61
@@ -80,34 +80,70 @@ class ReportGenerator:
|
||||
"thesis": 5,
|
||||
}
|
||||
|
||||
# Aggregate data from all funds
|
||||
all_sectors = set(investor_data.get("sectors", []))
|
||||
all_stages = set()
|
||||
all_geographies = []
|
||||
check_ranges = []
|
||||
|
||||
for fund in investor_data.get("funds", []):
|
||||
all_sectors.update(fund.get("sectors", []))
|
||||
all_stages.update(fund.get("investment_stages", []))
|
||||
if fund.get("geographic_focus"):
|
||||
all_geographies.append(fund["geographic_focus"])
|
||||
if fund.get("check_size_lower") and fund.get("check_size_upper"):
|
||||
check_ranges.append(
|
||||
{
|
||||
"lower": fund["check_size_lower"],
|
||||
"upper": fund["check_size_upper"],
|
||||
}
|
||||
)
|
||||
|
||||
# Sector match
|
||||
investor_sectors = set(investor_data.get("sectors", []))
|
||||
project_sectors = set(project_data.get("sectors", []))
|
||||
if investor_sectors and project_sectors:
|
||||
if investor_sectors & project_sectors:
|
||||
if all_sectors and project_sectors:
|
||||
if all_sectors & project_sectors:
|
||||
score += weights["sector"]
|
||||
|
||||
# Stage match
|
||||
investor_stages = set(investor_data.get("investment_stages", []))
|
||||
# Stage match - case insensitive comparison
|
||||
project_stage = project_data.get("stage")
|
||||
if project_stage and project_stage in investor_stages:
|
||||
score += weights["stage"]
|
||||
if project_stage and all_stages:
|
||||
# Normalize stage names for comparison (case-insensitive)
|
||||
normalized_stages = {
|
||||
stage.lower().replace("_", " ") for stage in all_stages
|
||||
}
|
||||
project_stage_normalized = project_stage.lower().replace("_", " ")
|
||||
if project_stage_normalized in normalized_stages:
|
||||
score += weights["stage"]
|
||||
|
||||
# Geography match
|
||||
investor_geo = (investor_data.get("geographic_focus") or "").lower()
|
||||
# Geography match - check if any fund matches
|
||||
project_geo = (project_data.get("location") or "").lower()
|
||||
if investor_geo and project_geo and investor_geo in project_geo:
|
||||
geo_match = False
|
||||
if all_geographies:
|
||||
for geo in all_geographies:
|
||||
if geo:
|
||||
geo_lower = geo.lower()
|
||||
# Match if investor geography is "global" or if there's a location overlap
|
||||
if "global" in geo_lower or "worldwide" in geo_lower:
|
||||
geo_match = True
|
||||
break
|
||||
if project_geo and (
|
||||
geo_lower in project_geo or project_geo in geo_lower
|
||||
):
|
||||
geo_match = True
|
||||
break
|
||||
if geo_match:
|
||||
score += weights["geography"]
|
||||
|
||||
# Check size match
|
||||
# Check size match - check if any fund's range matches
|
||||
project_valuation = project_data.get("valuation", 0)
|
||||
check_lower = investor_data.get("check_size_lower") or 0
|
||||
check_upper = investor_data.get("check_size_upper") or float("inf")
|
||||
if (
|
||||
check_lower
|
||||
and check_upper
|
||||
and check_lower <= project_valuation <= check_upper
|
||||
):
|
||||
check_match = False
|
||||
if project_valuation and check_ranges:
|
||||
for check_range in check_ranges:
|
||||
if check_range["lower"] <= project_valuation <= check_range["upper"]:
|
||||
check_match = True
|
||||
break
|
||||
if check_match:
|
||||
score += weights["check_size"]
|
||||
|
||||
# Thesis alignment (simplified)
|
||||
@@ -121,86 +157,126 @@ class ReportGenerator:
|
||||
"""Generate detailed match criteria table"""
|
||||
criteria = []
|
||||
|
||||
# Aggregate data from all funds
|
||||
all_sectors = set(investor_data.get("sectors", []))
|
||||
all_stages = set()
|
||||
all_geographies = []
|
||||
check_ranges = []
|
||||
|
||||
for fund in investor_data.get("funds", []):
|
||||
all_sectors.update(fund.get("sectors", []))
|
||||
all_stages.update(fund.get("investment_stages", []))
|
||||
if fund.get("geographic_focus"):
|
||||
all_geographies.append(fund["geographic_focus"])
|
||||
if fund.get("check_size_lower") and fund.get("check_size_upper"):
|
||||
check_ranges.append(
|
||||
{
|
||||
"lower": fund["check_size_lower"],
|
||||
"upper": fund["check_size_upper"],
|
||||
"fund_name": fund.get("fund_name", "Unnamed Fund"),
|
||||
}
|
||||
)
|
||||
|
||||
# Sector criterion
|
||||
investor_sectors = investor_data.get("sectors", [])
|
||||
project_sectors = project_data.get("sectors", [])
|
||||
sector_match = (
|
||||
"Perfect" if set(investor_sectors) & set(project_sectors) else "Mismatch"
|
||||
)
|
||||
sector_match = "Perfect" if all_sectors & set(project_sectors) else "Mismatch"
|
||||
criteria.append(
|
||||
{
|
||||
"name": "Sector",
|
||||
"requirement": "Cybersecurity, B2B SaaS" if project_sectors else "N/A",
|
||||
"evidence": ", ".join(investor_sectors[:3])
|
||||
if investor_sectors
|
||||
else "N/A",
|
||||
"requirement": ", ".join(project_sectors) if project_sectors else "N/A",
|
||||
"evidence": ", ".join(list(all_sectors)[:3]) if all_sectors else "N/A",
|
||||
"match": sector_match,
|
||||
"weight": "30%",
|
||||
}
|
||||
)
|
||||
|
||||
# Stage criterion
|
||||
investor_stages = investor_data.get("investment_stages", [])
|
||||
# Stage criterion - case insensitive comparison
|
||||
project_stage = project_data.get("stage", "N/A")
|
||||
stage_match = "Perfect" if project_stage in investor_stages else "Mismatch"
|
||||
stage_match = "Mismatch"
|
||||
if project_stage != "N/A" and all_stages:
|
||||
# Normalize stage names for comparison
|
||||
normalized_stages = {
|
||||
stage.lower().replace("_", " ") for stage in all_stages
|
||||
}
|
||||
project_stage_normalized = project_stage.lower().replace("_", " ")
|
||||
stage_match = (
|
||||
"Perfect"
|
||||
if project_stage_normalized in normalized_stages
|
||||
else "Mismatch"
|
||||
)
|
||||
elif project_stage == "N/A":
|
||||
stage_match = "N/A"
|
||||
|
||||
criteria.append(
|
||||
{
|
||||
"name": "Stage",
|
||||
"requirement": str(project_stage),
|
||||
"evidence": ", ".join(investor_stages) if investor_stages else "N/A",
|
||||
"evidence": ", ".join(all_stages) if all_stages else "N/A",
|
||||
"match": stage_match,
|
||||
"weight": "30%",
|
||||
}
|
||||
)
|
||||
|
||||
# Geography criterion
|
||||
investor_geo = investor_data.get("geographic_focus") or "N/A"
|
||||
project_geo = project_data.get("location") or "N/A"
|
||||
investor_geo_display = ", ".join(all_geographies) if all_geographies else "N/A"
|
||||
|
||||
# Safe comparison handling None values and "Global" matches
|
||||
geo_match = "Mismatch"
|
||||
if project_geo != "N/A" and all_geographies:
|
||||
for geo in all_geographies:
|
||||
if geo:
|
||||
geo_lower = geo.lower()
|
||||
# Match if investor geography is "global" or if there's a location overlap
|
||||
if "global" in geo_lower or "worldwide" in geo_lower:
|
||||
geo_match = "Perfect"
|
||||
break
|
||||
if (
|
||||
geo_lower in project_geo.lower()
|
||||
or project_geo.lower() in geo_lower
|
||||
):
|
||||
geo_match = "Strong"
|
||||
break
|
||||
elif not all_geographies and project_geo == "N/A":
|
||||
geo_match = "N/A"
|
||||
|
||||
# Safe comparison handling None values
|
||||
if investor_geo == "N/A" or project_geo == "N/A":
|
||||
geo_match = (
|
||||
"N/A" if investor_geo == "N/A" and project_geo == "N/A" else "Mismatch"
|
||||
)
|
||||
else:
|
||||
investor_geo_lower = investor_geo.lower()
|
||||
project_geo_lower = project_geo.lower()
|
||||
geo_match = (
|
||||
"Strong"
|
||||
if investor_geo_lower in project_geo_lower
|
||||
or project_geo_lower in investor_geo_lower
|
||||
else "Mismatch"
|
||||
)
|
||||
criteria.append(
|
||||
{
|
||||
"name": "Geography",
|
||||
"requirement": project_geo,
|
||||
"evidence": investor_geo,
|
||||
"evidence": investor_geo_display,
|
||||
"match": geo_match,
|
||||
"weight": "20%",
|
||||
}
|
||||
)
|
||||
|
||||
# Check Size criterion
|
||||
check_lower = investor_data.get("check_size_lower") or 0
|
||||
check_upper = investor_data.get("check_size_upper") or 0
|
||||
project_val = project_data.get("valuation", 0)
|
||||
|
||||
# Build evidence string from all fund ranges
|
||||
check_evidence = "N/A"
|
||||
if check_lower and check_upper:
|
||||
check_evidence = (
|
||||
f"€{check_lower / 1000000:.0f}M - €{check_upper / 1000000:.0f}M"
|
||||
)
|
||||
elif check_lower:
|
||||
check_evidence = f"€{check_lower / 1000000:.0f}M+"
|
||||
if check_ranges:
|
||||
evidence_parts = []
|
||||
for cr in check_ranges[:3]: # Show up to 3 funds
|
||||
range_str = (
|
||||
f"€{cr['lower'] / 1000000:.0f}M - €{cr['upper'] / 1000000:.0f}M"
|
||||
)
|
||||
if cr["fund_name"]:
|
||||
evidence_parts.append(f"{cr['fund_name']}: {range_str}")
|
||||
else:
|
||||
evidence_parts.append(range_str)
|
||||
check_evidence = "; ".join(evidence_parts)
|
||||
|
||||
# Check if project valuation matches any fund
|
||||
check_match = "N/A"
|
||||
if project_val > 0 and check_ranges:
|
||||
match_found = any(
|
||||
cr["lower"] <= project_val <= cr["upper"] for cr in check_ranges
|
||||
)
|
||||
check_match = "Perfect" if match_found else "Mismatch"
|
||||
elif project_val > 0:
|
||||
check_match = "Strong"
|
||||
|
||||
check_match = (
|
||||
"Perfect"
|
||||
if check_lower and check_upper and check_lower <= project_val <= check_upper
|
||||
else "Strong"
|
||||
if project_val > 0
|
||||
else "N/A"
|
||||
)
|
||||
criteria.append(
|
||||
{
|
||||
"name": "Check Size",
|
||||
|
||||
+38
-31
@@ -161,13 +161,6 @@
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<p class="text-xs text-gray-600">DACH Region:</p>
|
||||
<p class="font-semibold text-gray-900">
|
||||
{{ investor.geographic_focus or 'N/A' }}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<p class="text-xs text-gray-600">AUM (EUR million):</p>
|
||||
<p class="font-semibold text-gray-900">
|
||||
@@ -179,33 +172,47 @@
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<p class="text-xs text-gray-600 mb-1">
|
||||
Investment Stage:
|
||||
</p>
|
||||
<p class="text-sm font-semibold text-gray-900">
|
||||
{% if investor.investment_stages %} {{
|
||||
investor.investment_stages | join(', ') }} {% else
|
||||
%} N/A {% endif %}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<p class="text-xs text-gray-600 mb-1">
|
||||
Est. Investment Size:
|
||||
</p>
|
||||
<p class="text-sm font-semibold text-gray-900">
|
||||
{% if investor.check_size_lower and
|
||||
investor.check_size_upper %} €{{
|
||||
'{:,.0f}'.format(investor.check_size_lower /
|
||||
1000000) }}M - €{{
|
||||
'{:,.0f}'.format(investor.check_size_upper /
|
||||
1000000) }}M {% elif investor.check_size_lower %}
|
||||
€{{ '{:,.0f}'.format(investor.check_size_lower /
|
||||
1000000) }}M+ {% else %} N/A {% endif %}
|
||||
<div>
|
||||
<p class="text-xs text-gray-600 mb-1">Number of Funds:</p>
|
||||
<p class="font-semibold text-gray-900">
|
||||
{{ investor.funds | length if investor.funds else 'N/A' }}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mt-4">
|
||||
<h3 class="text-xs font-bold text-gray-900 uppercase mb-2">
|
||||
Fund Details
|
||||
</h3>
|
||||
{% if investor.funds %}
|
||||
{% for fund in investor.funds %}
|
||||
<div class="mb-3 pb-3 border-b border-gray-200">
|
||||
<p class="text-sm font-semibold text-gray-900 mb-1">
|
||||
{{ fund.fund_name or 'Fund ' + loop.index|string }}
|
||||
</p>
|
||||
<div class="text-xs text-gray-700 space-y-1">
|
||||
{% if fund.fund_size %}
|
||||
<p>Fund Size: €{{ '{:,.0f}'.format(fund.fund_size / 1000000) }}M</p>
|
||||
{% endif %}
|
||||
{% if fund.check_size_lower and fund.check_size_upper %}
|
||||
<p>Check Size: €{{ '{:,.0f}'.format(fund.check_size_lower / 1000000) }}M - €{{ '{:,.0f}'.format(fund.check_size_upper / 1000000) }}M</p>
|
||||
{% endif %}
|
||||
{% if fund.geographic_focus %}
|
||||
<p>Geography: {{ fund.geographic_focus }}</p>
|
||||
{% endif %}
|
||||
{% if fund.investment_stages %}
|
||||
<p>Stages: {{ fund.investment_stages | join(', ') }}</p>
|
||||
{% endif %}
|
||||
{% if fund.sectors %}
|
||||
<p>Sectors: {{ fund.sectors[:3] | join(', ') }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<p class="text-xs text-gray-500">No fund information available</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user