feat: Update investor report generation and HTML template to include fund details and improve data handling

This commit is contained in:
bolade
2025-10-21 10:48:58 +01:00
parent 63d8e57e57
commit 483c2cc114
10 changed files with 289 additions and 135 deletions
+97 -25
View File
@@ -145,16 +145,74 @@ Return the lower and upper bounds in USD."""
"""
Manually parse the JSON profile from the CSV.
Returns a cleaned dictionary with the investor profile data.
Handles JSON wrapped in markdown code blocks (```json ... ```).
Handles trailing quotes and extra data after JSON.
"""
if not json_str or pd.isna(json_str):
return None
try:
# Clean the JSON string
cleaned_json = json_str.strip()
# Check if it's plain text (no JSON structure)
if not cleaned_json.startswith(("{", "```", "'")):
print(" ⚠️ No JSON structure found - skipping")
return None
# Remove markdown code block markers if present
if cleaned_json.startswith("```"):
# Remove opening marker (```json or ```Json or ```)
lines = cleaned_json.split("\n")
if lines[0].startswith("```"):
lines = lines[1:] # Remove first line
# Remove closing marker (``` or ```')
if lines and lines[-1].strip() in ("```", "```'", '```"'):
lines = lines[:-1] # Remove last line
cleaned_json = "\n".join(lines).strip()
# Remove trailing quotes that might be left over
if cleaned_json.endswith(("'", '"')):
cleaned_json = cleaned_json[:-1].strip()
# Try to find JSON boundaries if there's extra data
# Look for the first { and the last }
start_idx = cleaned_json.find("{")
if start_idx == -1:
print(" ⚠️ No opening brace found - not valid JSON")
return None
# Find the matching closing brace
# We need to count braces to find the actual end
brace_count = 0
end_idx = -1
for i in range(start_idx, len(cleaned_json)):
if cleaned_json[i] == "{":
brace_count += 1
elif cleaned_json[i] == "}":
brace_count -= 1
if brace_count == 0:
end_idx = i + 1
break
if end_idx == -1:
print(" ⚠️ No matching closing brace found")
return None
# Extract just the JSON part
cleaned_json = cleaned_json[start_idx:end_idx]
# Parse JSON string
profile = json.loads(json_str)
profile = json.loads(cleaned_json)
return profile
except json.JSONDecodeError as e:
print(f"Error parsing JSON: {e}")
print(f" ❌ JSON parsing error: {e}")
# Print first 200 chars for debugging
preview = json_str[:200] if len(json_str) > 200 else json_str
print(f" Preview: {preview}...")
return None
except Exception as e:
print(f" ❌ Unexpected error: {e}")
return None
async def process_investor_profile(
@@ -338,34 +396,45 @@ Return the lower and upper bounds in USD."""
if existing_company:
# Update only founded_year on existing company
company = existing_company
updated_fields = []
if company_data.get("founded_year"):
company.founded_year = company_data["founded_year"]
updated_fields.append(
f"founded_year: {company_data['founded_year']}"
)
# Add/update company members (key executives)
# First, remove existing members if updating
db.query(CompanyMember).filter_by(company_id=company.id).delete()
exec_count = 0
for exec_data in company_data.get("key_executives", []):
member = CompanyMember(
name=exec_data.get("name"),
role=exec_data.get("title"),
linkedin=exec_data.get(
"source_url"
), # Store source URL in linkedin field
company_id=company.id,
)
db.add(member)
exec_count += 1
if exec_count > 0:
updated_fields.append(f"{exec_count} executives")
if updated_fields:
print(f" 📝 Updated: {', '.join(updated_fields)}")
return company
else:
# Company should already be in base database, but if not found, skip
print(
f"⚠️ Company '{company_data['name']}' not found in base database - skipping"
)
# Company not found in base database, skip
print(" ⚠️ Not in database - skipping")
return None
# Add/update company members (key executives)
# First, remove existing members if updating
db.query(CompanyMember).filter_by(company_id=company.id).delete()
for exec_data in company_data.get("key_executives", []):
member = CompanyMember(
name=exec_data.get("name"),
role=exec_data.get("title"),
linkedin=exec_data.get(
"source_url"
), # Store source URL in linkedin field
company_id=company.id,
)
db.add(member)
return company
except Exception as e:
print(f"Error saving company to database: {e}")
print(f"Error saving: {e}")
db.rollback()
return None
@@ -789,8 +858,11 @@ Return the lower and upper bounds in USD."""
if pd.notna(row.get("Investor"))
else None
)
# Try both column names for flexibility
profile_json = (
row.get("Final Investor Profile", "")
row.get("Perplexity Gap Output", "")
if pd.notna(row.get("Perplexity Gap Output"))
else row.get("Final Investor Profile", "")
if pd.notna(row.get("Final Investor Profile"))
else None
)