feat: Update investor report generation and HTML template to include fund details and improve data handling
This commit is contained in:
+97
-25
@@ -145,16 +145,74 @@ Return the lower and upper bounds in USD."""
|
||||
"""
|
||||
Manually parse the JSON profile from the CSV.
|
||||
Returns a cleaned dictionary with the investor profile data.
|
||||
Handles JSON wrapped in markdown code blocks (```json ... ```).
|
||||
Handles trailing quotes and extra data after JSON.
|
||||
"""
|
||||
if not json_str or pd.isna(json_str):
|
||||
return None
|
||||
|
||||
try:
|
||||
# Clean the JSON string
|
||||
cleaned_json = json_str.strip()
|
||||
|
||||
# Check if it's plain text (no JSON structure)
|
||||
if not cleaned_json.startswith(("{", "```", "'")):
|
||||
print(" ⚠️ No JSON structure found - skipping")
|
||||
return None
|
||||
|
||||
# Remove markdown code block markers if present
|
||||
if cleaned_json.startswith("```"):
|
||||
# Remove opening marker (```json or ```Json or ```)
|
||||
lines = cleaned_json.split("\n")
|
||||
if lines[0].startswith("```"):
|
||||
lines = lines[1:] # Remove first line
|
||||
# Remove closing marker (``` or ```')
|
||||
if lines and lines[-1].strip() in ("```", "```'", '```"'):
|
||||
lines = lines[:-1] # Remove last line
|
||||
cleaned_json = "\n".join(lines).strip()
|
||||
|
||||
# Remove trailing quotes that might be left over
|
||||
if cleaned_json.endswith(("'", '"')):
|
||||
cleaned_json = cleaned_json[:-1].strip()
|
||||
|
||||
# Try to find JSON boundaries if there's extra data
|
||||
# Look for the first { and the last }
|
||||
start_idx = cleaned_json.find("{")
|
||||
if start_idx == -1:
|
||||
print(" ⚠️ No opening brace found - not valid JSON")
|
||||
return None
|
||||
|
||||
# Find the matching closing brace
|
||||
# We need to count braces to find the actual end
|
||||
brace_count = 0
|
||||
end_idx = -1
|
||||
for i in range(start_idx, len(cleaned_json)):
|
||||
if cleaned_json[i] == "{":
|
||||
brace_count += 1
|
||||
elif cleaned_json[i] == "}":
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
end_idx = i + 1
|
||||
break
|
||||
|
||||
if end_idx == -1:
|
||||
print(" ⚠️ No matching closing brace found")
|
||||
return None
|
||||
|
||||
# Extract just the JSON part
|
||||
cleaned_json = cleaned_json[start_idx:end_idx]
|
||||
|
||||
# Parse JSON string
|
||||
profile = json.loads(json_str)
|
||||
profile = json.loads(cleaned_json)
|
||||
return profile
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing JSON: {e}")
|
||||
print(f" ❌ JSON parsing error: {e}")
|
||||
# Print first 200 chars for debugging
|
||||
preview = json_str[:200] if len(json_str) > 200 else json_str
|
||||
print(f" Preview: {preview}...")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" ❌ Unexpected error: {e}")
|
||||
return None
|
||||
|
||||
async def process_investor_profile(
|
||||
@@ -338,34 +396,45 @@ Return the lower and upper bounds in USD."""
|
||||
if existing_company:
|
||||
# Update only founded_year on existing company
|
||||
company = existing_company
|
||||
updated_fields = []
|
||||
|
||||
if company_data.get("founded_year"):
|
||||
company.founded_year = company_data["founded_year"]
|
||||
updated_fields.append(
|
||||
f"founded_year: {company_data['founded_year']}"
|
||||
)
|
||||
|
||||
# Add/update company members (key executives)
|
||||
# First, remove existing members if updating
|
||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||
|
||||
exec_count = 0
|
||||
for exec_data in company_data.get("key_executives", []):
|
||||
member = CompanyMember(
|
||||
name=exec_data.get("name"),
|
||||
role=exec_data.get("title"),
|
||||
linkedin=exec_data.get(
|
||||
"source_url"
|
||||
), # Store source URL in linkedin field
|
||||
company_id=company.id,
|
||||
)
|
||||
db.add(member)
|
||||
exec_count += 1
|
||||
|
||||
if exec_count > 0:
|
||||
updated_fields.append(f"{exec_count} executives")
|
||||
|
||||
if updated_fields:
|
||||
print(f" 📝 Updated: {', '.join(updated_fields)}")
|
||||
|
||||
return company
|
||||
else:
|
||||
# Company should already be in base database, but if not found, skip
|
||||
print(
|
||||
f"⚠️ Company '{company_data['name']}' not found in base database - skipping"
|
||||
)
|
||||
# Company not found in base database, skip
|
||||
print(" ⚠️ Not in database - skipping")
|
||||
return None
|
||||
|
||||
# Add/update company members (key executives)
|
||||
# First, remove existing members if updating
|
||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||
|
||||
for exec_data in company_data.get("key_executives", []):
|
||||
member = CompanyMember(
|
||||
name=exec_data.get("name"),
|
||||
role=exec_data.get("title"),
|
||||
linkedin=exec_data.get(
|
||||
"source_url"
|
||||
), # Store source URL in linkedin field
|
||||
company_id=company.id,
|
||||
)
|
||||
db.add(member)
|
||||
|
||||
return company
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error saving company to database: {e}")
|
||||
print(f" ❌ Error saving: {e}")
|
||||
db.rollback()
|
||||
return None
|
||||
|
||||
@@ -789,8 +858,11 @@ Return the lower and upper bounds in USD."""
|
||||
if pd.notna(row.get("Investor"))
|
||||
else None
|
||||
)
|
||||
# Try both column names for flexibility
|
||||
profile_json = (
|
||||
row.get("Final Investor Profile", "")
|
||||
row.get("Perplexity Gap Output", "")
|
||||
if pd.notna(row.get("Perplexity Gap Output"))
|
||||
else row.get("Final Investor Profile", "")
|
||||
if pd.notna(row.get("Final Investor Profile"))
|
||||
else None
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user