feat: Simplify company profile processing to only extract founded_year and key_executives

This commit is contained in:
bolade
2025-10-08 13:20:08 +01:00
parent 37e1ad01c4
commit be6fde9ba2
3 changed files with 12 additions and 55 deletions
Binary file not shown.
+11 -54
View File
@@ -265,37 +265,20 @@ Return the lower and upper bounds in USD."""
) -> Optional[dict]:
"""
Process company profile from CSV data.
Manually extracts fields without using LLM.
Only extracts founded_year and key_executives - rest is in base database.
"""
profile = self.parse_json_profile(profile_json)
if not profile:
return None
try:
# Extract basic info
# Only extract founded_year and key_executives
company_data = {
"name": name.strip() if name else None,
"website": website.strip() if website else None,
"description": profile.get("companyDescription"),
"location": profile.get("geographicFocus"),
"industry": profile.get("sectorDescription"),
"founded_year": None, # Not typically in the company JSON
"founded_year": None,
"key_executives": [],
"client_categories": profile.get("clientCategories", []),
"product_description": profile.get("productDescription"),
"linked_documents": profile.get("linkedDocuments", []),
"researcher_notes": profile.get("researcherNotes"),
"missing_important_fields": profile.get("missingImportantFields", []),
"sources": profile.get("sources", {}),
"investor_names": [],
}
# Parse investor names from the Investor column
if investor_names and pd.notna(investor_names):
# Split by comma and clean
investors = [inv.strip() for inv in str(investor_names).split(",")]
company_data["investor_names"] = [inv for inv in investors if inv]
# Process key executives/leadership
key_executives = profile.get("keyExecutives", [])
if not key_executives:
@@ -313,7 +296,7 @@ Return the lower and upper bounds in USD."""
)
# Try to extract founding year from description
description = company_data.get("description", "")
description = profile.get("companyDescription", "")
if description:
# Look for patterns like "founded in 2020", "Gegründet 2020", "founded 2020"
year_patterns = [
@@ -344,40 +327,27 @@ Return the lower and upper bounds in USD."""
def _save_parsed_company_to_db(
self, db: Session, company_data: dict
) -> Optional[CompanyTable]:
"""Save manually parsed company data to database"""
"""Save manually parsed company data to database - only updates founded_year and key_executives"""
try:
# Check if company already exists
# Check if company already exists (should exist in base database)
existing_company = (
db.query(CompanyTable).filter_by(name=company_data["name"]).first()
)
if existing_company:
# Update existing company
# Update only founded_year on existing company
company = existing_company
company.website = company_data.get("website") or company.website
company.location = company_data.get("location") or company.location
company.description = (
company_data.get("description") or company.description
)
company.industry = company_data.get("industry") or company.industry
if company_data.get("founded_year"):
company.founded_year = company_data["founded_year"]
else:
# Create new company
company = CompanyTable(
name=company_data["name"],
website=company_data.get("website"),
location=company_data.get("location"),
description=company_data.get("description"),
industry=company_data.get("industry"),
founded_year=company_data.get("founded_year"),
# Company should already be in base database, but if not found, skip
print(
f"⚠️ Company '{company_data['name']}' not found in base database - skipping"
)
db.add(company)
db.flush()
return None
# Add/update company members (key executives)
# First, remove existing members if updating
if existing_company:
db.query(CompanyMember).filter_by(company_id=company.id).delete()
for exec_data in company_data.get("key_executives", []):
@@ -391,19 +361,6 @@ Return the lower and upper bounds in USD."""
)
db.add(member)
# Link to investors if provided
for investor_name in company_data.get("investor_names", []):
# Find investor in database
investor = (
db.query(InvestorTable)
.filter_by(name=investor_name.strip())
.first()
)
if investor:
# Add company to investor's portfolio if not already there
if company not in investor.portfolio_companies:
investor.portfolio_companies.append(company)
return company
except Exception as e:
BIN
View File
Binary file not shown.