feat: Simplify company profile processing to only extract founded_year and key_executives
This commit is contained in:
Binary file not shown.
+11
-54
@@ -265,37 +265,20 @@ Return the lower and upper bounds in USD."""
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Process company profile from CSV data.
|
||||
Manually extracts fields without using LLM.
|
||||
Only extracts founded_year and key_executives - rest is in base database.
|
||||
"""
|
||||
profile = self.parse_json_profile(profile_json)
|
||||
if not profile:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Extract basic info
|
||||
# Only extract founded_year and key_executives
|
||||
company_data = {
|
||||
"name": name.strip() if name else None,
|
||||
"website": website.strip() if website else None,
|
||||
"description": profile.get("companyDescription"),
|
||||
"location": profile.get("geographicFocus"),
|
||||
"industry": profile.get("sectorDescription"),
|
||||
"founded_year": None, # Not typically in the company JSON
|
||||
"founded_year": None,
|
||||
"key_executives": [],
|
||||
"client_categories": profile.get("clientCategories", []),
|
||||
"product_description": profile.get("productDescription"),
|
||||
"linked_documents": profile.get("linkedDocuments", []),
|
||||
"researcher_notes": profile.get("researcherNotes"),
|
||||
"missing_important_fields": profile.get("missingImportantFields", []),
|
||||
"sources": profile.get("sources", {}),
|
||||
"investor_names": [],
|
||||
}
|
||||
|
||||
# Parse investor names from the Investor column
|
||||
if investor_names and pd.notna(investor_names):
|
||||
# Split by comma and clean
|
||||
investors = [inv.strip() for inv in str(investor_names).split(",")]
|
||||
company_data["investor_names"] = [inv for inv in investors if inv]
|
||||
|
||||
# Process key executives/leadership
|
||||
key_executives = profile.get("keyExecutives", [])
|
||||
if not key_executives:
|
||||
@@ -313,7 +296,7 @@ Return the lower and upper bounds in USD."""
|
||||
)
|
||||
|
||||
# Try to extract founding year from description
|
||||
description = company_data.get("description", "")
|
||||
description = profile.get("companyDescription", "")
|
||||
if description:
|
||||
# Look for patterns like "founded in 2020", "Gegründet 2020", "founded 2020"
|
||||
year_patterns = [
|
||||
@@ -344,40 +327,27 @@ Return the lower and upper bounds in USD."""
|
||||
def _save_parsed_company_to_db(
|
||||
self, db: Session, company_data: dict
|
||||
) -> Optional[CompanyTable]:
|
||||
"""Save manually parsed company data to database"""
|
||||
"""Save manually parsed company data to database - only updates founded_year and key_executives"""
|
||||
try:
|
||||
# Check if company already exists
|
||||
# Check if company already exists (should exist in base database)
|
||||
existing_company = (
|
||||
db.query(CompanyTable).filter_by(name=company_data["name"]).first()
|
||||
)
|
||||
|
||||
if existing_company:
|
||||
# Update existing company
|
||||
# Update only founded_year on existing company
|
||||
company = existing_company
|
||||
company.website = company_data.get("website") or company.website
|
||||
company.location = company_data.get("location") or company.location
|
||||
company.description = (
|
||||
company_data.get("description") or company.description
|
||||
)
|
||||
company.industry = company_data.get("industry") or company.industry
|
||||
if company_data.get("founded_year"):
|
||||
company.founded_year = company_data["founded_year"]
|
||||
else:
|
||||
# Create new company
|
||||
company = CompanyTable(
|
||||
name=company_data["name"],
|
||||
website=company_data.get("website"),
|
||||
location=company_data.get("location"),
|
||||
description=company_data.get("description"),
|
||||
industry=company_data.get("industry"),
|
||||
founded_year=company_data.get("founded_year"),
|
||||
# Company should already be in base database, but if not found, skip
|
||||
print(
|
||||
f"⚠️ Company '{company_data['name']}' not found in base database - skipping"
|
||||
)
|
||||
db.add(company)
|
||||
db.flush()
|
||||
return None
|
||||
|
||||
# Add/update company members (key executives)
|
||||
# First, remove existing members if updating
|
||||
if existing_company:
|
||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||
|
||||
for exec_data in company_data.get("key_executives", []):
|
||||
@@ -391,19 +361,6 @@ Return the lower and upper bounds in USD."""
|
||||
)
|
||||
db.add(member)
|
||||
|
||||
# Link to investors if provided
|
||||
for investor_name in company_data.get("investor_names", []):
|
||||
# Find investor in database
|
||||
investor = (
|
||||
db.query(InvestorTable)
|
||||
.filter_by(name=investor_name.strip())
|
||||
.first()
|
||||
)
|
||||
if investor:
|
||||
# Add company to investor's portfolio if not already there
|
||||
if company not in investor.portfolio_companies:
|
||||
investor.portfolio_companies.append(company)
|
||||
|
||||
return company
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user