feat: Simplify company profile processing to only extract founded_year and key_executives
This commit is contained in:
Binary file not shown.
+12
-55
@@ -265,37 +265,20 @@ Return the lower and upper bounds in USD."""
|
|||||||
) -> Optional[dict]:
|
) -> Optional[dict]:
|
||||||
"""
|
"""
|
||||||
Process company profile from CSV data.
|
Process company profile from CSV data.
|
||||||
Manually extracts fields without using LLM.
|
Only extracts founded_year and key_executives - rest is in base database.
|
||||||
"""
|
"""
|
||||||
profile = self.parse_json_profile(profile_json)
|
profile = self.parse_json_profile(profile_json)
|
||||||
if not profile:
|
if not profile:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract basic info
|
# Only extract founded_year and key_executives
|
||||||
company_data = {
|
company_data = {
|
||||||
"name": name.strip() if name else None,
|
"name": name.strip() if name else None,
|
||||||
"website": website.strip() if website else None,
|
"founded_year": None,
|
||||||
"description": profile.get("companyDescription"),
|
|
||||||
"location": profile.get("geographicFocus"),
|
|
||||||
"industry": profile.get("sectorDescription"),
|
|
||||||
"founded_year": None, # Not typically in the company JSON
|
|
||||||
"key_executives": [],
|
"key_executives": [],
|
||||||
"client_categories": profile.get("clientCategories", []),
|
|
||||||
"product_description": profile.get("productDescription"),
|
|
||||||
"linked_documents": profile.get("linkedDocuments", []),
|
|
||||||
"researcher_notes": profile.get("researcherNotes"),
|
|
||||||
"missing_important_fields": profile.get("missingImportantFields", []),
|
|
||||||
"sources": profile.get("sources", {}),
|
|
||||||
"investor_names": [],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Parse investor names from the Investor column
|
|
||||||
if investor_names and pd.notna(investor_names):
|
|
||||||
# Split by comma and clean
|
|
||||||
investors = [inv.strip() for inv in str(investor_names).split(",")]
|
|
||||||
company_data["investor_names"] = [inv for inv in investors if inv]
|
|
||||||
|
|
||||||
# Process key executives/leadership
|
# Process key executives/leadership
|
||||||
key_executives = profile.get("keyExecutives", [])
|
key_executives = profile.get("keyExecutives", [])
|
||||||
if not key_executives:
|
if not key_executives:
|
||||||
@@ -313,7 +296,7 @@ Return the lower and upper bounds in USD."""
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Try to extract founding year from description
|
# Try to extract founding year from description
|
||||||
description = company_data.get("description", "")
|
description = profile.get("companyDescription", "")
|
||||||
if description:
|
if description:
|
||||||
# Look for patterns like "founded in 2020", "Gegründet 2020", "founded 2020"
|
# Look for patterns like "founded in 2020", "Gegründet 2020", "founded 2020"
|
||||||
year_patterns = [
|
year_patterns = [
|
||||||
@@ -344,41 +327,28 @@ Return the lower and upper bounds in USD."""
|
|||||||
def _save_parsed_company_to_db(
|
def _save_parsed_company_to_db(
|
||||||
self, db: Session, company_data: dict
|
self, db: Session, company_data: dict
|
||||||
) -> Optional[CompanyTable]:
|
) -> Optional[CompanyTable]:
|
||||||
"""Save manually parsed company data to database"""
|
"""Save manually parsed company data to database - only updates founded_year and key_executives"""
|
||||||
try:
|
try:
|
||||||
# Check if company already exists
|
# Check if company already exists (should exist in base database)
|
||||||
existing_company = (
|
existing_company = (
|
||||||
db.query(CompanyTable).filter_by(name=company_data["name"]).first()
|
db.query(CompanyTable).filter_by(name=company_data["name"]).first()
|
||||||
)
|
)
|
||||||
|
|
||||||
if existing_company:
|
if existing_company:
|
||||||
# Update existing company
|
# Update only founded_year on existing company
|
||||||
company = existing_company
|
company = existing_company
|
||||||
company.website = company_data.get("website") or company.website
|
|
||||||
company.location = company_data.get("location") or company.location
|
|
||||||
company.description = (
|
|
||||||
company_data.get("description") or company.description
|
|
||||||
)
|
|
||||||
company.industry = company_data.get("industry") or company.industry
|
|
||||||
if company_data.get("founded_year"):
|
if company_data.get("founded_year"):
|
||||||
company.founded_year = company_data["founded_year"]
|
company.founded_year = company_data["founded_year"]
|
||||||
else:
|
else:
|
||||||
# Create new company
|
# Company should already be in base database, but if not found, skip
|
||||||
company = CompanyTable(
|
print(
|
||||||
name=company_data["name"],
|
f"⚠️ Company '{company_data['name']}' not found in base database - skipping"
|
||||||
website=company_data.get("website"),
|
|
||||||
location=company_data.get("location"),
|
|
||||||
description=company_data.get("description"),
|
|
||||||
industry=company_data.get("industry"),
|
|
||||||
founded_year=company_data.get("founded_year"),
|
|
||||||
)
|
)
|
||||||
db.add(company)
|
return None
|
||||||
db.flush()
|
|
||||||
|
|
||||||
# Add/update company members (key executives)
|
# Add/update company members (key executives)
|
||||||
# First, remove existing members if updating
|
# First, remove existing members if updating
|
||||||
if existing_company:
|
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
|
||||||
|
|
||||||
for exec_data in company_data.get("key_executives", []):
|
for exec_data in company_data.get("key_executives", []):
|
||||||
member = CompanyMember(
|
member = CompanyMember(
|
||||||
@@ -391,19 +361,6 @@ Return the lower and upper bounds in USD."""
|
|||||||
)
|
)
|
||||||
db.add(member)
|
db.add(member)
|
||||||
|
|
||||||
# Link to investors if provided
|
|
||||||
for investor_name in company_data.get("investor_names", []):
|
|
||||||
# Find investor in database
|
|
||||||
investor = (
|
|
||||||
db.query(InvestorTable)
|
|
||||||
.filter_by(name=investor_name.strip())
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if investor:
|
|
||||||
# Add company to investor's portfolio if not already there
|
|
||||||
if company not in investor.portfolio_companies:
|
|
||||||
investor.portfolio_companies.append(company)
|
|
||||||
|
|
||||||
return company
|
return company
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user