diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index 04633e6..30635c3 100644 Binary files a/app/services/__pycache__/llm_parser.cpython-312.pyc and b/app/services/__pycache__/llm_parser.cpython-312.pyc differ diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py index 146f100..34d0aa5 100644 --- a/app/services/llm_parser.py +++ b/app/services/llm_parser.py @@ -265,37 +265,20 @@ Return the lower and upper bounds in USD.""" ) -> Optional[dict]: """ Process company profile from CSV data. - Manually extracts fields without using LLM. + Only extracts founded_year and key_executives - rest is in base database. """ profile = self.parse_json_profile(profile_json) if not profile: return None try: - # Extract basic info + # Only extract founded_year and key_executives company_data = { "name": name.strip() if name else None, - "website": website.strip() if website else None, - "description": profile.get("companyDescription"), - "location": profile.get("geographicFocus"), - "industry": profile.get("sectorDescription"), - "founded_year": None, # Not typically in the company JSON + "founded_year": None, "key_executives": [], - "client_categories": profile.get("clientCategories", []), - "product_description": profile.get("productDescription"), - "linked_documents": profile.get("linkedDocuments", []), - "researcher_notes": profile.get("researcherNotes"), - "missing_important_fields": profile.get("missingImportantFields", []), - "sources": profile.get("sources", {}), - "investor_names": [], } - # Parse investor names from the Investor column - if investor_names and pd.notna(investor_names): - # Split by comma and clean - investors = [inv.strip() for inv in str(investor_names).split(",")] - company_data["investor_names"] = [inv for inv in investors if inv] - # Process key executives/leadership key_executives = profile.get("keyExecutives", []) if not key_executives: @@ -313,7 +296,7 @@ Return the lower and upper bounds in USD.""" ) # Try to extract founding year from description - description = company_data.get("description", "") + description = profile.get("companyDescription", "") if description: # Look for patterns like "founded in 2020", "Gegründet 2020", "founded 2020" year_patterns = [ @@ -344,41 +327,28 @@ Return the lower and upper bounds in USD.""" def _save_parsed_company_to_db( self, db: Session, company_data: dict ) -> Optional[CompanyTable]: - """Save manually parsed company data to database""" + """Save manually parsed company data to database - only updates founded_year and key_executives""" try: - # Check if company already exists + # Check if company already exists (should exist in base database) existing_company = ( db.query(CompanyTable).filter_by(name=company_data["name"]).first() ) if existing_company: - # Update existing company + # Update only founded_year on existing company company = existing_company - company.website = company_data.get("website") or company.website - company.location = company_data.get("location") or company.location - company.description = ( - company_data.get("description") or company.description - ) - company.industry = company_data.get("industry") or company.industry if company_data.get("founded_year"): company.founded_year = company_data["founded_year"] else: - # Create new company - company = CompanyTable( - name=company_data["name"], - website=company_data.get("website"), - location=company_data.get("location"), - description=company_data.get("description"), - industry=company_data.get("industry"), - founded_year=company_data.get("founded_year"), + # Company should already be in base database, but if not found, skip + print( + f"⚠️ Company '{company_data['name']}' not found in base database - skipping" ) - db.add(company) - db.flush() + return None # Add/update company members (key executives) # First, remove existing members if updating - if existing_company: - db.query(CompanyMember).filter_by(company_id=company.id).delete() + db.query(CompanyMember).filter_by(company_id=company.id).delete() for exec_data in company_data.get("key_executives", []): member = CompanyMember( @@ -391,19 +361,6 @@ Return the lower and upper bounds in USD.""" ) db.add(member) - # Link to investors if provided - for investor_name in company_data.get("investor_names", []): - # Find investor in database - investor = ( - db.query(InvestorTable) - .filter_by(name=investor_name.strip()) - .first() - ) - if investor: - # Add company to investor's portfolio if not already there - if company not in investor.portfolio_companies: - investor.portfolio_companies.append(company) - return company except Exception as e: diff --git a/investors.db b/investors.db index 721e448..e6f09a3 100644 Binary files a/investors.db and b/investors.db differ