#!/usr/bin/env python3 """ Quick verification script for the database """ from models import CompanyTable, FundTable, InvestorTable, SectorTable, get_db_session def verify_database(): session = get_db_session() print("=" * 60) print("šŸ” DATABASE VERIFICATION") print("=" * 60) # Count records investor_count = session.query(InvestorTable).count() company_count = session.query(CompanyTable).count() sector_count = session.query(SectorTable).count() fund_count = session.query(FundTable).count() print("\nšŸ“Š Record Counts:") print(f" Investors: {investor_count:,}") print(f" Companies: {company_count:,}") print(f" Sectors: {sector_count:,}") print(f" Funds: {fund_count:,}") # Check relationships investors_with_companies = ( session.query(InvestorTable) .filter(InvestorTable.portfolio_companies.any()) .count() ) investors_with_sectors = ( session.query(InvestorTable).filter(InvestorTable.sectors.any()).count() ) print("\nšŸ”— Relationships:") print(f" Investors with portfolio companies: {investors_with_companies:,}") print(f" Investors with sectors: {investors_with_sectors:,}") # Sample data quality checks investors_with_website = ( session.query(InvestorTable).filter(InvestorTable.website.isnot(None)).count() ) investors_with_investments = ( session.query(InvestorTable) .filter( InvestorTable.number_of_investments.isnot(None), InvestorTable.number_of_investments > 0, ) .count() ) print("\nāœ… Data Quality:") print( f" Investors with website: {investors_with_website:,} ({investors_with_website / investor_count * 100:.1f}%)" ) print( f" Investors with investment count: {investors_with_investments:,} ({investors_with_investments / investor_count * 100:.1f}%)" ) # Check for enrichment readiness investors_with_aum = ( session.query(InvestorTable).filter(InvestorTable.aum.isnot(None)).count() ) investors_with_headquarters = ( session.query(InvestorTable) .filter(InvestorTable.headquarters.isnot(None)) .count() ) investors_with_thesis = ( session.query(InvestorTable) .filter(InvestorTable.investment_thesis.isnot(None)) .count() ) print("\nšŸŽÆ Enrichment Status:") print(f" Investors with AUM: {investors_with_aum:,}") print(f" Investors with HQ: {investors_with_headquarters:,}") print(f" Investors with thesis: {investors_with_thesis:,}") print(f" Investors with funds: {fund_count:,}") if fund_count == 0: print("\nāš ļø No funds found - enrichment needed!") # Show a random sample import random sample_investors = session.query(InvestorTable).limit(1000).all() sample = random.sample(sample_investors, min(3, len(sample_investors))) print("\nšŸ“‹ Random Sample:") for inv in sample: print(f"\n {inv.name}") print(f" Website: {inv.website or 'N/A'}") print(f" Investments: {inv.number_of_investments or 'N/A'}") print(f" Portfolio: {len(inv.portfolio_companies)} companies") print(f" Sectors: {len(inv.sectors)} sectors") if inv.funds: print(f" Funds: {len(inv.funds)}") session.close() print("\n" + "=" * 60) if fund_count == 0: print("šŸ“ Next step: Run enrichment script") print(" python enrich_investors.py enriched_investors.csv") else: print("āœ… Database is enriched and ready!") print("=" * 60) if __name__ == "__main__": verify_database()