122 lines
3.6 KiB
Python
122 lines
3.6 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Quick verification script for the database
|
||
|
|
"""
|
||
|
|
|
||
|
|
from models import CompanyTable, FundTable, InvestorTable, SectorTable, get_db_session
|
||
|
|
|
||
|
|
|
||
|
|
def verify_database():
|
||
|
|
session = get_db_session()
|
||
|
|
|
||
|
|
print("=" * 60)
|
||
|
|
print("🔍 DATABASE VERIFICATION")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
# Count records
|
||
|
|
investor_count = session.query(InvestorTable).count()
|
||
|
|
company_count = session.query(CompanyTable).count()
|
||
|
|
sector_count = session.query(SectorTable).count()
|
||
|
|
fund_count = session.query(FundTable).count()
|
||
|
|
|
||
|
|
print("\n📊 Record Counts:")
|
||
|
|
print(f" Investors: {investor_count:,}")
|
||
|
|
print(f" Companies: {company_count:,}")
|
||
|
|
print(f" Sectors: {sector_count:,}")
|
||
|
|
print(f" Funds: {fund_count:,}")
|
||
|
|
|
||
|
|
# Check relationships
|
||
|
|
investors_with_companies = (
|
||
|
|
session.query(InvestorTable)
|
||
|
|
.filter(InvestorTable.portfolio_companies.any())
|
||
|
|
.count()
|
||
|
|
)
|
||
|
|
|
||
|
|
investors_with_sectors = (
|
||
|
|
session.query(InvestorTable).filter(InvestorTable.sectors.any()).count()
|
||
|
|
)
|
||
|
|
|
||
|
|
print("\n🔗 Relationships:")
|
||
|
|
print(f" Investors with portfolio companies: {investors_with_companies:,}")
|
||
|
|
print(f" Investors with sectors: {investors_with_sectors:,}")
|
||
|
|
|
||
|
|
# Sample data quality checks
|
||
|
|
investors_with_website = (
|
||
|
|
session.query(InvestorTable).filter(InvestorTable.website.isnot(None)).count()
|
||
|
|
)
|
||
|
|
|
||
|
|
investors_with_investments = (
|
||
|
|
session.query(InvestorTable)
|
||
|
|
.filter(
|
||
|
|
InvestorTable.number_of_investments.isnot(None),
|
||
|
|
InvestorTable.number_of_investments > 0,
|
||
|
|
)
|
||
|
|
.count()
|
||
|
|
)
|
||
|
|
|
||
|
|
print("\n✅ Data Quality:")
|
||
|
|
print(
|
||
|
|
f" Investors with website: {investors_with_website:,} ({investors_with_website / investor_count * 100:.1f}%)"
|
||
|
|
)
|
||
|
|
print(
|
||
|
|
f" Investors with investment count: {investors_with_investments:,} ({investors_with_investments / investor_count * 100:.1f}%)"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Check for enrichment readiness
|
||
|
|
investors_with_aum = (
|
||
|
|
session.query(InvestorTable).filter(InvestorTable.aum.isnot(None)).count()
|
||
|
|
)
|
||
|
|
|
||
|
|
investors_with_headquarters = (
|
||
|
|
session.query(InvestorTable)
|
||
|
|
.filter(InvestorTable.headquarters.isnot(None))
|
||
|
|
.count()
|
||
|
|
)
|
||
|
|
|
||
|
|
investors_with_thesis = (
|
||
|
|
session.query(InvestorTable)
|
||
|
|
.filter(InvestorTable.investment_thesis.isnot(None))
|
||
|
|
.count()
|
||
|
|
)
|
||
|
|
|
||
|
|
print("\n🎯 Enrichment Status:")
|
||
|
|
print(f" Investors with AUM: {investors_with_aum:,}")
|
||
|
|
print(f" Investors with HQ: {investors_with_headquarters:,}")
|
||
|
|
print(f" Investors with thesis: {investors_with_thesis:,}")
|
||
|
|
print(f" Investors with funds: {fund_count:,}")
|
||
|
|
|
||
|
|
if fund_count == 0:
|
||
|
|
print("\n⚠️ No funds found - enrichment needed!")
|
||
|
|
|
||
|
|
# Show a random sample
|
||
|
|
import random
|
||
|
|
|
||
|
|
sample_investors = session.query(InvestorTable).limit(1000).all()
|
||
|
|
sample = random.sample(sample_investors, min(3, len(sample_investors)))
|
||
|
|
|
||
|
|
print("\n📋 Random Sample:")
|
||
|
|
for inv in sample:
|
||
|
|
print(f"\n {inv.name}")
|
||
|
|
print(f" Website: {inv.website or 'N/A'}")
|
||
|
|
print(f" Investments: {inv.number_of_investments or 'N/A'}")
|
||
|
|
print(f" Portfolio: {len(inv.portfolio_companies)} companies")
|
||
|
|
print(f" Sectors: {len(inv.sectors)} sectors")
|
||
|
|
if inv.funds:
|
||
|
|
print(f" Funds: {len(inv.funds)}")
|
||
|
|
|
||
|
|
session.close()
|
||
|
|
|
||
|
|
print("\n" + "=" * 60)
|
||
|
|
|
||
|
|
if fund_count == 0:
|
||
|
|
print("📝 Next step: Run enrichment script")
|
||
|
|
print(" python enrich_investors.py enriched_investors.csv")
|
||
|
|
else:
|
||
|
|
print("✅ Database is enriched and ready!")
|
||
|
|
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
verify_database()
|