Refactor code structure for improved readability and maintainability
This commit is contained in:
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick verification script for the database
|
||||
"""
|
||||
|
||||
from models import CompanyTable, FundTable, InvestorTable, SectorTable, get_db_session
|
||||
|
||||
|
||||
def verify_database():
|
||||
session = get_db_session()
|
||||
|
||||
print("=" * 60)
|
||||
print("🔍 DATABASE VERIFICATION")
|
||||
print("=" * 60)
|
||||
|
||||
# Count records
|
||||
investor_count = session.query(InvestorTable).count()
|
||||
company_count = session.query(CompanyTable).count()
|
||||
sector_count = session.query(SectorTable).count()
|
||||
fund_count = session.query(FundTable).count()
|
||||
|
||||
print("\n📊 Record Counts:")
|
||||
print(f" Investors: {investor_count:,}")
|
||||
print(f" Companies: {company_count:,}")
|
||||
print(f" Sectors: {sector_count:,}")
|
||||
print(f" Funds: {fund_count:,}")
|
||||
|
||||
# Check relationships
|
||||
investors_with_companies = (
|
||||
session.query(InvestorTable)
|
||||
.filter(InvestorTable.portfolio_companies.any())
|
||||
.count()
|
||||
)
|
||||
|
||||
investors_with_sectors = (
|
||||
session.query(InvestorTable).filter(InvestorTable.sectors.any()).count()
|
||||
)
|
||||
|
||||
print("\n🔗 Relationships:")
|
||||
print(f" Investors with portfolio companies: {investors_with_companies:,}")
|
||||
print(f" Investors with sectors: {investors_with_sectors:,}")
|
||||
|
||||
# Sample data quality checks
|
||||
investors_with_website = (
|
||||
session.query(InvestorTable).filter(InvestorTable.website.isnot(None)).count()
|
||||
)
|
||||
|
||||
investors_with_investments = (
|
||||
session.query(InvestorTable)
|
||||
.filter(
|
||||
InvestorTable.number_of_investments.isnot(None),
|
||||
InvestorTable.number_of_investments > 0,
|
||||
)
|
||||
.count()
|
||||
)
|
||||
|
||||
print("\n✅ Data Quality:")
|
||||
print(
|
||||
f" Investors with website: {investors_with_website:,} ({investors_with_website / investor_count * 100:.1f}%)"
|
||||
)
|
||||
print(
|
||||
f" Investors with investment count: {investors_with_investments:,} ({investors_with_investments / investor_count * 100:.1f}%)"
|
||||
)
|
||||
|
||||
# Check for enrichment readiness
|
||||
investors_with_aum = (
|
||||
session.query(InvestorTable).filter(InvestorTable.aum.isnot(None)).count()
|
||||
)
|
||||
|
||||
investors_with_headquarters = (
|
||||
session.query(InvestorTable)
|
||||
.filter(InvestorTable.headquarters.isnot(None))
|
||||
.count()
|
||||
)
|
||||
|
||||
investors_with_thesis = (
|
||||
session.query(InvestorTable)
|
||||
.filter(InvestorTable.investment_thesis.isnot(None))
|
||||
.count()
|
||||
)
|
||||
|
||||
print("\n🎯 Enrichment Status:")
|
||||
print(f" Investors with AUM: {investors_with_aum:,}")
|
||||
print(f" Investors with HQ: {investors_with_headquarters:,}")
|
||||
print(f" Investors with thesis: {investors_with_thesis:,}")
|
||||
print(f" Investors with funds: {fund_count:,}")
|
||||
|
||||
if fund_count == 0:
|
||||
print("\n⚠️ No funds found - enrichment needed!")
|
||||
|
||||
# Show a random sample
|
||||
import random
|
||||
|
||||
sample_investors = session.query(InvestorTable).limit(1000).all()
|
||||
sample = random.sample(sample_investors, min(3, len(sample_investors)))
|
||||
|
||||
print("\n📋 Random Sample:")
|
||||
for inv in sample:
|
||||
print(f"\n {inv.name}")
|
||||
print(f" Website: {inv.website or 'N/A'}")
|
||||
print(f" Investments: {inv.number_of_investments or 'N/A'}")
|
||||
print(f" Portfolio: {len(inv.portfolio_companies)} companies")
|
||||
print(f" Sectors: {len(inv.sectors)} sectors")
|
||||
if inv.funds:
|
||||
print(f" Funds: {len(inv.funds)}")
|
||||
|
||||
session.close()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
if fund_count == 0:
|
||||
print("📝 Next step: Run enrichment script")
|
||||
print(" python enrich_investors.py enriched_investors.csv")
|
||||
else:
|
||||
print("✅ Database is enriched and ready!")
|
||||
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_database()
|
||||
Reference in New Issue
Block a user