#!/usr/bin/env python3 """ Migration script to update fund table schema: 1. Change geographic_focus from JSON to STRING 2. Create investment_stages table and fund_investment_stages association table 3. Create fund_sectors association table for many-to-many with sectors 4. Remove investment_stage_focus and sector_focus JSON columns """ import sqlite3 from pathlib import Path def migrate_fund_relationships(): db_path = Path(__file__).parent / "version_two.db" conn = sqlite3.connect(db_path) cursor = conn.cursor() print("šŸ”„ Starting fund relationships migration...") try: # Step 1: Drop and recreate investment_stages table with correct schema print("1ļøāƒ£ Recreating investment_stages table...") cursor.execute("DROP TABLE IF EXISTS investment_stages") cursor.execute(""" CREATE TABLE investment_stages ( id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR NOT NULL UNIQUE, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME ) """) # Insert standard investment stages stages = [ "Seed", "Pre-Seed", "Series A", "Series B", "Series C", "Series D+", "Growth", "Late Stage", "IPO", "Venture", "Early Stage", ] for stage in stages: cursor.execute( """ INSERT OR IGNORE INTO investment_stages (name) VALUES (?) """, (stage,), ) print(f" āœ… Created investment_stages table with {len(stages)} stages") # Step 2: Create fund_investment_stages association table print("2ļøāƒ£ Creating fund_investment_stages association table...") cursor.execute(""" CREATE TABLE IF NOT EXISTS fund_investment_stages ( fund_id INTEGER NOT NULL, stage_id INTEGER NOT NULL, PRIMARY KEY (fund_id, stage_id), FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE, FOREIGN KEY (stage_id) REFERENCES investment_stages (id) ON DELETE CASCADE ) """) print(" āœ… Created fund_investment_stages association table") # Step 3: Create fund_sectors association table print("3ļøāƒ£ Creating fund_sectors association table...") cursor.execute(""" CREATE TABLE IF NOT EXISTS fund_sectors ( fund_id INTEGER NOT NULL, sector_id INTEGER NOT NULL, PRIMARY KEY (fund_id, sector_id), FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE, FOREIGN KEY (sector_id) REFERENCES sectors (id) ON DELETE CASCADE ) """) print(" āœ… Created fund_sectors association table") # Step 4: Get current funds table columns cursor.execute("PRAGMA table_info(funds)") columns = {col[1]: col for col in cursor.fetchall()} print(f"\nšŸ“Š Current funds table has {len(columns)} columns") # Step 5: Create new funds table with updated schema print("4ļøāƒ£ Creating new funds table schema...") cursor.execute(""" CREATE TABLE funds_new ( id INTEGER PRIMARY KEY AUTOINCREMENT, investor_id INTEGER NOT NULL, fund_name VARCHAR, fund_size INTEGER, fund_size_source_url VARCHAR, check_size_lower INTEGER, check_size_upper INTEGER, source_url VARCHAR, source_provider VARCHAR, geographic_focus VARCHAR, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME, FOREIGN KEY (investor_id) REFERENCES investors (id) ) """) # Step 6: Copy data from old table to new table print("5ļøāƒ£ Copying data from old funds table...") cursor.execute(""" INSERT INTO funds_new ( id, investor_id, fund_name, fund_size, fund_size_source_url, check_size_lower, check_size_upper, source_url, source_provider, geographic_focus, created_at, updated_at ) SELECT id, investor_id, fund_name, fund_size, fund_size_source_url, check_size_lower, check_size_upper, source_url, source_provider, CASE WHEN geographic_focus IS NOT NULL AND geographic_focus != '[]' THEN REPLACE(REPLACE(geographic_focus, '["', ''), '"]', '') ELSE NULL END as geographic_focus, created_at, updated_at FROM funds """) rows_copied = cursor.rowcount print(f" āœ… Copied {rows_copied} rows") # Step 7: Migrate investment_stage_focus data to association table print("6ļøāƒ£ Migrating investment stage focus data...") cursor.execute(""" SELECT id, investment_stage_focus FROM funds WHERE investment_stage_focus IS NOT NULL AND investment_stage_focus != '[]' """) funds_with_stages = cursor.fetchall() stage_migrations = 0 for fund_id, stages_json in funds_with_stages: if stages_json: try: import json stages = json.loads(stages_json) for stage_name in stages: # Find matching stage cursor.execute( """ SELECT id FROM investment_stages WHERE name = ? """, (stage_name,), ) result = cursor.fetchone() if result: stage_id = result[0] cursor.execute( """ INSERT OR IGNORE INTO fund_investment_stages (fund_id, stage_id) VALUES (?, ?) """, (fund_id, stage_id), ) stage_migrations += 1 except: pass print(f" āœ… Migrated {stage_migrations} stage relationships") # Step 8: Migrate sector_focus data to association table print("7ļøāƒ£ Migrating sector focus data...") cursor.execute(""" SELECT id, sector_focus FROM funds WHERE sector_focus IS NOT NULL AND sector_focus != '[]' """) funds_with_sectors = cursor.fetchall() sector_migrations = 0 for fund_id, sectors_json in funds_with_sectors: if sectors_json: try: import json sectors = json.loads(sectors_json) for sector_name in sectors: # Find or create sector cursor.execute( """ SELECT id FROM sectors WHERE name = ? """, (sector_name,), ) result = cursor.fetchone() if result: sector_id = result[0] else: cursor.execute( """ INSERT INTO sectors (name) VALUES (?) """, (sector_name,), ) sector_id = cursor.lastrowid cursor.execute( """ INSERT OR IGNORE INTO fund_sectors (fund_id, sector_id) VALUES (?, ?) """, (fund_id, sector_id), ) sector_migrations += 1 except: pass print(f" āœ… Migrated {sector_migrations} sector relationships") # Step 9: Drop old funds table print("8ļøāƒ£ Dropping old funds table...") cursor.execute("DROP TABLE funds") # Step 10: Rename new table to funds print("9ļøāƒ£ Renaming funds_new to funds...") cursor.execute("ALTER TABLE funds_new RENAME TO funds") # Commit all changes conn.commit() print("\nāœ… Migration completed successfully!") print("\nšŸ“ Summary:") print(f" - Created investment_stages table with {len(stages)} stages") print(" - Created fund_investment_stages association table") print(" - Created fund_sectors association table") print(f" - Migrated {rows_copied} fund records") print(f" - Migrated {stage_migrations} stage relationships") print(f" - Migrated {sector_migrations} sector relationships") print(" - geographic_focus: JSON → STRING") print(" - investment_stage_focus: REMOVED (now in fund_investment_stages)") print(" - sector_focus: REMOVED (now in fund_sectors)") except Exception as e: conn.rollback() print(f"\nāŒ Migration failed: {e}") raise finally: conn.close() if __name__ == "__main__": migrate_fund_relationships()