a9589e54f3
- Updated FundTable to replace JSON fields for investment stages and sectors with relationships. - Introduced InvestmentStageTable and fund_investment_stages association table. - Created fund_sectors association table for many-to-many relationship with sectors. - Changed geographic_focus from JSON array to a simple string. - Migrated existing data to new schema, ensuring data integrity and normalization. - Updated related schemas, routers, and services to reflect new structure. - Added migration script to handle data transformation and schema updates. - Implemented tests to verify new relationships and data integrity.
251 lines
9.5 KiB
Python
251 lines
9.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Migration script to update fund table schema:
|
|
1. Change geographic_focus from JSON to STRING
|
|
2. Create investment_stages table and fund_investment_stages association table
|
|
3. Create fund_sectors association table for many-to-many with sectors
|
|
4. Remove investment_stage_focus and sector_focus JSON columns
|
|
"""
|
|
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
|
|
def migrate_fund_relationships():
|
|
db_path = Path(__file__).parent / "version_two.db"
|
|
conn = sqlite3.connect(db_path)
|
|
cursor = conn.cursor()
|
|
|
|
print("🔄 Starting fund relationships migration...")
|
|
|
|
try:
|
|
# Step 1: Drop and recreate investment_stages table with correct schema
|
|
print("1️⃣ Recreating investment_stages table...")
|
|
cursor.execute("DROP TABLE IF EXISTS investment_stages")
|
|
cursor.execute("""
|
|
CREATE TABLE investment_stages (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name VARCHAR NOT NULL UNIQUE,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME
|
|
)
|
|
""")
|
|
|
|
# Insert standard investment stages
|
|
stages = [
|
|
"Seed",
|
|
"Pre-Seed",
|
|
"Series A",
|
|
"Series B",
|
|
"Series C",
|
|
"Series D+",
|
|
"Growth",
|
|
"Late Stage",
|
|
"IPO",
|
|
"Venture",
|
|
"Early Stage",
|
|
]
|
|
for stage in stages:
|
|
cursor.execute(
|
|
"""
|
|
INSERT OR IGNORE INTO investment_stages (name) VALUES (?)
|
|
""",
|
|
(stage,),
|
|
)
|
|
|
|
print(f" ✅ Created investment_stages table with {len(stages)} stages")
|
|
|
|
# Step 2: Create fund_investment_stages association table
|
|
print("2️⃣ Creating fund_investment_stages association table...")
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS fund_investment_stages (
|
|
fund_id INTEGER NOT NULL,
|
|
stage_id INTEGER NOT NULL,
|
|
PRIMARY KEY (fund_id, stage_id),
|
|
FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE,
|
|
FOREIGN KEY (stage_id) REFERENCES investment_stages (id) ON DELETE CASCADE
|
|
)
|
|
""")
|
|
print(" ✅ Created fund_investment_stages association table")
|
|
|
|
# Step 3: Create fund_sectors association table
|
|
print("3️⃣ Creating fund_sectors association table...")
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS fund_sectors (
|
|
fund_id INTEGER NOT NULL,
|
|
sector_id INTEGER NOT NULL,
|
|
PRIMARY KEY (fund_id, sector_id),
|
|
FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE,
|
|
FOREIGN KEY (sector_id) REFERENCES sectors (id) ON DELETE CASCADE
|
|
)
|
|
""")
|
|
print(" ✅ Created fund_sectors association table")
|
|
|
|
# Step 4: Get current funds table columns
|
|
cursor.execute("PRAGMA table_info(funds)")
|
|
columns = {col[1]: col for col in cursor.fetchall()}
|
|
print(f"\n📊 Current funds table has {len(columns)} columns")
|
|
|
|
# Step 5: Create new funds table with updated schema
|
|
print("4️⃣ Creating new funds table schema...")
|
|
cursor.execute("""
|
|
CREATE TABLE funds_new (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
investor_id INTEGER NOT NULL,
|
|
fund_name VARCHAR,
|
|
fund_size INTEGER,
|
|
fund_size_source_url VARCHAR,
|
|
check_size_lower INTEGER,
|
|
check_size_upper INTEGER,
|
|
source_url VARCHAR,
|
|
source_provider VARCHAR,
|
|
geographic_focus VARCHAR,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME,
|
|
FOREIGN KEY (investor_id) REFERENCES investors (id)
|
|
)
|
|
""")
|
|
|
|
# Step 6: Copy data from old table to new table
|
|
print("5️⃣ Copying data from old funds table...")
|
|
cursor.execute("""
|
|
INSERT INTO funds_new (
|
|
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
|
check_size_lower, check_size_upper, source_url, source_provider,
|
|
geographic_focus, created_at, updated_at
|
|
)
|
|
SELECT
|
|
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
|
check_size_lower, check_size_upper, source_url, source_provider,
|
|
CASE
|
|
WHEN geographic_focus IS NOT NULL AND geographic_focus != '[]'
|
|
THEN REPLACE(REPLACE(geographic_focus, '["', ''), '"]', '')
|
|
ELSE NULL
|
|
END as geographic_focus,
|
|
created_at, updated_at
|
|
FROM funds
|
|
""")
|
|
rows_copied = cursor.rowcount
|
|
print(f" ✅ Copied {rows_copied} rows")
|
|
|
|
# Step 7: Migrate investment_stage_focus data to association table
|
|
print("6️⃣ Migrating investment stage focus data...")
|
|
cursor.execute("""
|
|
SELECT id, investment_stage_focus FROM funds
|
|
WHERE investment_stage_focus IS NOT NULL AND investment_stage_focus != '[]'
|
|
""")
|
|
funds_with_stages = cursor.fetchall()
|
|
|
|
stage_migrations = 0
|
|
for fund_id, stages_json in funds_with_stages:
|
|
if stages_json:
|
|
try:
|
|
import json
|
|
|
|
stages = json.loads(stages_json)
|
|
for stage_name in stages:
|
|
# Find matching stage
|
|
cursor.execute(
|
|
"""
|
|
SELECT id FROM investment_stages WHERE name = ?
|
|
""",
|
|
(stage_name,),
|
|
)
|
|
result = cursor.fetchone()
|
|
if result:
|
|
stage_id = result[0]
|
|
cursor.execute(
|
|
"""
|
|
INSERT OR IGNORE INTO fund_investment_stages (fund_id, stage_id)
|
|
VALUES (?, ?)
|
|
""",
|
|
(fund_id, stage_id),
|
|
)
|
|
stage_migrations += 1
|
|
except:
|
|
pass
|
|
|
|
print(f" ✅ Migrated {stage_migrations} stage relationships")
|
|
|
|
# Step 8: Migrate sector_focus data to association table
|
|
print("7️⃣ Migrating sector focus data...")
|
|
cursor.execute("""
|
|
SELECT id, sector_focus FROM funds
|
|
WHERE sector_focus IS NOT NULL AND sector_focus != '[]'
|
|
""")
|
|
funds_with_sectors = cursor.fetchall()
|
|
|
|
sector_migrations = 0
|
|
for fund_id, sectors_json in funds_with_sectors:
|
|
if sectors_json:
|
|
try:
|
|
import json
|
|
|
|
sectors = json.loads(sectors_json)
|
|
for sector_name in sectors:
|
|
# Find or create sector
|
|
cursor.execute(
|
|
"""
|
|
SELECT id FROM sectors WHERE name = ?
|
|
""",
|
|
(sector_name,),
|
|
)
|
|
result = cursor.fetchone()
|
|
if result:
|
|
sector_id = result[0]
|
|
else:
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO sectors (name) VALUES (?)
|
|
""",
|
|
(sector_name,),
|
|
)
|
|
sector_id = cursor.lastrowid
|
|
|
|
cursor.execute(
|
|
"""
|
|
INSERT OR IGNORE INTO fund_sectors (fund_id, sector_id)
|
|
VALUES (?, ?)
|
|
""",
|
|
(fund_id, sector_id),
|
|
)
|
|
sector_migrations += 1
|
|
except:
|
|
pass
|
|
|
|
print(f" ✅ Migrated {sector_migrations} sector relationships")
|
|
|
|
# Step 9: Drop old funds table
|
|
print("8️⃣ Dropping old funds table...")
|
|
cursor.execute("DROP TABLE funds")
|
|
|
|
# Step 10: Rename new table to funds
|
|
print("9️⃣ Renaming funds_new to funds...")
|
|
cursor.execute("ALTER TABLE funds_new RENAME TO funds")
|
|
|
|
# Commit all changes
|
|
conn.commit()
|
|
|
|
print("\n✅ Migration completed successfully!")
|
|
print("\n📝 Summary:")
|
|
print(f" - Created investment_stages table with {len(stages)} stages")
|
|
print(" - Created fund_investment_stages association table")
|
|
print(" - Created fund_sectors association table")
|
|
print(f" - Migrated {rows_copied} fund records")
|
|
print(f" - Migrated {stage_migrations} stage relationships")
|
|
print(f" - Migrated {sector_migrations} sector relationships")
|
|
print(" - geographic_focus: JSON → STRING")
|
|
print(" - investment_stage_focus: REMOVED (now in fund_investment_stages)")
|
|
print(" - sector_focus: REMOVED (now in fund_sectors)")
|
|
|
|
except Exception as e:
|
|
conn.rollback()
|
|
print(f"\n❌ Migration failed: {e}")
|
|
raise
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
migrate_fund_relationships()
|