Files
Anton_wireframe/preprocessor/migrate_fund_relationships.py
T
bolade a9589e54f3 feat: Refactor Fund schema to use many-to-many relationships for investment stages and sectors
- Updated FundTable to replace JSON fields for investment stages and sectors with relationships.
- Introduced InvestmentStageTable and fund_investment_stages association table.
- Created fund_sectors association table for many-to-many relationship with sectors.
- Changed geographic_focus from JSON array to a simple string.
- Migrated existing data to new schema, ensuring data integrity and normalization.
- Updated related schemas, routers, and services to reflect new structure.
- Added migration script to handle data transformation and schema updates.
- Implemented tests to verify new relationships and data integrity.
2025-10-07 15:57:29 +01:00

251 lines
9.5 KiB
Python

#!/usr/bin/env python3
"""
Migration script to update fund table schema:
1. Change geographic_focus from JSON to STRING
2. Create investment_stages table and fund_investment_stages association table
3. Create fund_sectors association table for many-to-many with sectors
4. Remove investment_stage_focus and sector_focus JSON columns
"""
import sqlite3
from pathlib import Path
def migrate_fund_relationships():
db_path = Path(__file__).parent / "version_two.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("🔄 Starting fund relationships migration...")
try:
# Step 1: Drop and recreate investment_stages table with correct schema
print("1️⃣ Recreating investment_stages table...")
cursor.execute("DROP TABLE IF EXISTS investment_stages")
cursor.execute("""
CREATE TABLE investment_stages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name VARCHAR NOT NULL UNIQUE,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME
)
""")
# Insert standard investment stages
stages = [
"Seed",
"Pre-Seed",
"Series A",
"Series B",
"Series C",
"Series D+",
"Growth",
"Late Stage",
"IPO",
"Venture",
"Early Stage",
]
for stage in stages:
cursor.execute(
"""
INSERT OR IGNORE INTO investment_stages (name) VALUES (?)
""",
(stage,),
)
print(f" ✅ Created investment_stages table with {len(stages)} stages")
# Step 2: Create fund_investment_stages association table
print("2️⃣ Creating fund_investment_stages association table...")
cursor.execute("""
CREATE TABLE IF NOT EXISTS fund_investment_stages (
fund_id INTEGER NOT NULL,
stage_id INTEGER NOT NULL,
PRIMARY KEY (fund_id, stage_id),
FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE,
FOREIGN KEY (stage_id) REFERENCES investment_stages (id) ON DELETE CASCADE
)
""")
print(" ✅ Created fund_investment_stages association table")
# Step 3: Create fund_sectors association table
print("3️⃣ Creating fund_sectors association table...")
cursor.execute("""
CREATE TABLE IF NOT EXISTS fund_sectors (
fund_id INTEGER NOT NULL,
sector_id INTEGER NOT NULL,
PRIMARY KEY (fund_id, sector_id),
FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE,
FOREIGN KEY (sector_id) REFERENCES sectors (id) ON DELETE CASCADE
)
""")
print(" ✅ Created fund_sectors association table")
# Step 4: Get current funds table columns
cursor.execute("PRAGMA table_info(funds)")
columns = {col[1]: col for col in cursor.fetchall()}
print(f"\n📊 Current funds table has {len(columns)} columns")
# Step 5: Create new funds table with updated schema
print("4️⃣ Creating new funds table schema...")
cursor.execute("""
CREATE TABLE funds_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
investor_id INTEGER NOT NULL,
fund_name VARCHAR,
fund_size INTEGER,
fund_size_source_url VARCHAR,
check_size_lower INTEGER,
check_size_upper INTEGER,
source_url VARCHAR,
source_provider VARCHAR,
geographic_focus VARCHAR,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME,
FOREIGN KEY (investor_id) REFERENCES investors (id)
)
""")
# Step 6: Copy data from old table to new table
print("5️⃣ Copying data from old funds table...")
cursor.execute("""
INSERT INTO funds_new (
id, investor_id, fund_name, fund_size, fund_size_source_url,
check_size_lower, check_size_upper, source_url, source_provider,
geographic_focus, created_at, updated_at
)
SELECT
id, investor_id, fund_name, fund_size, fund_size_source_url,
check_size_lower, check_size_upper, source_url, source_provider,
CASE
WHEN geographic_focus IS NOT NULL AND geographic_focus != '[]'
THEN REPLACE(REPLACE(geographic_focus, '["', ''), '"]', '')
ELSE NULL
END as geographic_focus,
created_at, updated_at
FROM funds
""")
rows_copied = cursor.rowcount
print(f" ✅ Copied {rows_copied} rows")
# Step 7: Migrate investment_stage_focus data to association table
print("6️⃣ Migrating investment stage focus data...")
cursor.execute("""
SELECT id, investment_stage_focus FROM funds
WHERE investment_stage_focus IS NOT NULL AND investment_stage_focus != '[]'
""")
funds_with_stages = cursor.fetchall()
stage_migrations = 0
for fund_id, stages_json in funds_with_stages:
if stages_json:
try:
import json
stages = json.loads(stages_json)
for stage_name in stages:
# Find matching stage
cursor.execute(
"""
SELECT id FROM investment_stages WHERE name = ?
""",
(stage_name,),
)
result = cursor.fetchone()
if result:
stage_id = result[0]
cursor.execute(
"""
INSERT OR IGNORE INTO fund_investment_stages (fund_id, stage_id)
VALUES (?, ?)
""",
(fund_id, stage_id),
)
stage_migrations += 1
except:
pass
print(f" ✅ Migrated {stage_migrations} stage relationships")
# Step 8: Migrate sector_focus data to association table
print("7️⃣ Migrating sector focus data...")
cursor.execute("""
SELECT id, sector_focus FROM funds
WHERE sector_focus IS NOT NULL AND sector_focus != '[]'
""")
funds_with_sectors = cursor.fetchall()
sector_migrations = 0
for fund_id, sectors_json in funds_with_sectors:
if sectors_json:
try:
import json
sectors = json.loads(sectors_json)
for sector_name in sectors:
# Find or create sector
cursor.execute(
"""
SELECT id FROM sectors WHERE name = ?
""",
(sector_name,),
)
result = cursor.fetchone()
if result:
sector_id = result[0]
else:
cursor.execute(
"""
INSERT INTO sectors (name) VALUES (?)
""",
(sector_name,),
)
sector_id = cursor.lastrowid
cursor.execute(
"""
INSERT OR IGNORE INTO fund_sectors (fund_id, sector_id)
VALUES (?, ?)
""",
(fund_id, sector_id),
)
sector_migrations += 1
except:
pass
print(f" ✅ Migrated {sector_migrations} sector relationships")
# Step 9: Drop old funds table
print("8️⃣ Dropping old funds table...")
cursor.execute("DROP TABLE funds")
# Step 10: Rename new table to funds
print("9️⃣ Renaming funds_new to funds...")
cursor.execute("ALTER TABLE funds_new RENAME TO funds")
# Commit all changes
conn.commit()
print("\n✅ Migration completed successfully!")
print("\n📝 Summary:")
print(f" - Created investment_stages table with {len(stages)} stages")
print(" - Created fund_investment_stages association table")
print(" - Created fund_sectors association table")
print(f" - Migrated {rows_copied} fund records")
print(f" - Migrated {stage_migrations} stage relationships")
print(f" - Migrated {sector_migrations} sector relationships")
print(" - geographic_focus: JSON → STRING")
print(" - investment_stage_focus: REMOVED (now in fund_investment_stages)")
print(" - sector_focus: REMOVED (now in fund_sectors)")
except Exception as e:
conn.rollback()
print(f"\n❌ Migration failed: {e}")
raise
finally:
conn.close()
if __name__ == "__main__":
migrate_fund_relationships()