feat: Refactor Fund schema to use many-to-many relationships for investment stages and sectors
- Updated FundTable to replace JSON fields for investment stages and sectors with relationships. - Introduced InvestmentStageTable and fund_investment_stages association table. - Created fund_sectors association table for many-to-many relationship with sectors. - Changed geographic_focus from JSON array to a simple string. - Migrated existing data to new schema, ensuring data integrity and normalization. - Updated related schemas, routers, and services to reflect new structure. - Added migration script to handle data transformation and schema updates. - Implemented tests to verify new relationships and data integrity.
This commit is contained in:
@@ -0,0 +1,250 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to update fund table schema:
|
||||
1. Change geographic_focus from JSON to STRING
|
||||
2. Create investment_stages table and fund_investment_stages association table
|
||||
3. Create fund_sectors association table for many-to-many with sectors
|
||||
4. Remove investment_stage_focus and sector_focus JSON columns
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def migrate_fund_relationships():
|
||||
db_path = Path(__file__).parent / "version_two.db"
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("🔄 Starting fund relationships migration...")
|
||||
|
||||
try:
|
||||
# Step 1: Drop and recreate investment_stages table with correct schema
|
||||
print("1️⃣ Recreating investment_stages table...")
|
||||
cursor.execute("DROP TABLE IF EXISTS investment_stages")
|
||||
cursor.execute("""
|
||||
CREATE TABLE investment_stages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name VARCHAR NOT NULL UNIQUE,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME
|
||||
)
|
||||
""")
|
||||
|
||||
# Insert standard investment stages
|
||||
stages = [
|
||||
"Seed",
|
||||
"Pre-Seed",
|
||||
"Series A",
|
||||
"Series B",
|
||||
"Series C",
|
||||
"Series D+",
|
||||
"Growth",
|
||||
"Late Stage",
|
||||
"IPO",
|
||||
"Venture",
|
||||
"Early Stage",
|
||||
]
|
||||
for stage in stages:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO investment_stages (name) VALUES (?)
|
||||
""",
|
||||
(stage,),
|
||||
)
|
||||
|
||||
print(f" ✅ Created investment_stages table with {len(stages)} stages")
|
||||
|
||||
# Step 2: Create fund_investment_stages association table
|
||||
print("2️⃣ Creating fund_investment_stages association table...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS fund_investment_stages (
|
||||
fund_id INTEGER NOT NULL,
|
||||
stage_id INTEGER NOT NULL,
|
||||
PRIMARY KEY (fund_id, stage_id),
|
||||
FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (stage_id) REFERENCES investment_stages (id) ON DELETE CASCADE
|
||||
)
|
||||
""")
|
||||
print(" ✅ Created fund_investment_stages association table")
|
||||
|
||||
# Step 3: Create fund_sectors association table
|
||||
print("3️⃣ Creating fund_sectors association table...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS fund_sectors (
|
||||
fund_id INTEGER NOT NULL,
|
||||
sector_id INTEGER NOT NULL,
|
||||
PRIMARY KEY (fund_id, sector_id),
|
||||
FOREIGN KEY (fund_id) REFERENCES funds (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (sector_id) REFERENCES sectors (id) ON DELETE CASCADE
|
||||
)
|
||||
""")
|
||||
print(" ✅ Created fund_sectors association table")
|
||||
|
||||
# Step 4: Get current funds table columns
|
||||
cursor.execute("PRAGMA table_info(funds)")
|
||||
columns = {col[1]: col for col in cursor.fetchall()}
|
||||
print(f"\n📊 Current funds table has {len(columns)} columns")
|
||||
|
||||
# Step 5: Create new funds table with updated schema
|
||||
print("4️⃣ Creating new funds table schema...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE funds_new (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
investor_id INTEGER NOT NULL,
|
||||
fund_name VARCHAR,
|
||||
fund_size INTEGER,
|
||||
fund_size_source_url VARCHAR,
|
||||
check_size_lower INTEGER,
|
||||
check_size_upper INTEGER,
|
||||
source_url VARCHAR,
|
||||
source_provider VARCHAR,
|
||||
geographic_focus VARCHAR,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME,
|
||||
FOREIGN KEY (investor_id) REFERENCES investors (id)
|
||||
)
|
||||
""")
|
||||
|
||||
# Step 6: Copy data from old table to new table
|
||||
print("5️⃣ Copying data from old funds table...")
|
||||
cursor.execute("""
|
||||
INSERT INTO funds_new (
|
||||
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
||||
check_size_lower, check_size_upper, source_url, source_provider,
|
||||
geographic_focus, created_at, updated_at
|
||||
)
|
||||
SELECT
|
||||
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
||||
check_size_lower, check_size_upper, source_url, source_provider,
|
||||
CASE
|
||||
WHEN geographic_focus IS NOT NULL AND geographic_focus != '[]'
|
||||
THEN REPLACE(REPLACE(geographic_focus, '["', ''), '"]', '')
|
||||
ELSE NULL
|
||||
END as geographic_focus,
|
||||
created_at, updated_at
|
||||
FROM funds
|
||||
""")
|
||||
rows_copied = cursor.rowcount
|
||||
print(f" ✅ Copied {rows_copied} rows")
|
||||
|
||||
# Step 7: Migrate investment_stage_focus data to association table
|
||||
print("6️⃣ Migrating investment stage focus data...")
|
||||
cursor.execute("""
|
||||
SELECT id, investment_stage_focus FROM funds
|
||||
WHERE investment_stage_focus IS NOT NULL AND investment_stage_focus != '[]'
|
||||
""")
|
||||
funds_with_stages = cursor.fetchall()
|
||||
|
||||
stage_migrations = 0
|
||||
for fund_id, stages_json in funds_with_stages:
|
||||
if stages_json:
|
||||
try:
|
||||
import json
|
||||
|
||||
stages = json.loads(stages_json)
|
||||
for stage_name in stages:
|
||||
# Find matching stage
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT id FROM investment_stages WHERE name = ?
|
||||
""",
|
||||
(stage_name,),
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
stage_id = result[0]
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO fund_investment_stages (fund_id, stage_id)
|
||||
VALUES (?, ?)
|
||||
""",
|
||||
(fund_id, stage_id),
|
||||
)
|
||||
stage_migrations += 1
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f" ✅ Migrated {stage_migrations} stage relationships")
|
||||
|
||||
# Step 8: Migrate sector_focus data to association table
|
||||
print("7️⃣ Migrating sector focus data...")
|
||||
cursor.execute("""
|
||||
SELECT id, sector_focus FROM funds
|
||||
WHERE sector_focus IS NOT NULL AND sector_focus != '[]'
|
||||
""")
|
||||
funds_with_sectors = cursor.fetchall()
|
||||
|
||||
sector_migrations = 0
|
||||
for fund_id, sectors_json in funds_with_sectors:
|
||||
if sectors_json:
|
||||
try:
|
||||
import json
|
||||
|
||||
sectors = json.loads(sectors_json)
|
||||
for sector_name in sectors:
|
||||
# Find or create sector
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT id FROM sectors WHERE name = ?
|
||||
""",
|
||||
(sector_name,),
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
sector_id = result[0]
|
||||
else:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO sectors (name) VALUES (?)
|
||||
""",
|
||||
(sector_name,),
|
||||
)
|
||||
sector_id = cursor.lastrowid
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO fund_sectors (fund_id, sector_id)
|
||||
VALUES (?, ?)
|
||||
""",
|
||||
(fund_id, sector_id),
|
||||
)
|
||||
sector_migrations += 1
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f" ✅ Migrated {sector_migrations} sector relationships")
|
||||
|
||||
# Step 9: Drop old funds table
|
||||
print("8️⃣ Dropping old funds table...")
|
||||
cursor.execute("DROP TABLE funds")
|
||||
|
||||
# Step 10: Rename new table to funds
|
||||
print("9️⃣ Renaming funds_new to funds...")
|
||||
cursor.execute("ALTER TABLE funds_new RENAME TO funds")
|
||||
|
||||
# Commit all changes
|
||||
conn.commit()
|
||||
|
||||
print("\n✅ Migration completed successfully!")
|
||||
print("\n📝 Summary:")
|
||||
print(f" - Created investment_stages table with {len(stages)} stages")
|
||||
print(" - Created fund_investment_stages association table")
|
||||
print(" - Created fund_sectors association table")
|
||||
print(f" - Migrated {rows_copied} fund records")
|
||||
print(f" - Migrated {stage_migrations} stage relationships")
|
||||
print(f" - Migrated {sector_migrations} sector relationships")
|
||||
print(" - geographic_focus: JSON → STRING")
|
||||
print(" - investment_stage_focus: REMOVED (now in fund_investment_stages)")
|
||||
print(" - sector_focus: REMOVED (now in fund_sectors)")
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"\n❌ Migration failed: {e}")
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate_fund_relationships()
|
||||
+41
-9
@@ -126,6 +126,22 @@ investor_stage_association = Table(
|
||||
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
||||
)
|
||||
|
||||
# Association table for fund-stage many-to-many
|
||||
fund_investment_stages_association = Table(
|
||||
"fund_investment_stages",
|
||||
Base.metadata,
|
||||
Column("fund_id", Integer, ForeignKey("funds.id")),
|
||||
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
||||
)
|
||||
|
||||
# Association table for fund-sector many-to-many
|
||||
fund_sectors_association = Table(
|
||||
"fund_sectors",
|
||||
Base.metadata,
|
||||
Column("fund_id", Integer, ForeignKey("funds.id")),
|
||||
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
||||
)
|
||||
|
||||
|
||||
class InvestorTable(Base, TimestampMixin):
|
||||
__tablename__ = "investors"
|
||||
@@ -235,27 +251,40 @@ class FundTable(Base, TimestampMixin):
|
||||
source_url = Column(String, nullable=True)
|
||||
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
|
||||
|
||||
# JSON array fields
|
||||
geographic_focus = Column(JSON, nullable=True) # Array of regions/countries
|
||||
investment_stage_focus = Column(JSON, nullable=True) # Array of stages
|
||||
sector_focus = Column(JSON, nullable=True) # Array of sectors
|
||||
# Geographic focus as simple string
|
||||
geographic_focus = Column(String, nullable=True)
|
||||
|
||||
# Relationships
|
||||
investor = relationship("InvestorTable", back_populates="funds")
|
||||
investment_stages = relationship(
|
||||
"InvestmentStageTable",
|
||||
secondary=fund_investment_stages_association,
|
||||
back_populates="funds",
|
||||
)
|
||||
sectors = relationship(
|
||||
"SectorTable",
|
||||
secondary=fund_sectors_association,
|
||||
back_populates="funds",
|
||||
)
|
||||
|
||||
|
||||
class InvestmentStageTable(Base, TimestampMixin):
|
||||
__tablename__ = "investment_stages"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
stage = Column(Enum(InvestmentStage), nullable=False, unique=True)
|
||||
name = Column(String, nullable=False, unique=True)
|
||||
|
||||
# Relationship back to investors
|
||||
# Relationships
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=investor_stage_association,
|
||||
back_populates="investment_stages",
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable",
|
||||
secondary=fund_investment_stages_association,
|
||||
back_populates="investment_stages",
|
||||
)
|
||||
|
||||
|
||||
class CompanyTable(Base, TimestampMixin):
|
||||
@@ -307,20 +336,23 @@ class SectorTable(Base, TimestampMixin):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
|
||||
# Add relationship back to investors
|
||||
# Relationships
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=investor_sector_association,
|
||||
back_populates="sectors",
|
||||
)
|
||||
|
||||
companies = relationship(
|
||||
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
|
||||
)
|
||||
|
||||
projects = relationship(
|
||||
"ProjectTable", secondary=project_sector_association, back_populates="sector"
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable",
|
||||
secondary=fund_sectors_association,
|
||||
back_populates="sectors",
|
||||
)
|
||||
|
||||
|
||||
class ProjectTable(Base, TimestampMixin):
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user