Files
Anton_wireframe/preprocessor/migrate_fund_schema.py
T
bolade d341cacb9a Refactor investor and fund schemas to support new check size range
- Removed deprecated `stage_focus` column from `InvestorTable` and `InvestorSchema`.
- Updated `FundTable` to change `fund_size` from VARCHAR to INTEGER and added `check_size_lower` and `check_size_upper` columns.
- Modified API routes to return investor-fund combinations as separate entries.
- Created new `InvestorFundData` schema for combined investor-fund responses.
- Implemented LLM parsing for check size range from estimated investment size.
- Updated database migration script to reflect schema changes and ensure data integrity.
- Removed obsolete verification and test scripts related to the old schema.
2025-10-07 15:24:36 +01:00

160 lines
6.0 KiB
Python

"""
Migration script to update FundTable schema:
- Change fund_size from VARCHAR to INTEGER
- Remove estimated_investment_size column
- Add check_size_lower INTEGER column
- Add check_size_upper INTEGER column
"""
import sys
from pathlib import Path
# Add preprocessor to path
sys.path.insert(0, str(Path(__file__).parent))
from models import engine
from sqlalchemy import text
def migrate_fund_table():
"""
Migrate the funds table to add check_size fields and update fund_size type.
SQLite doesn't support ALTER COLUMN directly, so we need to:
1. Create new table with correct schema
2. Copy data from old table
3. Drop old table
4. Rename new table
"""
print("🔄 Starting fund table migration...")
with engine.connect() as conn:
# Start transaction
trans = conn.begin()
try:
# Check if migration is needed
result = conn.execute(text("PRAGMA table_info(funds)"))
columns = {row[1]: row[2] for row in result}
if "check_size_lower" in columns and "check_size_upper" in columns:
print("✅ Migration already applied - check_size columns exist")
return
print("📊 Current columns:", list(columns.keys()))
# Create new table with updated schema
print("\n1️⃣ Creating new funds table with updated schema...")
conn.execute(
text("""
CREATE TABLE IF NOT EXISTS funds_new (
id INTEGER PRIMARY KEY,
investor_id INTEGER NOT NULL,
fund_name VARCHAR,
fund_size INTEGER,
fund_size_source_url VARCHAR,
check_size_lower INTEGER,
check_size_upper INTEGER,
source_url VARCHAR,
source_provider VARCHAR,
geographic_focus JSON,
investment_stage_focus JSON,
sector_focus JSON,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at DATETIME,
FOREIGN KEY (investor_id) REFERENCES investors(id)
)
""")
)
# Copy data from old table to new table
print("2️⃣ Copying data from old table...")
# Check if old estimated_investment_size column exists
if "estimated_investment_size" in columns:
# We have estimated_investment_size but it's a string
# We'll set check_size fields to NULL for now - they'll be repopulated when re-parsing
conn.execute(
text("""
INSERT INTO funds_new (
id, investor_id, fund_name, fund_size, fund_size_source_url,
check_size_lower, check_size_upper,
source_url, source_provider,
geographic_focus, investment_stage_focus, sector_focus,
created_at, updated_at
)
SELECT
id, investor_id, fund_name,
CAST(fund_size AS INTEGER) as fund_size,
fund_size_source_url,
NULL as check_size_lower,
NULL as check_size_upper,
source_url, source_provider,
geographic_focus, investment_stage_focus, sector_focus,
created_at, updated_at
FROM funds
""")
)
else:
# No estimated_investment_size column (fresh install or already migrated partially)
conn.execute(
text("""
INSERT INTO funds_new (
id, investor_id, fund_name, fund_size, fund_size_source_url,
check_size_lower, check_size_upper,
source_url, source_provider,
geographic_focus, investment_stage_focus, sector_focus,
created_at, updated_at
)
SELECT
id, investor_id, fund_name,
CAST(fund_size AS INTEGER) as fund_size,
fund_size_source_url,
NULL as check_size_lower,
NULL as check_size_upper,
source_url, source_provider,
geographic_focus, investment_stage_focus, sector_focus,
created_at, updated_at
FROM funds
""")
)
rows_copied = conn.execute(
text("SELECT COUNT(*) FROM funds_new")
).fetchone()[0]
print(f" ✅ Copied {rows_copied} rows")
# Drop old table
print("3️⃣ Dropping old funds table...")
conn.execute(text("DROP TABLE funds"))
# Rename new table
print("4️⃣ Renaming funds_new to funds...")
conn.execute(text("ALTER TABLE funds_new RENAME TO funds"))
# Commit transaction
trans.commit()
print("\n✅ Migration completed successfully!")
print("\n📝 Summary:")
print(" - fund_size: VARCHAR → INTEGER")
print(" - estimated_investment_size: REMOVED")
print(" - check_size_lower: ADDED (INTEGER)")
print(" - check_size_upper: ADDED (INTEGER)")
print(f" - {rows_copied} fund records migrated")
print(
"\n⚠️ Note: check_size_lower and check_size_upper are NULL for existing records."
)
print(" Run the investor CSV parser again to populate these fields.")
except Exception as e:
trans.rollback()
print(f"\n❌ Migration failed: {e}")
raise
if __name__ == "__main__":
migrate_fund_table()