d341cacb9a
- Removed deprecated `stage_focus` column from `InvestorTable` and `InvestorSchema`. - Updated `FundTable` to change `fund_size` from VARCHAR to INTEGER and added `check_size_lower` and `check_size_upper` columns. - Modified API routes to return investor-fund combinations as separate entries. - Created new `InvestorFundData` schema for combined investor-fund responses. - Implemented LLM parsing for check size range from estimated investment size. - Updated database migration script to reflect schema changes and ensure data integrity. - Removed obsolete verification and test scripts related to the old schema.
160 lines
6.0 KiB
Python
160 lines
6.0 KiB
Python
"""
|
|
Migration script to update FundTable schema:
|
|
- Change fund_size from VARCHAR to INTEGER
|
|
- Remove estimated_investment_size column
|
|
- Add check_size_lower INTEGER column
|
|
- Add check_size_upper INTEGER column
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add preprocessor to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from models import engine
|
|
from sqlalchemy import text
|
|
|
|
|
|
def migrate_fund_table():
|
|
"""
|
|
Migrate the funds table to add check_size fields and update fund_size type.
|
|
|
|
SQLite doesn't support ALTER COLUMN directly, so we need to:
|
|
1. Create new table with correct schema
|
|
2. Copy data from old table
|
|
3. Drop old table
|
|
4. Rename new table
|
|
"""
|
|
|
|
print("🔄 Starting fund table migration...")
|
|
|
|
with engine.connect() as conn:
|
|
# Start transaction
|
|
trans = conn.begin()
|
|
|
|
try:
|
|
# Check if migration is needed
|
|
result = conn.execute(text("PRAGMA table_info(funds)"))
|
|
columns = {row[1]: row[2] for row in result}
|
|
|
|
if "check_size_lower" in columns and "check_size_upper" in columns:
|
|
print("✅ Migration already applied - check_size columns exist")
|
|
return
|
|
|
|
print("📊 Current columns:", list(columns.keys()))
|
|
|
|
# Create new table with updated schema
|
|
print("\n1️⃣ Creating new funds table with updated schema...")
|
|
conn.execute(
|
|
text("""
|
|
CREATE TABLE IF NOT EXISTS funds_new (
|
|
id INTEGER PRIMARY KEY,
|
|
investor_id INTEGER NOT NULL,
|
|
fund_name VARCHAR,
|
|
fund_size INTEGER,
|
|
fund_size_source_url VARCHAR,
|
|
check_size_lower INTEGER,
|
|
check_size_upper INTEGER,
|
|
source_url VARCHAR,
|
|
source_provider VARCHAR,
|
|
geographic_focus JSON,
|
|
investment_stage_focus JSON,
|
|
sector_focus JSON,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
|
updated_at DATETIME,
|
|
FOREIGN KEY (investor_id) REFERENCES investors(id)
|
|
)
|
|
""")
|
|
)
|
|
|
|
# Copy data from old table to new table
|
|
print("2️⃣ Copying data from old table...")
|
|
|
|
# Check if old estimated_investment_size column exists
|
|
if "estimated_investment_size" in columns:
|
|
# We have estimated_investment_size but it's a string
|
|
# We'll set check_size fields to NULL for now - they'll be repopulated when re-parsing
|
|
conn.execute(
|
|
text("""
|
|
INSERT INTO funds_new (
|
|
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
|
check_size_lower, check_size_upper,
|
|
source_url, source_provider,
|
|
geographic_focus, investment_stage_focus, sector_focus,
|
|
created_at, updated_at
|
|
)
|
|
SELECT
|
|
id, investor_id, fund_name,
|
|
CAST(fund_size AS INTEGER) as fund_size,
|
|
fund_size_source_url,
|
|
NULL as check_size_lower,
|
|
NULL as check_size_upper,
|
|
source_url, source_provider,
|
|
geographic_focus, investment_stage_focus, sector_focus,
|
|
created_at, updated_at
|
|
FROM funds
|
|
""")
|
|
)
|
|
else:
|
|
# No estimated_investment_size column (fresh install or already migrated partially)
|
|
conn.execute(
|
|
text("""
|
|
INSERT INTO funds_new (
|
|
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
|
check_size_lower, check_size_upper,
|
|
source_url, source_provider,
|
|
geographic_focus, investment_stage_focus, sector_focus,
|
|
created_at, updated_at
|
|
)
|
|
SELECT
|
|
id, investor_id, fund_name,
|
|
CAST(fund_size AS INTEGER) as fund_size,
|
|
fund_size_source_url,
|
|
NULL as check_size_lower,
|
|
NULL as check_size_upper,
|
|
source_url, source_provider,
|
|
geographic_focus, investment_stage_focus, sector_focus,
|
|
created_at, updated_at
|
|
FROM funds
|
|
""")
|
|
)
|
|
|
|
rows_copied = conn.execute(
|
|
text("SELECT COUNT(*) FROM funds_new")
|
|
).fetchone()[0]
|
|
print(f" ✅ Copied {rows_copied} rows")
|
|
|
|
# Drop old table
|
|
print("3️⃣ Dropping old funds table...")
|
|
conn.execute(text("DROP TABLE funds"))
|
|
|
|
# Rename new table
|
|
print("4️⃣ Renaming funds_new to funds...")
|
|
conn.execute(text("ALTER TABLE funds_new RENAME TO funds"))
|
|
|
|
# Commit transaction
|
|
trans.commit()
|
|
|
|
print("\n✅ Migration completed successfully!")
|
|
print("\n📝 Summary:")
|
|
print(" - fund_size: VARCHAR → INTEGER")
|
|
print(" - estimated_investment_size: REMOVED")
|
|
print(" - check_size_lower: ADDED (INTEGER)")
|
|
print(" - check_size_upper: ADDED (INTEGER)")
|
|
print(f" - {rows_copied} fund records migrated")
|
|
|
|
print(
|
|
"\n⚠️ Note: check_size_lower and check_size_upper are NULL for existing records."
|
|
)
|
|
print(" Run the investor CSV parser again to populate these fields.")
|
|
|
|
except Exception as e:
|
|
trans.rollback()
|
|
print(f"\n❌ Migration failed: {e}")
|
|
raise
|
|
|
|
|
|
if __name__ == "__main__":
|
|
migrate_fund_table()
|