Refactor investor and fund schemas to support new check size range
- Removed deprecated `stage_focus` column from `InvestorTable` and `InvestorSchema`. - Updated `FundTable` to change `fund_size` from VARCHAR to INTEGER and added `check_size_lower` and `check_size_upper` columns. - Modified API routes to return investor-fund combinations as separate entries. - Created new `InvestorFundData` schema for combined investor-fund responses. - Implemented LLM parsing for check size range from estimated investment size. - Updated database migration script to reflect schema changes and ensure data integrity. - Removed obsolete verification and test scripts related to the old schema.
This commit is contained in:
@@ -0,0 +1,159 @@
|
||||
"""
|
||||
Migration script to update FundTable schema:
|
||||
- Change fund_size from VARCHAR to INTEGER
|
||||
- Remove estimated_investment_size column
|
||||
- Add check_size_lower INTEGER column
|
||||
- Add check_size_upper INTEGER column
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add preprocessor to path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from models import engine
|
||||
from sqlalchemy import text
|
||||
|
||||
|
||||
def migrate_fund_table():
|
||||
"""
|
||||
Migrate the funds table to add check_size fields and update fund_size type.
|
||||
|
||||
SQLite doesn't support ALTER COLUMN directly, so we need to:
|
||||
1. Create new table with correct schema
|
||||
2. Copy data from old table
|
||||
3. Drop old table
|
||||
4. Rename new table
|
||||
"""
|
||||
|
||||
print("🔄 Starting fund table migration...")
|
||||
|
||||
with engine.connect() as conn:
|
||||
# Start transaction
|
||||
trans = conn.begin()
|
||||
|
||||
try:
|
||||
# Check if migration is needed
|
||||
result = conn.execute(text("PRAGMA table_info(funds)"))
|
||||
columns = {row[1]: row[2] for row in result}
|
||||
|
||||
if "check_size_lower" in columns and "check_size_upper" in columns:
|
||||
print("✅ Migration already applied - check_size columns exist")
|
||||
return
|
||||
|
||||
print("📊 Current columns:", list(columns.keys()))
|
||||
|
||||
# Create new table with updated schema
|
||||
print("\n1️⃣ Creating new funds table with updated schema...")
|
||||
conn.execute(
|
||||
text("""
|
||||
CREATE TABLE IF NOT EXISTS funds_new (
|
||||
id INTEGER PRIMARY KEY,
|
||||
investor_id INTEGER NOT NULL,
|
||||
fund_name VARCHAR,
|
||||
fund_size INTEGER,
|
||||
fund_size_source_url VARCHAR,
|
||||
check_size_lower INTEGER,
|
||||
check_size_upper INTEGER,
|
||||
source_url VARCHAR,
|
||||
source_provider VARCHAR,
|
||||
geographic_focus JSON,
|
||||
investment_stage_focus JSON,
|
||||
sector_focus JSON,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||
updated_at DATETIME,
|
||||
FOREIGN KEY (investor_id) REFERENCES investors(id)
|
||||
)
|
||||
""")
|
||||
)
|
||||
|
||||
# Copy data from old table to new table
|
||||
print("2️⃣ Copying data from old table...")
|
||||
|
||||
# Check if old estimated_investment_size column exists
|
||||
if "estimated_investment_size" in columns:
|
||||
# We have estimated_investment_size but it's a string
|
||||
# We'll set check_size fields to NULL for now - they'll be repopulated when re-parsing
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO funds_new (
|
||||
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
||||
check_size_lower, check_size_upper,
|
||||
source_url, source_provider,
|
||||
geographic_focus, investment_stage_focus, sector_focus,
|
||||
created_at, updated_at
|
||||
)
|
||||
SELECT
|
||||
id, investor_id, fund_name,
|
||||
CAST(fund_size AS INTEGER) as fund_size,
|
||||
fund_size_source_url,
|
||||
NULL as check_size_lower,
|
||||
NULL as check_size_upper,
|
||||
source_url, source_provider,
|
||||
geographic_focus, investment_stage_focus, sector_focus,
|
||||
created_at, updated_at
|
||||
FROM funds
|
||||
""")
|
||||
)
|
||||
else:
|
||||
# No estimated_investment_size column (fresh install or already migrated partially)
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO funds_new (
|
||||
id, investor_id, fund_name, fund_size, fund_size_source_url,
|
||||
check_size_lower, check_size_upper,
|
||||
source_url, source_provider,
|
||||
geographic_focus, investment_stage_focus, sector_focus,
|
||||
created_at, updated_at
|
||||
)
|
||||
SELECT
|
||||
id, investor_id, fund_name,
|
||||
CAST(fund_size AS INTEGER) as fund_size,
|
||||
fund_size_source_url,
|
||||
NULL as check_size_lower,
|
||||
NULL as check_size_upper,
|
||||
source_url, source_provider,
|
||||
geographic_focus, investment_stage_focus, sector_focus,
|
||||
created_at, updated_at
|
||||
FROM funds
|
||||
""")
|
||||
)
|
||||
|
||||
rows_copied = conn.execute(
|
||||
text("SELECT COUNT(*) FROM funds_new")
|
||||
).fetchone()[0]
|
||||
print(f" ✅ Copied {rows_copied} rows")
|
||||
|
||||
# Drop old table
|
||||
print("3️⃣ Dropping old funds table...")
|
||||
conn.execute(text("DROP TABLE funds"))
|
||||
|
||||
# Rename new table
|
||||
print("4️⃣ Renaming funds_new to funds...")
|
||||
conn.execute(text("ALTER TABLE funds_new RENAME TO funds"))
|
||||
|
||||
# Commit transaction
|
||||
trans.commit()
|
||||
|
||||
print("\n✅ Migration completed successfully!")
|
||||
print("\n📝 Summary:")
|
||||
print(" - fund_size: VARCHAR → INTEGER")
|
||||
print(" - estimated_investment_size: REMOVED")
|
||||
print(" - check_size_lower: ADDED (INTEGER)")
|
||||
print(" - check_size_upper: ADDED (INTEGER)")
|
||||
print(f" - {rows_copied} fund records migrated")
|
||||
|
||||
print(
|
||||
"\n⚠️ Note: check_size_lower and check_size_upper are NULL for existing records."
|
||||
)
|
||||
print(" Run the investor CSV parser again to populate these fields.")
|
||||
|
||||
except Exception as e:
|
||||
trans.rollback()
|
||||
print(f"\n❌ Migration failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate_fund_table()
|
||||
@@ -223,11 +223,15 @@ class FundTable(Base, TimestampMixin):
|
||||
|
||||
# Fund details
|
||||
fund_name = Column(String, nullable=True)
|
||||
fund_size = Column(String, nullable=True) # Store as string to preserve currency
|
||||
fund_size = Column(
|
||||
Integer, nullable=True
|
||||
) # Store as integer for numerical filtering
|
||||
fund_size_source_url = Column(String, nullable=True)
|
||||
estimated_investment_size = Column(
|
||||
String, nullable=True
|
||||
) # e.g., "EUR 1,000 to 2,000"
|
||||
|
||||
# Check size range (parsed from estimated_investment_size by LLM)
|
||||
check_size_lower = Column(Integer, nullable=True)
|
||||
check_size_upper = Column(Integer, nullable=True)
|
||||
|
||||
source_url = Column(String, nullable=True)
|
||||
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
|
||||
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user