Files
Anton_wireframe/preprocessor/migrate_database.py
T
2025-10-05 19:16:03 +01:00

132 lines
4.7 KiB
Python

"""
Migration script to update existing database schema
Converts AUM from INTEGER to TEXT and adds new columns
"""
import logging
import sqlite3
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def migrate_database(db_path="version_two.db"):
"""Migrate existing database to new schema"""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
logger.info("Starting database migration...")
try:
# Check current schema
cursor.execute("PRAGMA table_info(investors);")
columns = {col[1]: col[2] for col in cursor.fetchall()}
# 1. Convert AUM from INTEGER to TEXT
if "aum" in columns and columns["aum"] == "INTEGER":
logger.info("Converting AUM from INTEGER to TEXT...")
cursor.execute("ALTER TABLE investors RENAME COLUMN aum TO aum_old;")
cursor.execute("ALTER TABLE investors ADD COLUMN aum TEXT;")
cursor.execute(
"UPDATE investors SET aum = CAST(aum_old AS TEXT) WHERE aum_old IS NOT NULL;"
)
cursor.execute("ALTER TABLE investors DROP COLUMN aum_old;")
logger.info("✅ AUM converted to TEXT")
# 2. Add new columns if they don't exist
new_columns = {
"headquarters": "TEXT",
"aum_as_of_date": "TEXT",
"aum_source_url": "TEXT",
"investment_thesis": "JSON",
"portfolio_highlights": "JSON",
"linked_documents": "JSON",
"researcher_notes": "TEXT",
"missing_important_fields": "JSON",
"sources": "JSON",
}
for col_name, col_type in new_columns.items():
if col_name not in columns:
logger.info(f"Adding column: {col_name} ({col_type})")
cursor.execute(
f"ALTER TABLE investors ADD COLUMN {col_name} {col_type};"
)
# 3. Add new columns to investor_members if they don't exist
cursor.execute("PRAGMA table_info(investor_members);")
member_columns = {col[1]: col[2] for col in cursor.fetchall()}
if "title" not in member_columns:
logger.info("Adding 'title' to investor_members")
cursor.execute("ALTER TABLE investor_members ADD COLUMN title TEXT;")
if "source_url" not in member_columns:
logger.info("Adding 'source_url' to investor_members")
cursor.execute("ALTER TABLE investor_members ADD COLUMN source_url TEXT;")
# 4. Check if funds table exists
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='funds';"
)
if not cursor.fetchone():
logger.info("Creating funds table...")
cursor.execute("""
CREATE TABLE funds (
id INTEGER NOT NULL PRIMARY KEY,
investor_id INTEGER NOT NULL,
fund_name VARCHAR,
fund_size VARCHAR,
fund_size_source_url VARCHAR,
estimated_investment_size VARCHAR,
source_url VARCHAR,
source_provider VARCHAR,
geographic_focus JSON,
investment_stage_focus JSON,
sector_focus JSON,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME,
FOREIGN KEY(investor_id) REFERENCES investors (id)
);
""")
logger.info("✅ Funds table created")
conn.commit()
logger.info("\n🎉 Migration completed successfully!")
# Show summary
cursor.execute("PRAGMA table_info(investors);")
investor_cols = cursor.fetchall()
logger.info(f"\nInvestors table now has {len(investor_cols)} columns")
cursor.execute("SELECT COUNT(*) FROM investors;")
investor_count = cursor.fetchone()[0]
logger.info(f"Investors in database: {investor_count}")
cursor.execute("SELECT COUNT(*) FROM funds;")
fund_count = cursor.fetchone()[0]
logger.info(f"Funds in database: {fund_count}")
except Exception as e:
logger.error(f"Migration failed: {e}")
conn.rollback()
raise
finally:
conn.close()
if __name__ == "__main__":
import sys
db_file = sys.argv[1] if len(sys.argv) > 1 else "version_two.db"
print(f"Migrating database: {db_file}")
print("⚠️ This will modify your database. Make sure you have a backup!")
response = input("Continue? (yes/no): ")
if response.lower() in ["yes", "y"]:
migrate_database(db_file)
else:
print("Migration cancelled")