a9589e54f3
- Updated FundTable to replace JSON fields for investment stages and sectors with relationships. - Introduced InvestmentStageTable and fund_investment_stages association table. - Created fund_sectors association table for many-to-many relationship with sectors. - Changed geographic_focus from JSON array to a simple string. - Migrated existing data to new schema, ensuring data integrity and normalization. - Updated related schemas, routers, and services to reflect new structure. - Added migration script to handle data transformation and schema updates. - Implemented tests to verify new relationships and data integrity.
382 lines
11 KiB
Python
382 lines
11 KiB
Python
import enum
|
|
from typing import Annotated
|
|
|
|
from fastapi import Depends
|
|
from sqlalchemy import (
|
|
Column,
|
|
DateTime,
|
|
ForeignKey,
|
|
Integer,
|
|
String,
|
|
Table,
|
|
Text,
|
|
create_engine,
|
|
func,
|
|
)
|
|
from sqlalchemy.ext.declarative import declarative_base
|
|
from sqlalchemy.orm import Session, declarative_mixin, relationship, sessionmaker
|
|
from sqlalchemy.types import JSON, Enum
|
|
|
|
Base = declarative_base()
|
|
|
|
# Database configuration
|
|
# DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./investors.db")
|
|
|
|
# Create engine
|
|
engine = create_engine("sqlite:///./version_two.db", echo=False)
|
|
|
|
# Create session factory
|
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
|
|
|
|
def get_db():
|
|
db = SessionLocal()
|
|
try:
|
|
yield db
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
db_dependency = Annotated[Session, Depends(get_db)]
|
|
|
|
|
|
def init_database():
|
|
"""Initialize the database by creating all tables"""
|
|
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
def get_session_sync() -> Session:
|
|
"""Get a database session for synchronous operations"""
|
|
return SessionLocal()
|
|
|
|
|
|
def get_db_session():
|
|
"""Get a database session for direct use."""
|
|
return SessionLocal()
|
|
|
|
|
|
@declarative_mixin
|
|
class TimestampMixin:
|
|
created_at = Column(
|
|
DateTime(timezone=True), server_default=func.now(), nullable=False
|
|
)
|
|
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
|
|
|
|
|
class InvestmentStage(enum.Enum):
|
|
SEED = "SEED"
|
|
SERIES_A = "SERIES_A"
|
|
SERIES_B = "SERIES_B"
|
|
SERIES_C = "SERIES_C"
|
|
GROWTH = "GROWTH"
|
|
LATE_STAGE = "LATE_STAGE"
|
|
|
|
|
|
# Association table for many-to-many relationship between investors and companies
|
|
investor_company_association = Table(
|
|
"investor_companies",
|
|
Base.metadata,
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
Column("company_id", Integer, ForeignKey("companies.id")),
|
|
)
|
|
|
|
|
|
# Association table for investor-sector many-to-many
|
|
investor_sector_association = Table(
|
|
"investor_sectors",
|
|
Base.metadata,
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
|
|
company_sector_association = Table(
|
|
"company_sector",
|
|
Base.metadata,
|
|
Column("company_id", Integer, ForeignKey("companies.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
project_sector_association = Table(
|
|
"project_sector",
|
|
Base.metadata,
|
|
Column("project_id", Integer, ForeignKey("projects.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
project_investor_association = Table(
|
|
"project_investors",
|
|
Base.metadata,
|
|
Column("project_id", Integer, ForeignKey("projects.id")),
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
)
|
|
|
|
project_company_association = Table(
|
|
"project_companies",
|
|
Base.metadata,
|
|
Column("project_id", Integer, ForeignKey("projects.id")),
|
|
Column("company_id", Integer, ForeignKey("companies.id")),
|
|
)
|
|
|
|
# Association table for investor-stage many-to-many
|
|
investor_stage_association = Table(
|
|
"investor_stages",
|
|
Base.metadata,
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
|
)
|
|
|
|
# Association table for fund-stage many-to-many
|
|
fund_investment_stages_association = Table(
|
|
"fund_investment_stages",
|
|
Base.metadata,
|
|
Column("fund_id", Integer, ForeignKey("funds.id")),
|
|
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
|
)
|
|
|
|
# Association table for fund-sector many-to-many
|
|
fund_sectors_association = Table(
|
|
"fund_sectors",
|
|
Base.metadata,
|
|
Column("fund_id", Integer, ForeignKey("funds.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
|
|
class InvestorTable(Base, TimestampMixin):
|
|
__tablename__ = "investors"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
description = Column(Text, nullable=True)
|
|
|
|
# Basic investor info
|
|
website = Column(String, nullable=True)
|
|
headquarters = Column(String, nullable=True)
|
|
|
|
# AUM fields
|
|
aum = Column(Integer, nullable=True) # Store as integer for numerical filtering
|
|
aum_as_of_date = Column(String, nullable=True)
|
|
aum_source_url = Column(String, nullable=True)
|
|
|
|
# Check size (deprecated in favor of fund-level data, but keeping for backward compatibility)
|
|
check_size_lower = Column(Integer, nullable=True)
|
|
check_size_upper = Column(Integer, nullable=True)
|
|
|
|
# Geographic focus (deprecated in favor of fund-level, but keeping for backward compatibility)
|
|
geographic_focus = Column(String, nullable=True)
|
|
|
|
# Investment thesis and portfolio
|
|
investment_thesis = Column(JSON, nullable=True) # Array of thesis statements
|
|
portfolio_highlights = Column(
|
|
JSON, nullable=True
|
|
) # Array of portfolio company names
|
|
linked_documents = Column(JSON, nullable=True) # Array of document URLs
|
|
|
|
# Research metadata
|
|
researcher_notes = Column(Text, nullable=True)
|
|
missing_important_fields = Column(
|
|
JSON, nullable=True
|
|
) # Array of missing field names
|
|
sources = Column(JSON, nullable=True) # JSON object with source URLs
|
|
|
|
# Portfolio info
|
|
number_of_investments = Column(Integer, nullable=True)
|
|
|
|
# Relationships
|
|
team_members = relationship(
|
|
"InvestorMember", back_populates="investor", cascade="all, delete-orphan"
|
|
)
|
|
funds = relationship(
|
|
"FundTable", back_populates="investor", cascade="all, delete-orphan"
|
|
)
|
|
|
|
# Many-to-many relationship with investment stages
|
|
investment_stages = relationship(
|
|
"InvestmentStageTable",
|
|
secondary=investor_stage_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
# Relationship to portfolio companies
|
|
portfolio_companies = relationship(
|
|
"CompanyTable",
|
|
secondary=investor_company_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
sectors = relationship(
|
|
"SectorTable",
|
|
secondary=investor_sector_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
projects = relationship(
|
|
"ProjectTable",
|
|
secondary=project_investor_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
|
|
class InvestorMember(Base, TimestampMixin):
|
|
__tablename__ = "investor_members"
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
role = Column(String, nullable=True)
|
|
title = Column(String, nullable=True) # Alternative to role
|
|
email = Column(String, nullable=True)
|
|
source_url = Column(String, nullable=True) # URL where member info was found
|
|
|
|
investor_id = Column(Integer, ForeignKey("investors.id"))
|
|
investor = relationship("InvestorTable", back_populates="team_members")
|
|
|
|
|
|
class FundTable(Base, TimestampMixin):
|
|
__tablename__ = "funds"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
investor_id = Column(Integer, ForeignKey("investors.id"), nullable=False)
|
|
|
|
# Fund details
|
|
fund_name = Column(String, nullable=True)
|
|
fund_size = Column(
|
|
Integer, nullable=True
|
|
) # Store as integer for numerical filtering
|
|
fund_size_source_url = Column(String, nullable=True)
|
|
|
|
# Check size range (parsed from estimated_investment_size by LLM)
|
|
check_size_lower = Column(Integer, nullable=True)
|
|
check_size_upper = Column(Integer, nullable=True)
|
|
|
|
source_url = Column(String, nullable=True)
|
|
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
|
|
|
|
# Geographic focus as simple string
|
|
geographic_focus = Column(String, nullable=True)
|
|
|
|
# Relationships
|
|
investor = relationship("InvestorTable", back_populates="funds")
|
|
investment_stages = relationship(
|
|
"InvestmentStageTable",
|
|
secondary=fund_investment_stages_association,
|
|
back_populates="funds",
|
|
)
|
|
sectors = relationship(
|
|
"SectorTable",
|
|
secondary=fund_sectors_association,
|
|
back_populates="funds",
|
|
)
|
|
|
|
|
|
class InvestmentStageTable(Base, TimestampMixin):
|
|
__tablename__ = "investment_stages"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False, unique=True)
|
|
|
|
# Relationships
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=investor_stage_association,
|
|
back_populates="investment_stages",
|
|
)
|
|
funds = relationship(
|
|
"FundTable",
|
|
secondary=fund_investment_stages_association,
|
|
back_populates="investment_stages",
|
|
)
|
|
|
|
|
|
class CompanyTable(Base, TimestampMixin):
|
|
__tablename__ = "companies"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
industry = Column(String, nullable=True)
|
|
location = Column(String, nullable=True)
|
|
description = Column(String, nullable=True)
|
|
founded_year = Column(Integer, nullable=True)
|
|
website = Column(String, nullable=True)
|
|
|
|
members = relationship(
|
|
"CompanyMember", back_populates="company", cascade="all, delete-orphan"
|
|
)
|
|
# Relationship back to investors
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=investor_company_association,
|
|
back_populates="portfolio_companies",
|
|
)
|
|
|
|
sectors = relationship(
|
|
"SectorTable", secondary=company_sector_association, back_populates="companies"
|
|
)
|
|
|
|
projects = relationship(
|
|
"ProjectTable",
|
|
secondary=project_company_association,
|
|
back_populates="companies",
|
|
)
|
|
|
|
|
|
class CompanyMember(Base, TimestampMixin):
|
|
__tablename__ = "company_members"
|
|
id = Column(Integer, primary_key=True)
|
|
name = Column(String)
|
|
linkedin = Column(String, nullable=True)
|
|
role = Column(String, nullable=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
|
|
|
company = relationship("CompanyTable", back_populates="members")
|
|
|
|
|
|
class SectorTable(Base, TimestampMixin):
|
|
__tablename__ = "sectors"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
|
|
# Relationships
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=investor_sector_association,
|
|
back_populates="sectors",
|
|
)
|
|
companies = relationship(
|
|
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
|
|
)
|
|
projects = relationship(
|
|
"ProjectTable", secondary=project_sector_association, back_populates="sector"
|
|
)
|
|
funds = relationship(
|
|
"FundTable",
|
|
secondary=fund_sectors_association,
|
|
back_populates="sectors",
|
|
)
|
|
|
|
|
|
class ProjectTable(Base, TimestampMixin):
|
|
__tablename__ = "projects"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
valuation = Column(Integer, nullable=True)
|
|
|
|
stage = Column(Enum(InvestmentStage), nullable=True)
|
|
location = Column(String, nullable=True)
|
|
description = Column(Text, nullable=True)
|
|
start_date = Column(DateTime, nullable=True)
|
|
end_date = Column(DateTime, nullable=True)
|
|
|
|
sector = relationship(
|
|
"SectorTable", secondary=project_sector_association, back_populates="projects"
|
|
)
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=project_investor_association,
|
|
back_populates="projects",
|
|
)
|
|
companies = relationship(
|
|
"CompanyTable", secondary=project_company_association, back_populates="projects"
|
|
)
|