a9589e54f3
- Updated FundTable to replace JSON fields for investment stages and sectors with relationships. - Introduced InvestmentStageTable and fund_investment_stages association table. - Created fund_sectors association table for many-to-many relationship with sectors. - Changed geographic_focus from JSON array to a simple string. - Migrated existing data to new schema, ensuring data integrity and normalization. - Updated related schemas, routers, and services to reflect new structure. - Added migration script to handle data transformation and schema updates. - Implemented tests to verify new relationships and data integrity.
314 lines
9.6 KiB
Python
314 lines
9.6 KiB
Python
import enum
|
|
|
|
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Table, Text, func
|
|
from sqlalchemy.orm import declarative_mixin, relationship
|
|
from sqlalchemy.types import JSON, Enum
|
|
|
|
from db.db import Base
|
|
|
|
|
|
@declarative_mixin
|
|
class TimestampMixin:
|
|
created_at = Column(
|
|
DateTime(timezone=True), server_default=func.now(), nullable=False
|
|
)
|
|
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
|
|
|
|
|
class InvestmentStage(enum.Enum):
|
|
SEED = "SEED"
|
|
SERIES_A = "SERIES_A"
|
|
SERIES_B = "SERIES_B"
|
|
SERIES_C = "SERIES_C"
|
|
GROWTH = "GROWTH"
|
|
LATE_STAGE = "LATE_STAGE"
|
|
|
|
|
|
# Association table for many-to-many relationship between investors and companies
|
|
investor_company_association = Table(
|
|
"investor_companies",
|
|
Base.metadata,
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
Column("company_id", Integer, ForeignKey("companies.id")),
|
|
)
|
|
|
|
|
|
# Association table for investor-sector many-to-many
|
|
investor_sector_association = Table(
|
|
"investor_sectors",
|
|
Base.metadata,
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
|
|
company_sector_association = Table(
|
|
"company_sector",
|
|
Base.metadata,
|
|
Column("company_id", Integer, ForeignKey("companies.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
project_sector_association = Table(
|
|
"project_sector",
|
|
Base.metadata,
|
|
Column("project_id", Integer, ForeignKey("projects.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
project_investor_association = Table(
|
|
"project_investors",
|
|
Base.metadata,
|
|
Column("project_id", Integer, ForeignKey("projects.id")),
|
|
Column("investor_id", Integer, ForeignKey("investors.id")),
|
|
)
|
|
|
|
project_company_association = Table(
|
|
"project_companies",
|
|
Base.metadata,
|
|
Column("project_id", Integer, ForeignKey("projects.id")),
|
|
Column("company_id", Integer, ForeignKey("companies.id")),
|
|
)
|
|
|
|
# Association table for fund-stage many-to-many
|
|
fund_investment_stages_association = Table(
|
|
"fund_investment_stages",
|
|
Base.metadata,
|
|
Column("fund_id", Integer, ForeignKey("funds.id")),
|
|
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
|
)
|
|
|
|
# Association table for fund-sector many-to-many
|
|
fund_sectors_association = Table(
|
|
"fund_sectors",
|
|
Base.metadata,
|
|
Column("fund_id", Integer, ForeignKey("funds.id")),
|
|
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
|
)
|
|
|
|
|
|
class InvestorTable(Base, TimestampMixin):
|
|
__tablename__ = "investors"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
description = Column(Text, nullable=True)
|
|
|
|
# Basic investor info
|
|
website = Column(String, nullable=True)
|
|
headquarters = Column(String, nullable=True)
|
|
|
|
# AUM fields
|
|
aum = Column(Integer, nullable=True) # Store as integer for numerical filtering
|
|
aum_as_of_date = Column(String, nullable=True)
|
|
aum_source_url = Column(String, nullable=True)
|
|
|
|
# Check size (deprecated in favor of fund-level data, but keeping for backward compatibility)
|
|
check_size_lower = Column(Integer, nullable=True)
|
|
check_size_upper = Column(Integer, nullable=True)
|
|
|
|
# Geographic focus (deprecated in favor of fund-level, but keeping for backward compatibility)
|
|
geographic_focus = Column(String, nullable=True)
|
|
|
|
# Investment thesis and portfolio
|
|
investment_thesis = Column(JSON, nullable=True) # Array of thesis statements
|
|
portfolio_highlights = Column(
|
|
JSON, nullable=True
|
|
) # Array of portfolio company names
|
|
linked_documents = Column(JSON, nullable=True) # Array of document URLs
|
|
|
|
# Research metadata
|
|
researcher_notes = Column(Text, nullable=True)
|
|
missing_important_fields = Column(
|
|
JSON, nullable=True
|
|
) # Array of missing field names
|
|
sources = Column(JSON, nullable=True) # JSON object with source URLs
|
|
|
|
# Portfolio info
|
|
number_of_investments = Column(Integer, default=0, nullable=True)
|
|
|
|
# Relationships
|
|
team_members = relationship(
|
|
"InvestorMember", back_populates="investor", cascade="all, delete-orphan"
|
|
)
|
|
funds = relationship(
|
|
"FundTable", back_populates="investor", cascade="all, delete-orphan"
|
|
)
|
|
|
|
# Relationship to portfolio companies
|
|
portfolio_companies = relationship(
|
|
"CompanyTable",
|
|
secondary=investor_company_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
sectors = relationship(
|
|
"SectorTable",
|
|
secondary=investor_sector_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
projects = relationship(
|
|
"ProjectTable",
|
|
secondary=project_investor_association,
|
|
back_populates="investors",
|
|
)
|
|
|
|
|
|
class InvestorMember(Base, TimestampMixin):
|
|
__tablename__ = "investor_members"
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
role = Column(String, nullable=True)
|
|
title = Column(String, nullable=True) # Alternative to role
|
|
email = Column(String, nullable=True)
|
|
source_url = Column(String, nullable=True) # URL where member info was found
|
|
|
|
investor_id = Column(Integer, ForeignKey("investors.id"))
|
|
investor = relationship("InvestorTable", back_populates="team_members")
|
|
|
|
|
|
class FundTable(Base, TimestampMixin):
|
|
__tablename__ = "funds"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
investor_id = Column(Integer, ForeignKey("investors.id"), nullable=False)
|
|
|
|
# Fund details
|
|
fund_name = Column(String, nullable=True)
|
|
fund_size = Column(
|
|
Integer, nullable=True
|
|
) # Store as integer for numerical filtering
|
|
fund_size_source_url = Column(String, nullable=True)
|
|
|
|
# Check size range (parsed from estimated_investment_size by LLM)
|
|
check_size_lower = Column(Integer, nullable=True)
|
|
check_size_upper = Column(Integer, nullable=True)
|
|
|
|
source_url = Column(String, nullable=True)
|
|
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
|
|
|
|
# Geographic focus as simple string
|
|
geographic_focus = Column(String, nullable=True)
|
|
|
|
# Relationships
|
|
investor = relationship("InvestorTable", back_populates="funds")
|
|
investment_stages = relationship(
|
|
"InvestmentStageTable",
|
|
secondary=fund_investment_stages_association,
|
|
back_populates="funds",
|
|
)
|
|
sectors = relationship(
|
|
"SectorTable",
|
|
secondary=fund_sectors_association,
|
|
back_populates="funds",
|
|
)
|
|
|
|
|
|
class CompanyTable(Base, TimestampMixin):
|
|
__tablename__ = "companies"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
industry = Column(String, nullable=True)
|
|
location = Column(String, nullable=True)
|
|
description = Column(String, nullable=True)
|
|
founded_year = Column(Integer, nullable=True)
|
|
website = Column(String, nullable=True)
|
|
|
|
members = relationship(
|
|
"CompanyMember", back_populates="company", cascade="all, delete-orphan"
|
|
)
|
|
# Relationship back to investors
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=investor_company_association,
|
|
back_populates="portfolio_companies",
|
|
)
|
|
|
|
sectors = relationship(
|
|
"SectorTable", secondary=company_sector_association, back_populates="companies"
|
|
)
|
|
|
|
projects = relationship(
|
|
"ProjectTable",
|
|
secondary=project_company_association,
|
|
back_populates="companies",
|
|
)
|
|
|
|
|
|
class CompanyMember(Base, TimestampMixin):
|
|
__tablename__ = "company_members"
|
|
id = Column(Integer, primary_key=True)
|
|
name = Column(String)
|
|
linkedin = Column(String, nullable=True)
|
|
role = Column(String, nullable=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
|
|
|
company = relationship("CompanyTable", back_populates="members")
|
|
|
|
|
|
class InvestmentStageTable(Base, TimestampMixin):
|
|
__tablename__ = "investment_stages"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False, unique=True)
|
|
|
|
# Relationships
|
|
funds = relationship(
|
|
"FundTable",
|
|
secondary=fund_investment_stages_association,
|
|
back_populates="investment_stages",
|
|
)
|
|
|
|
|
|
class SectorTable(Base, TimestampMixin):
|
|
__tablename__ = "sectors"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
|
|
# Relationships
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=investor_sector_association,
|
|
back_populates="sectors",
|
|
)
|
|
companies = relationship(
|
|
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
|
|
)
|
|
projects = relationship(
|
|
"ProjectTable", secondary=project_sector_association, back_populates="sector"
|
|
)
|
|
funds = relationship(
|
|
"FundTable",
|
|
secondary=fund_sectors_association,
|
|
back_populates="sectors",
|
|
)
|
|
|
|
|
|
class ProjectTable(Base, TimestampMixin):
|
|
__tablename__ = "projects"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, nullable=False)
|
|
valuation = Column(Integer, nullable=True)
|
|
|
|
stage = Column(Enum(InvestmentStage), nullable=True)
|
|
location = Column(String, nullable=True)
|
|
description = Column(Text, nullable=True)
|
|
start_date = Column(DateTime, nullable=True)
|
|
end_date = Column(DateTime, nullable=True)
|
|
|
|
sector = relationship(
|
|
"SectorTable", secondary=project_sector_association, back_populates="projects"
|
|
)
|
|
investors = relationship(
|
|
"InvestorTable",
|
|
secondary=project_investor_association,
|
|
back_populates="projects",
|
|
)
|
|
companies = relationship(
|
|
"CompanyTable", secondary=project_company_association, back_populates="projects"
|
|
)
|