feat: Refactor Fund schema to use many-to-many relationships for investment stages and sectors

- Updated FundTable to replace JSON fields for investment stages and sectors with relationships.
- Introduced InvestmentStageTable and fund_investment_stages association table.
- Created fund_sectors association table for many-to-many relationship with sectors.
- Changed geographic_focus from JSON array to a simple string.
- Migrated existing data to new schema, ensuring data integrity and normalization.
- Updated related schemas, routers, and services to reflect new structure.
- Added migration script to handle data transformation and schema updates.
- Implemented tests to verify new relationships and data integrity.
This commit is contained in:
bolade
2025-10-07 15:57:29 +01:00
parent d341cacb9a
commit a9589e54f3
10 changed files with 1134 additions and 42 deletions
+41 -9
View File
@@ -126,6 +126,22 @@ investor_stage_association = Table(
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
)
# Association table for fund-stage many-to-many
fund_investment_stages_association = Table(
"fund_investment_stages",
Base.metadata,
Column("fund_id", Integer, ForeignKey("funds.id")),
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
)
# Association table for fund-sector many-to-many
fund_sectors_association = Table(
"fund_sectors",
Base.metadata,
Column("fund_id", Integer, ForeignKey("funds.id")),
Column("sector_id", Integer, ForeignKey("sectors.id")),
)
class InvestorTable(Base, TimestampMixin):
__tablename__ = "investors"
@@ -235,27 +251,40 @@ class FundTable(Base, TimestampMixin):
source_url = Column(String, nullable=True)
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
# JSON array fields
geographic_focus = Column(JSON, nullable=True) # Array of regions/countries
investment_stage_focus = Column(JSON, nullable=True) # Array of stages
sector_focus = Column(JSON, nullable=True) # Array of sectors
# Geographic focus as simple string
geographic_focus = Column(String, nullable=True)
# Relationships
investor = relationship("InvestorTable", back_populates="funds")
investment_stages = relationship(
"InvestmentStageTable",
secondary=fund_investment_stages_association,
back_populates="funds",
)
sectors = relationship(
"SectorTable",
secondary=fund_sectors_association,
back_populates="funds",
)
class InvestmentStageTable(Base, TimestampMixin):
__tablename__ = "investment_stages"
id = Column(Integer, primary_key=True, index=True)
stage = Column(Enum(InvestmentStage), nullable=False, unique=True)
name = Column(String, nullable=False, unique=True)
# Relationship back to investors
# Relationships
investors = relationship(
"InvestorTable",
secondary=investor_stage_association,
back_populates="investment_stages",
)
funds = relationship(
"FundTable",
secondary=fund_investment_stages_association,
back_populates="investment_stages",
)
class CompanyTable(Base, TimestampMixin):
@@ -307,20 +336,23 @@ class SectorTable(Base, TimestampMixin):
id = Column(Integer, primary_key=True, index=True)
name = Column(String, nullable=False)
# Add relationship back to investors
# Relationships
investors = relationship(
"InvestorTable",
secondary=investor_sector_association,
back_populates="sectors",
)
companies = relationship(
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
)
projects = relationship(
"ProjectTable", secondary=project_sector_association, back_populates="sector"
)
funds = relationship(
"FundTable",
secondary=fund_sectors_association,
back_populates="sectors",
)
class ProjectTable(Base, TimestampMixin):