Refactor investor and fund schemas to support new check size range

- Removed deprecated `stage_focus` column from `InvestorTable` and `InvestorSchema`.
- Updated `FundTable` to change `fund_size` from VARCHAR to INTEGER and added `check_size_lower` and `check_size_upper` columns.
- Modified API routes to return investor-fund combinations as separate entries.
- Created new `InvestorFundData` schema for combined investor-fund responses.
- Implemented LLM parsing for check size range from estimated investment size.
- Updated database migration script to reflect schema changes and ensure data integrity.
- Removed obsolete verification and test scripts related to the old schema.
This commit is contained in:
bolade
2025-10-07 15:24:36 +01:00
parent c0fbbdd917
commit d341cacb9a
12 changed files with 556 additions and 884 deletions
+234 -39
View File
@@ -4,7 +4,11 @@ from db.db import get_db
from db.models import InvestorTable, SectorTable
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from schemas.router_schemas import InvestmentStage, InvestorData
from schemas.router_schemas import (
InvestmentStage,
InvestorData,
InvestorFundData,
)
from sqlalchemy.orm import Session, selectinload
router = APIRouter(tags=["Investor Routes"])
@@ -33,34 +37,95 @@ class InvestorUpdate(BaseModel):
number_of_investments: Optional[int] = None
@router.get("/investors", response_model=List[InvestorData])
@router.get("/investors", response_model=List[InvestorFundData])
def read_investors(db: Session = Depends(get_db)):
"""Get all investors with their related data"""
"""Get all investors with their funds as separate entries
Each investor-fund combination is returned as a separate row.
An investor with 3 funds will appear as 3 entries.
"""
investors = (
db.query(InvestorTable)
.options(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
.all()
)
# Transform InvestorTable objects to InvestorData format
investor_data_list = []
# Transform to InvestorFundData format (one row per investor-fund combination)
investor_fund_list = []
for investor in investors:
investor_data = InvestorData(
investor=investor, # This maps to InvestorSchema
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_data_list.append(investor_data)
# If investor has funds, create one entry per fund
if investor.funds:
for fund in investor.funds:
investor_fund_data = InvestorFundData(
# Investor fields
investor_id=investor.id,
investor_name=investor.name,
investor_description=investor.description,
investor_website=investor.website,
investor_headquarters=investor.headquarters,
aum=investor.aum,
aum_as_of_date=investor.aum_as_of_date,
aum_source_url=investor.aum_source_url,
investment_thesis=investor.investment_thesis,
portfolio_highlights=investor.portfolio_highlights,
number_of_investments=investor.number_of_investments,
# Fund fields
fund_id=fund.id,
fund_name=fund.fund_name,
fund_size=fund.fund_size,
fund_size_source_url=fund.fund_size_source_url,
check_size_lower=fund.check_size_lower,
check_size_upper=fund.check_size_upper,
geographic_focus=fund.geographic_focus,
investment_stage_focus=fund.investment_stage_focus,
sector_focus=fund.sector_focus,
# Related data (same for all funds of this investor)
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_fund_list.append(investor_fund_data)
else:
# If no funds, create one entry with null fund fields
investor_fund_data = InvestorFundData(
# Investor fields
investor_id=investor.id,
investor_name=investor.name,
investor_description=investor.description,
investor_website=investor.website,
investor_headquarters=investor.headquarters,
aum=investor.aum,
aum_as_of_date=investor.aum_as_of_date,
aum_source_url=investor.aum_source_url,
investment_thesis=investor.investment_thesis,
portfolio_highlights=investor.portfolio_highlights,
number_of_investments=investor.number_of_investments,
# Fund fields (null)
fund_id=None,
fund_name=None,
fund_size=None,
fund_size_source_url=None,
check_size_lower=None,
check_size_upper=None,
geographic_focus=None,
investment_stage_focus=None,
sector_focus=None,
# Related data
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_fund_list.append(investor_fund_data)
return investor_data_list
return investor_fund_list
@router.get("/investors/filter", response_model=List[InvestorData])
@router.get("/investors/filter", response_model=List[InvestorFundData])
def filter_investors(
stage: Optional[InvestmentStage] = Query(
None, description="Filter by investment stage"
@@ -75,13 +140,18 @@ def filter_investors(
max_aum: Optional[int] = Query(None, description="Maximum AUM"),
db: Session = Depends(get_db),
):
"""Filter investors based on various criteria"""
"""Filter investors based on various criteria
Returns investor-fund combinations as separate rows.
An investor with 3 funds will appear as 3 entries.
"""
# Start with base query
query = db.query(InvestorTable).options(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
# Apply filters
@@ -111,29 +181,86 @@ def filter_investors(
investors = query.all()
# Transform to InvestorData format
investor_data_list = []
# Transform to InvestorFundData format (one row per investor-fund combination)
investor_fund_list = []
for investor in investors:
investor_data = InvestorData(
investor=investor,
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_data_list.append(investor_data)
# If investor has funds, create one entry per fund
if investor.funds:
for fund in investor.funds:
investor_fund_data = InvestorFundData(
# Investor fields
investor_id=investor.id,
investor_name=investor.name,
investor_description=investor.description,
investor_website=investor.website,
investor_headquarters=investor.headquarters,
aum=investor.aum,
aum_as_of_date=investor.aum_as_of_date,
aum_source_url=investor.aum_source_url,
investment_thesis=investor.investment_thesis,
portfolio_highlights=investor.portfolio_highlights,
number_of_investments=investor.number_of_investments,
# Fund fields
fund_id=fund.id,
fund_name=fund.fund_name,
fund_size=fund.fund_size,
fund_size_source_url=fund.fund_size_source_url,
check_size_lower=fund.check_size_lower,
check_size_upper=fund.check_size_upper,
geographic_focus=fund.geographic_focus,
investment_stage_focus=fund.investment_stage_focus,
sector_focus=fund.sector_focus,
# Related data
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_fund_list.append(investor_fund_data)
else:
# If no funds, create one entry with null fund fields
investor_fund_data = InvestorFundData(
# Investor fields
investor_id=investor.id,
investor_name=investor.name,
investor_description=investor.description,
investor_website=investor.website,
investor_headquarters=investor.headquarters,
aum=investor.aum,
aum_as_of_date=investor.aum_as_of_date,
aum_source_url=investor.aum_source_url,
investment_thesis=investor.investment_thesis,
portfolio_highlights=investor.portfolio_highlights,
number_of_investments=investor.number_of_investments,
# Fund fields (null)
fund_id=None,
fund_name=None,
fund_size=None,
fund_size_source_url=None,
check_size_lower=None,
check_size_upper=None,
geographic_focus=None,
investment_stage_focus=None,
sector_focus=None,
# Related data
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_fund_list.append(investor_fund_data)
return investor_data_list
return investor_fund_list
@router.get("/investors/{investor_id}", response_model=InvestorData)
def read_investor(investor_id: int, db: Session = Depends(get_db)):
"""Get a specific investor by ID"""
"""Get a specific investor by ID with all their funds"""
investor = (
db.query(InvestorTable)
.options(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
.filter(InvestorTable.id == investor_id)
.first()
@@ -142,12 +269,13 @@ def read_investor(investor_id: int, db: Session = Depends(get_db)):
if not investor:
raise HTTPException(status_code=404, detail="Investor not found")
# Transform to InvestorData format
# Transform to InvestorData format (includes funds array)
return InvestorData(
investor=investor,
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
funds=investor.funds,
)
@@ -166,6 +294,7 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)):
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
.filter(InvestorTable.id == db_investor.id)
.first()
@@ -177,6 +306,7 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)):
portfolio_companies=investor_with_relations.portfolio_companies,
team_members=investor_with_relations.team_members,
sectors=investor_with_relations.sectors,
funds=investor_with_relations.funds,
)
@@ -205,6 +335,7 @@ def update_investor(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
.filter(InvestorTable.id == investor_id)
.first()
@@ -216,6 +347,7 @@ def update_investor(
portfolio_companies=investor_with_relations.portfolio_companies,
team_members=investor_with_relations.team_members,
sectors=investor_with_relations.sectors,
funds=investor_with_relations.funds,
)
@@ -233,13 +365,16 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)):
return {"message": "Investor deleted successfully"}
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData])
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorFundData])
def find_similar_investors(
investor_id: int,
limit: int = Query(10, description="Maximum number of similar investors to return"),
db: Session = Depends(get_db),
):
"""Find investors similar to a given investor based on characteristics"""
"""Find investors similar to a given investor based on characteristics
Returns investor-fund combinations as separate rows.
"""
# Get the target investor
target_investor = (
@@ -248,6 +383,7 @@ def find_similar_investors(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
.filter(InvestorTable.id == investor_id)
.first()
@@ -266,6 +402,7 @@ def find_similar_investors(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
selectinload(InvestorTable.funds),
)
.filter(InvestorTable.id != investor_id)
.all()
@@ -338,13 +475,71 @@ def find_similar_investors(
scored_investors.sort(key=lambda x: x[0], reverse=True)
similar_investors = [inv for score, inv in scored_investors[:limit]]
# Transform to InvestorData format
return [
InvestorData(
investor=inv,
portfolio_companies=inv.portfolio_companies,
team_members=inv.team_members,
sectors=inv.sectors,
)
for inv in similar_investors
]
# Transform to InvestorFundData format (one row per investor-fund combination)
investor_fund_list = []
for investor in similar_investors:
# If investor has funds, create one entry per fund
if investor.funds:
for fund in investor.funds:
investor_fund_data = InvestorFundData(
# Investor fields
investor_id=investor.id,
investor_name=investor.name,
investor_description=investor.description,
investor_website=investor.website,
investor_headquarters=investor.headquarters,
aum=investor.aum,
aum_as_of_date=investor.aum_as_of_date,
aum_source_url=investor.aum_source_url,
investment_thesis=investor.investment_thesis,
portfolio_highlights=investor.portfolio_highlights,
number_of_investments=investor.number_of_investments,
# Fund fields
fund_id=fund.id,
fund_name=fund.fund_name,
fund_size=fund.fund_size,
fund_size_source_url=fund.fund_size_source_url,
check_size_lower=fund.check_size_lower,
check_size_upper=fund.check_size_upper,
geographic_focus=fund.geographic_focus,
investment_stage_focus=fund.investment_stage_focus,
sector_focus=fund.sector_focus,
# Related data
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_fund_list.append(investor_fund_data)
else:
# If no funds, create one entry with null fund fields
investor_fund_data = InvestorFundData(
# Investor fields
investor_id=investor.id,
investor_name=investor.name,
investor_description=investor.description,
investor_website=investor.website,
investor_headquarters=investor.headquarters,
aum=investor.aum,
aum_as_of_date=investor.aum_as_of_date,
aum_source_url=investor.aum_source_url,
investment_thesis=investor.investment_thesis,
portfolio_highlights=investor.portfolio_highlights,
number_of_investments=investor.number_of_investments,
# Fund fields (null)
fund_id=None,
fund_name=None,
fund_size=None,
fund_size_source_url=None,
check_size_lower=None,
check_size_upper=None,
geographic_focus=None,
investment_stage_focus=None,
sector_focus=None,
# Related data
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_fund_list.append(investor_fund_data)
return investor_fund_list