diff --git a/app/routers/__pycache__/companies.cpython-312.pyc b/app/routers/__pycache__/companies.cpython-312.pyc index d1b4558..ab59350 100644 Binary files a/app/routers/__pycache__/companies.cpython-312.pyc and b/app/routers/__pycache__/companies.cpython-312.pyc differ diff --git a/app/routers/__pycache__/investors.cpython-312.pyc b/app/routers/__pycache__/investors.cpython-312.pyc index f98ebd5..e67c075 100644 Binary files a/app/routers/__pycache__/investors.cpython-312.pyc and b/app/routers/__pycache__/investors.cpython-312.pyc differ diff --git a/app/routers/investors.py b/app/routers/investors.py index c415aaa..816fb60 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -1,14 +1,16 @@ from typing import Optional from db.db import get_db -from db.models import InvestorTable, SectorTable +from db.models import FundTable, InvestorTable, SectorTable from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import BaseModel from schemas.router_schemas import ( + CompanyMinimal, + InvestmentResponse, InvestmentStage, InvestorData, - InvestorFundData, PaginatedResponse, + SectorMinimal, ) from sqlalchemy.orm import Session, selectinload @@ -40,7 +42,7 @@ class InvestorUpdate(BaseModel): number_of_investments: Optional[int] = None -@router.get("/investors", response_model=PaginatedResponse[InvestorFundData]) +@router.get("/investors", response_model=PaginatedResponse[InvestmentResponse]) def read_investors( page: int = Query(1, ge=1, description="Page number (starts at 1)"), page_size: int = Query(10, ge=1, le=100, description="Items per page (max 100)"), @@ -71,78 +73,67 @@ def read_investors( .all() ) - # Transform to InvestorFundData format (one row per investor-fund combination) - investor_fund_list = [] + # Transform to InvestmentResponse format (one row per investor-fund combination) + investment_responses = [] for investor in investors: + # Get top 3 portfolio companies (id and name only) + portfolio_companies = [ + CompanyMinimal(id=company.id, name=company.name) + for company in investor.portfolio_companies[:3] + ] + # If investor has funds, create one entry per fund if investor.funds: for fund in investor.funds: - investor_fund_data = InvestorFundData( - # Investor fields - investor_id=investor.id, - investor_name=investor.name, - investor_description=investor.description, - investor_website=investor.website, - investor_headquarters=investor.headquarters, + # Get stage focus as comma-separated string + stage_focus = ( + ", ".join([stage.name for stage in fund.investment_stages]) + if fund.investment_stages + else None + ) + + # Get top 3 sectors from fund (id and name only) + fund_sectors = [ + SectorMinimal(id=sector.id, name=sector.name) + for sector in (fund.sectors[:3] if fund.sectors else []) + ] + + investment_response = InvestmentResponse( + id=investor.id, + name=f"{investor.name} - {fund.fund_name}" + if fund.fund_name + else investor.name, aum=investor.aum, - aum_as_of_date=investor.aum_as_of_date, - aum_source_url=investor.aum_source_url, - investment_thesis=investor.investment_thesis, - portfolio_highlights=investor.portfolio_highlights, - number_of_investments=investor.number_of_investments, - # Fund fields - fund_id=fund.id, - fund_name=fund.fund_name, - fund_size=fund.fund_size, - fund_size_source_url=fund.fund_size_source_url, check_size_lower=fund.check_size_lower, check_size_upper=fund.check_size_upper, geographic_focus=fund.geographic_focus, - fund_investment_stages=fund.investment_stages, # Now a relationship - fund_sectors=fund.sectors, # Now a relationship - # Related data (same for all funds of this investor) - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, + stage_focus=stage_focus, + portfolio_companies=portfolio_companies, + sectors=fund_sectors, + compatibility_score=1.0, ) - investor_fund_list.append(investor_fund_data) + investment_responses.append(investment_response) else: # If no funds, create one entry with null fund fields - investor_fund_data = InvestorFundData( - # Investor fields - investor_id=investor.id, - investor_name=investor.name, - investor_description=investor.description, - investor_website=investor.website, - investor_headquarters=investor.headquarters, + investment_response = InvestmentResponse( + id=investor.id, + name=investor.name, aum=investor.aum, - aum_as_of_date=investor.aum_as_of_date, - aum_source_url=investor.aum_source_url, - investment_thesis=investor.investment_thesis, - portfolio_highlights=investor.portfolio_highlights, - number_of_investments=investor.number_of_investments, - # Fund fields (null) - fund_id=None, - fund_name=None, - fund_size=None, - fund_size_source_url=None, check_size_lower=None, check_size_upper=None, geographic_focus=None, - fund_investment_stages=None, - fund_sectors=None, - # Related data - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, + stage_focus=None, + portfolio_companies=portfolio_companies, + sectors=[], + compatibility_score=1.0, ) - investor_fund_list.append(investor_fund_data) + investment_responses.append(investment_response) # Calculate total pages total_pages = (total_count + page_size - 1) // page_size return PaginatedResponse( - items=investor_fund_list, + items=investment_responses, total=total_count, page=page, page_size=page_size, @@ -150,7 +141,7 @@ def read_investors( ) -@router.get("/investors/filter", response_model=PaginatedResponse[InvestorFundData]) +@router.get("/investors/filter", response_model=PaginatedResponse[InvestmentResponse]) def filter_investors( stage: Optional[InvestmentStage] = Query( None, description="Filter by investment stage" @@ -170,40 +161,42 @@ def filter_investors( """Filter investors based on various criteria (paginated) Returns investor-fund combinations as separate rows. - An investor with 3 funds will appear as 3 entries. + Queries the funds table to find matching funds. """ - # Start with base query - query = db.query(InvestorTable).options( - selectinload(InvestorTable.portfolio_companies), - selectinload(InvestorTable.team_members), - selectinload(InvestorTable.sectors), - selectinload(InvestorTable.funds), + # Start with base query on funds table + query = db.query(FundTable).options( + selectinload(FundTable.investor).selectinload( + InvestorTable.portfolio_companies + ), + selectinload(FundTable.investor).selectinload(InvestorTable.team_members), + selectinload(FundTable.investor).selectinload(InvestorTable.sectors), + selectinload(FundTable.investment_stages), + selectinload(FundTable.sectors), ) - # Apply filters - # Note: stage filtering is now done at fund level via fund.investment_stages - # if stage: - # query = query.filter(InvestorTable.stage_focus == stage) - + # Apply filters at fund level if min_check_size is not None: - query = query.filter(InvestorTable.check_size_lower >= min_check_size) + query = query.filter(FundTable.check_size_lower >= min_check_size) if max_check_size is not None: - query = query.filter(InvestorTable.check_size_upper <= max_check_size) + query = query.filter(FundTable.check_size_upper <= max_check_size) if geography: - query = query.filter(InvestorTable.geographic_focus.ilike(f"%{geography}%")) + query = query.filter(FundTable.geographic_focus.ilike(f"%{geography}%")) + # Apply filters at investor level (through relationship) if min_aum is not None: - query = query.filter(InvestorTable.aum >= min_aum) + query = query.join(FundTable.investor).filter(InvestorTable.aum >= min_aum) if max_aum is not None: + if min_aum is None: # Only join if not already joined + query = query.join(FundTable.investor) query = query.filter(InvestorTable.aum <= max_aum) - # Filter by sector if provided + # Filter by sector if provided (at fund level) if sector: - query = query.join(InvestorTable.sectors).filter( + query = query.join(FundTable.sectors).filter( SectorTable.name.ilike(f"%{sector}%") ) @@ -212,80 +205,53 @@ def filter_investors( # Calculate offset and apply pagination offset = (page - 1) * page_size - investors = query.offset(offset).limit(page_size).all() + funds = query.offset(offset).limit(page_size).all() - # Transform to InvestorFundData format (one row per investor-fund combination) - investor_fund_list = [] - for investor in investors: - # If investor has funds, create one entry per fund - if investor.funds: - for fund in investor.funds: - investor_fund_data = InvestorFundData( - # Investor fields - investor_id=investor.id, - investor_name=investor.name, - investor_description=investor.description, - investor_website=investor.website, - investor_headquarters=investor.headquarters, - aum=investor.aum, - aum_as_of_date=investor.aum_as_of_date, - aum_source_url=investor.aum_source_url, - investment_thesis=investor.investment_thesis, - portfolio_highlights=investor.portfolio_highlights, - number_of_investments=investor.number_of_investments, - # Fund fields - fund_id=fund.id, - fund_name=fund.fund_name, - fund_size=fund.fund_size, - fund_size_source_url=fund.fund_size_source_url, - check_size_lower=fund.check_size_lower, - check_size_upper=fund.check_size_upper, - geographic_focus=fund.geographic_focus, - fund_investment_stages=fund.investment_stages, # Now a relationship - fund_sectors=fund.sectors, # Now a relationship - # Related data - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_fund_list.append(investor_fund_data) - else: - # If no funds, create one entry with null fund fields - investor_fund_data = InvestorFundData( - # Investor fields - investor_id=investor.id, - investor_name=investor.name, - investor_description=investor.description, - investor_website=investor.website, - investor_headquarters=investor.headquarters, - aum=investor.aum, - aum_as_of_date=investor.aum_as_of_date, - aum_source_url=investor.aum_source_url, - investment_thesis=investor.investment_thesis, - portfolio_highlights=investor.portfolio_highlights, - number_of_investments=investor.number_of_investments, - # Fund fields (null) - fund_id=None, - fund_name=None, - fund_size=None, - fund_size_source_url=None, - check_size_lower=None, - check_size_upper=None, - geographic_focus=None, - fund_investment_stages=None, - fund_sectors=None, - # Related data - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_fund_list.append(investor_fund_data) + # Transform to InvestmentResponse format (one row per fund) + investment_responses = [] + for fund in funds: + investor = fund.investor + + # Get top 3 portfolio companies (id and name only) + portfolio_companies = [ + CompanyMinimal(id=company.id, name=company.name) + for company in investor.portfolio_companies[:3] + ] + + # Get stage focus as comma-separated string + stage_focus = ( + ", ".join([stage.name for stage in fund.investment_stages]) + if fund.investment_stages + else None + ) + + # Get top 3 sectors from fund (id and name only) + fund_sectors = [ + SectorMinimal(id=sector.id, name=sector.name) + for sector in (fund.sectors[:3] if fund.sectors else []) + ] + + investment_response = InvestmentResponse( + id=investor.id, + name=f"{investor.name} - {fund.fund_name}" + if fund.fund_name + else investor.name, + aum=investor.aum, + check_size_lower=fund.check_size_lower, + check_size_upper=fund.check_size_upper, + geographic_focus=fund.geographic_focus, + stage_focus=stage_focus, + portfolio_companies=portfolio_companies, + sectors=fund_sectors, + compatibility_score=1.0, + ) + investment_responses.append(investment_response) # Calculate total pages total_pages = (total_count + page_size - 1) // page_size return PaginatedResponse( - items=investor_fund_list, + items=investment_responses, total=total_count, page=page, page_size=page_size, @@ -409,7 +375,7 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)): @router.get( "/investors/{investor_id}/similar", - response_model=PaginatedResponse[InvestorFundData], + response_model=PaginatedResponse[InvestmentResponse], ) def find_similar_investors( investor_id: int, @@ -421,16 +387,18 @@ def find_similar_investors( """Find investors similar to a given investor based on characteristics (paginated) Returns investor-fund combinations as separate rows. + Queries the funds table to find matching funds. """ - # Get the target investor + # Get the target investor to get their funds for comparison target_investor = ( db.query(InvestorTable) .options( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), - selectinload(InvestorTable.funds), + selectinload(InvestorTable.funds).selectinload(FundTable.investment_stages), + selectinload(InvestorTable.funds).selectinload(FundTable.sectors), ) .filter(InvestorTable.id == investor_id) .first() @@ -439,168 +407,147 @@ def find_similar_investors( if not target_investor: raise HTTPException(status_code=404, detail="Investor not found") - # Get target investor's sector IDs for comparison - target_sector_ids = {sector.id for sector in target_investor.sectors} + # Get target investor's sector IDs for comparison (from their funds) + target_sector_ids = set() + target_stage_ids = set() + target_check_ranges = [] + target_geographies = [] - # Query all other investors with their relationships - candidates = ( - db.query(InvestorTable) + for fund in target_investor.funds: + if fund.sectors: + target_sector_ids.update({sector.id for sector in fund.sectors}) + if fund.investment_stages: + target_stage_ids.update({stage.id for stage in fund.investment_stages}) + if fund.check_size_lower and fund.check_size_upper: + target_check_ranges.append((fund.check_size_lower, fund.check_size_upper)) + if fund.geographic_focus: + target_geographies.append(fund.geographic_focus.lower()) + + # Query all funds from other investors + candidate_funds = ( + db.query(FundTable) .options( - selectinload(InvestorTable.portfolio_companies), - selectinload(InvestorTable.team_members), - selectinload(InvestorTable.sectors), - selectinload(InvestorTable.funds), + selectinload(FundTable.investor).selectinload( + InvestorTable.portfolio_companies + ), + selectinload(FundTable.investor).selectinload(InvestorTable.team_members), + selectinload(FundTable.investor).selectinload(InvestorTable.sectors), + selectinload(FundTable.investment_stages), + selectinload(FundTable.sectors), ) + .join(FundTable.investor) .filter(InvestorTable.id != investor_id) .all() ) - # Calculate similarity scores - scored_investors = [] - for candidate in candidates: + # Calculate similarity scores for each fund + scored_funds = [] + for fund in candidate_funds: score = 0 - # Stage focus match is now handled at fund level - # Skip stage matching at investor level since stage_focus no longer exists - # if candidate.stage_focus == target_investor.stage_focus: - # score += 30 - # Geographic focus match (20 points for exact, 10 for partial) - if candidate.geographic_focus and target_investor.geographic_focus: - if ( - candidate.geographic_focus.lower() - == target_investor.geographic_focus.lower() - ): - score += 20 - elif ( - candidate.geographic_focus.lower() - in target_investor.geographic_focus.lower() - or target_investor.geographic_focus.lower() - in candidate.geographic_focus.lower() - ): - score += 10 + if fund.geographic_focus and target_geographies: + fund_geo_lower = fund.geographic_focus.lower() + for target_geo in target_geographies: + if fund_geo_lower == target_geo: + score += 20 + break + elif fund_geo_lower in target_geo or target_geo in fund_geo_lower: + score += 10 + break # Check size overlap (20 points max) - if ( - candidate.check_size_lower - and candidate.check_size_upper - and target_investor.check_size_lower - and target_investor.check_size_upper - ): - # Calculate overlap percentage - overlap_start = max( - candidate.check_size_lower, target_investor.check_size_lower - ) - overlap_end = min( - candidate.check_size_upper, target_investor.check_size_upper - ) - if overlap_end > overlap_start: - overlap = overlap_end - overlap_start - target_range = ( - target_investor.check_size_upper - target_investor.check_size_lower - ) - overlap_ratio = overlap / target_range if target_range > 0 else 0 - score += int(20 * overlap_ratio) + if fund.check_size_lower and fund.check_size_upper and target_check_ranges: + max_overlap_score = 0 + for target_lower, target_upper in target_check_ranges: + overlap_start = max(fund.check_size_lower, target_lower) + overlap_end = min(fund.check_size_upper, target_upper) + if overlap_end > overlap_start: + overlap = overlap_end - overlap_start + target_range = target_upper - target_lower + overlap_ratio = overlap / target_range if target_range > 0 else 0 + max_overlap_score = max(max_overlap_score, int(20 * overlap_ratio)) + score += max_overlap_score # AUM similarity (15 points max) - if candidate.aum and target_investor.aum: - aum_diff = abs(candidate.aum - target_investor.aum) - max_aum = max(candidate.aum, target_investor.aum) + if fund.investor.aum and target_investor.aum: + aum_diff = abs(fund.investor.aum - target_investor.aum) + max_aum = max(fund.investor.aum, target_investor.aum) similarity_ratio = 1 - (aum_diff / max_aum) if max_aum > 0 else 0 score += int(15 * similarity_ratio) # Sector overlap (30 points max) - candidate_sector_ids = {sector.id for sector in candidate.sectors} - if target_sector_ids and candidate_sector_ids: - common_sectors = target_sector_ids.intersection(candidate_sector_ids) + if fund.sectors and target_sector_ids: + fund_sector_ids = {sector.id for sector in fund.sectors} + common_sectors = target_sector_ids.intersection(fund_sector_ids) overlap_ratio = len(common_sectors) / len(target_sector_ids) score += int(30 * overlap_ratio) - if score > 0: # Only include investors with some similarity - scored_investors.append((score, candidate)) + # Investment stage match (15 points max) + if fund.investment_stages and target_stage_ids: + fund_stage_ids = {stage.id for stage in fund.investment_stages} + common_stages = target_stage_ids.intersection(fund_stage_ids) + overlap_ratio = len(common_stages) / len(target_stage_ids) + score += int(15 * overlap_ratio) + + if score > 0: # Only include funds with some similarity + scored_funds.append((score, fund)) # Sort by score (descending) and take top N based on limit - scored_investors.sort(key=lambda x: x[0], reverse=True) - top_similar = scored_investors[:limit] + scored_funds.sort(key=lambda x: x[0], reverse=True) + top_similar = scored_funds[:limit] - # Apply pagination to the top similar investors + # Apply pagination to the top similar funds total_count = len(top_similar) offset = (page - 1) * page_size paginated_similar = top_similar[offset : offset + page_size] - similar_investors = [inv for score, inv in paginated_similar] + similar_funds = [fund for score, fund in paginated_similar] - # Transform to InvestorFundData format (one row per investor-fund combination) - investor_fund_list = [] - for investor in similar_investors: - # If investor has funds, create one entry per fund - if investor.funds: - for fund in investor.funds: - investor_fund_data = InvestorFundData( - # Investor fields - investor_id=investor.id, - investor_name=investor.name, - investor_description=investor.description, - investor_website=investor.website, - investor_headquarters=investor.headquarters, - aum=investor.aum, - aum_as_of_date=investor.aum_as_of_date, - aum_source_url=investor.aum_source_url, - investment_thesis=investor.investment_thesis, - portfolio_highlights=investor.portfolio_highlights, - number_of_investments=investor.number_of_investments, - # Fund fields - fund_id=fund.id, - fund_name=fund.fund_name, - fund_size=fund.fund_size, - fund_size_source_url=fund.fund_size_source_url, - check_size_lower=fund.check_size_lower, - check_size_upper=fund.check_size_upper, - geographic_focus=fund.geographic_focus, - fund_investment_stages=fund.investment_stages, # Now a relationship - fund_sectors=fund.sectors, # Now a relationship - # Related data - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_fund_list.append(investor_fund_data) - else: - # If no funds, create one entry with null fund fields - investor_fund_data = InvestorFundData( - # Investor fields - investor_id=investor.id, - investor_name=investor.name, - investor_description=investor.description, - investor_website=investor.website, - investor_headquarters=investor.headquarters, - aum=investor.aum, - aum_as_of_date=investor.aum_as_of_date, - aum_source_url=investor.aum_source_url, - investment_thesis=investor.investment_thesis, - portfolio_highlights=investor.portfolio_highlights, - number_of_investments=investor.number_of_investments, - # Fund fields (null) - fund_id=None, - fund_name=None, - fund_size=None, - fund_size_source_url=None, - check_size_lower=None, - check_size_upper=None, - geographic_focus=None, - fund_investment_stages=None, - fund_sectors=None, - # Related data - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_fund_list.append(investor_fund_data) + # Transform to InvestmentResponse format (one row per fund) + investment_responses = [] + for fund in similar_funds: + investor = fund.investor + + # Get top 3 portfolio companies (id and name only) + portfolio_companies = [ + CompanyMinimal(id=company.id, name=company.name) + for company in investor.portfolio_companies[:3] + ] + + # Get stage focus as comma-separated string + stage_focus = ( + ", ".join([stage.name for stage in fund.investment_stages]) + if fund.investment_stages + else None + ) + + # Get top 3 sectors from fund (id and name only) + fund_sectors = [ + SectorMinimal(id=sector.id, name=sector.name) + for sector in (fund.sectors[:3] if fund.sectors else []) + ] + + investment_response = InvestmentResponse( + id=investor.id, + name=f"{investor.name} - {fund.fund_name}" + if fund.fund_name + else investor.name, + aum=investor.aum, + check_size_lower=fund.check_size_lower, + check_size_upper=fund.check_size_upper, + geographic_focus=fund.geographic_focus, + stage_focus=stage_focus, + portfolio_companies=portfolio_companies, + sectors=fund_sectors, + compatibility_score=1.0, + ) + investment_responses.append(investment_response) # Calculate total pages total_pages = (total_count + page_size - 1) // page_size return PaginatedResponse( - items=investor_fund_list, + items=investment_responses, total=total_count, page=page, page_size=page_size, diff --git a/app/schemas/__pycache__/router_schemas.cpython-312.pyc b/app/schemas/__pycache__/router_schemas.cpython-312.pyc index 69fb8c4..d58670d 100644 Binary files a/app/schemas/__pycache__/router_schemas.cpython-312.pyc and b/app/schemas/__pycache__/router_schemas.cpython-312.pyc differ diff --git a/app/schemas/router_schemas.py b/app/schemas/router_schemas.py index 8379d3c..10eee8c 100644 --- a/app/schemas/router_schemas.py +++ b/app/schemas/router_schemas.py @@ -168,12 +168,29 @@ class InvestorFundData(BaseModel): class Config: from_attributes = True +class InvestorMinimal(BaseModel): + """Minimal investor info with just id and name""" + + id: int + name: str + + class Config: + from_attributes = True + +class CompanySchemaMinimal(BaseModel): + id: int + name: str + industry: str | None + location: str | None + founded_year: Optional[int] + website: Optional[str] + + class Config: + from_attributes = True class CompanyData(BaseModel): # Renamed from CompaniesData for consistency - company: CompanySchema - sectors: List[SectorSchema] - members: List[CompanyMemberSchema] - investors: List[InvestorSchema] + company: CompanySchemaMinimal + investors: List[InvestorMinimal] class Config: from_attributes = True @@ -189,6 +206,49 @@ class InvestorFundList(BaseModel): investor_funds: List[InvestorFundData] +class CompanyMinimal(BaseModel): + """Minimal company info with just id and name""" + + id: int + name: str + + class Config: + from_attributes = True + + +class SectorMinimal(BaseModel): + """Minimal sector info with just id and name""" + + id: int + name: str + + class Config: + from_attributes = True + + +class InvestmentResponse(BaseModel): + """Simplified investment response schema + + One row per investor-fund combination with streamlined data + """ + + id: int # Investor ID + name: ( + str # Combination of investor name and fund name (e.g., "Investor A - Fund A") + ) + aum: int | None # From investor + check_size_lower: int | None # From fund + check_size_upper: int | None # From fund + geographic_focus: str | None # From fund + stage_focus: str | None # Comma-separated stages from fund + portfolio_companies: List[CompanyMinimal] # Top 3 companies from investor + sectors: List[SectorMinimal] # Top 3 sectors from fund + compatibility_score: float # 0 to 1 (default 1 for now) + + class Config: + from_attributes = True + + class PaginatedResponse(BaseModel, Generic[T]): """Generic paginated response schema"""