Refactor investor similarity search to utilize AI for improved query generation; adjust DataFrame parsing to skip initial rows for better data handling.

This commit is contained in:
bolade
2025-09-29 15:58:09 +01:00
parent 6caea96658
commit 17bc5acbc8
5 changed files with 34 additions and 57 deletions
Binary file not shown.
Binary file not shown.
+32 -55
View File
@@ -1,13 +1,12 @@
from typing import List, Optional from typing import List, Optional
from db.db import get_db from db.db import get_db
from db.models import InvestorTable, SectorTable from db.models import InvestorTable, SectorTable
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from schemas.router_schemas import InvestmentStage, InvestorData
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy.orm import Session, selectinload from schemas.router_schemas import InvestmentStage, InvestorData
from services.querying import QueryProcessor from services.querying import QueryProcessor
from sqlalchemy.orm import Session, selectinload
router = APIRouter(tags=["Investor Routes"]) router = APIRouter(tags=["Investor Routes"])
@@ -237,9 +236,9 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)):
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) @router.get("/investors/{investor_id}/similar", response_model=List[InvestorData])
def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): def find_similar_investors(investor_id: int, db: Session = Depends(get_db)):
"""Find investors similar to a given investor""" """Find investors similar to a given investor using AI agent"""
# First, get the target investor # First, get the target investor to build the AI query
target_investor = ( target_investor = (
db.query(InvestorTable) db.query(InvestorTable)
.options( .options(
@@ -250,55 +249,33 @@ def find_similar_investors(investor_id: int, db: Session = Depends(get_db)):
.filter(InvestorTable.id == investor_id) .filter(InvestorTable.id == investor_id)
.first() .first()
) )
if not target_investor: if not target_investor:
raise HTTPException(status_code=404, detail="Investor not found") raise HTTPException(status_code=404, detail="Investor not found")
# Build a descriptive query for the AI agent based on target investor characteristics
target_sectors = [sector.name for sector in target_investor.sectors]
sectors_text = ", ".join(target_sectors) if target_sectors else "any sector"
ai_query = f"""
Find investors similar to investor ID {investor_id} with the following characteristics:
- Stage focus: {target_investor.stage_focus.value if target_investor.stage_focus else "any stage"}
- Geographic focus: {target_investor.geographic_focus or "any geography"}
- Check size range: ${target_investor.check_size_lower or 0:,} to ${target_investor.check_size_upper or 0:,}
- AUM (Assets Under Management): ${target_investor.aum or 0:,}
- Sectors: {sectors_text}
# Build query to find similar investors Find investors with similar characteristics but exclude investor ID {investor_id}.
query = db.query(InvestorTable).options( Look for investors with:
selectinload(InvestorTable.portfolio_companies), - Same or similar stage focus
selectinload(InvestorTable.team_members), - Similar geographic regions
selectinload(InvestorTable.sectors), - Overlapping check size ranges
).filter(InvestorTable.id != investor_id) # Exclude the target investor - Similar AUM levels (within a reasonable range)
- Common sector interests
# Filter by same stage focus """
query = query.filter(InvestorTable.stage_focus == target_investor.stage_focus)
# Use the AI agent to find similar investors
# Filter by similar geographic focus (partial match) query_processor = QueryProcessor()
query = query.filter(InvestorTable.geographic_focus.ilike(f"%{target_investor.geographic_focus}%")) result = query_processor.process_query(ai_query)
# Filter by overlapping check size ranges return result.investors
query = query.filter(
InvestorTable.check_size_upper >= target_investor.check_size_lower,
InvestorTable.check_size_lower <= target_investor.check_size_upper
)
# Filter by similar AUM (within 50% range)
aum_lower = int(target_investor.aum * 0.5)
aum_upper = int(target_investor.aum * 1.5)
query = query.filter(
InvestorTable.aum >= aum_lower,
InvestorTable.aum <= aum_upper
)
# Filter by common sectors
target_sector_names = [sector.name for sector in target_investor.sectors]
if target_sector_names:
query = query.join(InvestorTable.sectors).filter(
SectorTable.name.in_(target_sector_names)
)
investors = query.all()
# Transform to InvestorData format
investor_data_list = []
for investor in investors:
investor_data = InvestorData(
investor=investor,
portfolio_companies=investor.portfolio_companies,
team_members=investor.team_members,
sectors=investor.sectors,
)
investor_data_list.append(investor_data)
return investor_data_list
Binary file not shown.
+2 -2
View File
@@ -176,7 +176,7 @@ class InvestorProcessor:
async def parse_investors(self, df, save_to_db: bool = True): async def parse_investors(self, df, save_to_db: bool = True):
"""Parse investors from DataFrame and optionally save to database""" """Parse investors from DataFrame and optionally save to database"""
investors = [] investors = []
df = df[20:]
db = None db = None
if save_to_db: if save_to_db:
db = get_db_session() db = get_db_session()
@@ -244,7 +244,7 @@ class InvestorProcessor:
async def parse_companies(self, df, save_to_db: bool = True): async def parse_companies(self, df, save_to_db: bool = True):
"""Parse companies from DataFrame and optionally save to database""" """Parse companies from DataFrame and optionally save to database"""
companies = [] companies = []
df = df[20:]
db = None db = None
if save_to_db: if save_to_db:
db = get_db_session() db = get_db_session()