Update .gitignore to exclude preprocessor directory; refactor find_similar_investors function to improve similarity scoring based on investor characteristics and add limit parameter for results.
This commit is contained in:
@@ -14,3 +14,5 @@
|
|||||||
|
|
||||||
*.cypython
|
*.cypython
|
||||||
|
|
||||||
|
/preprocessor
|
||||||
|
|
||||||
|
|||||||
+96
-27
@@ -5,7 +5,6 @@ from db.models import InvestorTable, SectorTable
|
|||||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from schemas.router_schemas import InvestmentStage, InvestorData
|
from schemas.router_schemas import InvestmentStage, InvestorData
|
||||||
from services.querying import QueryProcessor
|
|
||||||
from sqlalchemy.orm import Session, selectinload
|
from sqlalchemy.orm import Session, selectinload
|
||||||
|
|
||||||
router = APIRouter(tags=["Investor Routes"])
|
router = APIRouter(tags=["Investor Routes"])
|
||||||
@@ -235,10 +234,14 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)):
|
|||||||
|
|
||||||
|
|
||||||
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData])
|
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData])
|
||||||
def find_similar_investors(investor_id: int, db: Session = Depends(get_db)):
|
def find_similar_investors(
|
||||||
"""Find investors similar to a given investor using AI agent"""
|
investor_id: int,
|
||||||
|
limit: int = Query(10, description="Maximum number of similar investors to return"),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Find investors similar to a given investor based on characteristics"""
|
||||||
|
|
||||||
# First, get the target investor to build the AI query
|
# Get the target investor
|
||||||
target_investor = (
|
target_investor = (
|
||||||
db.query(InvestorTable)
|
db.query(InvestorTable)
|
||||||
.options(
|
.options(
|
||||||
@@ -253,29 +256,95 @@ def find_similar_investors(investor_id: int, db: Session = Depends(get_db)):
|
|||||||
if not target_investor:
|
if not target_investor:
|
||||||
raise HTTPException(status_code=404, detail="Investor not found")
|
raise HTTPException(status_code=404, detail="Investor not found")
|
||||||
|
|
||||||
# Build a descriptive query for the AI agent based on target investor characteristics
|
# Get target investor's sector IDs for comparison
|
||||||
target_sectors = [sector.name for sector in target_investor.sectors]
|
target_sector_ids = {sector.id for sector in target_investor.sectors}
|
||||||
sectors_text = ", ".join(target_sectors) if target_sectors else "any sector"
|
|
||||||
|
|
||||||
ai_query = f"""
|
# Query all other investors with their relationships
|
||||||
Find investors similar to investor ID {investor_id} with the following characteristics:
|
candidates = (
|
||||||
- Stage focus: {target_investor.stage_focus.value if target_investor.stage_focus else "any stage"}
|
db.query(InvestorTable)
|
||||||
- Geographic focus: {target_investor.geographic_focus or "any geography"}
|
.options(
|
||||||
- Check size range: ${target_investor.check_size_lower or 0:,} to ${target_investor.check_size_upper or 0:,}
|
selectinload(InvestorTable.portfolio_companies),
|
||||||
- AUM (Assets Under Management): ${target_investor.aum or 0:,}
|
selectinload(InvestorTable.team_members),
|
||||||
- Sectors: {sectors_text}
|
selectinload(InvestorTable.sectors),
|
||||||
|
)
|
||||||
Find investors with similar characteristics but exclude investor ID {investor_id}.
|
.filter(InvestorTable.id != investor_id)
|
||||||
Look for investors with:
|
.all()
|
||||||
- Same or similar stage focus
|
)
|
||||||
- Similar geographic regions
|
|
||||||
- Overlapping check size ranges
|
|
||||||
- Similar AUM levels (within a reasonable range)
|
|
||||||
- Common sector interests
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Use the AI agent to find similar investors
|
# Calculate similarity scores
|
||||||
query_processor = QueryProcessor()
|
scored_investors = []
|
||||||
result = query_processor.process_query(ai_query)
|
for candidate in candidates:
|
||||||
|
score = 0
|
||||||
|
|
||||||
return result.investors
|
# Stage focus match (30 points)
|
||||||
|
if candidate.stage_focus == target_investor.stage_focus:
|
||||||
|
score += 30
|
||||||
|
|
||||||
|
# Geographic focus match (20 points for exact, 10 for partial)
|
||||||
|
if candidate.geographic_focus and target_investor.geographic_focus:
|
||||||
|
if (
|
||||||
|
candidate.geographic_focus.lower()
|
||||||
|
== target_investor.geographic_focus.lower()
|
||||||
|
):
|
||||||
|
score += 20
|
||||||
|
elif (
|
||||||
|
candidate.geographic_focus.lower()
|
||||||
|
in target_investor.geographic_focus.lower()
|
||||||
|
or target_investor.geographic_focus.lower()
|
||||||
|
in candidate.geographic_focus.lower()
|
||||||
|
):
|
||||||
|
score += 10
|
||||||
|
|
||||||
|
# Check size overlap (20 points max)
|
||||||
|
if (
|
||||||
|
candidate.check_size_lower
|
||||||
|
and candidate.check_size_upper
|
||||||
|
and target_investor.check_size_lower
|
||||||
|
and target_investor.check_size_upper
|
||||||
|
):
|
||||||
|
# Calculate overlap percentage
|
||||||
|
overlap_start = max(
|
||||||
|
candidate.check_size_lower, target_investor.check_size_lower
|
||||||
|
)
|
||||||
|
overlap_end = min(
|
||||||
|
candidate.check_size_upper, target_investor.check_size_upper
|
||||||
|
)
|
||||||
|
if overlap_end > overlap_start:
|
||||||
|
overlap = overlap_end - overlap_start
|
||||||
|
target_range = (
|
||||||
|
target_investor.check_size_upper - target_investor.check_size_lower
|
||||||
|
)
|
||||||
|
overlap_ratio = overlap / target_range if target_range > 0 else 0
|
||||||
|
score += int(20 * overlap_ratio)
|
||||||
|
|
||||||
|
# AUM similarity (15 points max)
|
||||||
|
if candidate.aum and target_investor.aum:
|
||||||
|
aum_diff = abs(candidate.aum - target_investor.aum)
|
||||||
|
max_aum = max(candidate.aum, target_investor.aum)
|
||||||
|
similarity_ratio = 1 - (aum_diff / max_aum) if max_aum > 0 else 0
|
||||||
|
score += int(15 * similarity_ratio)
|
||||||
|
|
||||||
|
# Sector overlap (30 points max)
|
||||||
|
candidate_sector_ids = {sector.id for sector in candidate.sectors}
|
||||||
|
if target_sector_ids and candidate_sector_ids:
|
||||||
|
common_sectors = target_sector_ids.intersection(candidate_sector_ids)
|
||||||
|
overlap_ratio = len(common_sectors) / len(target_sector_ids)
|
||||||
|
score += int(30 * overlap_ratio)
|
||||||
|
|
||||||
|
if score > 0: # Only include investors with some similarity
|
||||||
|
scored_investors.append((score, candidate))
|
||||||
|
|
||||||
|
# Sort by score (descending) and take top N
|
||||||
|
scored_investors.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
similar_investors = [inv for score, inv in scored_investors[:limit]]
|
||||||
|
|
||||||
|
# Transform to InvestorData format
|
||||||
|
return [
|
||||||
|
InvestorData(
|
||||||
|
investor=inv,
|
||||||
|
portfolio_companies=inv.portfolio_companies,
|
||||||
|
team_members=inv.team_members,
|
||||||
|
sectors=inv.sectors,
|
||||||
|
)
|
||||||
|
for inv in similar_investors
|
||||||
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user