diff --git a/.gitignore b/.gitignore index f6d2d64..c565444 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ *.cypython +/preprocessor + diff --git a/app/routers/investors.py b/app/routers/investors.py index 2687477..3b1b20b 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -5,7 +5,6 @@ from db.models import InvestorTable, SectorTable from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import BaseModel from schemas.router_schemas import InvestmentStage, InvestorData -from services.querying import QueryProcessor from sqlalchemy.orm import Session, selectinload router = APIRouter(tags=["Investor Routes"]) @@ -235,10 +234,14 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)): @router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) -def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): - """Find investors similar to a given investor using AI agent""" +def find_similar_investors( + investor_id: int, + limit: int = Query(10, description="Maximum number of similar investors to return"), + db: Session = Depends(get_db), +): + """Find investors similar to a given investor based on characteristics""" - # First, get the target investor to build the AI query + # Get the target investor target_investor = ( db.query(InvestorTable) .options( @@ -253,29 +256,95 @@ def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): if not target_investor: raise HTTPException(status_code=404, detail="Investor not found") - # Build a descriptive query for the AI agent based on target investor characteristics - target_sectors = [sector.name for sector in target_investor.sectors] - sectors_text = ", ".join(target_sectors) if target_sectors else "any sector" + # Get target investor's sector IDs for comparison + target_sector_ids = {sector.id for sector in target_investor.sectors} - ai_query = f""" - Find investors similar to investor ID {investor_id} with the following characteristics: - - Stage focus: {target_investor.stage_focus.value if target_investor.stage_focus else "any stage"} - - Geographic focus: {target_investor.geographic_focus or "any geography"} - - Check size range: ${target_investor.check_size_lower or 0:,} to ${target_investor.check_size_upper or 0:,} - - AUM (Assets Under Management): ${target_investor.aum or 0:,} - - Sectors: {sectors_text} - - Find investors with similar characteristics but exclude investor ID {investor_id}. - Look for investors with: - - Same or similar stage focus - - Similar geographic regions - - Overlapping check size ranges - - Similar AUM levels (within a reasonable range) - - Common sector interests - """ + # Query all other investors with their relationships + candidates = ( + db.query(InvestorTable) + .options( + selectinload(InvestorTable.portfolio_companies), + selectinload(InvestorTable.team_members), + selectinload(InvestorTable.sectors), + ) + .filter(InvestorTable.id != investor_id) + .all() + ) - # Use the AI agent to find similar investors - query_processor = QueryProcessor() - result = query_processor.process_query(ai_query) + # Calculate similarity scores + scored_investors = [] + for candidate in candidates: + score = 0 - return result.investors + # Stage focus match (30 points) + if candidate.stage_focus == target_investor.stage_focus: + score += 30 + + # Geographic focus match (20 points for exact, 10 for partial) + if candidate.geographic_focus and target_investor.geographic_focus: + if ( + candidate.geographic_focus.lower() + == target_investor.geographic_focus.lower() + ): + score += 20 + elif ( + candidate.geographic_focus.lower() + in target_investor.geographic_focus.lower() + or target_investor.geographic_focus.lower() + in candidate.geographic_focus.lower() + ): + score += 10 + + # Check size overlap (20 points max) + if ( + candidate.check_size_lower + and candidate.check_size_upper + and target_investor.check_size_lower + and target_investor.check_size_upper + ): + # Calculate overlap percentage + overlap_start = max( + candidate.check_size_lower, target_investor.check_size_lower + ) + overlap_end = min( + candidate.check_size_upper, target_investor.check_size_upper + ) + if overlap_end > overlap_start: + overlap = overlap_end - overlap_start + target_range = ( + target_investor.check_size_upper - target_investor.check_size_lower + ) + overlap_ratio = overlap / target_range if target_range > 0 else 0 + score += int(20 * overlap_ratio) + + # AUM similarity (15 points max) + if candidate.aum and target_investor.aum: + aum_diff = abs(candidate.aum - target_investor.aum) + max_aum = max(candidate.aum, target_investor.aum) + similarity_ratio = 1 - (aum_diff / max_aum) if max_aum > 0 else 0 + score += int(15 * similarity_ratio) + + # Sector overlap (30 points max) + candidate_sector_ids = {sector.id for sector in candidate.sectors} + if target_sector_ids and candidate_sector_ids: + common_sectors = target_sector_ids.intersection(candidate_sector_ids) + overlap_ratio = len(common_sectors) / len(target_sector_ids) + score += int(30 * overlap_ratio) + + if score > 0: # Only include investors with some similarity + scored_investors.append((score, candidate)) + + # Sort by score (descending) and take top N + scored_investors.sort(key=lambda x: x[0], reverse=True) + similar_investors = [inv for score, inv in scored_investors[:limit]] + + # Transform to InvestorData format + return [ + InvestorData( + investor=inv, + portfolio_companies=inv.portfolio_companies, + team_members=inv.team_members, + sectors=inv.sectors, + ) + for inv in similar_investors + ]