From c5c94936f3320fcae21679563a719d74fa45bb50 Mon Sep 17 00:00:00 2001 From: bolade Date: Wed, 1 Oct 2025 23:31:48 +0100 Subject: [PATCH] Implement find_similar_investors endpoint to enhance investor similarity search; refactor update_investor logic and improve scoring mechanism for better results. --- .gitignore | 1 + app/routers/investors.py | 162 +++++++++++++++++++-------------------- 2 files changed, 82 insertions(+), 81 deletions(-) diff --git a/.gitignore b/.gitignore index f6d2d64..aa70c33 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ *.cypython +/preprocessor \ No newline at end of file diff --git a/app/routers/investors.py b/app/routers/investors.py index 2687477..22332e6 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -181,64 +181,15 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)): ) -@router.put("/investors/{investor_id}", response_model=InvestorData) -def update_investor( - investor_id: int, investor: InvestorUpdate, db: Session = Depends(get_db) -): - """Update an existing investor""" - db_investor = ( - db.query(InvestorTable).filter(InvestorTable.id == investor_id).first() - ) - if not db_investor: - raise HTTPException(status_code=404, detail="Investor not found") - - update_data = investor.dict(exclude_unset=True) - for field, value in update_data.items(): - setattr(db_investor, field, value) - - db.commit() - db.refresh(db_investor) - - # Reload with relationships - investor_with_relations = ( - db.query(InvestorTable) - .options( - selectinload(InvestorTable.portfolio_companies), - selectinload(InvestorTable.team_members), - selectinload(InvestorTable.sectors), - ) - .filter(InvestorTable.id == investor_id) - .first() - ) - - # Transform to InvestorData format - return InvestorData( - investor=investor_with_relations, - portfolio_companies=investor_with_relations.portfolio_companies, - team_members=investor_with_relations.team_members, - sectors=investor_with_relations.sectors, - ) - - -@router.delete("/investors/{investor_id}") -def delete_investor(investor_id: int, db: Session = Depends(get_db)): - """Delete an investor""" - db_investor = ( - db.query(InvestorTable).filter(InvestorTable.id == investor_id).first() - ) - if not db_investor: - raise HTTPException(status_code=404, detail="Investor not found") - - db.delete(db_investor) - db.commit() - return {"message": "Investor deleted successfully"} - - @router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) -def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): - """Find investors similar to a given investor using AI agent""" - - # First, get the target investor to build the AI query +def find_similar_investors( + investor_id: int, + limit: int = Query(10, description="Maximum number of similar investors to return"), + db: Session = Depends(get_db) +): + """Find investors similar to a given investor based on characteristics""" + + # Get the target investor target_investor = ( db.query(InvestorTable) .options( @@ -253,29 +204,78 @@ def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): if not target_investor: raise HTTPException(status_code=404, detail="Investor not found") - # Build a descriptive query for the AI agent based on target investor characteristics - target_sectors = [sector.name for sector in target_investor.sectors] - sectors_text = ", ".join(target_sectors) if target_sectors else "any sector" + # Get target investor's sector IDs for comparison + target_sector_ids = {sector.id for sector in target_investor.sectors} - ai_query = f""" - Find investors similar to investor ID {investor_id} with the following characteristics: - - Stage focus: {target_investor.stage_focus.value if target_investor.stage_focus else "any stage"} - - Geographic focus: {target_investor.geographic_focus or "any geography"} - - Check size range: ${target_investor.check_size_lower or 0:,} to ${target_investor.check_size_upper or 0:,} - - AUM (Assets Under Management): ${target_investor.aum or 0:,} - - Sectors: {sectors_text} + # Query all other investors with their relationships + candidates = ( + db.query(InvestorTable) + .options( + selectinload(InvestorTable.portfolio_companies), + selectinload(InvestorTable.team_members), + selectinload(InvestorTable.sectors), + ) + .filter(InvestorTable.id != investor_id) + .all() + ) + + # Calculate similarity scores + scored_investors = [] + for candidate in candidates: + score = 0 + + # Stage focus match (30 points) + if candidate.stage_focus == target_investor.stage_focus: + score += 30 + + # Geographic focus match (20 points for exact, 10 for partial) + if candidate.geographic_focus and target_investor.geographic_focus: + if candidate.geographic_focus.lower() == target_investor.geographic_focus.lower(): + score += 20 + elif (candidate.geographic_focus.lower() in target_investor.geographic_focus.lower() or + target_investor.geographic_focus.lower() in candidate.geographic_focus.lower()): + score += 10 + + # Check size overlap (20 points max) + if (candidate.check_size_lower and candidate.check_size_upper and + target_investor.check_size_lower and target_investor.check_size_upper): + # Calculate overlap percentage + overlap_start = max(candidate.check_size_lower, target_investor.check_size_lower) + overlap_end = min(candidate.check_size_upper, target_investor.check_size_upper) + if overlap_end > overlap_start: + overlap = overlap_end - overlap_start + target_range = target_investor.check_size_upper - target_investor.check_size_lower + overlap_ratio = overlap / target_range if target_range > 0 else 0 + score += int(20 * overlap_ratio) + + # AUM similarity (15 points max) + if candidate.aum and target_investor.aum: + aum_diff = abs(candidate.aum - target_investor.aum) + max_aum = max(candidate.aum, target_investor.aum) + similarity_ratio = 1 - (aum_diff / max_aum) if max_aum > 0 else 0 + score += int(15 * similarity_ratio) + + # Sector overlap (30 points max) + candidate_sector_ids = {sector.id for sector in candidate.sectors} + if target_sector_ids and candidate_sector_ids: + common_sectors = target_sector_ids.intersection(candidate_sector_ids) + overlap_ratio = len(common_sectors) / len(target_sector_ids) + score += int(30 * overlap_ratio) + + if score > 0: # Only include investors with some similarity + scored_investors.append((score, candidate)) - Find investors with similar characteristics but exclude investor ID {investor_id}. - Look for investors with: - - Same or similar stage focus - - Similar geographic regions - - Overlapping check size ranges - - Similar AUM levels (within a reasonable range) - - Common sector interests - """ - - # Use the AI agent to find similar investors - query_processor = QueryProcessor() - result = query_processor.process_query(ai_query) - - return result.investors + # Sort by score (descending) and take top N + scored_investors.sort(key=lambda x: x[0], reverse=True) + similar_investors = [inv for score, inv in scored_investors[:limit]] + + # Transform to InvestorData format + return [ + InvestorData( + investor=inv, + portfolio_companies=inv.portfolio_companies, + team_members=inv.team_members, + sectors=inv.sectors, + ) + for inv in similar_investors + ] \ No newline at end of file