2 Commits

6 changed files with 84 additions and 106 deletions
+1
View File
@@ -14,3 +14,4 @@
*.cypython *.cypython
/preprocessor
Binary file not shown.
Binary file not shown.
+74 -97
View File
@@ -1,13 +1,12 @@
from typing import List, Optional from typing import List, Optional
from db.db import get_db from db.db import get_db
from db.models import InvestorTable, SectorTable from db.models import InvestorTable, SectorTable
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from schemas.router_schemas import InvestmentStage, InvestorData
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy.orm import Session, selectinload from schemas.router_schemas import InvestmentStage, InvestorData
from services.querying import QueryProcessor from services.querying import QueryProcessor
from sqlalchemy.orm import Session, selectinload
router = APIRouter(tags=["Investor Routes"]) router = APIRouter(tags=["Investor Routes"])
@@ -182,64 +181,15 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)):
) )
@router.put("/investors/{investor_id}", response_model=InvestorData)
def update_investor(
investor_id: int, investor: InvestorUpdate, db: Session = Depends(get_db)
):
"""Update an existing investor"""
db_investor = (
db.query(InvestorTable).filter(InvestorTable.id == investor_id).first()
)
if not db_investor:
raise HTTPException(status_code=404, detail="Investor not found")
update_data = investor.dict(exclude_unset=True)
for field, value in update_data.items():
setattr(db_investor, field, value)
db.commit()
db.refresh(db_investor)
# Reload with relationships
investor_with_relations = (
db.query(InvestorTable)
.options(
selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors),
)
.filter(InvestorTable.id == investor_id)
.first()
)
# Transform to InvestorData format
return InvestorData(
investor=investor_with_relations,
portfolio_companies=investor_with_relations.portfolio_companies,
team_members=investor_with_relations.team_members,
sectors=investor_with_relations.sectors,
)
@router.delete("/investors/{investor_id}")
def delete_investor(investor_id: int, db: Session = Depends(get_db)):
"""Delete an investor"""
db_investor = (
db.query(InvestorTable).filter(InvestorTable.id == investor_id).first()
)
if not db_investor:
raise HTTPException(status_code=404, detail="Investor not found")
db.delete(db_investor)
db.commit()
return {"message": "Investor deleted successfully"}
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) @router.get("/investors/{investor_id}/similar", response_model=List[InvestorData])
def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): def find_similar_investors(
"""Find investors similar to a given investor""" investor_id: int,
limit: int = Query(10, description="Maximum number of similar investors to return"),
db: Session = Depends(get_db)
):
"""Find investors similar to a given investor based on characteristics"""
# First, get the target investor # Get the target investor
target_investor = ( target_investor = (
db.query(InvestorTable) db.query(InvestorTable)
.options( .options(
@@ -254,51 +204,78 @@ def find_similar_investors(investor_id: int, db: Session = Depends(get_db)):
if not target_investor: if not target_investor:
raise HTTPException(status_code=404, detail="Investor not found") raise HTTPException(status_code=404, detail="Investor not found")
# Build query to find similar investors # Get target investor's sector IDs for comparison
query = db.query(InvestorTable).options( target_sector_ids = {sector.id for sector in target_investor.sectors}
# Query all other investors with their relationships
candidates = (
db.query(InvestorTable)
.options(
selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.portfolio_companies),
selectinload(InvestorTable.team_members), selectinload(InvestorTable.team_members),
selectinload(InvestorTable.sectors), selectinload(InvestorTable.sectors),
).filter(InvestorTable.id != investor_id) # Exclude the target investor )
.filter(InvestorTable.id != investor_id)
# Filter by same stage focus .all()
query = query.filter(InvestorTable.stage_focus == target_investor.stage_focus)
# Filter by similar geographic focus (partial match)
query = query.filter(InvestorTable.geographic_focus.ilike(f"%{target_investor.geographic_focus}%"))
# Filter by overlapping check size ranges
query = query.filter(
InvestorTable.check_size_upper >= target_investor.check_size_lower,
InvestorTable.check_size_lower <= target_investor.check_size_upper
) )
# Filter by similar AUM (within 50% range) # Calculate similarity scores
aum_lower = int(target_investor.aum * 0.5) scored_investors = []
aum_upper = int(target_investor.aum * 1.5) for candidate in candidates:
query = query.filter( score = 0
InvestorTable.aum >= aum_lower,
InvestorTable.aum <= aum_upper
)
# Filter by common sectors # Stage focus match (30 points)
target_sector_names = [sector.name for sector in target_investor.sectors] if candidate.stage_focus == target_investor.stage_focus:
if target_sector_names: score += 30
query = query.join(InvestorTable.sectors).filter(
SectorTable.name.in_(target_sector_names)
)
investors = query.all() # Geographic focus match (20 points for exact, 10 for partial)
if candidate.geographic_focus and target_investor.geographic_focus:
if candidate.geographic_focus.lower() == target_investor.geographic_focus.lower():
score += 20
elif (candidate.geographic_focus.lower() in target_investor.geographic_focus.lower() or
target_investor.geographic_focus.lower() in candidate.geographic_focus.lower()):
score += 10
# Check size overlap (20 points max)
if (candidate.check_size_lower and candidate.check_size_upper and
target_investor.check_size_lower and target_investor.check_size_upper):
# Calculate overlap percentage
overlap_start = max(candidate.check_size_lower, target_investor.check_size_lower)
overlap_end = min(candidate.check_size_upper, target_investor.check_size_upper)
if overlap_end > overlap_start:
overlap = overlap_end - overlap_start
target_range = target_investor.check_size_upper - target_investor.check_size_lower
overlap_ratio = overlap / target_range if target_range > 0 else 0
score += int(20 * overlap_ratio)
# AUM similarity (15 points max)
if candidate.aum and target_investor.aum:
aum_diff = abs(candidate.aum - target_investor.aum)
max_aum = max(candidate.aum, target_investor.aum)
similarity_ratio = 1 - (aum_diff / max_aum) if max_aum > 0 else 0
score += int(15 * similarity_ratio)
# Sector overlap (30 points max)
candidate_sector_ids = {sector.id for sector in candidate.sectors}
if target_sector_ids and candidate_sector_ids:
common_sectors = target_sector_ids.intersection(candidate_sector_ids)
overlap_ratio = len(common_sectors) / len(target_sector_ids)
score += int(30 * overlap_ratio)
if score > 0: # Only include investors with some similarity
scored_investors.append((score, candidate))
# Sort by score (descending) and take top N
scored_investors.sort(key=lambda x: x[0], reverse=True)
similar_investors = [inv for score, inv in scored_investors[:limit]]
# Transform to InvestorData format # Transform to InvestorData format
investor_data_list = [] return [
for investor in investors: InvestorData(
investor_data = InvestorData( investor=inv,
investor=investor, portfolio_companies=inv.portfolio_companies,
portfolio_companies=investor.portfolio_companies, team_members=inv.team_members,
team_members=investor.team_members, sectors=inv.sectors,
sectors=investor.sectors,
) )
investor_data_list.append(investor_data) for inv in similar_investors
]
return investor_data_list
Binary file not shown.
+2 -2
View File
@@ -176,7 +176,7 @@ class InvestorProcessor:
async def parse_investors(self, df, save_to_db: bool = True): async def parse_investors(self, df, save_to_db: bool = True):
"""Parse investors from DataFrame and optionally save to database""" """Parse investors from DataFrame and optionally save to database"""
investors = [] investors = []
df = df[20:]
db = None db = None
if save_to_db: if save_to_db:
db = get_db_session() db = get_db_session()
@@ -244,7 +244,7 @@ class InvestorProcessor:
async def parse_companies(self, df, save_to_db: bool = True): async def parse_companies(self, df, save_to_db: bool = True):
"""Parse companies from DataFrame and optionally save to database""" """Parse companies from DataFrame and optionally save to database"""
companies = [] companies = []
df = df[20:]
db = None db = None
if save_to_db: if save_to_db:
db = get_db_session() db = get_db_session()