510 lines
18 KiB
Python
510 lines
18 KiB
Python
"""
|
|
Compatibility Score Service
|
|
|
|
This module calculates compatibility scores between projects and investors.
|
|
The scoring system evaluates multiple dimensions to determine how well a project
|
|
matches with an investor's investment criteria.
|
|
"""
|
|
|
|
from typing import List, Optional, Tuple
|
|
|
|
from db.models import FundTable, InvestorTable, ProjectTable
|
|
|
|
|
|
def calculate_project_investor_compatibility(
|
|
project: ProjectTable, investor: InvestorTable, use_funds: bool = True
|
|
) -> float:
|
|
"""
|
|
Calculate compatibility score between a project and an investor.
|
|
|
|
Args:
|
|
project: The project to evaluate
|
|
investor: The investor to compare against
|
|
use_funds: If True, evaluates against investor's funds. If False, uses investor-level data.
|
|
|
|
Returns:
|
|
A score between 0 and 1, where 1 is perfect match
|
|
|
|
Scoring breakdown (out of 100 points):
|
|
- Investment Stage Match: 30 points
|
|
- Sector Overlap: 30 points
|
|
- Geographic Match: 20 points
|
|
- Valuation/Check Size Fit: 20 points
|
|
"""
|
|
if use_funds and investor.funds:
|
|
# Calculate score for each fund and return the highest
|
|
max_score = 0.0
|
|
for fund in investor.funds:
|
|
fund_score = _calculate_project_fund_compatibility(project, fund)
|
|
max_score = max(max_score, fund_score)
|
|
return max_score
|
|
else:
|
|
# Use investor-level data (fallback)
|
|
return _calculate_project_investor_direct_compatibility(project, investor)
|
|
|
|
|
|
def calculate_project_investors_compatibility(
|
|
project: ProjectTable, investors: List[InvestorTable], use_funds: bool = True
|
|
) -> List[Tuple[InvestorTable, float]]:
|
|
"""
|
|
Calculate compatibility scores between a project and multiple investors.
|
|
|
|
Args:
|
|
project: The project to evaluate
|
|
investors: List of investors to compare against
|
|
use_funds: If True, evaluates against investors' funds. If False, uses investor-level data.
|
|
|
|
Returns:
|
|
List of tuples (investor, score) sorted by score descending
|
|
"""
|
|
scored_investors = []
|
|
|
|
for investor in investors:
|
|
score = calculate_project_investor_compatibility(project, investor, use_funds)
|
|
scored_investors.append((investor, score))
|
|
|
|
# Sort by score descending
|
|
scored_investors.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
return scored_investors
|
|
|
|
|
|
def _calculate_project_fund_compatibility(
|
|
project: ProjectTable, fund: FundTable
|
|
) -> float:
|
|
"""
|
|
Calculate compatibility score between a project and a specific fund.
|
|
|
|
Scoring breakdown:
|
|
- Investment Stage Match: 30 points (all or nothing if stage exists)
|
|
- Sector Overlap: 30 points (proportional to overlap)
|
|
- Geographic Match: 20 points (exact=20, partial=10, none=0)
|
|
- Valuation/Check Size Fit: 20 points (proportional to fit)
|
|
|
|
Returns:
|
|
A score between 0 and 1
|
|
"""
|
|
total_score = 0
|
|
max_score = 100
|
|
|
|
# 1. Investment Stage Match (30 points)
|
|
stage_score = 0
|
|
if project.stage and fund.investment_stages:
|
|
# Check if project stage matches any of the fund's investment stages
|
|
fund_stage_names = {stage.name for stage in fund.investment_stages}
|
|
# Convert project.stage enum to string for comparison
|
|
project_stage_name = (
|
|
project.stage.value
|
|
if hasattr(project.stage, "value")
|
|
else str(project.stage)
|
|
)
|
|
|
|
if project_stage_name in fund_stage_names:
|
|
stage_score = 30
|
|
else:
|
|
# Partial credit for adjacent stages
|
|
stage_score = _calculate_stage_proximity(
|
|
project_stage_name, fund_stage_names
|
|
)
|
|
|
|
total_score += stage_score
|
|
|
|
# 2. Sector Overlap (30 points)
|
|
sector_score = 0
|
|
if project.sector and fund.sectors:
|
|
project_sector_ids = {sector.id for sector in project.sector}
|
|
fund_sector_ids = {sector.id for sector in fund.sectors}
|
|
|
|
if project_sector_ids and fund_sector_ids:
|
|
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
|
# Score based on what percentage of project sectors are covered by fund
|
|
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
|
sector_score = int(30 * overlap_ratio)
|
|
|
|
total_score += sector_score
|
|
|
|
# 3. Geographic Match (20 points)
|
|
geo_score = 0
|
|
if project.location and fund.geographic_focus:
|
|
project_location_lower = project.location.lower()
|
|
fund_geo_lower = fund.geographic_focus.lower()
|
|
|
|
# Exact match
|
|
if project_location_lower == fund_geo_lower:
|
|
geo_score = 20
|
|
# Partial match (one contains the other)
|
|
elif (
|
|
project_location_lower in fund_geo_lower
|
|
or fund_geo_lower in project_location_lower
|
|
):
|
|
geo_score = 10
|
|
# Check for common geographic terms
|
|
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
|
geo_score = 5
|
|
|
|
total_score += geo_score
|
|
|
|
# 4. Valuation/Check Size Fit (20 points)
|
|
valuation_score = 0
|
|
if project.valuation and fund.check_size_lower and fund.check_size_upper:
|
|
# Check if project valuation falls within or near the check size range
|
|
# Typically, check size is a fraction of valuation (e.g., 10-20%)
|
|
# We'll assume check size represents potential investment amount
|
|
|
|
if fund.check_size_lower <= project.valuation <= fund.check_size_upper:
|
|
# Valuation is within the check size range (might be too small)
|
|
valuation_score = 10
|
|
else:
|
|
# Check if the check size is reasonable for this valuation
|
|
# Typical investment is 10-30% of valuation
|
|
reasonable_valuation_min = fund.check_size_lower * 3 # Investing ~33%
|
|
reasonable_valuation_max = fund.check_size_upper * 10 # Investing ~10%
|
|
|
|
if (
|
|
reasonable_valuation_min
|
|
<= project.valuation
|
|
<= reasonable_valuation_max
|
|
):
|
|
# Perfect fit
|
|
valuation_score = 20
|
|
elif project.valuation < reasonable_valuation_min:
|
|
# Project might be too small
|
|
ratio = (
|
|
project.valuation / reasonable_valuation_min
|
|
if reasonable_valuation_min > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
else:
|
|
# Project might be too large
|
|
ratio = (
|
|
reasonable_valuation_max / project.valuation
|
|
if project.valuation > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
|
|
total_score += valuation_score
|
|
|
|
# Convert to 0-1 scale
|
|
return total_score / max_score
|
|
|
|
|
|
def _calculate_project_investor_direct_compatibility(
|
|
project: ProjectTable, investor: InvestorTable
|
|
) -> float:
|
|
"""
|
|
Calculate compatibility using investor-level data (fallback when no funds available).
|
|
|
|
Uses the same scoring system but with investor-level attributes.
|
|
"""
|
|
total_score = 0
|
|
max_score = 100
|
|
|
|
# 1. Investment Stage - Skip this since investors don't have a direct stage field
|
|
# We could add 30 points to other categories, but for consistency, we'll leave it as 0
|
|
stage_score = 0
|
|
total_score += stage_score
|
|
|
|
# 2. Sector Overlap (30 points)
|
|
sector_score = 0
|
|
if project.sector and investor.sectors:
|
|
project_sector_ids = {sector.id for sector in project.sector}
|
|
investor_sector_ids = {sector.id for sector in investor.sectors}
|
|
|
|
if project_sector_ids and investor_sector_ids:
|
|
common_sectors = project_sector_ids.intersection(investor_sector_ids)
|
|
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
|
sector_score = int(30 * overlap_ratio)
|
|
|
|
total_score += sector_score
|
|
|
|
# 3. Geographic Match (20 points)
|
|
geo_score = 0
|
|
if project.location and investor.geographic_focus:
|
|
project_location_lower = project.location.lower()
|
|
investor_geo_lower = investor.geographic_focus.lower()
|
|
|
|
if project_location_lower == investor_geo_lower:
|
|
geo_score = 20
|
|
elif (
|
|
project_location_lower in investor_geo_lower
|
|
or investor_geo_lower in project_location_lower
|
|
):
|
|
geo_score = 10
|
|
elif _check_geographic_overlap(project_location_lower, investor_geo_lower):
|
|
geo_score = 5
|
|
|
|
total_score += geo_score
|
|
|
|
# 4. Valuation/Check Size Fit (20 points)
|
|
valuation_score = 0
|
|
if project.valuation and investor.check_size_lower and investor.check_size_upper:
|
|
reasonable_valuation_min = investor.check_size_lower * 3
|
|
reasonable_valuation_max = investor.check_size_upper * 10
|
|
|
|
if reasonable_valuation_min <= project.valuation <= reasonable_valuation_max:
|
|
valuation_score = 20
|
|
elif project.valuation < reasonable_valuation_min:
|
|
ratio = (
|
|
project.valuation / reasonable_valuation_min
|
|
if reasonable_valuation_min > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
else:
|
|
ratio = (
|
|
reasonable_valuation_max / project.valuation
|
|
if project.valuation > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
|
|
total_score += valuation_score
|
|
|
|
# Convert to 0-1 scale
|
|
return total_score / max_score
|
|
|
|
|
|
def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
|
|
"""
|
|
Calculate proximity score between project stage and fund stages.
|
|
Awards partial credit for adjacent investment stages.
|
|
|
|
Stage progression: SEED -> SERIES_A -> SERIES_B -> SERIES_C -> GROWTH -> LATE_STAGE
|
|
|
|
Returns:
|
|
Score from 0-15 (half credit for adjacent stages)
|
|
"""
|
|
stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]
|
|
|
|
try:
|
|
project_idx = stage_order.index(project_stage)
|
|
except ValueError:
|
|
return 0
|
|
|
|
# Check for adjacent stages
|
|
adjacent_stages = []
|
|
if project_idx > 0:
|
|
adjacent_stages.append(stage_order[project_idx - 1])
|
|
if project_idx < len(stage_order) - 1:
|
|
adjacent_stages.append(stage_order[project_idx + 1])
|
|
|
|
for stage in fund_stages:
|
|
if stage in adjacent_stages:
|
|
return 15 # Half credit for adjacent stage
|
|
|
|
return 0
|
|
|
|
|
|
def _check_geographic_overlap(location1: str, location2: str) -> bool:
|
|
"""
|
|
Check for common geographic terms between two locations.
|
|
|
|
Examples:
|
|
- "San Francisco, CA" and "California" -> True
|
|
- "New York" and "USA" -> True (if both contain USA/US)
|
|
- "London, UK" and "United Kingdom" -> True
|
|
"""
|
|
# Common geographic groupings
|
|
geo_groups = [
|
|
["usa", "us", "united states", "america"],
|
|
["uk", "united kingdom", "britain"],
|
|
["california", "ca"],
|
|
["new york", "ny"],
|
|
["texas", "tx"],
|
|
["europe", "eu"],
|
|
["asia", "asian"],
|
|
["africa", "african"],
|
|
]
|
|
|
|
for group in geo_groups:
|
|
found_in_1 = any(term in location1 for term in group)
|
|
found_in_2 = any(term in location2 for term in group)
|
|
if found_in_1 and found_in_2:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_top_compatible_investors(
|
|
project: ProjectTable,
|
|
investors: List[InvestorTable],
|
|
limit: int = 10,
|
|
min_score: float = 0.0,
|
|
use_funds: bool = True,
|
|
) -> List[Tuple[InvestorTable, float]]:
|
|
"""
|
|
Get the top N most compatible investors for a project.
|
|
|
|
Args:
|
|
project: The project to find investors for
|
|
investors: List of all available investors
|
|
limit: Maximum number of investors to return
|
|
min_score: Minimum compatibility score threshold (0-1)
|
|
use_funds: If True, evaluates against investors' funds
|
|
|
|
Returns:
|
|
List of tuples (investor, score) sorted by score descending,
|
|
limited to 'limit' items and filtered by min_score
|
|
"""
|
|
scored_investors = calculate_project_investors_compatibility(
|
|
project, investors, use_funds
|
|
)
|
|
|
|
# Filter by minimum score
|
|
filtered_investors = [
|
|
(investor, score) for investor, score in scored_investors if score >= min_score
|
|
]
|
|
|
|
# Return top N
|
|
return filtered_investors[:limit]
|
|
|
|
|
|
def get_compatibility_score_breakdown(
|
|
project: ProjectTable, investor: InvestorTable, fund: Optional[FundTable] = None
|
|
) -> dict:
|
|
"""
|
|
Get a detailed breakdown of the compatibility score components.
|
|
|
|
Useful for debugging or showing users why a particular score was calculated.
|
|
|
|
Returns:
|
|
Dictionary with score components and explanations
|
|
"""
|
|
if fund:
|
|
total_score = 0
|
|
|
|
# Stage score
|
|
stage_score = 0
|
|
stage_match = False
|
|
if project.stage and fund.investment_stages:
|
|
fund_stage_names = {stage.name for stage in fund.investment_stages}
|
|
project_stage_name = (
|
|
project.stage.value
|
|
if hasattr(project.stage, "value")
|
|
else str(project.stage)
|
|
)
|
|
if project_stage_name in fund_stage_names:
|
|
stage_score = 30
|
|
stage_match = True
|
|
else:
|
|
stage_score = _calculate_stage_proximity(
|
|
project_stage_name, fund_stage_names
|
|
)
|
|
|
|
# Sector score
|
|
sector_score = 0
|
|
matching_sectors = []
|
|
if project.sector and fund.sectors:
|
|
project_sector_ids = {sector.id for sector in project.sector}
|
|
fund_sector_ids = {sector.id for sector in fund.sectors}
|
|
if project_sector_ids and fund_sector_ids:
|
|
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
|
matching_sectors = [
|
|
s.name for s in fund.sectors if s.id in common_sectors
|
|
]
|
|
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
|
sector_score = int(30 * overlap_ratio)
|
|
|
|
# Geographic score
|
|
geo_score = 0
|
|
geo_match_type = "none"
|
|
if project.location and fund.geographic_focus:
|
|
project_location_lower = project.location.lower()
|
|
fund_geo_lower = fund.geographic_focus.lower()
|
|
if project_location_lower == fund_geo_lower:
|
|
geo_score = 20
|
|
geo_match_type = "exact"
|
|
elif (
|
|
project_location_lower in fund_geo_lower
|
|
or fund_geo_lower in project_location_lower
|
|
):
|
|
geo_score = 10
|
|
geo_match_type = "partial"
|
|
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
|
geo_score = 5
|
|
geo_match_type = "regional"
|
|
|
|
# Valuation score
|
|
valuation_score = 0
|
|
valuation_fit = "unknown"
|
|
if project.valuation and fund.check_size_lower and fund.check_size_upper:
|
|
reasonable_valuation_min = fund.check_size_lower * 3
|
|
reasonable_valuation_max = fund.check_size_upper * 10
|
|
if (
|
|
reasonable_valuation_min
|
|
<= project.valuation
|
|
<= reasonable_valuation_max
|
|
):
|
|
valuation_score = 20
|
|
valuation_fit = "perfect"
|
|
elif project.valuation < reasonable_valuation_min:
|
|
ratio = (
|
|
project.valuation / reasonable_valuation_min
|
|
if reasonable_valuation_min > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
valuation_fit = "too_small"
|
|
else:
|
|
ratio = (
|
|
reasonable_valuation_max / project.valuation
|
|
if project.valuation > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
valuation_fit = "too_large"
|
|
|
|
total_score = stage_score + sector_score + geo_score + valuation_score
|
|
|
|
return {
|
|
"total_score": total_score / 100,
|
|
"breakdown": {
|
|
"stage": {
|
|
"score": stage_score,
|
|
"max_score": 30,
|
|
"match": stage_match,
|
|
"project_stage": project.stage.value if project.stage else None,
|
|
"fund_stages": [s.name for s in fund.investment_stages]
|
|
if fund.investment_stages
|
|
else [],
|
|
},
|
|
"sector": {
|
|
"score": sector_score,
|
|
"max_score": 30,
|
|
"matching_sectors": matching_sectors,
|
|
"project_sectors": [s.name for s in project.sector]
|
|
if project.sector
|
|
else [],
|
|
"fund_sectors": [s.name for s in fund.sectors]
|
|
if fund.sectors
|
|
else [],
|
|
},
|
|
"geography": {
|
|
"score": geo_score,
|
|
"max_score": 20,
|
|
"match_type": geo_match_type,
|
|
"project_location": project.location,
|
|
"fund_geography": fund.geographic_focus,
|
|
},
|
|
"valuation": {
|
|
"score": valuation_score,
|
|
"max_score": 20,
|
|
"fit": valuation_fit,
|
|
"project_valuation": project.valuation,
|
|
"fund_check_size_range": f"{fund.check_size_lower}-{fund.check_size_upper}"
|
|
if fund.check_size_lower
|
|
else None,
|
|
},
|
|
},
|
|
}
|
|
else:
|
|
# Investor-level breakdown (simplified)
|
|
return {
|
|
"total_score": _calculate_project_investor_direct_compatibility(
|
|
project, investor
|
|
),
|
|
"note": "Using investor-level data (no specific fund selected)",
|
|
}
|