Files
Anton_wireframe/app/services/compatibility_score.py
T

647 lines
23 KiB
Python
Raw Normal View History

"""
Compatibility Score Service
This module calculates compatibility scores between projects and investors.
The scoring system evaluates multiple dimensions to determine how well a project
matches with an investor's investment criteria.
"""
from typing import List, Optional, Tuple
from db.models import FundTable, InvestorTable, ProjectTable
def calculate_project_investor_compatibility(
project: ProjectTable, investor: InvestorTable, use_funds: bool = True
) -> float:
"""
Calculate compatibility score between a project and an investor.
Args:
project: The project to evaluate
investor: The investor to compare against
use_funds: If True, evaluates against investor's funds. If False, uses investor-level data.
Returns:
A score between 0 and 1, where 1 is perfect match
Scoring breakdown (out of 100 points):
- Investment Stage Match: 30 points
- Sector Overlap: 30 points
- Geographic Match: 20 points
- Valuation/Check Size Fit: 20 points
"""
if use_funds and investor.funds:
# Calculate score for each fund and return the highest
max_score = 0.0
for fund in investor.funds:
fund_score = _calculate_project_fund_compatibility(project, fund)
max_score = max(max_score, fund_score)
return max_score
else:
# Use investor-level data (fallback)
return _calculate_project_investor_direct_compatibility(project, investor)
def calculate_project_investors_compatibility(
project: ProjectTable, investors: List[InvestorTable], use_funds: bool = True
) -> List[Tuple[InvestorTable, float]]:
"""
Calculate compatibility scores between a project and multiple investors.
Args:
project: The project to evaluate
investors: List of investors to compare against
use_funds: If True, evaluates against investors' funds. If False, uses investor-level data.
Returns:
List of tuples (investor, score) sorted by score descending
"""
scored_investors = []
for investor in investors:
score = calculate_project_investor_compatibility(project, investor, use_funds)
scored_investors.append((investor, score))
# Sort by score descending
scored_investors.sort(key=lambda x: x[1], reverse=True)
return scored_investors
def _calculate_project_fund_compatibility(
project: ProjectTable, fund: FundTable
) -> float:
"""
Calculate compatibility score between a project and a specific fund.
Scoring breakdown:
- Investment Stage Match: 30 points (all or nothing if stage exists)
- Sector Overlap: 30 points (proportional to overlap)
- Geographic Match: 20 points (exact=20, partial=10, none=0)
- Valuation/Check Size Fit: 20 points (proportional to fit)
Returns:
A score between 0 and 1
"""
total_score = 0
max_score = 100
# 1. Investment Stage Match (30 points)
stage_score = 0
if project.stage and fund.investment_stages:
# Check if project stage matches any of the fund's investment stages
fund_stage_names = {stage.name for stage in fund.investment_stages}
# Convert project.stage enum to string for comparison
project_stage_name = (
project.stage.value
if hasattr(project.stage, "value")
else str(project.stage)
)
if project_stage_name in fund_stage_names:
stage_score = 30
else:
# Partial credit for adjacent stages
stage_score = _calculate_stage_proximity(
project_stage_name, fund_stage_names
)
total_score += stage_score
# 2. Sector Overlap (30 points)
sector_score = 0
if project.sector and fund.sectors:
project_sector_ids = {sector.id for sector in project.sector}
fund_sector_ids = {sector.id for sector in fund.sectors}
if project_sector_ids and fund_sector_ids:
common_sectors = project_sector_ids.intersection(fund_sector_ids)
# Score based on what percentage of project sectors are covered by fund
overlap_ratio = len(common_sectors) / len(project_sector_ids)
sector_score = int(30 * overlap_ratio)
total_score += sector_score
# 3. Geographic Match (20 points)
geo_score = 0
if project.location and fund.geographic_focus:
project_location_lower = project.location.lower()
fund_geo_lower = (fund.geographic_focus or "").lower()
# Exact match
if project_location_lower == fund_geo_lower:
geo_score = 20
# Partial match (one contains the other)
elif (
project_location_lower in fund_geo_lower
or fund_geo_lower in project_location_lower
):
geo_score = 10
# Check for common geographic terms
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
geo_score = 5
total_score += geo_score
# 4. Valuation/Check Size Fit (20 points)
valuation_score = 0
if project.valuation and fund.check_size_lower and fund.check_size_upper:
# Check if project valuation falls within or near the check size range
# Typically, check size is a fraction of valuation (e.g., 10-20%)
# We'll assume check size represents potential investment amount
if fund.check_size_lower <= project.valuation <= fund.check_size_upper:
# Valuation is within the check size range (might be too small)
valuation_score = 10
else:
# Check if the check size is reasonable for this valuation
# Typical investment is 10-30% of valuation
reasonable_valuation_min = fund.check_size_lower * 3 # Investing ~33%
reasonable_valuation_max = fund.check_size_upper * 10 # Investing ~10%
if (
reasonable_valuation_min
<= project.valuation
<= reasonable_valuation_max
):
# Perfect fit
valuation_score = 20
elif project.valuation < reasonable_valuation_min:
# Project might be too small
ratio = (
project.valuation / reasonable_valuation_min
if reasonable_valuation_min > 0
else 0
)
valuation_score = int(10 * ratio)
else:
# Project might be too large
ratio = (
reasonable_valuation_max / project.valuation
if project.valuation > 0
else 0
)
valuation_score = int(10 * ratio)
total_score += valuation_score
# Convert to 0-1 scale
return total_score / max_score
def _calculate_project_investor_direct_compatibility(
project: ProjectTable, investor: InvestorTable
) -> float:
"""
Calculate compatibility using investor-level data (fallback when no funds available).
Uses the same scoring system but with investor-level attributes.
"""
total_score = 0
max_score = 100
# 1. Investment Stage - Skip this since investors don't have a direct stage field
# We could add 30 points to other categories, but for consistency, we'll leave it as 0
stage_score = 0
total_score += stage_score
# 2. Sector Overlap (30 points)
sector_score = 0
if project.sector and investor.sectors:
project_sector_ids = {sector.id for sector in project.sector}
investor_sector_ids = {sector.id for sector in investor.sectors}
if project_sector_ids and investor_sector_ids:
common_sectors = project_sector_ids.intersection(investor_sector_ids)
overlap_ratio = len(common_sectors) / len(project_sector_ids)
sector_score = int(30 * overlap_ratio)
total_score += sector_score
# 3. Geographic Match (20 points)
geo_score = 0
if project.location and investor.geographic_focus:
project_location_lower = project.location.lower()
investor_geo_lower = (investor.geographic_focus or "").lower()
if project_location_lower == investor_geo_lower:
geo_score = 20
elif (
project_location_lower in investor_geo_lower
or investor_geo_lower in project_location_lower
):
geo_score = 10
elif _check_geographic_overlap(project_location_lower, investor_geo_lower):
geo_score = 5
total_score += geo_score
# 4. Valuation/Check Size Fit (20 points)
valuation_score = 0
if project.valuation and investor.check_size_lower and investor.check_size_upper:
reasonable_valuation_min = investor.check_size_lower * 3
reasonable_valuation_max = investor.check_size_upper * 10
if reasonable_valuation_min <= project.valuation <= reasonable_valuation_max:
valuation_score = 20
elif project.valuation < reasonable_valuation_min:
ratio = (
project.valuation / reasonable_valuation_min
if reasonable_valuation_min > 0
else 0
)
valuation_score = int(10 * ratio)
else:
ratio = (
reasonable_valuation_max / project.valuation
if project.valuation > 0
else 0
)
valuation_score = int(10 * ratio)
total_score += valuation_score
# Convert to 0-1 scale
return total_score / max_score
def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
"""
Calculate proximity score between project stage and fund stages.
Awards partial credit for adjacent investment stages.
Stage progression: SEED -> SERIES_A -> SERIES_B -> SERIES_C -> GROWTH -> LATE_STAGE
Returns:
Score from 0-15 (half credit for adjacent stages)
"""
stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]
try:
project_idx = stage_order.index(project_stage)
except ValueError:
return 0
# Check for adjacent stages
adjacent_stages = []
if project_idx > 0:
adjacent_stages.append(stage_order[project_idx - 1])
if project_idx < len(stage_order) - 1:
adjacent_stages.append(stage_order[project_idx + 1])
for stage in fund_stages:
if stage in adjacent_stages:
return 15 # Half credit for adjacent stage
return 0
def _check_geographic_overlap(location1: str, location2: str) -> bool:
"""
Check for common geographic terms between two locations.
Examples:
- "San Francisco, CA" and "California" -> True
- "New York" and "USA" -> True (if both contain USA/US)
- "London, UK" and "United Kingdom" -> True
"""
# Common geographic groupings
geo_groups = [
["usa", "us", "united states", "america"],
["uk", "united kingdom", "britain"],
["california", "ca"],
["new york", "ny"],
["texas", "tx"],
["europe", "eu"],
["asia", "asian"],
["africa", "african"],
]
for group in geo_groups:
found_in_1 = any(term in location1 for term in group)
found_in_2 = any(term in location2 for term in group)
if found_in_1 and found_in_2:
return True
return False
def get_top_compatible_investors(
project: ProjectTable,
investors: List[InvestorTable],
limit: int = 10,
min_score: float = 0.0,
use_funds: bool = True,
) -> List[Tuple[InvestorTable, float]]:
"""
Get the top N most compatible investors for a project.
Args:
project: The project to find investors for
investors: List of all available investors
limit: Maximum number of investors to return
min_score: Minimum compatibility score threshold (0-1)
use_funds: If True, evaluates against investors' funds
Returns:
List of tuples (investor, score) sorted by score descending,
limited to 'limit' items and filtered by min_score
"""
scored_investors = calculate_project_investors_compatibility(
project, investors, use_funds
)
# Filter by minimum score
filtered_investors = [
(investor, score) for investor, score in scored_investors if score >= min_score
]
# Return top N
return filtered_investors[:limit]
def get_compatibility_score_breakdown(
project: ProjectTable, investor: InvestorTable, fund: Optional[FundTable] = None
) -> dict:
"""
Get a detailed breakdown of the compatibility score components.
Useful for debugging or showing users why a particular score was calculated.
Returns:
Dictionary with score components and explanations
"""
if fund:
total_score = 0
# Stage score
stage_score = 0
stage_match = False
if project.stage and fund.investment_stages:
fund_stage_names = {stage.name for stage in fund.investment_stages}
project_stage_name = (
project.stage.value
if hasattr(project.stage, "value")
else str(project.stage)
)
if project_stage_name in fund_stage_names:
stage_score = 30
stage_match = True
else:
stage_score = _calculate_stage_proximity(
project_stage_name, fund_stage_names
)
# Sector score
sector_score = 0
matching_sectors = []
if project.sector and fund.sectors:
project_sector_ids = {sector.id for sector in project.sector}
fund_sector_ids = {sector.id for sector in fund.sectors}
if project_sector_ids and fund_sector_ids:
common_sectors = project_sector_ids.intersection(fund_sector_ids)
matching_sectors = [
s.name for s in fund.sectors if s.id in common_sectors
]
overlap_ratio = len(common_sectors) / len(project_sector_ids)
sector_score = int(30 * overlap_ratio)
# Geographic score
geo_score = 0
geo_match_type = "none"
if project.location and fund.geographic_focus:
project_location_lower = project.location.lower()
fund_geo_lower = fund.geographic_focus.lower()
if project_location_lower == fund_geo_lower:
geo_score = 20
geo_match_type = "exact"
elif (
project_location_lower in fund_geo_lower
or fund_geo_lower in project_location_lower
):
geo_score = 10
geo_match_type = "partial"
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
geo_score = 5
geo_match_type = "regional"
# Valuation score
valuation_score = 0
valuation_fit = "unknown"
if project.valuation and fund.check_size_lower and fund.check_size_upper:
reasonable_valuation_min = fund.check_size_lower * 3
reasonable_valuation_max = fund.check_size_upper * 10
if (
reasonable_valuation_min
<= project.valuation
<= reasonable_valuation_max
):
valuation_score = 20
valuation_fit = "perfect"
elif project.valuation < reasonable_valuation_min:
ratio = (
project.valuation / reasonable_valuation_min
if reasonable_valuation_min > 0
else 0
)
valuation_score = int(10 * ratio)
valuation_fit = "too_small"
else:
ratio = (
reasonable_valuation_max / project.valuation
if project.valuation > 0
else 0
)
valuation_score = int(10 * ratio)
valuation_fit = "too_large"
total_score = stage_score + sector_score + geo_score + valuation_score
return {
"total_score": total_score / 100,
"breakdown": {
"stage": {
"score": stage_score,
"max_score": 30,
"match": stage_match,
"project_stage": project.stage.value if project.stage else None,
"fund_stages": [s.name for s in fund.investment_stages]
if fund.investment_stages
else [],
},
"sector": {
"score": sector_score,
"max_score": 30,
"matching_sectors": matching_sectors,
"project_sectors": [s.name for s in project.sector]
if project.sector
else [],
"fund_sectors": [s.name for s in fund.sectors]
if fund.sectors
else [],
},
"geography": {
"score": geo_score,
"max_score": 20,
"match_type": geo_match_type,
"project_location": project.location,
"fund_geography": fund.geographic_focus,
},
"valuation": {
"score": valuation_score,
"max_score": 20,
"fit": valuation_fit,
"project_valuation": project.valuation,
"fund_check_size_range": f"{fund.check_size_lower}-{fund.check_size_upper}"
if fund.check_size_lower
else None,
},
},
}
else:
# Investor-level breakdown (simplified)
return {
"total_score": _calculate_project_investor_direct_compatibility(
project, investor
),
"note": "Using investor-level data (no specific fund selected)",
}
def generate_compatibility_explanation(
project: ProjectTable, investor: InvestorTable, score: float, use_funds: bool = True
) -> str:
"""
Generate a detailed, natural language explanation of the compatibility score.
Args:
project: The project being evaluated
investor: The investor being compared against
score: The calculated compatibility score (0-1)
use_funds: Whether fund-level data was used
Returns:
A formatted string with the compatibility score and detailed explanation
"""
score_percentage = int(score * 100)
# Determine match quality
if score_percentage >= 80:
match_level = "Excellent match"
elif score_percentage >= 65:
match_level = "Strong match"
elif score_percentage >= 50:
match_level = "Good match"
elif score_percentage >= 35:
match_level = "Moderate match"
else:
match_level = "Limited match"
# Collect alignment factors
alignment_factors = []
recommendations = []
# Get the best matching fund if using funds
best_fund = None
if use_funds and investor.funds:
best_score = 0
for fund in investor.funds:
fund_score = _calculate_project_fund_compatibility(project, fund)
if fund_score > best_score:
best_score = fund_score
best_fund = fund
# Analyze sector alignment
if project.sector:
project_sectors = [s.name for s in project.sector if hasattr(s, "name")]
if best_fund and best_fund.sectors:
fund_sectors = {s.name for s in best_fund.sectors if hasattr(s, "name")}
common_sectors = set(project_sectors) & fund_sectors
if common_sectors:
sectors_str = ", ".join(list(common_sectors)[:2])
alignment_factors.append(f"{sectors_str} sector focus")
elif project_sectors:
recommendations.append(
f"Consider emphasizing any {project_sectors[0]} industry connections"
)
elif investor.sectors:
investor_sectors = {s.name for s in investor.sectors if hasattr(s, "name")}
common_sectors = set(project_sectors) & investor_sectors
if common_sectors:
sectors_str = ", ".join(list(common_sectors)[:2])
alignment_factors.append(f"{sectors_str} sector focus")
# Analyze stage alignment
if project.stage:
stage_name = (
project.stage.value
if hasattr(project.stage, "value")
else str(project.stage)
)
stage_display = stage_name.replace("_", " ").title()
if best_fund and best_fund.investment_stages:
fund_stage_names = {
s.name for s in best_fund.investment_stages if hasattr(s, "name")
}
if stage_name in fund_stage_names:
alignment_factors.append(f"{stage_display} stage")
else:
recommendations.append(
"Investor typically focuses on different stages; highlight your traction and growth metrics"
)
if not best_fund:
alignment_factors.append(f"{stage_display} stage")
# Analyze geographic alignment
if project.location:
if best_fund and best_fund.geographic_focus:
if (
project.location.lower() in best_fund.geographic_focus.lower()
or best_fund.geographic_focus.lower() in project.location.lower()
):
alignment_factors.append(f"{project.location} presence")
elif investor.headquarters:
if (
project.location.lower() in investor.headquarters.lower()
or investor.headquarters.lower() in project.location.lower()
):
alignment_factors.append(f"{project.location} market presence")
# Analyze valuation/check size fit
if project.valuation:
if best_fund and best_fund.check_size_lower and best_fund.check_size_upper:
reasonable_min = best_fund.check_size_lower * 3
reasonable_max = best_fund.check_size_upper * 10
if reasonable_min <= project.valuation <= reasonable_max:
alignment_factors.append("appropriate funding stage")
elif project.valuation < reasonable_min:
recommendations.append(
"You may be early for this investor; consider approaching at a later stage"
)
else:
recommendations.append(
"Consider highlighting your growth trajectory and market opportunity"
)
# Build the explanation
explanation_parts = [f"Based on your startup profile: {score_percentage}% match"]
if alignment_factors:
alignment_text = ", ".join(alignment_factors)
explanation_parts.append(f"{match_level}: {alignment_text}.")
else:
explanation_parts.append(f"{match_level}.")
if recommendations:
rec_text = recommendations[0] # Show the most important recommendation
explanation_parts.append(rec_text + ".")
return " ".join(explanation_parts)