647 lines
23 KiB
Python
647 lines
23 KiB
Python
"""
|
|
Compatibility Score Service
|
|
|
|
This module calculates compatibility scores between projects and investors.
|
|
The scoring system evaluates multiple dimensions to determine how well a project
|
|
matches with an investor's investment criteria.
|
|
"""
|
|
|
|
from typing import List, Optional, Tuple
|
|
|
|
from db.models import FundTable, InvestorTable, ProjectTable
|
|
|
|
|
|
def calculate_project_investor_compatibility(
|
|
project: ProjectTable, investor: InvestorTable, use_funds: bool = True
|
|
) -> float:
|
|
"""
|
|
Calculate compatibility score between a project and an investor.
|
|
|
|
Args:
|
|
project: The project to evaluate
|
|
investor: The investor to compare against
|
|
use_funds: If True, evaluates against investor's funds. If False, uses investor-level data.
|
|
|
|
Returns:
|
|
A score between 0 and 1, where 1 is perfect match
|
|
|
|
Scoring breakdown (out of 100 points):
|
|
- Investment Stage Match: 30 points
|
|
- Sector Overlap: 30 points
|
|
- Geographic Match: 20 points
|
|
- Valuation/Check Size Fit: 20 points
|
|
"""
|
|
if use_funds and investor.funds:
|
|
# Calculate score for each fund and return the highest
|
|
max_score = 0.0
|
|
for fund in investor.funds:
|
|
fund_score = _calculate_project_fund_compatibility(project, fund)
|
|
max_score = max(max_score, fund_score)
|
|
return max_score
|
|
else:
|
|
# Use investor-level data (fallback)
|
|
return _calculate_project_investor_direct_compatibility(project, investor)
|
|
|
|
|
|
def calculate_project_investors_compatibility(
|
|
project: ProjectTable, investors: List[InvestorTable], use_funds: bool = True
|
|
) -> List[Tuple[InvestorTable, float]]:
|
|
"""
|
|
Calculate compatibility scores between a project and multiple investors.
|
|
|
|
Args:
|
|
project: The project to evaluate
|
|
investors: List of investors to compare against
|
|
use_funds: If True, evaluates against investors' funds. If False, uses investor-level data.
|
|
|
|
Returns:
|
|
List of tuples (investor, score) sorted by score descending
|
|
"""
|
|
scored_investors = []
|
|
|
|
for investor in investors:
|
|
score = calculate_project_investor_compatibility(project, investor, use_funds)
|
|
scored_investors.append((investor, score))
|
|
|
|
# Sort by score descending
|
|
scored_investors.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
return scored_investors
|
|
|
|
|
|
def _calculate_project_fund_compatibility(
|
|
project: ProjectTable, fund: FundTable
|
|
) -> float:
|
|
"""
|
|
Calculate compatibility score between a project and a specific fund.
|
|
|
|
Scoring breakdown:
|
|
- Investment Stage Match: 30 points (all or nothing if stage exists)
|
|
- Sector Overlap: 30 points (proportional to overlap)
|
|
- Geographic Match: 20 points (exact=20, partial=10, none=0)
|
|
- Valuation/Check Size Fit: 20 points (proportional to fit)
|
|
|
|
Returns:
|
|
A score between 0 and 1
|
|
"""
|
|
total_score = 0
|
|
max_score = 100
|
|
|
|
# 1. Investment Stage Match (30 points)
|
|
stage_score = 0
|
|
if project.stage and fund.investment_stages:
|
|
# Check if project stage matches any of the fund's investment stages
|
|
fund_stage_names = {stage.name for stage in fund.investment_stages}
|
|
# Convert project.stage enum to string for comparison
|
|
project_stage_name = (
|
|
project.stage.value
|
|
if hasattr(project.stage, "value")
|
|
else str(project.stage)
|
|
)
|
|
|
|
if project_stage_name in fund_stage_names:
|
|
stage_score = 30
|
|
else:
|
|
# Partial credit for adjacent stages
|
|
stage_score = _calculate_stage_proximity(
|
|
project_stage_name, fund_stage_names
|
|
)
|
|
|
|
total_score += stage_score
|
|
|
|
# 2. Sector Overlap (30 points)
|
|
sector_score = 0
|
|
if project.sector and fund.sectors:
|
|
project_sector_ids = {sector.id for sector in project.sector}
|
|
fund_sector_ids = {sector.id for sector in fund.sectors}
|
|
|
|
if project_sector_ids and fund_sector_ids:
|
|
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
|
# Score based on what percentage of project sectors are covered by fund
|
|
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
|
sector_score = int(30 * overlap_ratio)
|
|
|
|
total_score += sector_score
|
|
|
|
# 3. Geographic Match (20 points)
|
|
geo_score = 0
|
|
if project.location and fund.geographic_focus:
|
|
project_location_lower = project.location.lower()
|
|
fund_geo_lower = (fund.geographic_focus or "").lower()
|
|
|
|
# Exact match
|
|
if project_location_lower == fund_geo_lower:
|
|
geo_score = 20
|
|
# Partial match (one contains the other)
|
|
elif (
|
|
project_location_lower in fund_geo_lower
|
|
or fund_geo_lower in project_location_lower
|
|
):
|
|
geo_score = 10
|
|
# Check for common geographic terms
|
|
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
|
geo_score = 5
|
|
|
|
total_score += geo_score
|
|
|
|
# 4. Valuation/Check Size Fit (20 points)
|
|
valuation_score = 0
|
|
if project.valuation and fund.check_size_lower and fund.check_size_upper:
|
|
# Check if project valuation falls within or near the check size range
|
|
# Typically, check size is a fraction of valuation (e.g., 10-20%)
|
|
# We'll assume check size represents potential investment amount
|
|
|
|
if fund.check_size_lower <= project.valuation <= fund.check_size_upper:
|
|
# Valuation is within the check size range (might be too small)
|
|
valuation_score = 10
|
|
else:
|
|
# Check if the check size is reasonable for this valuation
|
|
# Typical investment is 10-30% of valuation
|
|
reasonable_valuation_min = fund.check_size_lower * 3 # Investing ~33%
|
|
reasonable_valuation_max = fund.check_size_upper * 10 # Investing ~10%
|
|
|
|
if (
|
|
reasonable_valuation_min
|
|
<= project.valuation
|
|
<= reasonable_valuation_max
|
|
):
|
|
# Perfect fit
|
|
valuation_score = 20
|
|
elif project.valuation < reasonable_valuation_min:
|
|
# Project might be too small
|
|
ratio = (
|
|
project.valuation / reasonable_valuation_min
|
|
if reasonable_valuation_min > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
else:
|
|
# Project might be too large
|
|
ratio = (
|
|
reasonable_valuation_max / project.valuation
|
|
if project.valuation > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
|
|
total_score += valuation_score
|
|
|
|
# Convert to 0-1 scale
|
|
return total_score / max_score
|
|
|
|
|
|
def _calculate_project_investor_direct_compatibility(
|
|
project: ProjectTable, investor: InvestorTable
|
|
) -> float:
|
|
"""
|
|
Calculate compatibility using investor-level data (fallback when no funds available).
|
|
|
|
Uses the same scoring system but with investor-level attributes.
|
|
"""
|
|
total_score = 0
|
|
max_score = 100
|
|
|
|
# 1. Investment Stage - Skip this since investors don't have a direct stage field
|
|
# We could add 30 points to other categories, but for consistency, we'll leave it as 0
|
|
stage_score = 0
|
|
total_score += stage_score
|
|
|
|
# 2. Sector Overlap (30 points)
|
|
sector_score = 0
|
|
if project.sector and investor.sectors:
|
|
project_sector_ids = {sector.id for sector in project.sector}
|
|
investor_sector_ids = {sector.id for sector in investor.sectors}
|
|
|
|
if project_sector_ids and investor_sector_ids:
|
|
common_sectors = project_sector_ids.intersection(investor_sector_ids)
|
|
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
|
sector_score = int(30 * overlap_ratio)
|
|
|
|
total_score += sector_score
|
|
|
|
# 3. Geographic Match (20 points)
|
|
geo_score = 0
|
|
if project.location and investor.geographic_focus:
|
|
project_location_lower = project.location.lower()
|
|
investor_geo_lower = (investor.geographic_focus or "").lower()
|
|
|
|
if project_location_lower == investor_geo_lower:
|
|
geo_score = 20
|
|
elif (
|
|
project_location_lower in investor_geo_lower
|
|
or investor_geo_lower in project_location_lower
|
|
):
|
|
geo_score = 10
|
|
elif _check_geographic_overlap(project_location_lower, investor_geo_lower):
|
|
geo_score = 5
|
|
|
|
total_score += geo_score
|
|
|
|
# 4. Valuation/Check Size Fit (20 points)
|
|
valuation_score = 0
|
|
if project.valuation and investor.check_size_lower and investor.check_size_upper:
|
|
reasonable_valuation_min = investor.check_size_lower * 3
|
|
reasonable_valuation_max = investor.check_size_upper * 10
|
|
|
|
if reasonable_valuation_min <= project.valuation <= reasonable_valuation_max:
|
|
valuation_score = 20
|
|
elif project.valuation < reasonable_valuation_min:
|
|
ratio = (
|
|
project.valuation / reasonable_valuation_min
|
|
if reasonable_valuation_min > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
else:
|
|
ratio = (
|
|
reasonable_valuation_max / project.valuation
|
|
if project.valuation > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
|
|
total_score += valuation_score
|
|
|
|
# Convert to 0-1 scale
|
|
return total_score / max_score
|
|
|
|
|
|
def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
|
|
"""
|
|
Calculate proximity score between project stage and fund stages.
|
|
Awards partial credit for adjacent investment stages.
|
|
|
|
Stage progression: SEED -> SERIES_A -> SERIES_B -> SERIES_C -> GROWTH -> LATE_STAGE
|
|
|
|
Returns:
|
|
Score from 0-15 (half credit for adjacent stages)
|
|
"""
|
|
stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]
|
|
|
|
try:
|
|
project_idx = stage_order.index(project_stage)
|
|
except ValueError:
|
|
return 0
|
|
|
|
# Check for adjacent stages
|
|
adjacent_stages = []
|
|
if project_idx > 0:
|
|
adjacent_stages.append(stage_order[project_idx - 1])
|
|
if project_idx < len(stage_order) - 1:
|
|
adjacent_stages.append(stage_order[project_idx + 1])
|
|
|
|
for stage in fund_stages:
|
|
if stage in adjacent_stages:
|
|
return 15 # Half credit for adjacent stage
|
|
|
|
return 0
|
|
|
|
|
|
def _check_geographic_overlap(location1: str, location2: str) -> bool:
|
|
"""
|
|
Check for common geographic terms between two locations.
|
|
|
|
Examples:
|
|
- "San Francisco, CA" and "California" -> True
|
|
- "New York" and "USA" -> True (if both contain USA/US)
|
|
- "London, UK" and "United Kingdom" -> True
|
|
"""
|
|
# Common geographic groupings
|
|
geo_groups = [
|
|
["usa", "us", "united states", "america"],
|
|
["uk", "united kingdom", "britain"],
|
|
["california", "ca"],
|
|
["new york", "ny"],
|
|
["texas", "tx"],
|
|
["europe", "eu"],
|
|
["asia", "asian"],
|
|
["africa", "african"],
|
|
]
|
|
|
|
for group in geo_groups:
|
|
found_in_1 = any(term in location1 for term in group)
|
|
found_in_2 = any(term in location2 for term in group)
|
|
if found_in_1 and found_in_2:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_top_compatible_investors(
|
|
project: ProjectTable,
|
|
investors: List[InvestorTable],
|
|
limit: int = 10,
|
|
min_score: float = 0.0,
|
|
use_funds: bool = True,
|
|
) -> List[Tuple[InvestorTable, float]]:
|
|
"""
|
|
Get the top N most compatible investors for a project.
|
|
|
|
Args:
|
|
project: The project to find investors for
|
|
investors: List of all available investors
|
|
limit: Maximum number of investors to return
|
|
min_score: Minimum compatibility score threshold (0-1)
|
|
use_funds: If True, evaluates against investors' funds
|
|
|
|
Returns:
|
|
List of tuples (investor, score) sorted by score descending,
|
|
limited to 'limit' items and filtered by min_score
|
|
"""
|
|
scored_investors = calculate_project_investors_compatibility(
|
|
project, investors, use_funds
|
|
)
|
|
|
|
# Filter by minimum score
|
|
filtered_investors = [
|
|
(investor, score) for investor, score in scored_investors if score >= min_score
|
|
]
|
|
|
|
# Return top N
|
|
return filtered_investors[:limit]
|
|
|
|
|
|
def get_compatibility_score_breakdown(
|
|
project: ProjectTable, investor: InvestorTable, fund: Optional[FundTable] = None
|
|
) -> dict:
|
|
"""
|
|
Get a detailed breakdown of the compatibility score components.
|
|
|
|
Useful for debugging or showing users why a particular score was calculated.
|
|
|
|
Returns:
|
|
Dictionary with score components and explanations
|
|
"""
|
|
if fund:
|
|
total_score = 0
|
|
|
|
# Stage score
|
|
stage_score = 0
|
|
stage_match = False
|
|
if project.stage and fund.investment_stages:
|
|
fund_stage_names = {stage.name for stage in fund.investment_stages}
|
|
project_stage_name = (
|
|
project.stage.value
|
|
if hasattr(project.stage, "value")
|
|
else str(project.stage)
|
|
)
|
|
if project_stage_name in fund_stage_names:
|
|
stage_score = 30
|
|
stage_match = True
|
|
else:
|
|
stage_score = _calculate_stage_proximity(
|
|
project_stage_name, fund_stage_names
|
|
)
|
|
|
|
# Sector score
|
|
sector_score = 0
|
|
matching_sectors = []
|
|
if project.sector and fund.sectors:
|
|
project_sector_ids = {sector.id for sector in project.sector}
|
|
fund_sector_ids = {sector.id for sector in fund.sectors}
|
|
if project_sector_ids and fund_sector_ids:
|
|
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
|
matching_sectors = [
|
|
s.name for s in fund.sectors if s.id in common_sectors
|
|
]
|
|
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
|
sector_score = int(30 * overlap_ratio)
|
|
|
|
# Geographic score
|
|
geo_score = 0
|
|
geo_match_type = "none"
|
|
if project.location and fund.geographic_focus:
|
|
project_location_lower = project.location.lower()
|
|
fund_geo_lower = fund.geographic_focus.lower()
|
|
if project_location_lower == fund_geo_lower:
|
|
geo_score = 20
|
|
geo_match_type = "exact"
|
|
elif (
|
|
project_location_lower in fund_geo_lower
|
|
or fund_geo_lower in project_location_lower
|
|
):
|
|
geo_score = 10
|
|
geo_match_type = "partial"
|
|
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
|
geo_score = 5
|
|
geo_match_type = "regional"
|
|
|
|
# Valuation score
|
|
valuation_score = 0
|
|
valuation_fit = "unknown"
|
|
if project.valuation and fund.check_size_lower and fund.check_size_upper:
|
|
reasonable_valuation_min = fund.check_size_lower * 3
|
|
reasonable_valuation_max = fund.check_size_upper * 10
|
|
if (
|
|
reasonable_valuation_min
|
|
<= project.valuation
|
|
<= reasonable_valuation_max
|
|
):
|
|
valuation_score = 20
|
|
valuation_fit = "perfect"
|
|
elif project.valuation < reasonable_valuation_min:
|
|
ratio = (
|
|
project.valuation / reasonable_valuation_min
|
|
if reasonable_valuation_min > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
valuation_fit = "too_small"
|
|
else:
|
|
ratio = (
|
|
reasonable_valuation_max / project.valuation
|
|
if project.valuation > 0
|
|
else 0
|
|
)
|
|
valuation_score = int(10 * ratio)
|
|
valuation_fit = "too_large"
|
|
|
|
total_score = stage_score + sector_score + geo_score + valuation_score
|
|
|
|
return {
|
|
"total_score": total_score / 100,
|
|
"breakdown": {
|
|
"stage": {
|
|
"score": stage_score,
|
|
"max_score": 30,
|
|
"match": stage_match,
|
|
"project_stage": project.stage.value if project.stage else None,
|
|
"fund_stages": [s.name for s in fund.investment_stages]
|
|
if fund.investment_stages
|
|
else [],
|
|
},
|
|
"sector": {
|
|
"score": sector_score,
|
|
"max_score": 30,
|
|
"matching_sectors": matching_sectors,
|
|
"project_sectors": [s.name for s in project.sector]
|
|
if project.sector
|
|
else [],
|
|
"fund_sectors": [s.name for s in fund.sectors]
|
|
if fund.sectors
|
|
else [],
|
|
},
|
|
"geography": {
|
|
"score": geo_score,
|
|
"max_score": 20,
|
|
"match_type": geo_match_type,
|
|
"project_location": project.location,
|
|
"fund_geography": fund.geographic_focus,
|
|
},
|
|
"valuation": {
|
|
"score": valuation_score,
|
|
"max_score": 20,
|
|
"fit": valuation_fit,
|
|
"project_valuation": project.valuation,
|
|
"fund_check_size_range": f"{fund.check_size_lower}-{fund.check_size_upper}"
|
|
if fund.check_size_lower
|
|
else None,
|
|
},
|
|
},
|
|
}
|
|
else:
|
|
# Investor-level breakdown (simplified)
|
|
return {
|
|
"total_score": _calculate_project_investor_direct_compatibility(
|
|
project, investor
|
|
),
|
|
"note": "Using investor-level data (no specific fund selected)",
|
|
}
|
|
|
|
|
|
def generate_compatibility_explanation(
|
|
project: ProjectTable, investor: InvestorTable, score: float, use_funds: bool = True
|
|
) -> str:
|
|
"""
|
|
Generate a detailed, natural language explanation of the compatibility score.
|
|
|
|
Args:
|
|
project: The project being evaluated
|
|
investor: The investor being compared against
|
|
score: The calculated compatibility score (0-1)
|
|
use_funds: Whether fund-level data was used
|
|
|
|
Returns:
|
|
A formatted string with the compatibility score and detailed explanation
|
|
"""
|
|
score_percentage = int(score * 100)
|
|
|
|
# Determine match quality
|
|
if score_percentage >= 80:
|
|
match_level = "Excellent match"
|
|
elif score_percentage >= 65:
|
|
match_level = "Strong match"
|
|
elif score_percentage >= 50:
|
|
match_level = "Good match"
|
|
elif score_percentage >= 35:
|
|
match_level = "Moderate match"
|
|
else:
|
|
match_level = "Limited match"
|
|
|
|
# Collect alignment factors
|
|
alignment_factors = []
|
|
recommendations = []
|
|
|
|
# Get the best matching fund if using funds
|
|
best_fund = None
|
|
if use_funds and investor.funds:
|
|
best_score = 0
|
|
for fund in investor.funds:
|
|
fund_score = _calculate_project_fund_compatibility(project, fund)
|
|
if fund_score > best_score:
|
|
best_score = fund_score
|
|
best_fund = fund
|
|
|
|
# Analyze sector alignment
|
|
if project.sector:
|
|
project_sectors = [s.name for s in project.sector if hasattr(s, "name")]
|
|
|
|
if best_fund and best_fund.sectors:
|
|
fund_sectors = {s.name for s in best_fund.sectors if hasattr(s, "name")}
|
|
common_sectors = set(project_sectors) & fund_sectors
|
|
|
|
if common_sectors:
|
|
sectors_str = ", ".join(list(common_sectors)[:2])
|
|
alignment_factors.append(f"{sectors_str} sector focus")
|
|
elif project_sectors:
|
|
recommendations.append(
|
|
f"Consider emphasizing any {project_sectors[0]} industry connections"
|
|
)
|
|
elif investor.sectors:
|
|
investor_sectors = {s.name for s in investor.sectors if hasattr(s, "name")}
|
|
common_sectors = set(project_sectors) & investor_sectors
|
|
|
|
if common_sectors:
|
|
sectors_str = ", ".join(list(common_sectors)[:2])
|
|
alignment_factors.append(f"{sectors_str} sector focus")
|
|
|
|
# Analyze stage alignment
|
|
if project.stage:
|
|
stage_name = (
|
|
project.stage.value
|
|
if hasattr(project.stage, "value")
|
|
else str(project.stage)
|
|
)
|
|
stage_display = stage_name.replace("_", " ").title()
|
|
|
|
if best_fund and best_fund.investment_stages:
|
|
fund_stage_names = {
|
|
s.name for s in best_fund.investment_stages if hasattr(s, "name")
|
|
}
|
|
if stage_name in fund_stage_names:
|
|
alignment_factors.append(f"{stage_display} stage")
|
|
else:
|
|
recommendations.append(
|
|
"Investor typically focuses on different stages; highlight your traction and growth metrics"
|
|
)
|
|
|
|
if not best_fund:
|
|
alignment_factors.append(f"{stage_display} stage")
|
|
|
|
# Analyze geographic alignment
|
|
if project.location:
|
|
if best_fund and best_fund.geographic_focus:
|
|
if (
|
|
project.location.lower() in best_fund.geographic_focus.lower()
|
|
or best_fund.geographic_focus.lower() in project.location.lower()
|
|
):
|
|
alignment_factors.append(f"{project.location} presence")
|
|
elif investor.headquarters:
|
|
if (
|
|
project.location.lower() in investor.headquarters.lower()
|
|
or investor.headquarters.lower() in project.location.lower()
|
|
):
|
|
alignment_factors.append(f"{project.location} market presence")
|
|
|
|
# Analyze valuation/check size fit
|
|
if project.valuation:
|
|
if best_fund and best_fund.check_size_lower and best_fund.check_size_upper:
|
|
reasonable_min = best_fund.check_size_lower * 3
|
|
reasonable_max = best_fund.check_size_upper * 10
|
|
|
|
if reasonable_min <= project.valuation <= reasonable_max:
|
|
alignment_factors.append("appropriate funding stage")
|
|
elif project.valuation < reasonable_min:
|
|
recommendations.append(
|
|
"You may be early for this investor; consider approaching at a later stage"
|
|
)
|
|
else:
|
|
recommendations.append(
|
|
"Consider highlighting your growth trajectory and market opportunity"
|
|
)
|
|
|
|
# Build the explanation
|
|
explanation_parts = [f"Based on your startup profile: {score_percentage}% match"]
|
|
|
|
if alignment_factors:
|
|
alignment_text = ", ".join(alignment_factors)
|
|
explanation_parts.append(f"{match_level}: {alignment_text}.")
|
|
else:
|
|
explanation_parts.append(f"{match_level}.")
|
|
|
|
if recommendations:
|
|
rec_text = recommendations[0] # Show the most important recommendation
|
|
explanation_parts.append(rec_text + ".")
|
|
|
|
return " ".join(explanation_parts)
|