feat: Integrate Folk CRM API for investor synchronization and compatibility scoring
This commit is contained in:
@@ -0,0 +1,509 @@
|
||||
"""
|
||||
Compatibility Score Service
|
||||
|
||||
This module calculates compatibility scores between projects and investors.
|
||||
The scoring system evaluates multiple dimensions to determine how well a project
|
||||
matches with an investor's investment criteria.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from db.models import FundTable, InvestorTable, ProjectTable
|
||||
|
||||
|
||||
def calculate_project_investor_compatibility(
|
||||
project: ProjectTable, investor: InvestorTable, use_funds: bool = True
|
||||
) -> float:
|
||||
"""
|
||||
Calculate compatibility score between a project and an investor.
|
||||
|
||||
Args:
|
||||
project: The project to evaluate
|
||||
investor: The investor to compare against
|
||||
use_funds: If True, evaluates against investor's funds. If False, uses investor-level data.
|
||||
|
||||
Returns:
|
||||
A score between 0 and 1, where 1 is perfect match
|
||||
|
||||
Scoring breakdown (out of 100 points):
|
||||
- Investment Stage Match: 30 points
|
||||
- Sector Overlap: 30 points
|
||||
- Geographic Match: 20 points
|
||||
- Valuation/Check Size Fit: 20 points
|
||||
"""
|
||||
if use_funds and investor.funds:
|
||||
# Calculate score for each fund and return the highest
|
||||
max_score = 0.0
|
||||
for fund in investor.funds:
|
||||
fund_score = _calculate_project_fund_compatibility(project, fund)
|
||||
max_score = max(max_score, fund_score)
|
||||
return max_score
|
||||
else:
|
||||
# Use investor-level data (fallback)
|
||||
return _calculate_project_investor_direct_compatibility(project, investor)
|
||||
|
||||
|
||||
def calculate_project_investors_compatibility(
|
||||
project: ProjectTable, investors: List[InvestorTable], use_funds: bool = True
|
||||
) -> List[Tuple[InvestorTable, float]]:
|
||||
"""
|
||||
Calculate compatibility scores between a project and multiple investors.
|
||||
|
||||
Args:
|
||||
project: The project to evaluate
|
||||
investors: List of investors to compare against
|
||||
use_funds: If True, evaluates against investors' funds. If False, uses investor-level data.
|
||||
|
||||
Returns:
|
||||
List of tuples (investor, score) sorted by score descending
|
||||
"""
|
||||
scored_investors = []
|
||||
|
||||
for investor in investors:
|
||||
score = calculate_project_investor_compatibility(project, investor, use_funds)
|
||||
scored_investors.append((investor, score))
|
||||
|
||||
# Sort by score descending
|
||||
scored_investors.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
return scored_investors
|
||||
|
||||
|
||||
def _calculate_project_fund_compatibility(
|
||||
project: ProjectTable, fund: FundTable
|
||||
) -> float:
|
||||
"""
|
||||
Calculate compatibility score between a project and a specific fund.
|
||||
|
||||
Scoring breakdown:
|
||||
- Investment Stage Match: 30 points (all or nothing if stage exists)
|
||||
- Sector Overlap: 30 points (proportional to overlap)
|
||||
- Geographic Match: 20 points (exact=20, partial=10, none=0)
|
||||
- Valuation/Check Size Fit: 20 points (proportional to fit)
|
||||
|
||||
Returns:
|
||||
A score between 0 and 1
|
||||
"""
|
||||
total_score = 0
|
||||
max_score = 100
|
||||
|
||||
# 1. Investment Stage Match (30 points)
|
||||
stage_score = 0
|
||||
if project.stage and fund.investment_stages:
|
||||
# Check if project stage matches any of the fund's investment stages
|
||||
fund_stage_names = {stage.name for stage in fund.investment_stages}
|
||||
# Convert project.stage enum to string for comparison
|
||||
project_stage_name = (
|
||||
project.stage.value
|
||||
if hasattr(project.stage, "value")
|
||||
else str(project.stage)
|
||||
)
|
||||
|
||||
if project_stage_name in fund_stage_names:
|
||||
stage_score = 30
|
||||
else:
|
||||
# Partial credit for adjacent stages
|
||||
stage_score = _calculate_stage_proximity(
|
||||
project_stage_name, fund_stage_names
|
||||
)
|
||||
|
||||
total_score += stage_score
|
||||
|
||||
# 2. Sector Overlap (30 points)
|
||||
sector_score = 0
|
||||
if project.sector and fund.sectors:
|
||||
project_sector_ids = {sector.id for sector in project.sector}
|
||||
fund_sector_ids = {sector.id for sector in fund.sectors}
|
||||
|
||||
if project_sector_ids and fund_sector_ids:
|
||||
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
||||
# Score based on what percentage of project sectors are covered by fund
|
||||
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
||||
sector_score = int(30 * overlap_ratio)
|
||||
|
||||
total_score += sector_score
|
||||
|
||||
# 3. Geographic Match (20 points)
|
||||
geo_score = 0
|
||||
if project.location and fund.geographic_focus:
|
||||
project_location_lower = project.location.lower()
|
||||
fund_geo_lower = fund.geographic_focus.lower()
|
||||
|
||||
# Exact match
|
||||
if project_location_lower == fund_geo_lower:
|
||||
geo_score = 20
|
||||
# Partial match (one contains the other)
|
||||
elif (
|
||||
project_location_lower in fund_geo_lower
|
||||
or fund_geo_lower in project_location_lower
|
||||
):
|
||||
geo_score = 10
|
||||
# Check for common geographic terms
|
||||
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
||||
geo_score = 5
|
||||
|
||||
total_score += geo_score
|
||||
|
||||
# 4. Valuation/Check Size Fit (20 points)
|
||||
valuation_score = 0
|
||||
if project.valuation and fund.check_size_lower and fund.check_size_upper:
|
||||
# Check if project valuation falls within or near the check size range
|
||||
# Typically, check size is a fraction of valuation (e.g., 10-20%)
|
||||
# We'll assume check size represents potential investment amount
|
||||
|
||||
if fund.check_size_lower <= project.valuation <= fund.check_size_upper:
|
||||
# Valuation is within the check size range (might be too small)
|
||||
valuation_score = 10
|
||||
else:
|
||||
# Check if the check size is reasonable for this valuation
|
||||
# Typical investment is 10-30% of valuation
|
||||
reasonable_valuation_min = fund.check_size_lower * 3 # Investing ~33%
|
||||
reasonable_valuation_max = fund.check_size_upper * 10 # Investing ~10%
|
||||
|
||||
if (
|
||||
reasonable_valuation_min
|
||||
<= project.valuation
|
||||
<= reasonable_valuation_max
|
||||
):
|
||||
# Perfect fit
|
||||
valuation_score = 20
|
||||
elif project.valuation < reasonable_valuation_min:
|
||||
# Project might be too small
|
||||
ratio = (
|
||||
project.valuation / reasonable_valuation_min
|
||||
if reasonable_valuation_min > 0
|
||||
else 0
|
||||
)
|
||||
valuation_score = int(10 * ratio)
|
||||
else:
|
||||
# Project might be too large
|
||||
ratio = (
|
||||
reasonable_valuation_max / project.valuation
|
||||
if project.valuation > 0
|
||||
else 0
|
||||
)
|
||||
valuation_score = int(10 * ratio)
|
||||
|
||||
total_score += valuation_score
|
||||
|
||||
# Convert to 0-1 scale
|
||||
return total_score / max_score
|
||||
|
||||
|
||||
def _calculate_project_investor_direct_compatibility(
|
||||
project: ProjectTable, investor: InvestorTable
|
||||
) -> float:
|
||||
"""
|
||||
Calculate compatibility using investor-level data (fallback when no funds available).
|
||||
|
||||
Uses the same scoring system but with investor-level attributes.
|
||||
"""
|
||||
total_score = 0
|
||||
max_score = 100
|
||||
|
||||
# 1. Investment Stage - Skip this since investors don't have a direct stage field
|
||||
# We could add 30 points to other categories, but for consistency, we'll leave it as 0
|
||||
stage_score = 0
|
||||
total_score += stage_score
|
||||
|
||||
# 2. Sector Overlap (30 points)
|
||||
sector_score = 0
|
||||
if project.sector and investor.sectors:
|
||||
project_sector_ids = {sector.id for sector in project.sector}
|
||||
investor_sector_ids = {sector.id for sector in investor.sectors}
|
||||
|
||||
if project_sector_ids and investor_sector_ids:
|
||||
common_sectors = project_sector_ids.intersection(investor_sector_ids)
|
||||
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
||||
sector_score = int(30 * overlap_ratio)
|
||||
|
||||
total_score += sector_score
|
||||
|
||||
# 3. Geographic Match (20 points)
|
||||
geo_score = 0
|
||||
if project.location and investor.geographic_focus:
|
||||
project_location_lower = project.location.lower()
|
||||
investor_geo_lower = investor.geographic_focus.lower()
|
||||
|
||||
if project_location_lower == investor_geo_lower:
|
||||
geo_score = 20
|
||||
elif (
|
||||
project_location_lower in investor_geo_lower
|
||||
or investor_geo_lower in project_location_lower
|
||||
):
|
||||
geo_score = 10
|
||||
elif _check_geographic_overlap(project_location_lower, investor_geo_lower):
|
||||
geo_score = 5
|
||||
|
||||
total_score += geo_score
|
||||
|
||||
# 4. Valuation/Check Size Fit (20 points)
|
||||
valuation_score = 0
|
||||
if project.valuation and investor.check_size_lower and investor.check_size_upper:
|
||||
reasonable_valuation_min = investor.check_size_lower * 3
|
||||
reasonable_valuation_max = investor.check_size_upper * 10
|
||||
|
||||
if reasonable_valuation_min <= project.valuation <= reasonable_valuation_max:
|
||||
valuation_score = 20
|
||||
elif project.valuation < reasonable_valuation_min:
|
||||
ratio = (
|
||||
project.valuation / reasonable_valuation_min
|
||||
if reasonable_valuation_min > 0
|
||||
else 0
|
||||
)
|
||||
valuation_score = int(10 * ratio)
|
||||
else:
|
||||
ratio = (
|
||||
reasonable_valuation_max / project.valuation
|
||||
if project.valuation > 0
|
||||
else 0
|
||||
)
|
||||
valuation_score = int(10 * ratio)
|
||||
|
||||
total_score += valuation_score
|
||||
|
||||
# Convert to 0-1 scale
|
||||
return total_score / max_score
|
||||
|
||||
|
||||
def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
|
||||
"""
|
||||
Calculate proximity score between project stage and fund stages.
|
||||
Awards partial credit for adjacent investment stages.
|
||||
|
||||
Stage progression: SEED -> SERIES_A -> SERIES_B -> SERIES_C -> GROWTH -> LATE_STAGE
|
||||
|
||||
Returns:
|
||||
Score from 0-15 (half credit for adjacent stages)
|
||||
"""
|
||||
stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]
|
||||
|
||||
try:
|
||||
project_idx = stage_order.index(project_stage)
|
||||
except ValueError:
|
||||
return 0
|
||||
|
||||
# Check for adjacent stages
|
||||
adjacent_stages = []
|
||||
if project_idx > 0:
|
||||
adjacent_stages.append(stage_order[project_idx - 1])
|
||||
if project_idx < len(stage_order) - 1:
|
||||
adjacent_stages.append(stage_order[project_idx + 1])
|
||||
|
||||
for stage in fund_stages:
|
||||
if stage in adjacent_stages:
|
||||
return 15 # Half credit for adjacent stage
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _check_geographic_overlap(location1: str, location2: str) -> bool:
|
||||
"""
|
||||
Check for common geographic terms between two locations.
|
||||
|
||||
Examples:
|
||||
- "San Francisco, CA" and "California" -> True
|
||||
- "New York" and "USA" -> True (if both contain USA/US)
|
||||
- "London, UK" and "United Kingdom" -> True
|
||||
"""
|
||||
# Common geographic groupings
|
||||
geo_groups = [
|
||||
["usa", "us", "united states", "america"],
|
||||
["uk", "united kingdom", "britain"],
|
||||
["california", "ca"],
|
||||
["new york", "ny"],
|
||||
["texas", "tx"],
|
||||
["europe", "eu"],
|
||||
["asia", "asian"],
|
||||
["africa", "african"],
|
||||
]
|
||||
|
||||
for group in geo_groups:
|
||||
found_in_1 = any(term in location1 for term in group)
|
||||
found_in_2 = any(term in location2 for term in group)
|
||||
if found_in_1 and found_in_2:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_top_compatible_investors(
|
||||
project: ProjectTable,
|
||||
investors: List[InvestorTable],
|
||||
limit: int = 10,
|
||||
min_score: float = 0.0,
|
||||
use_funds: bool = True,
|
||||
) -> List[Tuple[InvestorTable, float]]:
|
||||
"""
|
||||
Get the top N most compatible investors for a project.
|
||||
|
||||
Args:
|
||||
project: The project to find investors for
|
||||
investors: List of all available investors
|
||||
limit: Maximum number of investors to return
|
||||
min_score: Minimum compatibility score threshold (0-1)
|
||||
use_funds: If True, evaluates against investors' funds
|
||||
|
||||
Returns:
|
||||
List of tuples (investor, score) sorted by score descending,
|
||||
limited to 'limit' items and filtered by min_score
|
||||
"""
|
||||
scored_investors = calculate_project_investors_compatibility(
|
||||
project, investors, use_funds
|
||||
)
|
||||
|
||||
# Filter by minimum score
|
||||
filtered_investors = [
|
||||
(investor, score) for investor, score in scored_investors if score >= min_score
|
||||
]
|
||||
|
||||
# Return top N
|
||||
return filtered_investors[:limit]
|
||||
|
||||
|
||||
def get_compatibility_score_breakdown(
|
||||
project: ProjectTable, investor: InvestorTable, fund: Optional[FundTable] = None
|
||||
) -> dict:
|
||||
"""
|
||||
Get a detailed breakdown of the compatibility score components.
|
||||
|
||||
Useful for debugging or showing users why a particular score was calculated.
|
||||
|
||||
Returns:
|
||||
Dictionary with score components and explanations
|
||||
"""
|
||||
if fund:
|
||||
total_score = 0
|
||||
|
||||
# Stage score
|
||||
stage_score = 0
|
||||
stage_match = False
|
||||
if project.stage and fund.investment_stages:
|
||||
fund_stage_names = {stage.name for stage in fund.investment_stages}
|
||||
project_stage_name = (
|
||||
project.stage.value
|
||||
if hasattr(project.stage, "value")
|
||||
else str(project.stage)
|
||||
)
|
||||
if project_stage_name in fund_stage_names:
|
||||
stage_score = 30
|
||||
stage_match = True
|
||||
else:
|
||||
stage_score = _calculate_stage_proximity(
|
||||
project_stage_name, fund_stage_names
|
||||
)
|
||||
|
||||
# Sector score
|
||||
sector_score = 0
|
||||
matching_sectors = []
|
||||
if project.sector and fund.sectors:
|
||||
project_sector_ids = {sector.id for sector in project.sector}
|
||||
fund_sector_ids = {sector.id for sector in fund.sectors}
|
||||
if project_sector_ids and fund_sector_ids:
|
||||
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
||||
matching_sectors = [
|
||||
s.name for s in fund.sectors if s.id in common_sectors
|
||||
]
|
||||
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
||||
sector_score = int(30 * overlap_ratio)
|
||||
|
||||
# Geographic score
|
||||
geo_score = 0
|
||||
geo_match_type = "none"
|
||||
if project.location and fund.geographic_focus:
|
||||
project_location_lower = project.location.lower()
|
||||
fund_geo_lower = fund.geographic_focus.lower()
|
||||
if project_location_lower == fund_geo_lower:
|
||||
geo_score = 20
|
||||
geo_match_type = "exact"
|
||||
elif (
|
||||
project_location_lower in fund_geo_lower
|
||||
or fund_geo_lower in project_location_lower
|
||||
):
|
||||
geo_score = 10
|
||||
geo_match_type = "partial"
|
||||
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
||||
geo_score = 5
|
||||
geo_match_type = "regional"
|
||||
|
||||
# Valuation score
|
||||
valuation_score = 0
|
||||
valuation_fit = "unknown"
|
||||
if project.valuation and fund.check_size_lower and fund.check_size_upper:
|
||||
reasonable_valuation_min = fund.check_size_lower * 3
|
||||
reasonable_valuation_max = fund.check_size_upper * 10
|
||||
if (
|
||||
reasonable_valuation_min
|
||||
<= project.valuation
|
||||
<= reasonable_valuation_max
|
||||
):
|
||||
valuation_score = 20
|
||||
valuation_fit = "perfect"
|
||||
elif project.valuation < reasonable_valuation_min:
|
||||
ratio = (
|
||||
project.valuation / reasonable_valuation_min
|
||||
if reasonable_valuation_min > 0
|
||||
else 0
|
||||
)
|
||||
valuation_score = int(10 * ratio)
|
||||
valuation_fit = "too_small"
|
||||
else:
|
||||
ratio = (
|
||||
reasonable_valuation_max / project.valuation
|
||||
if project.valuation > 0
|
||||
else 0
|
||||
)
|
||||
valuation_score = int(10 * ratio)
|
||||
valuation_fit = "too_large"
|
||||
|
||||
total_score = stage_score + sector_score + geo_score + valuation_score
|
||||
|
||||
return {
|
||||
"total_score": total_score / 100,
|
||||
"breakdown": {
|
||||
"stage": {
|
||||
"score": stage_score,
|
||||
"max_score": 30,
|
||||
"match": stage_match,
|
||||
"project_stage": project.stage.value if project.stage else None,
|
||||
"fund_stages": [s.name for s in fund.investment_stages]
|
||||
if fund.investment_stages
|
||||
else [],
|
||||
},
|
||||
"sector": {
|
||||
"score": sector_score,
|
||||
"max_score": 30,
|
||||
"matching_sectors": matching_sectors,
|
||||
"project_sectors": [s.name for s in project.sector]
|
||||
if project.sector
|
||||
else [],
|
||||
"fund_sectors": [s.name for s in fund.sectors]
|
||||
if fund.sectors
|
||||
else [],
|
||||
},
|
||||
"geography": {
|
||||
"score": geo_score,
|
||||
"max_score": 20,
|
||||
"match_type": geo_match_type,
|
||||
"project_location": project.location,
|
||||
"fund_geography": fund.geographic_focus,
|
||||
},
|
||||
"valuation": {
|
||||
"score": valuation_score,
|
||||
"max_score": 20,
|
||||
"fit": valuation_fit,
|
||||
"project_valuation": project.valuation,
|
||||
"fund_check_size_range": f"{fund.check_size_lower}-{fund.check_size_upper}"
|
||||
if fund.check_size_lower
|
||||
else None,
|
||||
},
|
||||
},
|
||||
}
|
||||
else:
|
||||
# Investor-level breakdown (simplified)
|
||||
return {
|
||||
"total_score": _calculate_project_investor_direct_compatibility(
|
||||
project, investor
|
||||
),
|
||||
"note": "Using investor-level data (no specific fund selected)",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user