app/services/compatibility_score.py

"""
Compatibility Score Service

This module calculates compatibility scores between projects and investors.
The scoring system evaluates multiple dimensions to determine how well a project
matches with an investor's investment criteria.
"""

from typing import List, Optional, Tuple

from db.models import FundTable, InvestorTable, ProjectTable


def calculate_project_investor_compatibility(
    project: ProjectTable, investor: InvestorTable, use_funds: bool = True
) -> float:
    """
    Calculate compatibility score between a project and an investor.

    Args:
        project: The project to evaluate
        investor: The investor to compare against
        use_funds: If True, evaluates against investor's funds. If False, uses investor-level data.

    Returns:
        A score between 0 and 1, where 1 is perfect match

    Scoring breakdown (out of 100 points):
        - Investment Stage Match: 30 points
        - Sector Overlap: 30 points
        - Geographic Match: 20 points
        - Valuation/Check Size Fit: 20 points
    """
    if use_funds and investor.funds:
        # Calculate score for each fund and return the highest
        max_score = 0.0
        for fund in investor.funds:
            fund_score = _calculate_project_fund_compatibility(project, fund)
            max_score = max(max_score, fund_score)
        return max_score
    else:
        # Use investor-level data (fallback)
        return _calculate_project_investor_direct_compatibility(project, investor)


def calculate_project_investors_compatibility(
    project: ProjectTable, investors: List[InvestorTable], use_funds: bool = True
) -> List[Tuple[InvestorTable, float]]:
    """
    Calculate compatibility scores between a project and multiple investors.

    Args:
        project: The project to evaluate
        investors: List of investors to compare against
        use_funds: If True, evaluates against investors' funds. If False, uses investor-level data.

    Returns:
        List of tuples (investor, score) sorted by score descending
    """
    scored_investors = []

    for investor in investors:
        score = calculate_project_investor_compatibility(project, investor, use_funds)
        scored_investors.append((investor, score))

    # Sort by score descending
    scored_investors.sort(key=lambda x: x[1], reverse=True)

    return scored_investors


def _calculate_project_fund_compatibility(
    project: ProjectTable, fund: FundTable
) -> float:
    """
    Calculate compatibility score between a project and a specific fund.

    Scoring breakdown:
        - Investment Stage Match: 30 points (all or nothing if stage exists)
        - Sector Overlap: 30 points (proportional to overlap)
        - Geographic Match: 20 points (exact=20, partial=10, none=0)
        - Valuation/Check Size Fit: 20 points (proportional to fit)

    Returns:
        A score between 0 and 1
    """
    total_score = 0
    max_score = 100

    # 1. Investment Stage Match (30 points)
    stage_score = 0
    if project.stage and fund.investment_stages:
        # Check if project stage matches any of the fund's investment stages
        fund_stage_names = {stage.name for stage in fund.investment_stages}
        # Convert project.stage enum to string for comparison
        project_stage_name = (
            project.stage.value
            if hasattr(project.stage, "value")
            else str(project.stage)
        )

        if project_stage_name in fund_stage_names:
            stage_score = 30
        else:
            # Partial credit for adjacent stages
            stage_score = _calculate_stage_proximity(
                project_stage_name, fund_stage_names
            )

    total_score += stage_score

    # 2. Sector Overlap (30 points)
    sector_score = 0
    if project.sector and fund.sectors:
        project_sector_ids = {sector.id for sector in project.sector}
        fund_sector_ids = {sector.id for sector in fund.sectors}

        if project_sector_ids and fund_sector_ids:
            common_sectors = project_sector_ids.intersection(fund_sector_ids)
            # Score based on what percentage of project sectors are covered by fund
            overlap_ratio = len(common_sectors) / len(project_sector_ids)
            sector_score = int(30 * overlap_ratio)

    total_score += sector_score

    # 3. Geographic Match (20 points)
    geo_score = 0
    if project.location and fund.geographic_focus:
        project_location_lower = project.location.lower()
        fund_geo_lower = (fund.geographic_focus or "").lower()

        # Exact match
        if project_location_lower == fund_geo_lower:
            geo_score = 20
        # Partial match (one contains the other)
        elif (
            project_location_lower in fund_geo_lower
            or fund_geo_lower in project_location_lower
        ):
            geo_score = 10
        # Check for common geographic terms
        elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
            geo_score = 5

    total_score += geo_score

    # 4. Valuation/Check Size Fit (20 points)
    valuation_score = 0
    if project.valuation and fund.check_size_lower and fund.check_size_upper:
        # Check if project valuation falls within or near the check size range
        # Typically, check size is a fraction of valuation (e.g., 10-20%)
        # We'll assume check size represents potential investment amount

        if fund.check_size_lower <= project.valuation <= fund.check_size_upper:
            # Valuation is within the check size range (might be too small)
            valuation_score = 10
        else:
            # Check if the check size is reasonable for this valuation
            # Typical investment is 10-30% of valuation
            reasonable_valuation_min = fund.check_size_lower * 3  # Investing ~33%
            reasonable_valuation_max = fund.check_size_upper * 10  # Investing ~10%

            if (
                reasonable_valuation_min
                <= project.valuation
                <= reasonable_valuation_max
            ):
                # Perfect fit
                valuation_score = 20
            elif project.valuation < reasonable_valuation_min:
                # Project might be too small
                ratio = (
                    project.valuation / reasonable_valuation_min
                    if reasonable_valuation_min > 0
                    else 0
                )
                valuation_score = int(10 * ratio)
            else:
                # Project might be too large
                ratio = (
                    reasonable_valuation_max / project.valuation
                    if project.valuation > 0
                    else 0
                )
                valuation_score = int(10 * ratio)

    total_score += valuation_score

    # Convert to 0-1 scale
    return total_score / max_score


def _calculate_project_investor_direct_compatibility(
    project: ProjectTable, investor: InvestorTable
) -> float:
    """
    Calculate compatibility using investor-level data (fallback when no funds available).

    Uses the same scoring system but with investor-level attributes.
    """
    total_score = 0
    max_score = 100

    # 1. Investment Stage - Skip this since investors don't have a direct stage field
    # We could add 30 points to other categories, but for consistency, we'll leave it as 0
    stage_score = 0
    total_score += stage_score

    # 2. Sector Overlap (30 points)
    sector_score = 0
    if project.sector and investor.sectors:
        project_sector_ids = {sector.id for sector in project.sector}
        investor_sector_ids = {sector.id for sector in investor.sectors}

        if project_sector_ids and investor_sector_ids:
            common_sectors = project_sector_ids.intersection(investor_sector_ids)
            overlap_ratio = len(common_sectors) / len(project_sector_ids)
            sector_score = int(30 * overlap_ratio)

    total_score += sector_score

    # 3. Geographic Match (20 points)
    geo_score = 0
    if project.location and investor.geographic_focus:
        project_location_lower = project.location.lower()
        investor_geo_lower = (investor.geographic_focus or "").lower()

        if project_location_lower == investor_geo_lower:
            geo_score = 20
        elif (
            project_location_lower in investor_geo_lower
            or investor_geo_lower in project_location_lower
        ):
            geo_score = 10
        elif _check_geographic_overlap(project_location_lower, investor_geo_lower):
            geo_score = 5

    total_score += geo_score

    # 4. Valuation/Check Size Fit (20 points)
    valuation_score = 0
    if project.valuation and investor.check_size_lower and investor.check_size_upper:
        reasonable_valuation_min = investor.check_size_lower * 3
        reasonable_valuation_max = investor.check_size_upper * 10

        if reasonable_valuation_min <= project.valuation <= reasonable_valuation_max:
            valuation_score = 20
        elif project.valuation < reasonable_valuation_min:
            ratio = (
                project.valuation / reasonable_valuation_min
                if reasonable_valuation_min > 0
                else 0
            )
            valuation_score = int(10 * ratio)
        else:
            ratio = (
                reasonable_valuation_max / project.valuation
                if project.valuation > 0
                else 0
            )
            valuation_score = int(10 * ratio)

    total_score += valuation_score

    # Convert to 0-1 scale
    return total_score / max_score


def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
    """
    Calculate proximity score between project stage and fund stages.
    Awards partial credit for adjacent investment stages.

    Stage progression: SEED -> SERIES_A -> SERIES_B -> SERIES_C -> GROWTH -> LATE_STAGE

    Returns:
        Score from 0-15 (half credit for adjacent stages)
    """
    stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]

    try:
        project_idx = stage_order.index(project_stage)
    except ValueError:
        return 0

    # Check for adjacent stages
    adjacent_stages = []
    if project_idx > 0:
        adjacent_stages.append(stage_order[project_idx - 1])
    if project_idx < len(stage_order) - 1:
        adjacent_stages.append(stage_order[project_idx + 1])

    for stage in fund_stages:
        if stage in adjacent_stages:
            return 15  # Half credit for adjacent stage

    return 0


def _check_geographic_overlap(location1: str, location2: str) -> bool:
    """
    Check for common geographic terms between two locations.

    Examples:
        - "San Francisco, CA" and "California" -> True
        - "New York" and "USA" -> True (if both contain USA/US)
        - "London, UK" and "United Kingdom" -> True
    """
    # Common geographic groupings
    geo_groups = [
        ["usa", "us", "united states", "america"],
        ["uk", "united kingdom", "britain"],
        ["california", "ca"],
        ["new york", "ny"],
        ["texas", "tx"],
        ["europe", "eu"],
        ["asia", "asian"],
        ["africa", "african"],
    ]

    for group in geo_groups:
        found_in_1 = any(term in location1 for term in group)
        found_in_2 = any(term in location2 for term in group)
        if found_in_1 and found_in_2:
            return True

    return False


def get_top_compatible_investors(
    project: ProjectTable,
    investors: List[InvestorTable],
    limit: int = 10,
    min_score: float = 0.0,
    use_funds: bool = True,
) -> List[Tuple[InvestorTable, float]]:
    """
    Get the top N most compatible investors for a project.

    Args:
        project: The project to find investors for
        investors: List of all available investors
        limit: Maximum number of investors to return
        min_score: Minimum compatibility score threshold (0-1)
        use_funds: If True, evaluates against investors' funds

    Returns:
        List of tuples (investor, score) sorted by score descending,
        limited to 'limit' items and filtered by min_score
    """
    scored_investors = calculate_project_investors_compatibility(
        project, investors, use_funds
    )

    # Filter by minimum score
    filtered_investors = [
        (investor, score) for investor, score in scored_investors if score >= min_score
    ]

    # Return top N
    return filtered_investors[:limit]


def get_compatibility_score_breakdown(
    project: ProjectTable, investor: InvestorTable, fund: Optional[FundTable] = None
) -> dict:
    """
    Get a detailed breakdown of the compatibility score components.

    Useful for debugging or showing users why a particular score was calculated.

    Returns:
        Dictionary with score components and explanations
    """
    if fund:
        total_score = 0

        # Stage score
        stage_score = 0
        stage_match = False
        if project.stage and fund.investment_stages:
            fund_stage_names = {stage.name for stage in fund.investment_stages}
            project_stage_name = (
                project.stage.value
                if hasattr(project.stage, "value")
                else str(project.stage)
            )
            if project_stage_name in fund_stage_names:
                stage_score = 30
                stage_match = True
            else:
                stage_score = _calculate_stage_proximity(
                    project_stage_name, fund_stage_names
                )

        # Sector score
        sector_score = 0
        matching_sectors = []
        if project.sector and fund.sectors:
            project_sector_ids = {sector.id for sector in project.sector}
            fund_sector_ids = {sector.id for sector in fund.sectors}
            if project_sector_ids and fund_sector_ids:
                common_sectors = project_sector_ids.intersection(fund_sector_ids)
                matching_sectors = [
                    s.name for s in fund.sectors if s.id in common_sectors
                ]
                overlap_ratio = len(common_sectors) / len(project_sector_ids)
                sector_score = int(30 * overlap_ratio)

        # Geographic score
        geo_score = 0
        geo_match_type = "none"
        if project.location and fund.geographic_focus:
            project_location_lower = project.location.lower()
            fund_geo_lower = fund.geographic_focus.lower()
            if project_location_lower == fund_geo_lower:
                geo_score = 20
                geo_match_type = "exact"
            elif (
                project_location_lower in fund_geo_lower
                or fund_geo_lower in project_location_lower
            ):
                geo_score = 10
                geo_match_type = "partial"
            elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
                geo_score = 5
                geo_match_type = "regional"

        # Valuation score
        valuation_score = 0
        valuation_fit = "unknown"
        if project.valuation and fund.check_size_lower and fund.check_size_upper:
            reasonable_valuation_min = fund.check_size_lower * 3
            reasonable_valuation_max = fund.check_size_upper * 10
            if (
                reasonable_valuation_min
                <= project.valuation
                <= reasonable_valuation_max
            ):
                valuation_score = 20
                valuation_fit = "perfect"
            elif project.valuation < reasonable_valuation_min:
                ratio = (
                    project.valuation / reasonable_valuation_min
                    if reasonable_valuation_min > 0
                    else 0
                )
                valuation_score = int(10 * ratio)
                valuation_fit = "too_small"
            else:
                ratio = (
                    reasonable_valuation_max / project.valuation
                    if project.valuation > 0
                    else 0
                )
                valuation_score = int(10 * ratio)
                valuation_fit = "too_large"

        total_score = stage_score + sector_score + geo_score + valuation_score

        return {
            "total_score": total_score / 100,
            "breakdown": {
                "stage": {
                    "score": stage_score,
                    "max_score": 30,
                    "match": stage_match,
                    "project_stage": project.stage.value if project.stage else None,
                    "fund_stages": [s.name for s in fund.investment_stages]
                    if fund.investment_stages
                    else [],
                },
                "sector": {
                    "score": sector_score,
                    "max_score": 30,
                    "matching_sectors": matching_sectors,
                    "project_sectors": [s.name for s in project.sector]
                    if project.sector
                    else [],
                    "fund_sectors": [s.name for s in fund.sectors]
                    if fund.sectors
                    else [],
                },
                "geography": {
                    "score": geo_score,
                    "max_score": 20,
                    "match_type": geo_match_type,
                    "project_location": project.location,
                    "fund_geography": fund.geographic_focus,
                },
                "valuation": {
                    "score": valuation_score,
                    "max_score": 20,
                    "fit": valuation_fit,
                    "project_valuation": project.valuation,
                    "fund_check_size_range": f"{fund.check_size_lower}-{fund.check_size_upper}"
                    if fund.check_size_lower
                    else None,
                },
            },
        }
    else:
        # Investor-level breakdown (simplified)
        return {
            "total_score": _calculate_project_investor_direct_compatibility(
                project, investor
            ),
            "note": "Using investor-level data (no specific fund selected)",
        }


def generate_compatibility_explanation(
    project: ProjectTable, investor: InvestorTable, score: float, use_funds: bool = True
) -> str:
    """
    Generate a detailed, natural language explanation of the compatibility score.

    Args:
        project: The project being evaluated
        investor: The investor being compared against
        score: The calculated compatibility score (0-1)
        use_funds: Whether fund-level data was used

    Returns:
        A formatted string with the compatibility score and detailed explanation
    """
    score_percentage = int(score * 100)

    # Determine match quality
    if score_percentage >= 80:
        match_level = "Excellent match"
    elif score_percentage >= 65:
        match_level = "Strong match"
    elif score_percentage >= 50:
        match_level = "Good match"
    elif score_percentage >= 35:
        match_level = "Moderate match"
    else:
        match_level = "Limited match"

    # Collect alignment factors
    alignment_factors = []
    recommendations = []

    # Get the best matching fund if using funds
    best_fund = None
    if use_funds and investor.funds:
        best_score = 0
        for fund in investor.funds:
            fund_score = _calculate_project_fund_compatibility(project, fund)
            if fund_score > best_score:
                best_score = fund_score
                best_fund = fund

    # Analyze sector alignment
    if project.sector:
        project_sectors = [s.name for s in project.sector if hasattr(s, "name")]

        if best_fund and best_fund.sectors:
            fund_sectors = {s.name for s in best_fund.sectors if hasattr(s, "name")}
            common_sectors = set(project_sectors) & fund_sectors

            if common_sectors:
                sectors_str = ", ".join(list(common_sectors)[:2])
                alignment_factors.append(f"{sectors_str} sector focus")
            elif project_sectors:
                recommendations.append(
                    f"Consider emphasizing any {project_sectors[0]} industry connections"
                )
        elif investor.sectors:
            investor_sectors = {s.name for s in investor.sectors if hasattr(s, "name")}
            common_sectors = set(project_sectors) & investor_sectors

            if common_sectors:
                sectors_str = ", ".join(list(common_sectors)[:2])
                alignment_factors.append(f"{sectors_str} sector focus")

    # Analyze stage alignment
    if project.stage:
        stage_name = (
            project.stage.value
            if hasattr(project.stage, "value")
            else str(project.stage)
        )
        stage_display = stage_name.replace("_", " ").title()

        if best_fund and best_fund.investment_stages:
            fund_stage_names = {
                s.name for s in best_fund.investment_stages if hasattr(s, "name")
            }
            if stage_name in fund_stage_names:
                alignment_factors.append(f"{stage_display} stage")
            else:
                recommendations.append(
                    "Investor typically focuses on different stages; highlight your traction and growth metrics"
                )

        if not best_fund:
            alignment_factors.append(f"{stage_display} stage")

    # Analyze geographic alignment
    if project.location:
        if best_fund and best_fund.geographic_focus:
            if (
                project.location.lower() in best_fund.geographic_focus.lower()
                or best_fund.geographic_focus.lower() in project.location.lower()
            ):
                alignment_factors.append(f"{project.location} presence")
        elif investor.headquarters:
            if (
                project.location.lower() in investor.headquarters.lower()
                or investor.headquarters.lower() in project.location.lower()
            ):
                alignment_factors.append(f"{project.location} market presence")

    # Analyze valuation/check size fit
    if project.valuation:
        if best_fund and best_fund.check_size_lower and best_fund.check_size_upper:
            reasonable_min = best_fund.check_size_lower * 3
            reasonable_max = best_fund.check_size_upper * 10

            if reasonable_min <= project.valuation <= reasonable_max:
                alignment_factors.append("appropriate funding stage")
            elif project.valuation < reasonable_min:
                recommendations.append(
                    "You may be early for this investor; consider approaching at a later stage"
                )
            else:
                recommendations.append(
                    "Consider highlighting your growth trajectory and market opportunity"
                )

    # Build the explanation
    explanation_parts = [f"Based on your startup profile: {score_percentage}% match"]

    if alignment_factors:
        alignment_text = ", ".join(alignment_factors)
        explanation_parts.append(f"{match_level}: {alignment_text}.")
    else:
        explanation_parts.append(f"{match_level}.")

    if recommendations:
        rec_text = recommendations[0]  # Show the most important recommendation
        explanation_parts.append(rec_text + ".")

    return " ".join(explanation_parts)
feat: Integrate Folk CRM API for investor synchronization and compatibility scoring 2025-10-08 19:21:46 +01:00			`"""`
			`Compatibility Score Service`

			`This module calculates compatibility scores between projects and investors.`
			`The scoring system evaluates multiple dimensions to determine how well a project`
			`matches with an investor's investment criteria.`
			`"""`

			`from typing import List, Optional, Tuple`

			`from db.models import FundTable, InvestorTable, ProjectTable`


			`def calculate_project_investor_compatibility(`
			`project: ProjectTable, investor: InvestorTable, use_funds: bool = True`
			`) -> float:`
			`"""`
			`Calculate compatibility score between a project and an investor.`

			`Args:`
			`project: The project to evaluate`
			`investor: The investor to compare against`
			`use_funds: If True, evaluates against investor's funds. If False, uses investor-level data.`

			`Returns:`
			`A score between 0 and 1, where 1 is perfect match`

			`Scoring breakdown (out of 100 points):`
			`- Investment Stage Match: 30 points`
			`- Sector Overlap: 30 points`
			`- Geographic Match: 20 points`
			`- Valuation/Check Size Fit: 20 points`
			`"""`
			`if use_funds and investor.funds:`
			`# Calculate score for each fund and return the highest`
			`max_score = 0.0`
			`for fund in investor.funds:`
			`fund_score = _calculate_project_fund_compatibility(project, fund)`
			`max_score = max(max_score, fund_score)`
			`return max_score`
			`else:`
			`# Use investor-level data (fallback)`
			`return _calculate_project_investor_direct_compatibility(project, investor)`


			`def calculate_project_investors_compatibility(`
			`project: ProjectTable, investors: List[InvestorTable], use_funds: bool = True`
			`) -> List[Tuple[InvestorTable, float]]:`
			`"""`
			`Calculate compatibility scores between a project and multiple investors.`

			`Args:`
			`project: The project to evaluate`
			`investors: List of investors to compare against`
			`use_funds: If True, evaluates against investors' funds. If False, uses investor-level data.`

			`Returns:`
			`List of tuples (investor, score) sorted by score descending`
			`"""`
			`scored_investors = []`

			`for investor in investors:`
			`score = calculate_project_investor_compatibility(project, investor, use_funds)`
			`scored_investors.append((investor, score))`

			`# Sort by score descending`
			`scored_investors.sort(key=lambda x: x[1], reverse=True)`

			`return scored_investors`


			`def _calculate_project_fund_compatibility(`
			`project: ProjectTable, fund: FundTable`
			`) -> float:`
			`"""`
			`Calculate compatibility score between a project and a specific fund.`

			`Scoring breakdown:`
			`- Investment Stage Match: 30 points (all or nothing if stage exists)`
			`- Sector Overlap: 30 points (proportional to overlap)`
			`- Geographic Match: 20 points (exact=20, partial=10, none=0)`
			`- Valuation/Check Size Fit: 20 points (proportional to fit)`

			`Returns:`
			`A score between 0 and 1`
			`"""`
			`total_score = 0`
			`max_score = 100`

			`# 1. Investment Stage Match (30 points)`
			`stage_score = 0`
			`if project.stage and fund.investment_stages:`
			`# Check if project stage matches any of the fund's investment stages`
			`fund_stage_names = {stage.name for stage in fund.investment_stages}`
			`# Convert project.stage enum to string for comparison`
			`project_stage_name = (`
			`project.stage.value`
			`if hasattr(project.stage, "value")`
			`else str(project.stage)`
			`)`

			`if project_stage_name in fund_stage_names:`
			`stage_score = 30`
			`else:`
			`# Partial credit for adjacent stages`
			`stage_score = _calculate_stage_proximity(`
			`project_stage_name, fund_stage_names`
			`)`

			`total_score += stage_score`

			`# 2. Sector Overlap (30 points)`
			`sector_score = 0`
			`if project.sector and fund.sectors:`
			`project_sector_ids = {sector.id for sector in project.sector}`
			`fund_sector_ids = {sector.id for sector in fund.sectors}`

			`if project_sector_ids and fund_sector_ids:`
			`common_sectors = project_sector_ids.intersection(fund_sector_ids)`
			`# Score based on what percentage of project sectors are covered by fund`
			`overlap_ratio = len(common_sectors) / len(project_sector_ids)`
			`sector_score = int(30 * overlap_ratio)`

			`total_score += sector_score`

			`# 3. Geographic Match (20 points)`
			`geo_score = 0`
			`if project.location and fund.geographic_focus:`
			`project_location_lower = project.location.lower()`
refactor: Improve handling of optional fields and enhance compatibility score calculations 2025-10-15 17:58:31 +00:00			`fund_geo_lower = (fund.geographic_focus or "").lower()`
feat: Integrate Folk CRM API for investor synchronization and compatibility scoring 2025-10-08 19:21:46 +01:00
			`# Exact match`
			`if project_location_lower == fund_geo_lower:`
			`geo_score = 20`
			`# Partial match (one contains the other)`
			`elif (`
			`project_location_lower in fund_geo_lower`
			`or fund_geo_lower in project_location_lower`
			`):`
			`geo_score = 10`
			`# Check for common geographic terms`
			`elif _check_geographic_overlap(project_location_lower, fund_geo_lower):`
			`geo_score = 5`

			`total_score += geo_score`

			`# 4. Valuation/Check Size Fit (20 points)`
			`valuation_score = 0`
			`if project.valuation and fund.check_size_lower and fund.check_size_upper:`
			`# Check if project valuation falls within or near the check size range`
			`# Typically, check size is a fraction of valuation (e.g., 10-20%)`
			`# We'll assume check size represents potential investment amount`

			`if fund.check_size_lower <= project.valuation <= fund.check_size_upper:`
			`# Valuation is within the check size range (might be too small)`
			`valuation_score = 10`
			`else:`
			`# Check if the check size is reasonable for this valuation`
			`# Typical investment is 10-30% of valuation`
			`reasonable_valuation_min = fund.check_size_lower * 3 # Investing ~33%`
			`reasonable_valuation_max = fund.check_size_upper * 10 # Investing ~10%`

			`if (`
			`reasonable_valuation_min`
			`<= project.valuation`
			`<= reasonable_valuation_max`
			`):`
			`# Perfect fit`
			`valuation_score = 20`
			`elif project.valuation < reasonable_valuation_min:`
			`# Project might be too small`
			`ratio = (`
			`project.valuation / reasonable_valuation_min`
			`if reasonable_valuation_min > 0`
			`else 0`
			`)`
			`valuation_score = int(10 * ratio)`
			`else:`
			`# Project might be too large`
			`ratio = (`
			`reasonable_valuation_max / project.valuation`
			`if project.valuation > 0`
			`else 0`
			`)`
			`valuation_score = int(10 * ratio)`

			`total_score += valuation_score`

			`# Convert to 0-1 scale`
			`return total_score / max_score`


			`def _calculate_project_investor_direct_compatibility(`
			`project: ProjectTable, investor: InvestorTable`
			`) -> float:`
			`"""`
			`Calculate compatibility using investor-level data (fallback when no funds available).`

			`Uses the same scoring system but with investor-level attributes.`
			`"""`
			`total_score = 0`
			`max_score = 100`

			`# 1. Investment Stage - Skip this since investors don't have a direct stage field`
			`# We could add 30 points to other categories, but for consistency, we'll leave it as 0`
			`stage_score = 0`
			`total_score += stage_score`

			`# 2. Sector Overlap (30 points)`
			`sector_score = 0`
			`if project.sector and investor.sectors:`
			`project_sector_ids = {sector.id for sector in project.sector}`
			`investor_sector_ids = {sector.id for sector in investor.sectors}`

			`if project_sector_ids and investor_sector_ids:`
			`common_sectors = project_sector_ids.intersection(investor_sector_ids)`
			`overlap_ratio = len(common_sectors) / len(project_sector_ids)`
			`sector_score = int(30 * overlap_ratio)`

			`total_score += sector_score`

			`# 3. Geographic Match (20 points)`
			`geo_score = 0`
			`if project.location and investor.geographic_focus:`
			`project_location_lower = project.location.lower()`
refactor: Improve handling of optional fields and enhance compatibility score calculations 2025-10-15 17:58:31 +00:00			`investor_geo_lower = (investor.geographic_focus or "").lower()`
feat: Integrate Folk CRM API for investor synchronization and compatibility scoring 2025-10-08 19:21:46 +01:00
			`if project_location_lower == investor_geo_lower:`
			`geo_score = 20`
			`elif (`
			`project_location_lower in investor_geo_lower`
			`or investor_geo_lower in project_location_lower`
			`):`
			`geo_score = 10`
			`elif _check_geographic_overlap(project_location_lower, investor_geo_lower):`
			`geo_score = 5`

			`total_score += geo_score`

			`# 4. Valuation/Check Size Fit (20 points)`
			`valuation_score = 0`
			`if project.valuation and investor.check_size_lower and investor.check_size_upper:`
			`reasonable_valuation_min = investor.check_size_lower * 3`
			`reasonable_valuation_max = investor.check_size_upper * 10`

			`if reasonable_valuation_min <= project.valuation <= reasonable_valuation_max:`
			`valuation_score = 20`
			`elif project.valuation < reasonable_valuation_min:`
			`ratio = (`
			`project.valuation / reasonable_valuation_min`
			`if reasonable_valuation_min > 0`
			`else 0`
			`)`
			`valuation_score = int(10 * ratio)`
			`else:`
			`ratio = (`
			`reasonable_valuation_max / project.valuation`
			`if project.valuation > 0`
			`else 0`
			`)`
			`valuation_score = int(10 * ratio)`

			`total_score += valuation_score`

			`# Convert to 0-1 scale`
			`return total_score / max_score`


			`def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:`
			`"""`
			`Calculate proximity score between project stage and fund stages.`
			`Awards partial credit for adjacent investment stages.`

			`Stage progression: SEED -> SERIES_A -> SERIES_B -> SERIES_C -> GROWTH -> LATE_STAGE`

			`Returns:`
			`Score from 0-15 (half credit for adjacent stages)`
			`"""`
			`stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]`

			`try:`
			`project_idx = stage_order.index(project_stage)`
			`except ValueError:`
			`return 0`

			`# Check for adjacent stages`
			`adjacent_stages = []`
			`if project_idx > 0:`
			`adjacent_stages.append(stage_order[project_idx - 1])`
			`if project_idx < len(stage_order) - 1:`
			`adjacent_stages.append(stage_order[project_idx + 1])`

			`for stage in fund_stages:`
			`if stage in adjacent_stages:`
			`return 15 # Half credit for adjacent stage`

			`return 0`


			`def _check_geographic_overlap(location1: str, location2: str) -> bool:`
			`"""`
			`Check for common geographic terms between two locations.`

			`Examples:`
			`- "San Francisco, CA" and "California" -> True`
			`- "New York" and "USA" -> True (if both contain USA/US)`
			`- "London, UK" and "United Kingdom" -> True`
			`"""`
			`# Common geographic groupings`
			`geo_groups = [`
			`["usa", "us", "united states", "america"],`
			`["uk", "united kingdom", "britain"],`
			`["california", "ca"],`
			`["new york", "ny"],`
			`["texas", "tx"],`
			`["europe", "eu"],`
			`["asia", "asian"],`
			`["africa", "african"],`
			`]`

			`for group in geo_groups:`
			`found_in_1 = any(term in location1 for term in group)`
			`found_in_2 = any(term in location2 for term in group)`
			`if found_in_1 and found_in_2:`
			`return True`

			`return False`


			`def get_top_compatible_investors(`
			`project: ProjectTable,`
			`investors: List[InvestorTable],`
			`limit: int = 10,`
			`min_score: float = 0.0,`
			`use_funds: bool = True,`
			`) -> List[Tuple[InvestorTable, float]]:`
			`"""`
			`Get the top N most compatible investors for a project.`

			`Args:`
			`project: The project to find investors for`
			`investors: List of all available investors`
			`limit: Maximum number of investors to return`
			`min_score: Minimum compatibility score threshold (0-1)`
			`use_funds: If True, evaluates against investors' funds`

			`Returns:`
			`List of tuples (investor, score) sorted by score descending,`
			`limited to 'limit' items and filtered by min_score`
			`"""`
			`scored_investors = calculate_project_investors_compatibility(`
			`project, investors, use_funds`
			`)`

			`# Filter by minimum score`
			`filtered_investors = [`
			`(investor, score) for investor, score in scored_investors if score >= min_score`
			`]`

			`# Return top N`
			`return filtered_investors[:limit]`


			`def get_compatibility_score_breakdown(`
			`project: ProjectTable, investor: InvestorTable, fund: Optional[FundTable] = None`
			`) -> dict:`
			`"""`
			`Get a detailed breakdown of the compatibility score components.`

			`Useful for debugging or showing users why a particular score was calculated.`

			`Returns:`
			`Dictionary with score components and explanations`
			`"""`
			`if fund:`
			`total_score = 0`

			`# Stage score`
			`stage_score = 0`
			`stage_match = False`
			`if project.stage and fund.investment_stages:`
			`fund_stage_names = {stage.name for stage in fund.investment_stages}`
			`project_stage_name = (`
			`project.stage.value`
			`if hasattr(project.stage, "value")`
			`else str(project.stage)`
			`)`
			`if project_stage_name in fund_stage_names:`
			`stage_score = 30`
			`stage_match = True`
			`else:`
			`stage_score = _calculate_stage_proximity(`
			`project_stage_name, fund_stage_names`
			`)`

			`# Sector score`
			`sector_score = 0`
			`matching_sectors = []`
			`if project.sector and fund.sectors:`
			`project_sector_ids = {sector.id for sector in project.sector}`
			`fund_sector_ids = {sector.id for sector in fund.sectors}`
			`if project_sector_ids and fund_sector_ids:`
			`common_sectors = project_sector_ids.intersection(fund_sector_ids)`
			`matching_sectors = [`
			`s.name for s in fund.sectors if s.id in common_sectors`
			`]`
			`overlap_ratio = len(common_sectors) / len(project_sector_ids)`
			`sector_score = int(30 * overlap_ratio)`

			`# Geographic score`
			`geo_score = 0`
			`geo_match_type = "none"`
			`if project.location and fund.geographic_focus:`
			`project_location_lower = project.location.lower()`
			`fund_geo_lower = fund.geographic_focus.lower()`
			`if project_location_lower == fund_geo_lower:`
			`geo_score = 20`
			`geo_match_type = "exact"`
			`elif (`
			`project_location_lower in fund_geo_lower`
			`or fund_geo_lower in project_location_lower`
			`):`
			`geo_score = 10`
			`geo_match_type = "partial"`
			`elif _check_geographic_overlap(project_location_lower, fund_geo_lower):`
			`geo_score = 5`
			`geo_match_type = "regional"`

			`# Valuation score`
			`valuation_score = 0`
			`valuation_fit = "unknown"`
			`if project.valuation and fund.check_size_lower and fund.check_size_upper:`
			`reasonable_valuation_min = fund.check_size_lower * 3`
			`reasonable_valuation_max = fund.check_size_upper * 10`
			`if (`
			`reasonable_valuation_min`
			`<= project.valuation`
			`<= reasonable_valuation_max`
			`):`
			`valuation_score = 20`
			`valuation_fit = "perfect"`
			`elif project.valuation < reasonable_valuation_min:`
			`ratio = (`
			`project.valuation / reasonable_valuation_min`
			`if reasonable_valuation_min > 0`
			`else 0`
			`)`
			`valuation_score = int(10 * ratio)`
			`valuation_fit = "too_small"`
			`else:`
			`ratio = (`
			`reasonable_valuation_max / project.valuation`
			`if project.valuation > 0`
			`else 0`
			`)`
			`valuation_score = int(10 * ratio)`
			`valuation_fit = "too_large"`

			`total_score = stage_score + sector_score + geo_score + valuation_score`

			`return {`
			`"total_score": total_score / 100,`
			`"breakdown": {`
			`"stage": {`
			`"score": stage_score,`
			`"max_score": 30,`
			`"match": stage_match,`
			`"project_stage": project.stage.value if project.stage else None,`
			`"fund_stages": [s.name for s in fund.investment_stages]`
			`if fund.investment_stages`
			`else [],`
			`},`
			`"sector": {`
			`"score": sector_score,`
			`"max_score": 30,`
			`"matching_sectors": matching_sectors,`
			`"project_sectors": [s.name for s in project.sector]`
			`if project.sector`
			`else [],`
			`"fund_sectors": [s.name for s in fund.sectors]`
			`if fund.sectors`
			`else [],`
			`},`
			`"geography": {`
			`"score": geo_score,`
			`"max_score": 20,`
			`"match_type": geo_match_type,`
			`"project_location": project.location,`
			`"fund_geography": fund.geographic_focus,`
			`},`
			`"valuation": {`
			`"score": valuation_score,`
			`"max_score": 20,`
			`"fit": valuation_fit,`
			`"project_valuation": project.valuation,`
			`"fund_check_size_range": f"{fund.check_size_lower}-{fund.check_size_upper}"`
			`if fund.check_size_lower`
			`else None,`
			`},`
			`},`
			`}`
			`else:`
			`# Investor-level breakdown (simplified)`
			`return {`
			`"total_score": _calculate_project_investor_direct_compatibility(`
			`project, investor`
			`),`
			`"note": "Using investor-level data (no specific fund selected)",`
			`}`
feat: Add insight generation functionality with compatibility scoring and web search integration 2025-10-13 23:19:46 +01:00

			`def generate_compatibility_explanation(`
			`project: ProjectTable, investor: InvestorTable, score: float, use_funds: bool = True`
			`) -> str:`
			`"""`
			`Generate a detailed, natural language explanation of the compatibility score.`

			`Args:`
			`project: The project being evaluated`
			`investor: The investor being compared against`
			`score: The calculated compatibility score (0-1)`
			`use_funds: Whether fund-level data was used`

			`Returns:`
			`A formatted string with the compatibility score and detailed explanation`
			`"""`
			`score_percentage = int(score * 100)`

			`# Determine match quality`
			`if score_percentage >= 80:`
			`match_level = "Excellent match"`
			`elif score_percentage >= 65:`
			`match_level = "Strong match"`
			`elif score_percentage >= 50:`
			`match_level = "Good match"`
			`elif score_percentage >= 35:`
			`match_level = "Moderate match"`
			`else:`
			`match_level = "Limited match"`

			`# Collect alignment factors`
			`alignment_factors = []`
			`recommendations = []`

			`# Get the best matching fund if using funds`
			`best_fund = None`
			`if use_funds and investor.funds:`
			`best_score = 0`
			`for fund in investor.funds:`
			`fund_score = _calculate_project_fund_compatibility(project, fund)`
			`if fund_score > best_score:`
			`best_score = fund_score`
			`best_fund = fund`

			`# Analyze sector alignment`
			`if project.sector:`
			`project_sectors = [s.name for s in project.sector if hasattr(s, "name")]`

			`if best_fund and best_fund.sectors:`
			`fund_sectors = {s.name for s in best_fund.sectors if hasattr(s, "name")}`
			`common_sectors = set(project_sectors) & fund_sectors`

			`if common_sectors:`
			`sectors_str = ", ".join(list(common_sectors)[:2])`
			`alignment_factors.append(f"{sectors_str} sector focus")`
			`elif project_sectors:`
			`recommendations.append(`
			`f"Consider emphasizing any {project_sectors[0]} industry connections"`
			`)`
			`elif investor.sectors:`
			`investor_sectors = {s.name for s in investor.sectors if hasattr(s, "name")}`
			`common_sectors = set(project_sectors) & investor_sectors`

			`if common_sectors:`
			`sectors_str = ", ".join(list(common_sectors)[:2])`
			`alignment_factors.append(f"{sectors_str} sector focus")`

			`# Analyze stage alignment`
			`if project.stage:`
			`stage_name = (`
			`project.stage.value`
			`if hasattr(project.stage, "value")`
			`else str(project.stage)`
			`)`
			`stage_display = stage_name.replace("_", " ").title()`

			`if best_fund and best_fund.investment_stages:`
			`fund_stage_names = {`
			`s.name for s in best_fund.investment_stages if hasattr(s, "name")`
			`}`
			`if stage_name in fund_stage_names:`
			`alignment_factors.append(f"{stage_display} stage")`
			`else:`
			`recommendations.append(`
			`"Investor typically focuses on different stages; highlight your traction and growth metrics"`
			`)`

			`if not best_fund:`
			`alignment_factors.append(f"{stage_display} stage")`

			`# Analyze geographic alignment`
			`if project.location:`
			`if best_fund and best_fund.geographic_focus:`
			`if (`
			`project.location.lower() in best_fund.geographic_focus.lower()`
			`or best_fund.geographic_focus.lower() in project.location.lower()`
			`):`
			`alignment_factors.append(f"{project.location} presence")`
			`elif investor.headquarters:`
			`if (`
			`project.location.lower() in investor.headquarters.lower()`
			`or investor.headquarters.lower() in project.location.lower()`
			`):`
			`alignment_factors.append(f"{project.location} market presence")`

			`# Analyze valuation/check size fit`
			`if project.valuation:`
			`if best_fund and best_fund.check_size_lower and best_fund.check_size_upper:`
			`reasonable_min = best_fund.check_size_lower * 3`
			`reasonable_max = best_fund.check_size_upper * 10`

			`if reasonable_min <= project.valuation <= reasonable_max:`
			`alignment_factors.append("appropriate funding stage")`
			`elif project.valuation < reasonable_min:`
			`recommendations.append(`
			`"You may be early for this investor; consider approaching at a later stage"`
			`)`
			`else:`
			`recommendations.append(`
			`"Consider highlighting your growth trajectory and market opportunity"`
			`)`

			`# Build the explanation`
			`explanation_parts = [f"Based on your startup profile: {score_percentage}% match"]`

			`if alignment_factors:`
			`alignment_text = ", ".join(alignment_factors)`
			`explanation_parts.append(f"{match_level}: {alignment_text}.")`
			`else:`
			`explanation_parts.append(f"{match_level}.")`

			`if recommendations:`
			`rec_text = recommendations[0] # Show the most important recommendation`
			`explanation_parts.append(rec_text + ".")`

			`return " ".join(explanation_parts)`