feat: Enhance compatibility scoring and report generation with new methods and models
This commit is contained in:
+22
-11
@@ -12,7 +12,11 @@ from schemas.router_schemas import (
|
|||||||
PaginatedResponse,
|
PaginatedResponse,
|
||||||
SectorMinimal,
|
SectorMinimal,
|
||||||
)
|
)
|
||||||
from services.compatibility_score import calculate_project_investor_compatibility
|
from services.compatibility_score import (
|
||||||
|
calculate_project_investor_compatibility,
|
||||||
|
_calculate_project_fund_compatibility,
|
||||||
|
_calculate_project_investor_direct_compatibility,
|
||||||
|
)
|
||||||
from sqlalchemy.orm import Session, selectinload
|
from sqlalchemy.orm import Session, selectinload
|
||||||
|
|
||||||
router = APIRouter(tags=["Investor Routes"])
|
router = APIRouter(tags=["Investor Routes"])
|
||||||
@@ -95,13 +99,6 @@ def read_investors(
|
|||||||
# Transform to InvestmentResponse format (one row per investor-fund combination)
|
# Transform to InvestmentResponse format (one row per investor-fund combination)
|
||||||
investment_responses = []
|
investment_responses = []
|
||||||
for investor in investors:
|
for investor in investors:
|
||||||
# Calculate compatibility score if project provided
|
|
||||||
compatibility_score = 1.0
|
|
||||||
if project is not None:
|
|
||||||
compatibility_score = calculate_project_investor_compatibility(
|
|
||||||
project=project, investor=investor, use_funds=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get top 3 portfolio companies (id and name only)
|
# Get top 3 portfolio companies (id and name only)
|
||||||
portfolio_companies = [
|
portfolio_companies = [
|
||||||
CompanyMinimal(id=company.id, name=company.name)
|
CompanyMinimal(id=company.id, name=company.name)
|
||||||
@@ -111,6 +108,13 @@ def read_investors(
|
|||||||
# If investor has funds, create one entry per fund
|
# If investor has funds, create one entry per fund
|
||||||
if investor.funds:
|
if investor.funds:
|
||||||
for fund in investor.funds:
|
for fund in investor.funds:
|
||||||
|
# Calculate compatibility score for this specific fund
|
||||||
|
compatibility_score = 1.0
|
||||||
|
if project is not None:
|
||||||
|
compatibility_score = _calculate_project_fund_compatibility(
|
||||||
|
project=project, fund=fund
|
||||||
|
)
|
||||||
|
|
||||||
# Get stage focus as comma-separated string
|
# Get stage focus as comma-separated string
|
||||||
stage_focus = (
|
stage_focus = (
|
||||||
", ".join([stage.name for stage in fund.investment_stages])
|
", ".join([stage.name for stage in fund.investment_stages])
|
||||||
@@ -141,6 +145,13 @@ def read_investors(
|
|||||||
investment_responses.append(investment_response)
|
investment_responses.append(investment_response)
|
||||||
else:
|
else:
|
||||||
# If no funds, create one entry with null fund fields
|
# If no funds, create one entry with null fund fields
|
||||||
|
# Calculate compatibility using investor-level data
|
||||||
|
compatibility_score = 1.0
|
||||||
|
if project is not None:
|
||||||
|
compatibility_score = _calculate_project_investor_direct_compatibility(
|
||||||
|
project=project, investor=investor
|
||||||
|
)
|
||||||
|
|
||||||
investment_response = InvestmentResponse(
|
investment_response = InvestmentResponse(
|
||||||
id=investor.id,
|
id=investor.id,
|
||||||
name=investor.name,
|
name=investor.name,
|
||||||
@@ -255,11 +266,11 @@ def filter_investors(
|
|||||||
for fund in funds:
|
for fund in funds:
|
||||||
investor = fund.investor
|
investor = fund.investor
|
||||||
|
|
||||||
# Calculate compatibility score if project provided
|
# Calculate compatibility score for this specific fund
|
||||||
compatibility_score = 1.0
|
compatibility_score = 1.0
|
||||||
if project is not None:
|
if project is not None:
|
||||||
compatibility_score = calculate_project_investor_compatibility(
|
compatibility_score = _calculate_project_fund_compatibility(
|
||||||
project=project, investor=investor, use_funds=True
|
project=project, fund=fund
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get top 3 portfolio companies (id and name only)
|
# Get top 3 portfolio companies (id and name only)
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ async def generate_investor_report(
|
|||||||
# Generate PDF report
|
# Generate PDF report
|
||||||
report_generator = ReportGenerator()
|
report_generator = ReportGenerator()
|
||||||
pdf_bytes = await report_generator.generate_investor_report(
|
pdf_bytes = await report_generator.generate_investor_report(
|
||||||
investor_data, project_data
|
investor_data, project_data, investor_model=investor, project_model=project
|
||||||
)
|
)
|
||||||
|
|
||||||
# Return PDF as downloadable file
|
# Return PDF as downloadable file
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ The scoring system evaluates multiple dimensions to determine how well a project
|
|||||||
matches with an investor's investment criteria.
|
matches with an investor's investment criteria.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from difflib import SequenceMatcher
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from db.models import FundTable, InvestorTable, ProjectTable
|
from db.models import FundTable, InvestorTable, ProjectTable
|
||||||
@@ -99,12 +100,16 @@ def _calculate_project_fund_compatibility(
|
|||||||
else str(project.stage)
|
else str(project.stage)
|
||||||
)
|
)
|
||||||
|
|
||||||
if project_stage_name in fund_stage_names:
|
# Normalize both for case-insensitive comparison
|
||||||
|
project_stage_normalized = project_stage_name.upper().strip()
|
||||||
|
fund_stages_normalized = {name.upper().strip() for name in fund_stage_names}
|
||||||
|
|
||||||
|
if project_stage_normalized in fund_stages_normalized:
|
||||||
stage_score = 30
|
stage_score = 30
|
||||||
else:
|
else:
|
||||||
# Partial credit for adjacent stages
|
# Partial credit for adjacent stages
|
||||||
stage_score = _calculate_stage_proximity(
|
stage_score = _calculate_stage_proximity(
|
||||||
project_stage_name, fund_stage_names
|
project_stage_normalized, fund_stages_normalized
|
||||||
)
|
)
|
||||||
|
|
||||||
total_score += stage_score
|
total_score += stage_score
|
||||||
@@ -112,22 +117,53 @@ def _calculate_project_fund_compatibility(
|
|||||||
# 2. Sector Overlap (30 points)
|
# 2. Sector Overlap (30 points)
|
||||||
sector_score = 0
|
sector_score = 0
|
||||||
if project.sector and fund.sectors:
|
if project.sector and fund.sectors:
|
||||||
project_sector_ids = {sector.id for sector in project.sector}
|
project_sectors = [s for s in project.sector if hasattr(s, 'name')]
|
||||||
fund_sector_ids = {sector.id for sector in fund.sectors}
|
fund_sectors = [s for s in fund.sectors if hasattr(s, 'name')]
|
||||||
|
|
||||||
if project_sector_ids and fund_sector_ids:
|
if project_sectors and fund_sectors:
|
||||||
common_sectors = project_sector_ids.intersection(fund_sector_ids)
|
# Use fuzzy matching to account for similar but not identical sector names
|
||||||
# Score based on what percentage of project sectors are covered by fund
|
match_count = 0
|
||||||
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
total_matches = 0
|
||||||
sector_score = int(30 * overlap_ratio)
|
|
||||||
|
for proj_sector in project_sectors:
|
||||||
|
best_match_score = 0
|
||||||
|
proj_name = proj_sector.name.lower().strip()
|
||||||
|
|
||||||
|
for fund_sector in fund_sectors:
|
||||||
|
fund_name = fund_sector.name.lower().strip()
|
||||||
|
|
||||||
|
# Exact match
|
||||||
|
if proj_name == fund_name:
|
||||||
|
best_match_score = 1.0
|
||||||
|
break
|
||||||
|
|
||||||
|
# Fuzzy match using sequence matcher
|
||||||
|
similarity = SequenceMatcher(None, proj_name, fund_name).ratio()
|
||||||
|
|
||||||
|
# Also check if one contains the other (substring match)
|
||||||
|
if proj_name in fund_name or fund_name in proj_name:
|
||||||
|
similarity = max(similarity, 0.8)
|
||||||
|
|
||||||
|
best_match_score = max(best_match_score, similarity)
|
||||||
|
|
||||||
|
# Count matches with threshold
|
||||||
|
# Perfect match (1.0), strong match (>0.75), partial match (>0.6)
|
||||||
|
if best_match_score >= 0.6:
|
||||||
|
total_matches += best_match_score
|
||||||
|
match_count += 1
|
||||||
|
|
||||||
|
if match_count > 0:
|
||||||
|
# Calculate overlap ratio based on fuzzy matches
|
||||||
|
overlap_ratio = total_matches / len(project_sectors)
|
||||||
|
sector_score = int(30 * overlap_ratio)
|
||||||
|
|
||||||
total_score += sector_score
|
total_score += sector_score
|
||||||
|
|
||||||
# 3. Geographic Match (20 points)
|
# 3. Geographic Match (20 points)
|
||||||
geo_score = 0
|
geo_score = 0
|
||||||
if project.location and fund.geographic_focus:
|
if project.location and fund.geographic_focus:
|
||||||
project_location_lower = project.location.lower()
|
project_location_lower = project.location.lower().strip()
|
||||||
fund_geo_lower = (fund.geographic_focus or "").lower()
|
fund_geo_lower = (fund.geographic_focus or "").lower().strip()
|
||||||
|
|
||||||
# Exact match
|
# Exact match
|
||||||
if project_location_lower == fund_geo_lower:
|
if project_location_lower == fund_geo_lower:
|
||||||
@@ -137,10 +173,10 @@ def _calculate_project_fund_compatibility(
|
|||||||
project_location_lower in fund_geo_lower
|
project_location_lower in fund_geo_lower
|
||||||
or fund_geo_lower in project_location_lower
|
or fund_geo_lower in project_location_lower
|
||||||
):
|
):
|
||||||
geo_score = 10
|
geo_score = 15
|
||||||
# Check for common geographic terms
|
# Check for common geographic terms or regional overlap
|
||||||
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
elif _check_geographic_overlap(project_location_lower, fund_geo_lower):
|
||||||
geo_score = 5
|
geo_score = 12
|
||||||
|
|
||||||
total_score += geo_score
|
total_score += geo_score
|
||||||
|
|
||||||
@@ -209,13 +245,44 @@ def _calculate_project_investor_direct_compatibility(
|
|||||||
# 2. Sector Overlap (30 points)
|
# 2. Sector Overlap (30 points)
|
||||||
sector_score = 0
|
sector_score = 0
|
||||||
if project.sector and investor.sectors:
|
if project.sector and investor.sectors:
|
||||||
project_sector_ids = {sector.id for sector in project.sector}
|
project_sectors = [s for s in project.sector if hasattr(s, 'name')]
|
||||||
investor_sector_ids = {sector.id for sector in investor.sectors}
|
investor_sectors = [s for s in investor.sectors if hasattr(s, 'name')]
|
||||||
|
|
||||||
if project_sector_ids and investor_sector_ids:
|
if project_sectors and investor_sectors:
|
||||||
common_sectors = project_sector_ids.intersection(investor_sector_ids)
|
# Use fuzzy matching to account for similar but not identical sector names
|
||||||
overlap_ratio = len(common_sectors) / len(project_sector_ids)
|
match_count = 0
|
||||||
sector_score = int(30 * overlap_ratio)
|
total_matches = 0
|
||||||
|
|
||||||
|
for proj_sector in project_sectors:
|
||||||
|
best_match_score = 0
|
||||||
|
proj_name = proj_sector.name.lower().strip()
|
||||||
|
|
||||||
|
for inv_sector in investor_sectors:
|
||||||
|
inv_name = inv_sector.name.lower().strip()
|
||||||
|
|
||||||
|
# Exact match
|
||||||
|
if proj_name == inv_name:
|
||||||
|
best_match_score = 1.0
|
||||||
|
break
|
||||||
|
|
||||||
|
# Fuzzy match using sequence matcher
|
||||||
|
similarity = SequenceMatcher(None, proj_name, inv_name).ratio()
|
||||||
|
|
||||||
|
# Also check if one contains the other (substring match)
|
||||||
|
if proj_name in inv_name or inv_name in proj_name:
|
||||||
|
similarity = max(similarity, 0.8)
|
||||||
|
|
||||||
|
best_match_score = max(best_match_score, similarity)
|
||||||
|
|
||||||
|
# Count matches with threshold
|
||||||
|
if best_match_score >= 0.6:
|
||||||
|
total_matches += best_match_score
|
||||||
|
match_count += 1
|
||||||
|
|
||||||
|
if match_count > 0:
|
||||||
|
# Calculate overlap ratio based on fuzzy matches
|
||||||
|
overlap_ratio = total_matches / len(project_sectors)
|
||||||
|
sector_score = int(30 * overlap_ratio)
|
||||||
|
|
||||||
total_score += sector_score
|
total_score += sector_score
|
||||||
|
|
||||||
@@ -278,8 +345,11 @@ def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
|
|||||||
"""
|
"""
|
||||||
stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]
|
stage_order = ["SEED", "SERIES_A", "SERIES_B", "SERIES_C", "GROWTH", "LATE_STAGE"]
|
||||||
|
|
||||||
|
# Normalize project stage for comparison
|
||||||
|
project_stage_normalized = project_stage.upper().strip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
project_idx = stage_order.index(project_stage)
|
project_idx = stage_order.index(project_stage_normalized)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@@ -290,8 +360,10 @@ def _calculate_stage_proximity(project_stage: str, fund_stages: set) -> int:
|
|||||||
if project_idx < len(stage_order) - 1:
|
if project_idx < len(stage_order) - 1:
|
||||||
adjacent_stages.append(stage_order[project_idx + 1])
|
adjacent_stages.append(stage_order[project_idx + 1])
|
||||||
|
|
||||||
|
# Normalize fund stages and check for matches
|
||||||
for stage in fund_stages:
|
for stage in fund_stages:
|
||||||
if stage in adjacent_stages:
|
stage_normalized = stage.upper().strip()
|
||||||
|
if stage_normalized in adjacent_stages:
|
||||||
return 15 # Half credit for adjacent stage
|
return 15 # Half credit for adjacent stage
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
@@ -305,24 +377,62 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool:
|
|||||||
- "San Francisco, CA" and "California" -> True
|
- "San Francisco, CA" and "California" -> True
|
||||||
- "New York" and "USA" -> True (if both contain USA/US)
|
- "New York" and "USA" -> True (if both contain USA/US)
|
||||||
- "London, UK" and "United Kingdom" -> True
|
- "London, UK" and "United Kingdom" -> True
|
||||||
|
- "Germany" and "Europe" -> True
|
||||||
"""
|
"""
|
||||||
# Common geographic groupings
|
# Normalize inputs
|
||||||
|
loc1 = location1.lower().strip()
|
||||||
|
loc2 = location2.lower().strip()
|
||||||
|
|
||||||
|
# Common geographic groupings with broader regional mappings
|
||||||
geo_groups = [
|
geo_groups = [
|
||||||
["usa", "us", "united states", "america"],
|
# North America
|
||||||
["uk", "united kingdom", "britain"],
|
["usa", "us", "united states", "america", "u.s.", "u.s.a"],
|
||||||
["california", "ca"],
|
["canada", "canadian"],
|
||||||
["new york", "ny"],
|
["mexico", "mexican"],
|
||||||
|
|
||||||
|
# Europe and countries
|
||||||
|
["europe", "european", "eu", "germany", "france", "uk", "united kingdom",
|
||||||
|
"britain", "spain", "italy", "netherlands", "belgium", "sweden", "denmark",
|
||||||
|
"norway", "finland", "poland", "portugal", "austria", "switzerland",
|
||||||
|
"ireland", "greece", "czech", "romania"],
|
||||||
|
|
||||||
|
# UK specific
|
||||||
|
["uk", "united kingdom", "britain", "england", "scotland", "wales", "london"],
|
||||||
|
|
||||||
|
# US states
|
||||||
|
["california", "ca", "san francisco", "los angeles", "silicon valley"],
|
||||||
|
["new york", "ny", "nyc"],
|
||||||
["texas", "tx"],
|
["texas", "tx"],
|
||||||
["europe", "eu"],
|
["massachusetts", "ma", "boston"],
|
||||||
["asia", "asian"],
|
["washington", "seattle"],
|
||||||
["africa", "african"],
|
|
||||||
|
# Asia
|
||||||
|
["asia", "asian", "china", "japan", "korea", "singapore", "hong kong",
|
||||||
|
"india", "indonesia", "thailand", "vietnam", "malaysia", "philippines"],
|
||||||
|
|
||||||
|
# Middle East
|
||||||
|
["middle east", "israel", "uae", "dubai", "saudi arabia"],
|
||||||
|
|
||||||
|
# Latin America
|
||||||
|
["latin america", "brazil", "argentina", "chile", "colombia", "mexico"],
|
||||||
|
|
||||||
|
# Africa
|
||||||
|
["africa", "african", "south africa", "nigeria", "kenya", "egypt"],
|
||||||
|
|
||||||
|
# Oceania
|
||||||
|
["australia", "australian", "new zealand"],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Check if both locations match any group
|
||||||
for group in geo_groups:
|
for group in geo_groups:
|
||||||
found_in_1 = any(term in location1 for term in group)
|
found_in_1 = any(term in loc1 for term in group)
|
||||||
found_in_2 = any(term in location2 for term in group)
|
found_in_2 = any(term in loc2 for term in group)
|
||||||
if found_in_1 and found_in_2:
|
if found_in_1 and found_in_2:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# Check for direct substring match (one contains the other)
|
||||||
|
if loc1 in loc2 or loc2 in loc1:
|
||||||
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ from typing import Any, Dict, List, Optional
|
|||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
|
# Import database models and compatibility score service
|
||||||
|
from db.models import InvestorTable, ProjectTable
|
||||||
|
from services.compatibility_score import calculate_project_investor_compatibility
|
||||||
|
|
||||||
|
|
||||||
class ReportGenerator:
|
class ReportGenerator:
|
||||||
"""Service for generating PDF reports from HTML templates"""
|
"""Service for generating PDF reports from HTML templates"""
|
||||||
@@ -17,6 +21,8 @@ class ReportGenerator:
|
|||||||
self,
|
self,
|
||||||
investor_data: Dict[str, Any],
|
investor_data: Dict[str, Any],
|
||||||
project_data: Optional[Dict[str, Any]] = None,
|
project_data: Optional[Dict[str, Any]] = None,
|
||||||
|
investor_model: Optional[InvestorTable] = None,
|
||||||
|
project_model: Optional[ProjectTable] = None,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""
|
"""
|
||||||
Generate a PDF report for an investor profile.
|
Generate a PDF report for an investor profile.
|
||||||
@@ -24,12 +30,16 @@ class ReportGenerator:
|
|||||||
Args:
|
Args:
|
||||||
investor_data: Dictionary containing investor information
|
investor_data: Dictionary containing investor information
|
||||||
project_data: Optional dictionary containing project information for compatibility analysis
|
project_data: Optional dictionary containing project information for compatibility analysis
|
||||||
|
investor_model: Optional database model for investor (used for compatibility scoring)
|
||||||
|
project_model: Optional database model for project (used for compatibility scoring)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bytes: PDF file content
|
bytes: PDF file content
|
||||||
"""
|
"""
|
||||||
# Prepare template context
|
# Prepare template context
|
||||||
context = self._prepare_context(investor_data, project_data)
|
context = self._prepare_context(
|
||||||
|
investor_data, project_data, investor_model, project_model
|
||||||
|
)
|
||||||
|
|
||||||
# Render HTML from template
|
# Render HTML from template
|
||||||
template = self.env.get_template("report.html")
|
template = self.env.get_template("report.html")
|
||||||
@@ -43,6 +53,8 @@ class ReportGenerator:
|
|||||||
self,
|
self,
|
||||||
investor_data: Dict[str, Any],
|
investor_data: Dict[str, Any],
|
||||||
project_data: Optional[Dict[str, Any]] = None,
|
project_data: Optional[Dict[str, Any]] = None,
|
||||||
|
investor_model: Optional[InvestorTable] = None,
|
||||||
|
project_model: Optional[ProjectTable] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Prepare the context dictionary for template rendering"""
|
"""Prepare the context dictionary for template rendering"""
|
||||||
context = {
|
context = {
|
||||||
@@ -55,9 +67,20 @@ class ReportGenerator:
|
|||||||
|
|
||||||
# If project data is provided, calculate compatibility
|
# If project data is provided, calculate compatibility
|
||||||
if project_data:
|
if project_data:
|
||||||
context["compatibility_score"] = self._calculate_compatibility_score(
|
# Use the compatibility_score service if models are provided
|
||||||
investor_data, project_data
|
if investor_model and project_model:
|
||||||
)
|
# Calculate using the standardized compatibility score service
|
||||||
|
# Returns score between 0 and 1, convert to percentage (0-100)
|
||||||
|
score_decimal = calculate_project_investor_compatibility(
|
||||||
|
project=project_model, investor=investor_model, use_funds=True
|
||||||
|
)
|
||||||
|
context["compatibility_score"] = int(score_decimal * 100)
|
||||||
|
else:
|
||||||
|
# Fallback to old calculation method if models not provided
|
||||||
|
context["compatibility_score"] = self._calculate_compatibility_score(
|
||||||
|
investor_data, project_data
|
||||||
|
)
|
||||||
|
|
||||||
context["match_criteria"] = self._generate_match_criteria(
|
context["match_criteria"] = self._generate_match_criteria(
|
||||||
investor_data, project_data
|
investor_data, project_data
|
||||||
)
|
)
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user