made corrections based on feedback
This commit is contained in:
@@ -117,41 +117,41 @@ def _calculate_project_fund_compatibility(
|
||||
# 2. Sector Overlap (30 points)
|
||||
sector_score = 0
|
||||
if project.sector and fund.sectors:
|
||||
project_sectors = [s for s in project.sector if hasattr(s, 'name')]
|
||||
fund_sectors = [s for s in fund.sectors if hasattr(s, 'name')]
|
||||
|
||||
project_sectors = [s for s in project.sector if hasattr(s, "name")]
|
||||
fund_sectors = [s for s in fund.sectors if hasattr(s, "name")]
|
||||
|
||||
if project_sectors and fund_sectors:
|
||||
# Use fuzzy matching to account for similar but not identical sector names
|
||||
match_count = 0
|
||||
total_matches = 0
|
||||
|
||||
|
||||
for proj_sector in project_sectors:
|
||||
best_match_score = 0
|
||||
proj_name = proj_sector.name.lower().strip()
|
||||
|
||||
|
||||
for fund_sector in fund_sectors:
|
||||
fund_name = fund_sector.name.lower().strip()
|
||||
|
||||
|
||||
# Exact match
|
||||
if proj_name == fund_name:
|
||||
best_match_score = 1.0
|
||||
break
|
||||
|
||||
|
||||
# Fuzzy match using sequence matcher
|
||||
similarity = SequenceMatcher(None, proj_name, fund_name).ratio()
|
||||
|
||||
|
||||
# Also check if one contains the other (substring match)
|
||||
if proj_name in fund_name or fund_name in proj_name:
|
||||
similarity = max(similarity, 0.8)
|
||||
|
||||
|
||||
best_match_score = max(best_match_score, similarity)
|
||||
|
||||
|
||||
# Count matches with threshold
|
||||
# Perfect match (1.0), strong match (>0.75), partial match (>0.6)
|
||||
if best_match_score >= 0.6:
|
||||
total_matches += best_match_score
|
||||
match_count += 1
|
||||
|
||||
|
||||
if match_count > 0:
|
||||
# Calculate overlap ratio based on fuzzy matches
|
||||
overlap_ratio = total_matches / len(project_sectors)
|
||||
@@ -246,40 +246,40 @@ def _calculate_project_investor_direct_compatibility(
|
||||
# 2. Sector Overlap (30 points)
|
||||
sector_score = 0
|
||||
if project.sector and investor.sectors:
|
||||
project_sectors = [s for s in project.sector if hasattr(s, 'name')]
|
||||
investor_sectors = [s for s in investor.sectors if hasattr(s, 'name')]
|
||||
|
||||
project_sectors = [s for s in project.sector if hasattr(s, "name")]
|
||||
investor_sectors = [s for s in investor.sectors if hasattr(s, "name")]
|
||||
|
||||
if project_sectors and investor_sectors:
|
||||
# Use fuzzy matching to account for similar but not identical sector names
|
||||
match_count = 0
|
||||
total_matches = 0
|
||||
|
||||
|
||||
for proj_sector in project_sectors:
|
||||
best_match_score = 0
|
||||
proj_name = proj_sector.name.lower().strip()
|
||||
|
||||
|
||||
for inv_sector in investor_sectors:
|
||||
inv_name = inv_sector.name.lower().strip()
|
||||
|
||||
|
||||
# Exact match
|
||||
if proj_name == inv_name:
|
||||
best_match_score = 1.0
|
||||
break
|
||||
|
||||
|
||||
# Fuzzy match using sequence matcher
|
||||
similarity = SequenceMatcher(None, proj_name, inv_name).ratio()
|
||||
|
||||
|
||||
# Also check if one contains the other (substring match)
|
||||
if proj_name in inv_name or inv_name in proj_name:
|
||||
similarity = max(similarity, 0.8)
|
||||
|
||||
|
||||
best_match_score = max(best_match_score, similarity)
|
||||
|
||||
|
||||
# Count matches with threshold
|
||||
if best_match_score >= 0.6:
|
||||
total_matches += best_match_score
|
||||
match_count += 1
|
||||
|
||||
|
||||
if match_count > 0:
|
||||
# Calculate overlap ratio based on fuzzy matches
|
||||
overlap_ratio = total_matches / len(project_sectors)
|
||||
@@ -384,43 +384,70 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool:
|
||||
# Normalize inputs
|
||||
loc1 = location1.lower().strip()
|
||||
loc2 = location2.lower().strip()
|
||||
|
||||
|
||||
# Common geographic groupings with broader regional mappings
|
||||
geo_groups = [
|
||||
# North America
|
||||
["usa", "us", "united states", "america", "u.s.", "u.s.a"],
|
||||
["canada", "canadian"],
|
||||
["mexico", "mexican"],
|
||||
|
||||
# Europe and countries
|
||||
["europe", "european", "eu", "germany", "france", "uk", "united kingdom",
|
||||
"britain", "spain", "italy", "netherlands", "belgium", "sweden", "denmark",
|
||||
"norway", "finland", "poland", "portugal", "austria", "switzerland",
|
||||
"ireland", "greece", "czech", "romania"],
|
||||
|
||||
[
|
||||
"europe",
|
||||
"european",
|
||||
"eu",
|
||||
"germany",
|
||||
"france",
|
||||
"uk",
|
||||
"united kingdom",
|
||||
"britain",
|
||||
"spain",
|
||||
"italy",
|
||||
"netherlands",
|
||||
"belgium",
|
||||
"sweden",
|
||||
"denmark",
|
||||
"norway",
|
||||
"finland",
|
||||
"poland",
|
||||
"portugal",
|
||||
"austria",
|
||||
"switzerland",
|
||||
"ireland",
|
||||
"greece",
|
||||
"czech",
|
||||
"romania",
|
||||
],
|
||||
# UK specific
|
||||
["uk", "united kingdom", "britain", "england", "scotland", "wales", "london"],
|
||||
|
||||
# US states
|
||||
["california", "ca", "san francisco", "los angeles", "silicon valley"],
|
||||
["new york", "ny", "nyc"],
|
||||
["texas", "tx"],
|
||||
["massachusetts", "ma", "boston"],
|
||||
["washington", "seattle"],
|
||||
|
||||
# Asia
|
||||
["asia", "asian", "china", "japan", "korea", "singapore", "hong kong",
|
||||
"india", "indonesia", "thailand", "vietnam", "malaysia", "philippines"],
|
||||
|
||||
[
|
||||
"asia",
|
||||
"asian",
|
||||
"china",
|
||||
"japan",
|
||||
"korea",
|
||||
"singapore",
|
||||
"hong kong",
|
||||
"india",
|
||||
"indonesia",
|
||||
"thailand",
|
||||
"vietnam",
|
||||
"malaysia",
|
||||
"philippines",
|
||||
],
|
||||
# Middle East
|
||||
["middle east", "israel", "uae", "dubai", "saudi arabia"],
|
||||
|
||||
# Latin America
|
||||
["latin america", "brazil", "argentina", "chile", "colombia", "mexico"],
|
||||
|
||||
# Africa
|
||||
["africa", "african", "south africa", "nigeria", "kenya", "egypt"],
|
||||
|
||||
# Oceania
|
||||
["australia", "australian", "new zealand"],
|
||||
]
|
||||
@@ -431,7 +458,7 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool:
|
||||
found_in_2 = any(term in loc2 for term in group)
|
||||
if found_in_1 and found_in_2:
|
||||
return True
|
||||
|
||||
|
||||
# Check for direct substring match (one contains the other)
|
||||
if loc1 in loc2 or loc2 in loc1:
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user