made corrections based on feedback

2025-11-11 20:27:55 +01:00
parent 5e83734acf
commit 215fec2895
4 changed files with 189 additions and 45 deletions
@@ -117,41 +117,41 @@ def _calculate_project_fund_compatibility(
    # 2. Sector Overlap (30 points)
    sector_score = 0
    if project.sector and fund.sectors:
-        project_sectors = [s for s in project.sector if hasattr(s, 'name')]
-        fund_sectors = [s for s in fund.sectors if hasattr(s, 'name')]
-        
+        project_sectors = [s for s in project.sector if hasattr(s, "name")]
+        fund_sectors = [s for s in fund.sectors if hasattr(s, "name")]
+
        if project_sectors and fund_sectors:
            # Use fuzzy matching to account for similar but not identical sector names
            match_count = 0
            total_matches = 0
-            
+
            for proj_sector in project_sectors:
                best_match_score = 0
                proj_name = proj_sector.name.lower().strip()
-                
+
                for fund_sector in fund_sectors:
                    fund_name = fund_sector.name.lower().strip()
-                    
+
                    # Exact match
                    if proj_name == fund_name:
                        best_match_score = 1.0
                        break
-                    
+
                    # Fuzzy match using sequence matcher
                    similarity = SequenceMatcher(None, proj_name, fund_name).ratio()
-                    
+
                    # Also check if one contains the other (substring match)
                    if proj_name in fund_name or fund_name in proj_name:
                        similarity = max(similarity, 0.8)
-                    
+
                    best_match_score = max(best_match_score, similarity)
-                
+
                # Count matches with threshold
                # Perfect match (1.0), strong match (>0.75), partial match (>0.6)
                if best_match_score >= 0.6:
                    total_matches += best_match_score
                    match_count += 1
-            
+
            if match_count > 0:
                # Calculate overlap ratio based on fuzzy matches
                overlap_ratio = total_matches / len(project_sectors)
@@ -246,40 +246,40 @@ def _calculate_project_investor_direct_compatibility(
    # 2. Sector Overlap (30 points)
    sector_score = 0
    if project.sector and investor.sectors:
-        project_sectors = [s for s in project.sector if hasattr(s, 'name')]
-        investor_sectors = [s for s in investor.sectors if hasattr(s, 'name')]
-        
+        project_sectors = [s for s in project.sector if hasattr(s, "name")]
+        investor_sectors = [s for s in investor.sectors if hasattr(s, "name")]
+
        if project_sectors and investor_sectors:
            # Use fuzzy matching to account for similar but not identical sector names
            match_count = 0
            total_matches = 0
-            
+
            for proj_sector in project_sectors:
                best_match_score = 0
                proj_name = proj_sector.name.lower().strip()
-                
+
                for inv_sector in investor_sectors:
                    inv_name = inv_sector.name.lower().strip()
-                    
+
                    # Exact match
                    if proj_name == inv_name:
                        best_match_score = 1.0
                        break
-                    
+
                    # Fuzzy match using sequence matcher
                    similarity = SequenceMatcher(None, proj_name, inv_name).ratio()
-                    
+
                    # Also check if one contains the other (substring match)
                    if proj_name in inv_name or inv_name in proj_name:
                        similarity = max(similarity, 0.8)
-                    
+
                    best_match_score = max(best_match_score, similarity)
-                
+
                # Count matches with threshold
                if best_match_score >= 0.6:
                    total_matches += best_match_score
                    match_count += 1
-            
+
            if match_count > 0:
                # Calculate overlap ratio based on fuzzy matches
                overlap_ratio = total_matches / len(project_sectors)
@@ -384,43 +384,70 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool:
    # Normalize inputs
    loc1 = location1.lower().strip()
    loc2 = location2.lower().strip()
-    
+
    # Common geographic groupings with broader regional mappings
    geo_groups = [
        # North America
        ["usa", "us", "united states", "america", "u.s.", "u.s.a"],
        ["canada", "canadian"],
        ["mexico", "mexican"],
-        
        # Europe and countries
-        ["europe", "european", "eu", "germany", "france", "uk", "united kingdom", 
-         "britain", "spain", "italy", "netherlands", "belgium", "sweden", "denmark",
-         "norway", "finland", "poland", "portugal", "austria", "switzerland", 
-         "ireland", "greece", "czech", "romania"],
-        
+        [
+            "europe",
+            "european",
+            "eu",
+            "germany",
+            "france",
+            "uk",
+            "united kingdom",
+            "britain",
+            "spain",
+            "italy",
+            "netherlands",
+            "belgium",
+            "sweden",
+            "denmark",
+            "norway",
+            "finland",
+            "poland",
+            "portugal",
+            "austria",
+            "switzerland",
+            "ireland",
+            "greece",
+            "czech",
+            "romania",
+        ],
        # UK specific
        ["uk", "united kingdom", "britain", "england", "scotland", "wales", "london"],
-        
        # US states
        ["california", "ca", "san francisco", "los angeles", "silicon valley"],
        ["new york", "ny", "nyc"],
        ["texas", "tx"],
        ["massachusetts", "ma", "boston"],
        ["washington", "seattle"],
-        
        # Asia
-        ["asia", "asian", "china", "japan", "korea", "singapore", "hong kong", 
-         "india", "indonesia", "thailand", "vietnam", "malaysia", "philippines"],
-        
+        [
+            "asia",
+            "asian",
+            "china",
+            "japan",
+            "korea",
+            "singapore",
+            "hong kong",
+            "india",
+            "indonesia",
+            "thailand",
+            "vietnam",
+            "malaysia",
+            "philippines",
+        ],
        # Middle East
        ["middle east", "israel", "uae", "dubai", "saudi arabia"],
-        
        # Latin America
        ["latin america", "brazil", "argentina", "chile", "colombia", "mexico"],
-        
        # Africa
        ["africa", "african", "south africa", "nigeria", "kenya", "egypt"],
-        
        # Oceania
        ["australia", "australian", "new zealand"],
    ]
@@ -431,7 +458,7 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool:
        found_in_2 = any(term in loc2 for term in group)
        if found_in_1 and found_in_2:
            return True
-    
+
    # Check for direct substring match (one contains the other)
    if loc1 in loc2 or loc2 in loc1:
        return True