From 215fec289511b0f7093d1ec253837b6a044b17b9 Mon Sep 17 00:00:00 2001 From: bolade Date: Tue, 11 Nov 2025 20:27:55 +0100 Subject: [PATCH] made corrections based on feedback --- app/routers/investors.py | 21 ++-- app/services/compatibility_score.py | 103 +++++++++++------- investors.db | Bin 29966336 -> 29966336 bytes migrations/add_feedback_fixes_2025_01_07.py | 110 ++++++++++++++++++++ 4 files changed, 189 insertions(+), 45 deletions(-) create mode 100644 migrations/add_feedback_fixes_2025_01_07.py diff --git a/app/routers/investors.py b/app/routers/investors.py index 5d41df0..19d10e1 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -13,7 +13,6 @@ from schemas.router_schemas import ( SectorMinimal, ) from services.compatibility_score import ( - calculate_project_investor_compatibility, _calculate_project_fund_compatibility, _calculate_project_investor_direct_compatibility, ) @@ -91,7 +90,9 @@ def read_investors( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), - selectinload(InvestorTable.funds).selectinload(FundTable.investment_stages), + selectinload(InvestorTable.funds).selectinload( + FundTable.investment_stages + ), selectinload(InvestorTable.funds).selectinload(FundTable.sectors), ) .all() @@ -106,7 +107,9 @@ def read_investors( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), - selectinload(InvestorTable.funds).selectinload(FundTable.investment_stages), + selectinload(InvestorTable.funds).selectinload( + FundTable.investment_stages + ), selectinload(InvestorTable.funds).selectinload(FundTable.sectors), ) .offset(offset) @@ -143,7 +146,9 @@ def read_investors( # Get top 3 sectors from fund (id and name only) - sorted alphabetically fund_sectors = [ SectorMinimal(id=sector.id, name=sector.name) - for sector in sorted(fund.sectors[:3] if fund.sectors else [], key=lambda s: s.name) + for sector in sorted( + fund.sectors[:3] if fund.sectors else [], key=lambda s: s.name + ) ] investment_response = InvestmentResponse( @@ -188,7 +193,7 @@ def read_investors( if project is not None: investment_responses.sort(key=lambda x: x.compatibility_score, reverse=True) # Apply pagination after sorting - investment_responses = investment_responses[offset:offset + page_size] + investment_responses = investment_responses[offset : offset + page_size] # Calculate total pages total_pages = (total_count + page_size - 1) // page_size @@ -320,7 +325,9 @@ def filter_investors( # Get top 3 sectors from fund (id and name only) - sorted alphabetically fund_sectors = [ SectorMinimal(id=sector.id, name=sector.name) - for sector in sorted(fund.sectors[:3] if fund.sectors else [], key=lambda s: s.name) + for sector in sorted( + fund.sectors[:3] if fund.sectors else [], key=lambda s: s.name + ) ] investment_response = InvestmentResponse( @@ -344,7 +351,7 @@ def filter_investors( investment_responses.sort(key=lambda x: x.compatibility_score, reverse=True) # Apply pagination after sorting offset = (page - 1) * page_size - investment_responses = investment_responses[offset:offset + page_size] + investment_responses = investment_responses[offset : offset + page_size] # Calculate total pages total_pages = (total_count + page_size - 1) // page_size diff --git a/app/services/compatibility_score.py b/app/services/compatibility_score.py index 707c0ba..2253bf4 100644 --- a/app/services/compatibility_score.py +++ b/app/services/compatibility_score.py @@ -117,41 +117,41 @@ def _calculate_project_fund_compatibility( # 2. Sector Overlap (30 points) sector_score = 0 if project.sector and fund.sectors: - project_sectors = [s for s in project.sector if hasattr(s, 'name')] - fund_sectors = [s for s in fund.sectors if hasattr(s, 'name')] - + project_sectors = [s for s in project.sector if hasattr(s, "name")] + fund_sectors = [s for s in fund.sectors if hasattr(s, "name")] + if project_sectors and fund_sectors: # Use fuzzy matching to account for similar but not identical sector names match_count = 0 total_matches = 0 - + for proj_sector in project_sectors: best_match_score = 0 proj_name = proj_sector.name.lower().strip() - + for fund_sector in fund_sectors: fund_name = fund_sector.name.lower().strip() - + # Exact match if proj_name == fund_name: best_match_score = 1.0 break - + # Fuzzy match using sequence matcher similarity = SequenceMatcher(None, proj_name, fund_name).ratio() - + # Also check if one contains the other (substring match) if proj_name in fund_name or fund_name in proj_name: similarity = max(similarity, 0.8) - + best_match_score = max(best_match_score, similarity) - + # Count matches with threshold # Perfect match (1.0), strong match (>0.75), partial match (>0.6) if best_match_score >= 0.6: total_matches += best_match_score match_count += 1 - + if match_count > 0: # Calculate overlap ratio based on fuzzy matches overlap_ratio = total_matches / len(project_sectors) @@ -246,40 +246,40 @@ def _calculate_project_investor_direct_compatibility( # 2. Sector Overlap (30 points) sector_score = 0 if project.sector and investor.sectors: - project_sectors = [s for s in project.sector if hasattr(s, 'name')] - investor_sectors = [s for s in investor.sectors if hasattr(s, 'name')] - + project_sectors = [s for s in project.sector if hasattr(s, "name")] + investor_sectors = [s for s in investor.sectors if hasattr(s, "name")] + if project_sectors and investor_sectors: # Use fuzzy matching to account for similar but not identical sector names match_count = 0 total_matches = 0 - + for proj_sector in project_sectors: best_match_score = 0 proj_name = proj_sector.name.lower().strip() - + for inv_sector in investor_sectors: inv_name = inv_sector.name.lower().strip() - + # Exact match if proj_name == inv_name: best_match_score = 1.0 break - + # Fuzzy match using sequence matcher similarity = SequenceMatcher(None, proj_name, inv_name).ratio() - + # Also check if one contains the other (substring match) if proj_name in inv_name or inv_name in proj_name: similarity = max(similarity, 0.8) - + best_match_score = max(best_match_score, similarity) - + # Count matches with threshold if best_match_score >= 0.6: total_matches += best_match_score match_count += 1 - + if match_count > 0: # Calculate overlap ratio based on fuzzy matches overlap_ratio = total_matches / len(project_sectors) @@ -384,43 +384,70 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool: # Normalize inputs loc1 = location1.lower().strip() loc2 = location2.lower().strip() - + # Common geographic groupings with broader regional mappings geo_groups = [ # North America ["usa", "us", "united states", "america", "u.s.", "u.s.a"], ["canada", "canadian"], ["mexico", "mexican"], - # Europe and countries - ["europe", "european", "eu", "germany", "france", "uk", "united kingdom", - "britain", "spain", "italy", "netherlands", "belgium", "sweden", "denmark", - "norway", "finland", "poland", "portugal", "austria", "switzerland", - "ireland", "greece", "czech", "romania"], - + [ + "europe", + "european", + "eu", + "germany", + "france", + "uk", + "united kingdom", + "britain", + "spain", + "italy", + "netherlands", + "belgium", + "sweden", + "denmark", + "norway", + "finland", + "poland", + "portugal", + "austria", + "switzerland", + "ireland", + "greece", + "czech", + "romania", + ], # UK specific ["uk", "united kingdom", "britain", "england", "scotland", "wales", "london"], - # US states ["california", "ca", "san francisco", "los angeles", "silicon valley"], ["new york", "ny", "nyc"], ["texas", "tx"], ["massachusetts", "ma", "boston"], ["washington", "seattle"], - # Asia - ["asia", "asian", "china", "japan", "korea", "singapore", "hong kong", - "india", "indonesia", "thailand", "vietnam", "malaysia", "philippines"], - + [ + "asia", + "asian", + "china", + "japan", + "korea", + "singapore", + "hong kong", + "india", + "indonesia", + "thailand", + "vietnam", + "malaysia", + "philippines", + ], # Middle East ["middle east", "israel", "uae", "dubai", "saudi arabia"], - # Latin America ["latin america", "brazil", "argentina", "chile", "colombia", "mexico"], - # Africa ["africa", "african", "south africa", "nigeria", "kenya", "egypt"], - # Oceania ["australia", "australian", "new zealand"], ] @@ -431,7 +458,7 @@ def _check_geographic_overlap(location1: str, location2: str) -> bool: found_in_2 = any(term in loc2 for term in group) if found_in_1 and found_in_2: return True - + # Check for direct substring match (one contains the other) if loc1 in loc2 or loc2 in loc1: return True diff --git a/investors.db b/investors.db index 6ec347efff2de50f1519551b57ce6aa2726cba2b..dceae795ca494eff4398c18075b666e7a8d4a5ef 100644 GIT binary patch delta 2361 zcmYM!cTki^0ETh)s8SRwDiF@zP_cIr5CugPJQOP`pq?Ts%`PZnL&bugz4zYFj@=Yf zOpj@%C&m~x&6xVU=8t?czh`!5XTI5)o!x?hUF8d~Jg{Q0i%U?c%l}`!7k16 z7R^aJm1Z`D*JsV&QtINj)i2K1=3CKcqovd*#yj7;mDdrk6wfS=68DpCcU`yKxwXkW zboR^k$T!||2YYx1goKzPS7&-J6KHBr%9@y#o0^l7ot~9vPffSlqLXc**3=ApdS*`c z^O@sutf}U_A89VRc^4XQ&w$`yQjl0DsvTSm-C^N-4vj>?w##o%S!mI(tX?>Ki%y<`iRrU;M+SC+9Io*;?J>!*O z&m5ngX3uoojx<4z@Fe5!c#vQkF33$Wu)q~=aEAvx;e~SWh7Wwo_0a&@Un4X|6NI8EnxQ$u&;l*d3a!xwZP5hj}fu4v#EPA0g;?M{2=!M3U@Ytyhw;e31Y}|&CSfwNkc}MVA`eqA71J;sGcXggFdO+O zz#Pm)A?Be7^RWO6u?UM%j3ro#WhlXNtiVdFLMc{b4IEgDby$xL*oaNoj4jxTZP<<- z*oj@(jXl_leb|o!IEX_yj3YRTV>pfzIEhm@4JXdvEY9IPF5n_Ap$wOC1y^wm*Kq?k zaSJcvCA^GRa2v1UHN1{Da0hSVExe6)@GjoN`}hEN@gY9K$M^)F;vPQ3=lB9&;wyZO zZ}2U?!+m^@2lxRG@gpALF`nQj{ET1lD}KXM{Elb%1ApQ#{EdH#x%(whEQ+h*rnoB} zil^eGlvBJFAH`SkQ_3q96n~|nQc0<-R8gub)sz6Gx>7@_sRSxPO0e>R5~5g@T1suD zj#5{tr_@&(C=HcHN@Jyo5~?&+nkmhdFr|gkQfZ~MR@x|Sm3B&drGwH@>7;a4x+q7n#gVw6~=m(p8_Q~D_JN?)a)lAt6i{S}*%qzq67D#^+qWw0_t z8LA9ZhAYpHc7#SFl~GEnlBT39qm?nrSjDc4Q^qS9$^<1-nW#)sCM#J=wvwadDtXEj zWvVhwnXb%GW-7Ck*-E}rpv+O`Duv2CrAV2tEKn9Iix8 zDOFZ0YZQmFR#~U4S2idcl}*ZKWs9;^*`{n)b|^cQUCM4{kFr#CzVsmX~n6WQO+vol=I33<)U&)DN`;hSCp&DHRZZ;L%FHkGG#ntrug2! DP*b73 delta 2291 zcmYM!XHZmE9EagOOIcW46fB@CVy~dsJ1AHYtXQz4Sg{xEiU@+D*wD3sz4vDBba6cb|-W9s`eA981&ch1az?wvDp=G=^oJ%Jfm?GAF=Y!wS^|3B_E5siJ^w)zW` z7pI;{HH9$|j{UY$Cg0}T?n)@(?=k@e&OuIxf3)8tlkXVdo92`2eZ=dY{ej)fxLsCI zac+unW+powAz@)A+s8WJ#Y7uVy?rvPT~|!;|$Ki!a1DB z1zf}>T*eh#MKP}7I&R=5Zs9iW;5j^x7w{rp!pnFCui`b_#p}3-H}EFj!rOQU@8UhY zj}P!6KElVik5BL^KEvnu0$<`Qe2s7L0N>&}e2*XS5I^E4Ji^a-j9>68e!~;|j;Hto zf8sCvjeqjE`>UYX6)(kG@lkvghvKLBD^4XqDWSNOK&7NoN(oYemC{NXrL0m;2~k3o z@=688tyEOPluAmtQdy~@R8^`e)s-4bO{JDnTdAYeRq81bN~BU>X`n@nfN@t~u(p8C9x+w`tqS9UIq4ZRG zDZQ0GN|Mr7>8JEp1}MqOKxL3JSQ(-WRh}K~aII355z0sR>mk}m2t{=B~6*2 zOjIT*la(pTRArhnU74ZGRMM4M%4}thGFO?W%vTmD3zbDmhO$`6RF)`NO1838$x(8Z zWy*3TPg$X?RPvQo%4%hevQ}BA6e#PJ4T?wEsBBU;D_fMUN}*DuY*V%?JCvQuE@ii} zN7<|FQ}!zdl!MA4<*;%@IjS5}jw>gWlgcUOv~or{t60i8<-Br1xu{%HE-P1*t4gtQ XO}VbzP;M%>l-tT3bCuti$-DnAGPi-X diff --git a/migrations/add_feedback_fixes_2025_01_07.py b/migrations/add_feedback_fixes_2025_01_07.py new file mode 100644 index 0000000..d8056f2 --- /dev/null +++ b/migrations/add_feedback_fixes_2025_01_07.py @@ -0,0 +1,110 @@ +""" +Migration: Add fields from feedback fixes +Date: 2025-01-07 + +Adds the following fields: +- projects.is_archived (INTEGER, default 0) +- companies.product_service (TEXT, nullable) +- companies.clients (TEXT, nullable - stored as JSON string) +- investor_members.linkedin (VARCHAR, nullable) +""" + +import os +import sys +from pathlib import Path + +# Add parent directory to path to import app modules +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from sqlalchemy import create_engine, text +from app.db.db import DATABASE_URL, engine + + +def check_column_exists(conn, table_name, column_name): + """Check if a column exists in a table""" + result = conn.execute(text(f"PRAGMA table_info({table_name})")) + columns = [row[1] for row in result] + return column_name in columns + + +def upgrade(): + """Add new columns to tables""" + print("Running migration: Add feedback fixes fields") + print("=" * 60) + + with engine.begin() as conn: # Use begin() for transaction management + # 1. Add is_archived to projects table + print("\n1. Adding 'is_archived' column to projects table...") + if check_column_exists(conn, "projects", "is_archived"): + print(" ✓ Column 'is_archived' already exists. Skipping.") + else: + conn.execute(text("ALTER TABLE projects ADD COLUMN is_archived INTEGER DEFAULT 0 NOT NULL")) + # Set default value for existing rows + conn.execute(text("UPDATE projects SET is_archived = 0 WHERE is_archived IS NULL")) + print(" ✓ Successfully added 'is_archived' column to projects table") + + # 2. Add product_service to companies table + print("\n2. Adding 'product_service' column to companies table...") + if check_column_exists(conn, "companies", "product_service"): + print(" ✓ Column 'product_service' already exists. Skipping.") + else: + conn.execute(text("ALTER TABLE companies ADD COLUMN product_service TEXT")) + print(" ✓ Successfully added 'product_service' column to companies table") + + # 3. Add clients to companies table + print("\n3. Adding 'clients' column to companies table...") + if check_column_exists(conn, "companies", "clients"): + print(" ✓ Column 'clients' already exists. Skipping.") + else: + conn.execute(text("ALTER TABLE companies ADD COLUMN clients TEXT")) + print(" ✓ Successfully added 'clients' column to companies table") + + # 4. Add linkedin to investor_members table + print("\n4. Adding 'linkedin' column to investor_members table...") + if check_column_exists(conn, "investor_members", "linkedin"): + print(" ✓ Column 'linkedin' already exists. Skipping.") + else: + conn.execute(text("ALTER TABLE investor_members ADD COLUMN linkedin VARCHAR")) + print(" ✓ Successfully added 'linkedin' column to investor_members table") + + print("\n" + "=" * 60) + print("Migration completed successfully!") + + +def downgrade(): + """Remove added columns from tables""" + print("Running downgrade: Remove feedback fixes fields") + print("=" * 60) + + # Note: SQLite doesn't support DROP COLUMN directly + print("\nWarning: SQLite doesn't support DROP COLUMN directly.") + print("To remove these columns, you would need to:") + print("1. Create new tables without the columns") + print("2. Copy data from old tables to new tables") + print("3. Drop old tables and rename new tables") + print("\nColumns to remove:") + print(" - projects.is_archived") + print(" - companies.product_service") + print(" - companies.clients") + print(" - investor_members.linkedin") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Run database migration") + parser.add_argument( + "direction", + choices=["upgrade", "downgrade"], + default="upgrade", + nargs="?", + help="Migration direction (default: upgrade)" + ) + + args = parser.parse_args() + + if args.direction == "upgrade": + upgrade() + else: + downgrade() +