From 1ac755b2d7267aa69e9c43cdd5920e54e705736a Mon Sep 17 00:00:00 2001 From: michael Date: Thu, 23 Oct 2025 12:52:52 +0000 Subject: [PATCH] feat: Add industry column to ProjectTable and update related schemas and query filters --- app/db/models.py | 1 + app/routers/projects.py | 4 ++ app/schemas/project_schemas.py | 3 ++ app/services/querying.py | 17 +++++-- investors.db | Bin 29941760 -> 29949952 bytes migrations/add_industry_to_projects.py | 67 +++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 migrations/add_industry_to_projects.py diff --git a/app/db/models.py b/app/db/models.py index 983533b..f9badcc 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -296,6 +296,7 @@ class ProjectTable(Base, TimestampMixin): stage = Column(Enum(InvestmentStage), nullable=True) location = Column(String, nullable=True) + industry = Column(String, nullable=True) description = Column(Text, nullable=True) start_date = Column(DateTime, nullable=True) end_date = Column(DateTime, nullable=True) diff --git a/app/routers/projects.py b/app/routers/projects.py index 6ebc7df..bc276aa 100644 --- a/app/routers/projects.py +++ b/app/routers/projects.py @@ -182,6 +182,7 @@ def filter_projects( min_valuation: Optional[int] = Query(None, description="Minimum valuation"), max_valuation: Optional[int] = Query(None, description="Maximum valuation"), location: Optional[str] = Query(None, description="Location (partial match)"), + industry: Optional[str] = Query(None, description="Industry (partial match)"), sector: Optional[str] = Query(None, description="Sector name (partial match)"), investor_name: Optional[str] = Query( None, description="Investor name (partial match)" @@ -215,6 +216,9 @@ def filter_projects( if location: query = query.filter(ProjectTable.location.ilike(f"%{location}%")) + if industry: + query = query.filter(ProjectTable.industry.ilike(f"%{industry}%")) + if sector: query = query.join(ProjectTable.sector).filter( SectorTable.name.ilike(f"%{sector}%") diff --git a/app/schemas/project_schemas.py b/app/schemas/project_schemas.py index c084fd1..7138f26 100644 --- a/app/schemas/project_schemas.py +++ b/app/schemas/project_schemas.py @@ -60,6 +60,7 @@ class ProjectSchema(BaseModel): valuation: int | None stage: InvestmentStage | None location: str | None + industry: str | None description: Optional[str] start_date: Optional[datetime] end_date: Optional[datetime] @@ -75,6 +76,7 @@ class ProjectCreate(BaseModel): valuation: Optional[int] = None stage: Optional[InvestmentStage] = None location: Optional[str] = None + industry: Optional[str] = None description: Optional[str] = None start_date: Optional[datetime] = None end_date: Optional[datetime] = None @@ -85,6 +87,7 @@ class ProjectUpdate(BaseModel): valuation: Optional[int] = None stage: Optional[InvestmentStage] = None location: Optional[str] = None + industry: Optional[str] = None description: Optional[str] = None start_date: Optional[datetime] = None end_date: Optional[datetime] = None diff --git a/app/services/querying.py b/app/services/querying.py index 252fce9..5bd0219 100644 --- a/app/services/querying.py +++ b/app/services/querying.py @@ -37,7 +37,7 @@ class QueryProcessor: self.toolkit = SQLDatabaseToolkit(db=db, llm=self.llm) # Update system message to specifically request only fund IDs system_message_updated = ( - prompt_template.format(dialect="SQLite", top_k=5) + prompt_template.format(dialect="SQLite", top_k=100) + "\n\n=== IMPORTANT TERMINOLOGY ===" + "\n- When users say 'investors' or 'find me investors', they mean FUNDS" + "\n- Always query the 'funds' table for investment opportunities" @@ -51,8 +51,19 @@ class QueryProcessor: + "\n1. For geographic searches: use funds.geographic_focus" + "\n2. For sector searches: JOIN with fund_sectors table" + "\n3. For stage searches: JOIN with fund_investment_stages table" - + "\n4. If no results: respond with 'NO_RESULTS'" - + "\n5. Never repeat the same failed query" + + "\n4. Return ALL matching fund IDs, not just the first few" + + "\n5. If no results: respond with 'NO_RESULTS'" + + "\n6. Never repeat the same failed query" + + "\n\n=== GEOGRAPHIC SEARCH RULES (VERY IMPORTANT) ===" + + "\n- ALWAYS use LIKE '%keyword%' for geographic searches, NEVER use exact equality (=)" + + "\n- When user says 'Europe', match ANY location containing 'Europe' (e.g., 'Northern Europe', 'Western Europe', 'Europe', 'Central Europe')" + + "\n- When user says 'America', match locations like 'North America', 'South America', 'Latin America', 'United States'" + + "\n- When user says 'Asia', match 'Asia', 'Southeast Asia', 'East Asia', etc." + + "\n- Examples:" + + "\n * User: 'Europe' → SQL: WHERE geographic_focus LIKE '%Europe%'" + + "\n * User: 'America' → SQL: WHERE geographic_focus LIKE '%America%'" + + "\n * User: 'UK' → SQL: WHERE geographic_focus LIKE '%UK%' OR geographic_focus LIKE '%United Kingdom%'" + + "\n- Be INCLUSIVE: capture all relevant regional variations" ) self.agent = create_react_agent( model=self.llm, diff --git a/investors.db b/investors.db index ccc9762f4ab10f7df9dc2d58ed8b80a5f8ec2317..c29bad880c437a03d34d3f27f7e46516d4448280 100644 GIT binary patch delta 8828 zcmeI1dw3Mp`NwmzxovKnKyD;}hnpZSF&6?7swN0ZxF`hVE;`wr-JP;Kv&>9_i_!o^ z5^%&Qmm@_Ss8)ee1+B#^7j3eO>dPy~uW2^auMK^Yhb27$rgBEZ2AFcb^}!@&q}F&GI(flI(>a4GPDF`yh= z1}Z=$r~=hsET{qFz<6*um;h?QMDQ(e1-KGi1?oUOm;|l{lfe{l4Y(Fe1r31WG#$(U zGr@J>dN2#j29012m5Bw192R{OjfJeara1cBO9tS@LPk^6*L*Pm96nGju1D*xXfy3YkI0}w|=fMl$ zr{HJcMeq`M8N33HgIB?8;OF23_yu?!{1W^M{2KfQ{1*HUya9d>-UM%fKY%}ix4}E$ zPv9hY7n}lr2JeCQ!C$~%!3W?&a2k9BJ_dgSpMbxEGvF-v6#N7H6MP0f2Va0M!N0)2 z!8!01_z(CR{Pzy5{ks53GKq^Mg(Q_EjU=5UgTzgeNs>j9O_D>BOX4BPBgrS}N7A39 zfTWP5h@_aLgk%6oDM=a0K$1ZugGnwT;Uq&yhLQ{;8BQ{SFAWCBSo$wZQGkz7G?CCODJbtLs9lSr;6nM^W; zBuo+^QAsqCD9LgXox~t9Nt#JoNLG;C zLJ}icNpdU6Z6vEmzC+SVvYKQK$y$={lB^@Son$>p8%aCK29k{=caT^lcam%(xr?NO z#OR}Bhdn7y95tJr2w>>)*+b{EAC&pHMFq-PBXm538Cb9N~ z?Kf6eCuKIM+ziP)EEy)RtQu2!IgeHA+Ii2eWFB#% ztTD?SR+h7&8RDgLE|2(A!0YtDsBy6P+xlWF z%d@|}M`h{hy4IwbiqHOJ!$x+IDEd4zcO>E(QhXsUt04*ZHE3iAenirj$rRhp=h?e= z)UXlPYw?j|UNL)GfH%oLEke<1Ru!M7tH>*y-|(_d2oXyS{}0=wh$dY0}lud1l38Bb;kwq0q2q0)H6zC{wra0fKKP;j zqVdZ&3dO3%_2~u9l<|O~M<`k1t3l)qNhz9C({(Ro8}40M(RBe!6{D2GUM__xoiGY# zH6Wk4ldy6yMoaMYfK6p$;KJE%FFgkLH+2~U(&a!{_TjS z_gG97f{Ovj5u}Qp|H=kdnB6n`#lg^;{$+@o&(!q9+Kz{hUcI_P?0V5XOiUY}n`LXi z+rj#axAtx*v>V@TX9dE}|IBT#dG#7rfKeOG$X<*(J}q{oYaJ`iUna-8bC(vJT{l5N ziAbCMNS-sLn1==g1ZlgxS>}ju6d7r=(UVcp{*vC7n~?~#c>V0hMWX%#Hh5@IlfrnT z_(Gvtf~<((dsyyRf3iO)+m{8ytYm4|67HL^6}2_heKM9VwU1|+Ny+Uq+vX!2Dp<$P2XpK(i&n6bQQkypmDQjW zqX<6ciqyzW+3bQSEVrkeo``HZLrgD9?)GF0s1 zw{r@vn0;bvnW%fyNUIrx_s9JDWN-Hxr`jG*H~Nl*Y#38rF#=<8iZH6N=vd(?un(7v zWLYC9)0|Qmb@r#k=G47LPOaW--ZK);VVBy1M zt%-fi8Jeol7kZRV>>Kgo*;RpQI%z9UJcphcm0}T;&CX?nJ4j(Oq&1_!8d2FtyH27o z=({Piu#Qu-&uMU+20-bL^9FqQ!t$9>YB6fBHSvK8)pghxGyS*vo@S$ zF%A3dhDKJH+x-GmvV^qojlv1vSOXP(mIY2Tci|Y5l$7RqJIV8B&p$nHdp_}e;5jnn zsGqshj_@-(*fICGR2O3zX(=u@OQnC7rjTa3NL|UPu4MOcfBYuzBzeB_eC|2pIqiAR zW7i+h*-qykM=}p}6i(Mt#5<2B54CnO>wae0%-Y4Q-OTzvvmRj9gUtE?vmRpB!_3;l zti8e{^(eCrFzX<*9%I(y%=$62o?zBbn01I*PcrK%WQhL*0c#-T%m;uFnasqN29%q0YIKgF~Gw z<+h~kD>7PBM1IGaadZNkkTSkKDUB-pu=YYUK^YD4!|r)}EA*AKB;8bHoer{wJsZ~5 zu()-#7)Ru}@Ta_dDw<^zf72m(zT8yZeL{}ANGiu+*y&)YT~D+os?#c=o0ZWNs=gRC zyJ%jVQ?XbbIaMOg{Kua%wbR0ClFb!#d`|dR)$p%+wLbCY+tgM_x*y$J1l^a6UbnMh zSA1f|+*zYT<@`Fw(TaKLc;9K}pjm-WqriJY&w{t=-EzYMsjT7wcnx&acDiPq{X6}# zm2@p`;!u00IO&1D$L_c_gB96_C+=Vw+2uW=DEO1l7dRbrSFc0gw|`#H#!7nFQD^(* zT_n!jn3iQP-Ea-LXPdK%TjLFU+{kcZ?}R)aekX2S?o^iEqpvD5$IFtB>e4VTdK zwU7&fA@rV)?$*DWS60@-ud1o+YJ0jbwM|UTOmeR`+O9y>H=Hl&M>%e76(*cKfPm&} zHjon`S!z-6Tr`}MswO$EtT=qmRVFSw&IZ60>7C|c$fhmD_WkYmvb6CqUMWm{tXF5Q z;C{< z4Rf88OQG7_zikAvwDi1bYUa?73iU8u!cJMf`HgB;ltR@jo;jlF@jvtz z(e3x8hz6FE&%EOJuU;+{OB&|5#p>lomc4IbEz2y_W%8h8KiNo*Af^&u1prH;CQG-!HnfuP58bVeL$DCop1ovgcy4w6K`UrV(9J#{YJ%qCUDZN0To~BvsAl&5fLA~ojYK-0T&nUZNm2~5^<`>A3;R)&uocmM vocQen8Ey8=KRi>EaR}hFNlJQiiwCt*lu@+ z+riddmu)t_zkRcNWI5Q~La zgvD5brC5gLSb>#Tg*e1xHP#>jYq1XNu>l*g37fG6Takz)Bx4)4V+VF31*u5GE~H~O zGLVTZWFrTAuot=5hy6H!gE)l4ID)5;hkP7G0gmA~3Q>d;IEhmz#%Y|vS(M-$&f@|u z;u0>S6jxA&t0>1cT*nRE#4S{y5>=?iZQQ|K+{1l5z(YL3(|88Y;yFBz7w{rp!pnFC zui`bljyLco-oo2>2k+uNypIp?7$4#ze2h=8&-ewu;y3(`KTwB1@fZG1=kI4hu~X_PX2o7{P#hH}#aXc^E{dySRooPJ#Y6E_ z>MISDhDsx)vC>3osx(u)6mP{xX|A+TS}LuS)=C?ttVShAaO*@d!;L zl~KxQC0rS!j8(=d5z2UFf-+HwR3<5tl_|d;IEhmz#%Y|vS(M-$&f@|u t;u0>S6jxA&t0>1cT*nRE#4S{y5>=?iZQQ|K+{1l5z(b?L{K%+#{0|PAb!`9u diff --git a/migrations/add_industry_to_projects.py b/migrations/add_industry_to_projects.py new file mode 100644 index 0000000..f277f5a --- /dev/null +++ b/migrations/add_industry_to_projects.py @@ -0,0 +1,67 @@ +""" +Migration: Add industry column to projects table +Date: 2025-10-23 +""" + +import os +import sys +from pathlib import Path + +# Add parent directory to path to import app modules +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from sqlalchemy import create_engine, text +from app.db.db import DATABASE_URL, engine + + +def upgrade(): + """Add industry column to projects table""" + print("Running migration: Add industry column to projects table") + + with engine.connect() as conn: + # Check if column already exists + result = conn.execute(text("PRAGMA table_info(projects)")) + columns = [row[1] for row in result] + + if 'industry' in columns: + print("Column 'industry' already exists in projects table. Skipping migration.") + return + + # Add the industry column + conn.execute(text("ALTER TABLE projects ADD COLUMN industry VARCHAR")) + conn.commit() + + print("Successfully added 'industry' column to projects table") + + +def downgrade(): + """Remove industry column from projects table""" + print("Running downgrade: Remove industry column from projects table") + + # Note: SQLite doesn't support DROP COLUMN directly + # This is a simplified version - in production you'd need to recreate the table + print("Warning: SQLite doesn't support DROP COLUMN.") + print("To remove the column, you would need to:") + print("1. Create a new table without the industry column") + print("2. Copy data from old table to new table") + print("3. Drop old table and rename new table") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Run database migration") + parser.add_argument( + "direction", + choices=["upgrade", "downgrade"], + default="upgrade", + nargs="?", + help="Migration direction (default: upgrade)" + ) + + args = parser.parse_args() + + if args.direction == "upgrade": + upgrade() + else: + downgrade()