From 65b5df3a431dd4b5c9189c7d9ef59663e7871fe1 Mon Sep 17 00:00:00 2001 From: bolade Date: Tue, 2 Sep 2025 12:22:50 +0100 Subject: [PATCH] Add CompanyTable model and refactor query handling; update requirements for new dependencies --- .gitignore | 1 + app/__pycache__/main.cpython-312.pyc | Bin 2312 -> 2264 bytes .../pydantic_schemas.cpython-312.pyc | Bin 1640 -> 1640 bytes app/api/__init__.py | 0 app/api/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 168 bytes app/api/__pycache__/investors.cpython-312.pyc | Bin 0 -> 481 bytes app/api/companies.py | 8 + app/api/investors.py | 8 + app/db/__pycache__/tables.cpython-312.pyc | Bin 1299 -> 2139 bytes app/db/models.py | 103 +++++++++++ app/db/tables.py | 23 --- app/main.py | 24 ++- .../langgraph_agent.cpython-312.pyc | Bin 7279 -> 7268 bytes .../__pycache__/openrouter.cpython-312.pyc | Bin 8681 -> 8681 bytes .../__pycache__/querying.cpython-312.pyc | Bin 0 -> 3970 bytes app/services/langgraph_agent.py | 162 ------------------ app/services/openrouter.py | 3 +- app/services/querying.py | 46 +++-- requirements.txt | 4 - 19 files changed, 166 insertions(+), 216 deletions(-) create mode 100644 app/api/__init__.py create mode 100644 app/api/__pycache__/__init__.cpython-312.pyc create mode 100644 app/api/__pycache__/investors.cpython-312.pyc create mode 100644 app/api/companies.py create mode 100644 app/api/investors.py create mode 100644 app/db/models.py delete mode 100644 app/db/tables.py create mode 100644 app/services/__pycache__/querying.cpython-312.pyc diff --git a/.gitignore b/.gitignore index 698ec62..2ae4ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ /*.db +/*.cypython-* \ No newline at end of file diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index 9cbd458af81d89c7020e6ba300163b1926a0d2e5..6d7f7be4f30b04b061da4f639c9e031a8a6d3e58 100644 GIT binary patch delta 1004 zcmYjPO=uHA6rS1HY&Ji;`PsHHwzdCgu&uubi`s(+MbM&%6!zefW~Oawb`xitmVlCA zdk_!OvIjkS5}^u$;K5VxdPr%ZaZAvG2fYP*v7VgSnA!*Xe!hM0&G+8SlXxwrd{UGk zVDjwahuPAqk}N-k06Yg6V3-3sw?Yn@f-b-&hIEl(DCaXJUGgw7=Qm|tHUoOV4C+A- z^W{QjSPy%?lv7MqS0NApk=OWHO!Nqe%=I<>!0ta_b!CV(o+s|xoQ)rZ9fkK+v#K_nSbdbfGktu-&m~oXta{z<&rKOZeJ!(Td zt-qCP^CfX=O=1x*)i_uXWlIPA5!ff0`T>5Qu7K~+W3SI{MMo>wYieR!9okTbUe0c+ zrz+QK;r5lqYN)3w_UwC&ANLPB`_h%cPTI-JU6d|lDWAuc3TX?cZJOkP4^NHaDd(R5 zLOEs=x{zgCAN7c2tr>3=%1?LItd+?XF)?VqI8Uf6{j+F>%u{|bu2{(1^WI8b#2k;k zD;tKDHi==l{vzw0rt^X{^CwX|-VcU@TaXl0g_IdvB7B^X0j- z2@6UVn+JWeWgfKufshsy@n7gmE2LB~+)`?=(E1jvQ0tTD&LpCTx#!I9oO|v)=lo{2 ztuN~4ZPOegDAyl+HU=>;KjBkO}5}L9`S(j zpwpLTH|WL=b7ObG6+DsjwNsj`yx-%{zr;W3aS=;v4~ZxY1fT1QxJ_ka&JC6>fEz8R z(YKd)5GO~pH)$=gwGR(B@U|K*VWJw4Un`BPm$%AeTgDhzKit-K=({5rfKgPk&H+a)#&&kW&-73A56De@jkdLm(8O?ChcX(eLoblU zbUfWKXd94rgB%f=bJZwh9&71&u~5gW+Ox`sF$MZgH(15Nh9Ka~G{(nq3H5nAA%6|X zqsoilp8L7->foJiv;Nimm-AQGerZnJZBE=Xj}Ojom#x8j&(mW+pL`{`lRr290*qm< z%tvm(Tmgp^D7jHI%@*(h)o?OXcw>D;`Jz5^gSgvqL}!`%xV?m7Q2&-Ztm1;hY8MHZ z_K@Et#jle#l=clL)INQQ$Djz7Mih!TvPAEg&v*~LI9KK2lXYuoTaoP z@bBVjB&g@{GjJB6`gz1psCIQq>3vfAjWiyTnTKTRE}42jPCO=WJs_ud^5&K9K;I$e v#_(=FnJr#^X>0uCpmmSFzN_i9{*Av&(0u3pq0CdAXtSw5xmlbr)Ybn0YF!c6 diff --git a/app/__pycache__/pydantic_schemas.cpython-312.pyc b/app/__pycache__/pydantic_schemas.cpython-312.pyc index 49fa62c1f046168ca3768de4a7821f5c96636635..761fc876e847fe16ff24ae90e532fabf0d47fd75 100644 GIT binary patch delta 22 ccmaFC^MZ%>G%qg~0}$+%+?3I?kvEzR08N$#egFUf delta 22 ccmaFC^MZ%>G%qg~0}%9{*_hG2kvEzR08xYo0RR91 diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/__pycache__/__init__.cpython-312.pyc b/app/api/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..707593a0b1e3feed607800fb0dee58913358f498 GIT binary patch literal 168 zcmX@j%ge<81j~1A%K*`jK?FMZ%mNgd&QQsq$>_I|p@<2{`wUX^%UwSsKQ~oBKc}=j zu{bd=H&fpwKe;qFHLs*tKRmxETi@5)ML#jGBtI{{JhLb@ttb(wIzL~dVG67O9USFC|(toLBCi*lOmt^n=CYaa) z8!L#Q0mZ?zT+yg@8J!I+yr~WFKQr}Pn0JS|p=|hD6|(5Z_fHN_ndeg8OBj}OO@+(2 zs4YN1t^!a@#)_z9fu?5R8D#acAGN6vq(OVDB}=#&R?o~Ko`FZykW`o)7)@qVgSEU4 z5yY`AqsVHWO&wPl=Zs$&$45njI4*Ol9gEXCC$gKg+fuCst3tc$r#K~Mb-sniC45+_ zNJ-O&(enCmBR5>i4?}sbUQ*76{gpZqu0$$ud8p+yxVpxVsEl%7Z%{XY3HSy>JSH0^ zv0a;)UW@MWq)gQ(MNXO@9T7iZ`!l}wu+@)@FnX81N#B>=ar0M0+!^bjrGqHI`U9K* Bb9Mj# literal 0 HcmV?d00001 diff --git a/app/api/companies.py b/app/api/companies.py new file mode 100644 index 0000000..d718c44 --- /dev/null +++ b/app/api/companies.py @@ -0,0 +1,8 @@ +from fastapi.routing import apirouter + +router = apirouter() + +@router.get("/companies") +def read_companies(): + return {"message": "list of companies"} + diff --git a/app/api/investors.py b/app/api/investors.py new file mode 100644 index 0000000..42d5b39 --- /dev/null +++ b/app/api/investors.py @@ -0,0 +1,8 @@ +from fastapi.routing import apirouter + +router = apirouter() + +@router.get("/investors") +def read_investors(): + return {"message": "list of investors"} + diff --git a/app/db/__pycache__/tables.cpython-312.pyc b/app/db/__pycache__/tables.cpython-312.pyc index 8e6b31ce548b07a62420e563ee9e31229f4d306c..18d72cf2655e4fc4da75488271699723da0d8185 100644 GIT binary patch delta 747 zcmbQtbz6Y%G%qg~0}%Y!xh+GCbt0bxW6VT#cPSi8rIcJ5Q!*mCWcg& zRCW|G9!pgY#>4sNZjH8MnP$DNm1o3j-34D#FEVXyjwhJ`K5U&zNsnkm8pqEx7f>5lZrD- zQj1tXxBPzDhyAOajyY9N*di~w0vyngaAW>tND(Fth_O0V$iHhA1b zW(!NsP+#G4McA;x=O&-Xgs=rESNOCW+$XEC#Pfnp1R18uRU|)o5{pU}Yf6${N>ULh z`oW0;Vil_Ozc_4i^HWN5QtgTifa(}QZqNi0AD9^#8E-T2KWDJG%wX}9LF@|~KO@@& VmoE%J>WdN|kZtmn0Z4&;0svFPmx}-Z delta 204 zcmcaDFqw<*G%qg~0}!-7-@dk91rQ6YRtd=Z#bJ}1pHiBWYFDHRPa~M;V2CK$6bhYKDrpymj!eAirjKDR_uO~h zedoUS?jP|taqJhZR)vV;XYJ0XZT&X(yVQk|U(N4&hZ>75@AUmSmUMCQ>D@s1xx5uf z-_Nj=wzo3OC9bAn7dhzSaWJ}v1W3R;~0E#4&%lHN>mgeRwfsP+N2hUM>87I}9J zxMMAJ>=4V=No_UhFeJ4WS-B3qLn$bAHQ z{keXq#(6aj)HJJhLW-#zV|77R6BJa4mgF^NpfU3r2hcbkrZ`RXwN|%wVUHYGHx*2q z^Sn6(%pr~p^W-QXN4Ksw2A)};TKU90NX&DI583!x_Ur;1JI9gd-%y72GlesoU+}gB z$l4MdmE@^OKutmuy^$_wN*Uf90p&T=7gKrfI@<r&OPyyY z7TLumHgSQ|U3{xolb0kNnHIy|nGsC4wv-?F=90i5N|oS2sDbrZAyz!Lk7$)x?j+Rc z9%R-sdxQlV&1FN$01b4xztj(Hj`F?IJ;=CtCJLA+v^#5fsER|{T27Xd(Bp&tAf)46 zow(CKWg0Ce$ixK&D?(Z~?y$I*MZHCplHOS)UP-gd;tAgTTl~F0JVB~gl8~U*@dC~h zZa}!VVo#25A7@9VUtVDeH%BbwPPBDYZ>t+Lr*k)T!TOQh3>0^tE~Gbt9OY?JLv=?k zy^HHgiam>`a=X4z{hYHz_7RL6ETr=%@;4xDDUX%LDp^n6&(WiKkx#E@3Yk5F8!~Qa zXT+P-li>?Mr@J;*9jk{L4$hf^p$MFsdA{(Xf6KLXecRh$ehj=k2c1)*TBAu+J1{6H z#4KVtNv7gu@c@ek{^}|b!YDpXCE3;xZNh#S5R@_z+X+)eS@z3bA*=_txE&nM^|dJ@ zPx%1ltH=1@1Q0)JqJxmio>pAh|&qCT+o-9o^=|p|`@zqCHp(n(9rhsQ^ACY)NP*3yv zVW1y|{wVK11^lN(I1oL6BS0VdFT1PPxR3M%_sWFyg7sqLx#CCnCsYwW=#3dgMHWV5X zH$8-)6-ls{6eQz-46!G0B*{nFyR#5#wfu)ey`B+VlLs~1%n+_GYB}=^5uM0s;!!d z0!$Q|;MKmCsHaP&T!eD53C8-OSWEBZwsTEz#J-epcHxIYjcbCwE8Frk!BGFaRQsq2 zl)jayD@`!mx5P)OCWyD({>nw|;)8e9^!=e5aXV92KsR6Au@`1Oph{ok7^Y$(;fxlI6y?ywle^uUr$W}zo*ki#ZffMnq% zVLB1Hq zD7JiNUB3IKmBR?q%458za!xz<;QS%xCMfj^BG|R5^3(z8L#(?pqzxg4tB9J2yn!u# zgGYYB^vlpcDkEQTE3uJ4oToCc%_A|4A`;@Vod8SGe-Io=S%mG-ieq*~M4d_MAi2ph z(Y?34PeEzfuH((W?CIYUT0kV6$l+Qml}n~Wf(~gn^G*JXyq=x-Zc%qghGR}W)?!_i zjFzdo3|4Beavi9n$i-EPTCo~6T(O2LRrjmKeVWxiJn%(o#ky~M-0-CjF`ja0)H=RC zf;hf1qs>%HW3>r`%cy4&#jP!smk#Iv;=HyqjI+NBx`p*u7u>t!BC7dn7 z(PAV04T~XcZ7&<#Bue+YiHP^!ax0^#M_PQqb!s4B8(**2a7UtzJbmyS7qZ zsosKg422|hW_`3Wp-nVMq2&>(SGQI+R-k84ojV9+5bLiT)s7;@SCKTS!6a+JozQj& zc|)c*1-+?;cdWkpdFj(q3Hc+YUx9w5;h)?cHFJ|NH;H^<)0ct1Ov5*^8#J>MFgt;` zU}ZsDXmG=|)t%ehw~?4I#bZ!Bc8KAj1afkwGY*|`6p_uyaTqyn%Tc)}C!sU>f4U=b zVY(YYwi|YNg53Kh6v4+o>!z#Fdp8Gc zu(sS}6}w+agFAEEb2|&$3%h3wX++H(Fp>H@W@Zv*CY#I&9h}%KM!ugrZFBdqeSFdG z3@HUEIm?j%v6d%cQTE#Hmzkc@YaDIH*4n4>qd;yi;ZcxjzZL~#XOaL@o3BUjh258~ zU(a8?oL{|k`Th59-r#NDPlH`;^QY)6equ8w$tmS$?2Qgh;HU6vxPx`q7u+Jirw+01 I+-qz90F=)1;Q#;t diff --git a/app/services/__pycache__/openrouter.cpython-312.pyc b/app/services/__pycache__/openrouter.cpython-312.pyc index 46d57bd3895341e45d2e94e48ee40b4cb14b231c..8fcc5d3ee0bea103e577f6578ea577dc81beb601 100644 GIT binary patch delta 20 acmaFq{L-2GG%qg~0}wnD*}Reao+1EAg$BC- delta 20 acmaFq{L-2GG%qg~0}$As+qjYYo+1EAMh46P diff --git a/app/services/__pycache__/querying.cpython-312.pyc b/app/services/__pycache__/querying.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3237606f10202ccfa1f8318df891adbe98c8482 GIT binary patch literal 3970 zcmaJ^&2JmW6`x)1lFJW?5=DJcl1({EM7ky^MRj1iO;E*FVl}e-*~VoLixqc8F1^d8 zXO?zE$bb(%=#T>SNj~ZnxUdfHCH4Q%ix{~ev5-&$>BTpuGSDKYz8P{!Nl~+hnK$pv zJidAF{oXwOy{9LD;9CfPTlhy1q3`L!{e(7g@LwP{k&0B#Kqi;vI7;&dpXFip7#>r| z3XCThqUp_g87&$CS541=m_-Bd|v*e$bV2jxTo8|4Ibjc zGH|(|*w@QiX?~F&$ze^gH5ta7Eh~AgWG8t?S}Z-#h;3mfm+G)#qoksz<(>!+whK0 z)fGd_*^X#iW%*w9SAqEFnjKe4w|5`uT5mPvkW1-oh1krT|ChJ?N8-YUiEAXFWRzoBzLshes`D6Fh*DFsOb8~ z6~eqNx)}%OJ90bhZaMiBa6|5n?p*Be(k}I)O~8qW%j?d0j^@7|bvgEsOKp1(-K+Ck z>pWQ5839zF$@#Jam&syOLV-C6ak0X3*|+o3}~>fCV+C3}{u z;|1F;6UcZAK!&Y~t>H8vL@Dc;2eZ}jyk+GLEtA(wy`-l;OrKA!8VV_-C`>R7Z&Gl4 zuvxyRtvM2fvRuK2Bbt_~8BP$eRo1X#SFi@S;Q>k>!!VNq9)_b7K9)foZwq}+7;B_# zl|aR+g-yk-2Gf~b0l=g{;sKh4vSJsUK^K{_s*xPlSz#eg@X)F{>(+tzAueGMY@IeC zhbbC~4sf+{6#~`?CB@Vn$<%Cxf;Jh!J}?qPWk*ir`i+Iw1ItDYRr4LhX9s^tMWmYy!rh7Gav_j>xJK9-y?; zx;yx+?*^0=sAypHWd=WIgsQi&+^&Z?%b@j$LN(B(lO9JPnz4#$;xe6t0xTYn|G1PX zSf-Y-jLJiWC?!+RTmdgouaL}_7QUCc_QjQqQnIa*{7}c*DhAg;PLwl5!w+<*5gB$V zD6Q!-#9fvlJ8W654j(NeX;(nQO}>TUF?tt3(NlZT@p?2wbB)MMV`TK5UyKGe zgs-J{AvAJ!`_{`lf4lqFyRU~o0bXctaJms0+Dtr6(COUMbM;7~7D?8rK$ z)t%I0EwZ>bFu6CL098X&RiGI{ld~JbYiZ(N@kBj-u@=Ah>e1`?HO6YhPSsY9tHm1`t0NR=X2Gwc}!rrUBJ(>^mu3>s+2=Z(%#B9`Rj` z>xVV+5IC_q*A3jUcH(DZR~`QS!sR>HZ^duJun_-p{&xKG{L<3GmABM*D1gD1hi&aE zn>hXwbXhc91Y*eQkyyrh$;RhkGmB@^b11gH_GY)ju(09}U~hHm#5f)+habaE@>>|% zp`7|IJO!cLlX`Y~r(a76i0Q!aX69+89+|F1rW>QDp7|O>!+_I)!~yaQ1U7u{B;@bi z$ba42lu+OBMlTDeBf+Hzt&B$kOVPvvOS9+y1@ z-%y_9zL!Uw?)1QOO1n6j*-#xn=KdqnWza(?Lc#6e-%C#~)d!|(15=Hj;b$LhUD%18 z+Zj#mgwh8d9;&khQRo}l2s6(n`Q_zgkk)?sZpyL~kY#xFD+Z-QGW@z?v?Q2%+VNT| zMR=odhAuqd09GLIMOd;t!88fmduXSE=|Gz*Qi9ACTaPAyZY0m%`^Yp<0dnCvb%2|UN&`AdN*%AGTvwVbm|vjQ*T zTAID*tPz0LfUtJKnK)@<#nz#x(lvs;R9Rnl4{gj8oi!s)gtGHkDHqaZtgTdZD5tF7 z=qsZ?Z-d3&0 z)ATU&{}QEWVPrA+QTlZ&#-HH3z@wc$xer6r!*Sd`8v7oF_tEG+n%_s~_tEq|n*Ar5 z+eZ@s$$_yyzeiv2ow51d$fu9P4RQ4GAM4_1O&r~ceYh*mJ?7sC19c%*6JlEzUe4|a fv0dSO!+Yfi4<{~gO%LM2{|SA}jr<$Iz)bod%;e-I literal 0 HcmV?d00001 diff --git a/app/services/langgraph_agent.py b/app/services/langgraph_agent.py index 9bf1866..e69de29 100644 --- a/app/services/langgraph_agent.py +++ b/app/services/langgraph_agent.py @@ -1,162 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, List, Optional - -import chromadb -from langgraph.graph import END, START, StateGraph -from pydantic import BaseModel, Field -from sqlalchemy import func - -from app.db.tables import InvestorTable -from app.pydantic_schemas import QueryResponse, QueryResponseList - - -class AgentState(BaseModel): - question: str - sql_results: List[QueryResponse] = Field(default_factory=list) - vector_results: List[QueryResponse] = Field(default_factory=list) - - -class LangGraphQueryAgent: - """Simple LangGraph agent that queries both SQL and Chroma and merges results.""" - - def __init__( - self, - sql_session: Optional[object] = None, - vector_db_client: Optional[object] = None, - ) -> None: - self.sql_session = sql_session - - # Setup Chroma collection - self.vector_db_client = vector_db_client or chromadb.PersistentClient( - path="./chroma_db" - ) - self.collection = self.vector_db_client.get_or_create_collection( - name="investor_descriptions", - metadata={ - "description": "Investor descriptions and investment thesis focus", - }, - ) - - # Build graph - graph = StateGraph(AgentState) - graph.add_node("sql_search", self._sql_search) - graph.add_node("vector_search", self._vector_search) - graph.add_node("merge", self._merge) - - # Parallel fan-out: START -> sql_search & vector_search -> merge -> END - graph.add_edge(START, "sql_search") - graph.add_edge(START, "vector_search") - graph.add_edge("sql_search", "merge") - graph.add_edge("vector_search", "merge") - graph.add_edge("merge", END) - - self.app = graph.compile() - - # Nodes - def _sql_search(self, state: AgentState) -> Dict[str, Any]: - results: List[QueryResponse] = [] - if self.sql_session is None: - return {"sql_results": results} - - # Simple LIKE-based search across a few fields - # Note: SQLite uses case-insensitive LIKE by default for ASCII. - q = ( - self.sql_session.query(InvestorTable) - .filter( - (func.lower(InvestorTable.name).like(f"%{state.question.lower()}%")) - | ( - func.lower(InvestorTable.sector_focus).like( - f"%{state.question.lower()}%" - ) - ) - | ( - func.lower(InvestorTable.stage_focus).like( - f"%{state.question.lower()}%" - ) - ) - | (func.lower(InvestorTable.region).like(f"%{state.question.lower()}%")) - ) - .limit(10) - ) - - for row in q.all(): - results.append( - QueryResponse( - name=row.name, - aum=row.aum, - check_size=row.check_size, - sector_focus=row.sector_focus, - stage_focus=row.stage_focus, - region=row.region, - investment_thesis="", - investor_description="", - reason="Matched SQL fields via LIKE", - ) - ) - - return {"sql_results": results} - - def _vector_search(self, state: AgentState) -> Dict[str, Any]: - results: List[QueryResponse] = [] - try: - q = self.collection.query(query_texts=[state.question], n_results=10) - # q has keys: ids, distances, documents, metadatas - docs = q.get("documents") or [] - metas = q.get("metadatas") or [] - if docs and metas: - for i, md in enumerate(metas[0]): - name = md.get("name", "Unknown") - results.append( - QueryResponse( - name=name, - aum=0, - check_size="", - sector_focus="", - stage_focus="", - region=md.get("headquarters", ""), - investment_thesis="", - investor_description=(docs[0][i] if docs[0] else ""), - reason="Vector similarity in Chroma", - ) - ) - except Exception: - # Best-effort; leave vector results empty on failure - pass - - return {"vector_results": results} - - def _merge(self, state: AgentState) -> Dict[str, Any]: - # Deduplicate by name, prefer SQL fields where available, keep first reason - merged: Dict[str, QueryResponse] = {} - - for item in state.vector_results + state.sql_results: - if item.name not in merged: - merged[item.name] = item - else: - existing = merged[item.name] - merged[item.name] = QueryResponse( - name=existing.name, - aum=existing.aum or item.aum, - check_size=existing.check_size or item.check_size, - sector_focus=existing.sector_focus or item.sector_focus, - stage_focus=existing.stage_focus or item.stage_focus, - region=existing.region or item.region, - investment_thesis=existing.investment_thesis - or item.investment_thesis, - investor_description=existing.investor_description - or item.investor_description, - reason=existing.reason or item.reason, - ) - - # Store back into sql_results to pass through the END with full state - return { - "sql_results": list(merged.values()), - "vector_results": [], - } - - # Public API - def run(self, question: str) -> QueryResponseList: - state = AgentState(question=question) - final_state: AgentState = self.app.invoke(state) - return QueryResponseList(responses=final_state.sql_results) diff --git a/app/services/openrouter.py b/app/services/openrouter.py index 6a36a61..057a5dc 100644 --- a/app/services/openrouter.py +++ b/app/services/openrouter.py @@ -3,13 +3,12 @@ from typing import List, Optional import chromadb import pandas as pd +from db.models import InvestorTable from langchain_core.prompts import PromptTemplate from langchain_openai import ChatOpenAI from pydantic_schemas import Investor, InvestorList from settings import settings -from app.db.tables import InvestorTable - # Add these imports for your databases # from sqlalchemy.ext.asyncio import AsyncSession # from your_vector_db import VectorDBClient diff --git a/app/services/querying.py b/app/services/querying.py index ea80784..e253b66 100644 --- a/app/services/querying.py +++ b/app/services/querying.py @@ -1,13 +1,22 @@ from typing import Optional import chromadb +from langchain import hub +from langchain_community.agent_toolkits import SQLDatabaseToolkit +from langchain_community.utilities import SQLDatabase from langchain_openai import ChatOpenAI +from langgraph.prebuilt import create_react_agent from pydantic_schemas import Investor, InvestorList from settings import settings -# Add these imports for your databases -# from sqlalchemy.ext.asyncio import AsyncSession -# from your_vector_db import VectorDBClient +# Connect to SQLite + +prompt_template = hub.pull("langchain-ai/sql-agent-system-prompt") +db = SQLDatabase.from_uri("sqlite:///investors.db") +system_message = ( + prompt_template.format(dialect="SQLite", top_k=5) + + "\n Get answers from the Sql database and the vector database" +) class QueryProcessor: @@ -19,12 +28,16 @@ class QueryProcessor: self.llm = ChatOpenAI( api_key=settings.OPENROUTER_API_KEY, base_url="https://openrouter.ai/api/v1", - model="openai/gpt-oss-120b:free", + model="google/gemini-2.5-flash-lite", temperature=0, ) - - self.structured_llm = self.llm.with_structured_output(InvestorList) - self.sql_session = sql_session + self.toolkit = SQLDatabaseToolkit(db=db, llm=self.llm) + self.agent = create_react_agent( + model=self.llm, + tools=self.toolkit.get_tools() + [self.query_vector_database], + prompt=system_message, + response_format=InvestorList, + ) self.vector_db_client = vector_db_client self.vector_db_client = chromadb.PersistentClient(path="./chroma_db") @@ -49,13 +62,22 @@ class QueryProcessor: """Query the vector database for investor information.""" if not self.vector_db_client: return None + print("VECTOR STORE WAS CALLED") - # Implement vector database querying logic here - results = self.vector_db_client.query(collection=self.collection, query=query) - investors = [Investor(**doc.metadata) for doc in results.documents] - return InvestorList(investors=investors) + # Query the collection directly, not passing collection as parameter + results = self.collection.query( + query_texts=[query], # ChromaDB expects a list of query texts + n_results=3, # Specify how many results you want + ) + print(results) + + # ChromaDB returns results in a different structure + # results will have 'documents', 'metadatas', 'ids', 'distances' + return results def process_query(self, question: str) -> InvestorList: """Process a query using the LLM and return structured investor data.""" - response = self.structured_llm.predict(question=question) + response = self.agent.invoke( + {"messages": [("user", question)]}, + ) return response diff --git a/requirements.txt b/requirements.txt index 6a7dbd3..10ba213 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,13 +8,9 @@ chromadb>=0.4.0 # LLM integration openai>=1.0.0 -langchain>=0.2.0 -langchain-openai>=0.1.0 -langgraph>=0.2.0 # Environment management python-dotenv>=1.0.0 -pydantic-settings>=2.0.0 # Additional dependencies for data processing typing-extensions>=4.0.0