From ba0ed169ceed827388bb5db3135dcc59bd8b1322 Mon Sep 17 00:00:00 2001
From: bolade <babawale030@gmail.com>
Date: Fri, 29 Aug 2025 18:42:55 +0100
Subject: [PATCH] Implement investor processing and querying functionality

- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases.
- Introduced QueryProcessor class for querying investor information from SQL and vector databases.
- Integrated OpenAI's ChatGPT for structured output generation.
- Implemented data cleaning and control character removal in CSV processing.
- Added asynchronous processing capabilities for batch handling.
- Established connection to ChromaDB for vector storage of investor descriptions.
- Defined structured output schemas using Pydantic for investor data validation.
- Enhanced settings management for API key and database configurations.
---
 app/__pycache__/main.cpython-312.pyc          | Bin 0 -> 2311 bytes
 .../pydantic_schemas.cpython-312.pyc          | Bin 0 -> 1640 bytes
 app/__pycache__/schemas.cpython-312.pyc       | Bin 0 -> 885 bytes
 app/__pycache__/settings.cpython-312.pyc      | Bin 0 -> 672 bytes
 app/db/__pycache__/__init__.cpython-312.pyc   | Bin 0 -> 167 bytes
 app/db/__pycache__/db.cpython-312.pyc         | Bin 0 -> 1623 bytes
 app/db/__pycache__/tables.cpython-312.pyc     | Bin 0 -> 1295 bytes
 app/db/db.py                                  |  32 +-
 app/db/tables.py                              |  23 +
 app/main.py                                   |  41 +-
 app/pydantic_schemas.py                       |  38 ++
 .../__pycache__/__init__.cpython-312.pyc      | Bin 0 -> 173 bytes
 .../langgraph_agent.cpython-312.pyc           | Bin 0 -> 11285 bytes
 .../__pycache__/llm_parser.cpython-312.pyc    | Bin 0 -> 2916 bytes
 .../__pycache__/openrouter.cpython-312.pyc    | Bin 0 -> 8681 bytes
 .../__pycache__/settings.cpython-312.pyc      | Bin 0 -> 681 bytes
 app/services/investor_parser.py               | 449 ------------------
 app/services/langgraph_agent.py               |   0
 app/services/llm_parser.py                    | 382 ++++++++++++++-
 app/services/openrouter.py                    | 178 +++++++
 app/services/querying.py                      |  61 +++
 app/settings.py                               |   7 +-
 22 files changed, 719 insertions(+), 492 deletions(-)
 create mode 100644 app/__pycache__/main.cpython-312.pyc
 create mode 100644 app/__pycache__/pydantic_schemas.cpython-312.pyc
 create mode 100644 app/__pycache__/schemas.cpython-312.pyc
 create mode 100644 app/__pycache__/settings.cpython-312.pyc
 create mode 100644 app/db/__pycache__/__init__.cpython-312.pyc
 create mode 100644 app/db/__pycache__/db.cpython-312.pyc
 create mode 100644 app/db/__pycache__/tables.cpython-312.pyc
 create mode 100644 app/db/tables.py
 create mode 100644 app/pydantic_schemas.py
 create mode 100644 app/services/__pycache__/__init__.cpython-312.pyc
 create mode 100644 app/services/__pycache__/langgraph_agent.cpython-312.pyc
 create mode 100644 app/services/__pycache__/llm_parser.cpython-312.pyc
 create mode 100644 app/services/__pycache__/openrouter.cpython-312.pyc
 create mode 100644 app/services/__pycache__/settings.cpython-312.pyc
 delete mode 100644 app/services/investor_parser.py
 create mode 100644 app/services/langgraph_agent.py
 create mode 100644 app/services/openrouter.py
 create mode 100644 app/services/querying.py

diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9c2a4c1689218ae235fa426aae78fd34f019050
GIT binary patch
literal 2311
zcmZ`)T}%{L6ux(6c6OKj19o9S{6U(6n-;c$iI!Sh36)Y(>Q5V!HO*$&JHV{7GwYpM
z33O>zA=M@()!2wWG^J0)XcPO^q%YN{CVc@hVy`u7>_hvuN*|P`o;%Bcwe3mfo^$Rw
z_ug~Q`R<utLZK=IG`8^l=p_lEM_h49UKN&yF+wwlAcA$2s&JeVKo@i|C#9sEC*{F>
zUex8BH|5RwQofu&<#*;Jy($+-1svI<2XmoR$dP3|oQtF)7zs%AURgaVRSlZQ)i6@4
zvoE>ftaIYM0ugZ{zG>uor)t5wtup`oNc^rh?Cm2}t~QKGK$S>P^$$xVG`)6eD)vA7
zgx!_Sx`-Npb=A|-+WB>AJ*hdm4I$d(<SL$u!-?_iYtKhhkx4Cyk{GGGT7Lzy&IzM&
zJdP9bKe;lP5Lk!|rHPtX4Wb&EaTd}H%}x`=R)!QyO^D39PqFO1`}>)+Pt#Ra^-*3o
z6~g5NX2Ammm5v`&j}=tQ2EG2!xRL2Mj)P`W$F_GwHMph5o!w8(jA~gXUBSr=55gq@
z1QEVvjue0aCW3|!p@cZdJnyQyZZgkdlj`I$q7m)}A6j`UIcnzAq^TFi6iYF3TC&&7
z6moE;l?3mj$@d3(lZs)RMtV%6>M&KnTgm5>IYl!%^5e`;RfVLfY1*_FHsm{7As9;N
zK`?su@O)s?ytIi{!^}igGOtieZO>T8JD5O*m^2Jo$ndyb@`5l0-*PViX8@8tgNiu2
z3L@8ubGMjN0OwEO))#~s@iUJHO$s*8fkWBVlDRtPHb23ktk0F!c~Qs))-_im*ag&!
zd{VUeT^%Z-tAcBvl!{W8r{V=Fimu1M;`R`+=;=jY$QfY-WrU9*nUiu+&epuJ1*q=b
z&k1K@68KKayv{q(nG$@h838+UW9U0^3@4<)2~WWuZr^novw(F>Pg?*U%`}*os#Za_
zt%RRRyoOB36XtWO2?b(;W-{L)n`*{L{|8JaYQ`ig<<Me*d<BAO?k5o~V<)^c3KkmU
zW^cwcYyc3Hc$oQDeN9RmxDiu}SD?8<l-~|~I4t;X^kvZbCKd<5ALwZbm1R`dIMaWw
z|NOvGcuT48H-F%Zrq7$cJUDaY+>zP%x0}D&e9gPKb=P9u>x=&GQqNy9s%e=Ux>dcs
z)PE<oX(^Uih$ZG)7Gv#8;r3D=Y!|AZYr7rqoIP+6e=T2<zw+ITcg_d8=B2K34T>~B
zEmxsP$I25KHFQ0*c0e2_V?E9I0`7@RGkcpryxxg>8l~%9elG8h_cX~jq8^ZMG-5tB
zad|>c!keb!Jk8GI{s+uCMEMPui!fARZm)qHfV2j<NdX|a254|zC3qD70d5yulOhp{
zVs;Jc?y7a@aKJe55C>^Jn4RJzB<cs5h1~j9Zs2p`2guN7u!56Q>Q?UQQFwQ97F#?X
zUe!uXBhK2iw%W=p`U;p^;InqZ;8gbaV9Ol673?g%b0-{K3U6HqZ=G#hYUx~P>0AtV
zmHO@k>&}kN6wejUe>xv%otIjlIcJ>g-i?_ro#qWFou2S<-VGgz`E@g+=%c1(Yj1Bs
z=);7>L_RO^DJJDjYBQOty!$2^=|%{H$$7;fibXeY#Y2WV$Pne(ru;I@I}Dv$$!pZZ
zr}$r%@|3db{5XM@sAbYtW>n3=dfrM{gQe2r8uaE4T`@*RsFEK|SMuXzHM(Xt;b%e3
zf~`{LJ<$%nS{zX;9h66E+0ILFlE&|9K-W#DnYRG@o77+;Ef`J{cN(jc@fF|hoJk70
zx`)0A`*Ac`Kf_QKF~)b1{}GDbM;-T3`vPjehZ6VD8~4!mhjM62D~S(LXgUHfL-3r6
zpNOB1PHlWFV7z%*L_)aCrLvFP8csBvesvMYf9>A$gY{$LF{;8lE{4jS&841j##c0y
z1(3=c+!fmvakMOmxb0%DjDTI!pK!)qW6pdzh@{}@iJM~MU8(kg6e;zc8aOd<_PyC7
S*X*BqZ%h4UFOoJmH~kySXc_<j

literal 0
HcmV?d00001

diff --git a/app/__pycache__/pydantic_schemas.cpython-312.pyc b/app/__pycache__/pydantic_schemas.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49fa62c1f046168ca3768de4a7821f5c96636635
GIT binary patch
literal 1640
zcmb_c&1(}u6rag%w!3N4G;R7}p`{;)<kB2G38e=q)KXhf5FrPbB|B}FW_Qz>jU`@N
zK@_2$+JB*V^6$`#q=zuzNj!KH8ZSNh-fR-m5JAC(`OR;BZ$93<_hvt3GARPzyU!mh
zZxe)k!$J2;cF5HmAZNrSF0B%umMBG@s3v@^q*0O}o5a;lh^sU0gBGtT=}Jw4YV@f|
zrH+7V_Nj(aEl^W^>WETDK~49mW+PJ=`+<iPsMH^NB7)w!?+CUNx~y7`kB_nA3%Pm@
z<1@lYiMphecuJVYbeFDy%96kmilL_UnBHT_9y5Auq{mET4Xcon=2q~6i74b!4;-IK
z&8hn`Rj#n|fi1k3Or`}ZgW28-%XJ|~Mda+W&cNVo-wOko^VA+b3nDwJFyRTAk0n^=
zGEwGUErN-=7lt%#8_%(AnX+v^bn8{*Y1=-mJJpUw6A_nL+jfE=j2v7kV2<Vm(FM7}
zAYE@4E1}PdVYPnb2q*Bp;s&^f4v6BjkRKFxwl|7S5QTw#<Z-sg;mD#>s}*Yv7i6z&
z3%H{1h}Bwy8?d7SBw`id6=|h1$6GBcef*#`K6(78H8IuHr&_suQ_r{3+2ifj%v@8S
zYt7AH>hpZ2yTR~0E`WJ3kGDW$$vB&hL_w43ZXU2WWTqFsDhziE1|P>WatIR$SR}FI
zd1TWF%8glMb6vm%A`JjHnw&W|->+ZlOYuaUGu+<_SO)$5pLM6|*isdkDvZj^<2vJw
zCrs2J&y0`2>D)xH5b&MzQ3SjO&mfEeNGLu>Kuz&1N&^9yM-5>Lnz#$_4=HWx^N{0{
z?Kn=j%kVg1sE5ZX&S>Xehjn;g(o`Kjf<kgInUk;)AC|{OG$1TN6Brq|##ruTCw2`j
z!(IEUlFQu)ysA5kchVsKRC6k_!+c!<XE?%bXae&DzGf#+cfMqoFZJcvW3=xUENMiI
zniuS=GL~ldQF0y3aaF4dR%b1urZ&Hs<7;y*{(Y?TJ1~Vp6Po}(HA?AMGV`4*v<;Ik
tyl%7!bnR(Mmrw7t33P4KqT{DqZ310;mePq>pJ<zuuAE!HV9tv2_cxtIVpjkF

literal 0
HcmV?d00001

diff --git a/app/__pycache__/schemas.cpython-312.pyc b/app/__pycache__/schemas.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f60949db15b9c38669ff4631de3cd2a7fe72f3cf
GIT binary patch
literal 885
zcmZuwzi-n(6u$GXI>AvJk~AVxA^}sDdSF6S9jZ#SDuoFlW#DAl*TkxS_0FL(Yd~Uz
zsr(D56MqL|I#?$$AqF<2gryVj?8E}Z4c~q5-Fx3Vzjyx9Xw(s`2Unjbk1#^tq_JN5
ziYyL+Tp|y7I6xsDVJx{4D4{x1F;dWD<f(6vr&0AvEptX%;nl%w{NdHJjiLENh77UL
zp7=b4y|(XgdJ=myaLeQpS4wE{66{Mv(Fl8Jq&!DdrJ9F#!Ao^uO2MR0t(aCZy<$ei
zHY#RHmeq!JVID=VC{JT1w8#mmP@QQg>h6TPXN3E&sjxV8;h3Dp?v#sLJaxu&Wf+W(
z{Wua$zt9M2l#+Bpxz9zrbU~g+xy$?{g}|pnLzslfI)sQiAz|!I1IaBy&ZkbWI#GGb
z#3mt56ve3{GdaYlew2Pf3)!TbeR~pz)Q*Gcg~OdF^z8$<p1c6JU&QRpJ~=+HohXeX
za^W+2%Alm}B#F)8%R-0mBpEY6r~pa$4!~QK*Bi5=yk^Y~^R3qGS$?}S*E)H#J=fZ~
zwK+S^yS=&A%X|H6t<SdC0{<W10FM71-yQH#y&Phu3{_#R+b`dQXjEMbz=U?OO<7}0
zLQ{f#8|B4Yl652$*XT+n)eDfZ+ya0vYIQ%FANH=byJet^k#jK8Eb*gpp(xBG^PpS4
z%QPubbfysdyTFaPTTX0`^&t*^8^rGe{8TZ<Us3lv8Y~SH58h=<1lw{O<9qKP{z9-7
G#eW01f7uQI

literal 0
HcmV?d00001

diff --git a/app/__pycache__/settings.cpython-312.pyc b/app/__pycache__/settings.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..22723b303fc8c67bbb3bdbbe3ebf109b0cf575ab
GIT binary patch
literal 672
zcmY*W&ubGw6n?Y2iEB+7tqR)WAFv|Xizg|9wn3zrmTD3AG7OnXySO_OXEv6I7omdU
zAJD%+;=%vHqenr?f+tVj)D(oAe6v544D9#5H~Y=|-pu>lZZ83@PhUSCe8BkKCiQAm
zWOftDF>v4{fs{mqfCKk{J4e7>L8fl0iyQ}pGp!Cq?!*fkKWr9|ymglu@la%0EceZz
zk`%$KlGzO;#~>gg93p2AgeyEw)^Yd0+0-f3QNtF`{kmelvc1#m?{4os>h01yJNM~U
z@5zD-sa?)5xNw4PI)OXy)Rnp$N)F@w7hYG$=l#ITeM%)u1*Q2CrK#fM#PSxU&&DjN
z{`jb+!#EM|VP=iy8{vUUMW~YTAu~*-ak!}l<5bAZgpZYe8a~+C44KT7q=&H<LyZn0
z8;wF!JyCZw$(v;>+Q*7#@G};U;N;@fx4vFRuI**|K~tZ#iJhZ9hmgA_(|LzdCZ)<)
z7At8eEyTKQMa9{UWp=^^*eRG5gx7F#W$lZ%_QxZQ)!%?n%B|8q$A!^^W8?9F)~#wg
z{-Q|k|Jz&infvSI%iY$OQEZ1{t|9z%2_fI$`f1Z6t8ZSO0)}79(7N>Q)~oW0e*qSD
Bo!$Td

literal 0
HcmV?d00001

diff --git a/app/db/__pycache__/__init__.cpython-312.pyc b/app/db/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3173eb0374a22b0b98187be7befc0471db634573
GIT binary patch
literal 167
zcmX@j%ge<81o{^@WPs?$AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxd<))vJpPQ<mpHo_%
zSe%%bo2l=TpIn-onpaY+AD&;7t?%pYqMw*olAjk}o>`QdR+I=-omfzypOT~>AD@|*
nSrQ+wS5Wzj!zMRBr8Fniu80+AEF%yXgBTx~85tRin1L(+($Okt

literal 0
HcmV?d00001

diff --git a/app/db/__pycache__/db.cpython-312.pyc b/app/db/__pycache__/db.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..21b09ee2e3009d681ea96bb4b0219cb0b1d9bc24
GIT binary patch
literal 1623
zcmah}O>Epm6rQoYUfa8y-6Tyz(p51O3cl<H5Hv!lkSG+PG&v9u!k3lTGn;klpJnV#
z>ZnBvszgf#MI304pmHcmD#QVa8{E01Y7odI0t85K3k`>=;=mj4Zj&M;MxHnCy?OII
z@0)Mtn?gZD(BAm!^X6wFLcegM-!eUAcMFuuh#-PpWb#pPu_u@U=DO&Lo@7d?F1Z;`
zHsw^$xLHpzl~k8q)zeJP%b7Wh1Voj!UTQNB7Frtb|8j36S)!~7UE3UhD47lrbzQsb
zD}paaRHD-2dTtH&&YM{x`E$q|qyp7oUZR6*xI5Dj#`ydYDZu<N8K7E2AjNZ%c_79{
z=?@+>Z{S3o^8FyPB1*u>&d?V1NeF7eW)!?N>MuGz1xJZc+qIY#Im@(Gw?b+NNp^vT
zp%eH?KI|%<b&9eiKQs06)b!NC^R-uAI=Ol<eBE^-`gFBgb^K)-Mga>eq<#W&O4M!!
zhL~tpI|}T;^PDJAx!!QwVKb5Hj!*1vIbA8ia0L2p1Y#CpIHjmpFG78c&mt9_K_3d2
zF@|xehg3H~SN0<@iXq+q#jK#y;%jIHUl&)ff#;#_e6Q}4fo-`-#&!dM11AEhhkQ5s
zo#(2}z@yc`ZLe6N<$F$bCa~Kc^`o%*YQRoa=VoWBmLCOvZN*`<!7PZjTCFOn!&hm=
ziM&Xo8mY5k$TtE#G(h~0Zg<d@I&$%FXL?5vGsE8xl-^N(QnX9T1?64s_IaVJ{2d;E
z=(lo{_&w<e`e1MpUmbc<{6d_>_n^s<;eEFb0+0mRZA(2K_H8~c^>4>MXC(KHb=>D2
zMv>b05@F99i-xpXdC_+w$8w!jsz*(#6DzVfm-Kq9r-XC-Mcr~;9o#Mj9<l=DT77V)
zALra%PxP>D+d#WU+jZjy=8a4uO+%_g@n~f6#EI79FeFUmE|HT=i#dMuDca?GWJ4g5
z0#91&Z-te?r$Z09=%8OE<?XpQ=f0E5H>C0>M>nMMrethM>V{VNR;sc?5R}M_Ms4Qb
z11^U!1@|B*|3%XUWZ>1uUZ9a~?cvb7M|vY*dKmk5lLdaz4)vf#`PB=2j!6+RGX8|5
zfA1Di;(Y0?uJM)Zeh4@UJ>(A_)R7y#czi<|OV=19i4cT|9K|g_obi1o*@hKHR?A`h
zk|i44TFbSY)QcHE`DA?0QK6@!%D(4-aT+88`vL;yu`D>5!UgAVN@(gYGfob69~WsE
zoZm@-)N3StdZ=&5=?SFU|5uTBlxMtvv|W0fJqpYCRSVC8*by<tKOl7*m48Erw$Ve|
z=-7{F;^*w>&Fol5+L2Ld>^!>^Ux=?9-Y}+a7N<M;KV_tizJKQ0!<*_vM+U1@{;2(B
z=Iaxi#o12&mYl)EJ0&zYe&vymk6nA}>a!c;kKZUg(J9=LMLe;S7ck8Hg<weW{0B5h
Bh*tmr

literal 0
HcmV?d00001

diff --git a/app/db/__pycache__/tables.cpython-312.pyc b/app/db/__pycache__/tables.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2086f0dc2dd37b122cfe39748de2665e952bcbe3
GIT binary patch
literal 1295
zcmb_c&1=*^6rX%-lKrxqw%Qi)qlFf;f>HcHP!#M^X={6Ni;zo5JJU98KGsRry5eDl
z{sDWlcv??Z+Jl}2(US*HOINl#9z;aMo6vgk<V><^!5;e#FTeMjH*emX_a^VN*%YAi
z>GjLS4?F;0nDIvv9k4S-!D9dc<Q!nIFYj<JZ}6^Q2(D;|Zo)`#j29fql?|B#Jh%y<
zxC&qb$s0l($51MqCVgkdv?da1)OX6RN0fpUqN430B9D6^T0odcw?k}u3mQkn8&-hk
z;{CCfJI2lvjwOYF!9if~kTV1xAQ2@X5BW{u@1at}%23=)Y|sLk7)r-;FPGfMFSQSs
z-iOQV!(}7x5b9bw9-q2bFAeUsVoW`U{%1F6l}t9Jzu9*mY~%yW@L!{QDvFjQ*>Ue4
z3PK+jtyu?=q}?%tqQ;YS3EQrPE9Nq)kc90)bbpNYTk*<{!`vyBA)2?!PN?zt5WNAS
z_+GgL>99;|y+koh&vKDzl9Xw>J}f&7XH4^M*>XB7S<?(ztk}eC3^AK&!rKr_42W!r
z2v*r8skucow`>OX14J@(=4jsLygyeCNID3u1=KMlj23L)!(x;X%O+%_D|r425oU^`
zG!90Tg{BpfRGcrv&%qAe0e%(qMc+j_9sLz6usqk+3-k~dd10X6@$s@gHCfOtFZ4Zg
z#l~nJ(*$*^RMO$B9u*UuDOGTihKiIzLGX&IRj}QAqB`|C*;SM4#TUKJ<fZD3c3Q2C
z)Gs#ETD8zl<!XHtr?l#5TkTtu)(<yTy*knE8CV-xzu4?KSH0El?q55yKHTg+Tb*oY
zdun6#nP&EMb&UBd>!(`zD~<e>R{nY;fBlW}F7r0Cm7i*=(=>?Mx2@#1bN#K{U?Vr!
z$_+Pi!}Y~y&QoVAcYa$PXsJUDb*QC|G}Mv${bvuKKHO3-?8;)d^k@RJ#}HMcCGk;;
zNh-MOSPpH7TZy~IZaZpS+_nUqJp*Sk`xV4jagw1Z40}Sql&kSXT*JfEWG^T<M%532
d<G7EY>l?Vd1ulODM|PzyPO24t0{V%z{|0&JGp7Im

literal 0
HcmV?d00001

diff --git a/app/db/db.py b/app/db/db.py
index 81215fa..401bdd6 100644
--- a/app/db/db.py
+++ b/app/db/db.py
@@ -1,11 +1,12 @@
 import os
-from contextlib import contextmanager
-from typing import Generator
+from typing import Annotated
 
+from fastapi import Depends
 from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import Session, sessionmaker
 
-from schema import Base
+Base = declarative_base()
 
 # Database configuration
 DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///investors.db")
@@ -17,26 +18,23 @@ engine = create_engine(DATABASE_URL, echo=False)
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 
 
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+db_dependency = Annotated[Session, Depends(get_db)]
+
+
 def init_database():
     """Initialize the database by creating all tables"""
     Base.metadata.create_all(bind=engine)
     print("Database initialized successfully!")
 
 
-@contextmanager
-def get_session() -> Generator[Session, None, None]:
-    """Get a database session with automatic cleanup"""
-    session = SessionLocal()
-    try:
-        yield session
-        session.commit()
-    except Exception as e:
-        session.rollback()
-        raise e
-    finally:
-        session.close()
-
-
 def get_session_sync() -> Session:
     """Get a database session for synchronous operations"""
     return SessionLocal()
diff --git a/app/db/tables.py b/app/db/tables.py
new file mode 100644
index 0000000..c66d162
--- /dev/null
+++ b/app/db/tables.py
@@ -0,0 +1,23 @@
+import datetime
+
+from sqlalchemy import Column, DateTime, Integer, String
+
+from db.db import Base
+
+
+class InvestorTable(Base):
+    __tablename__ = "investors"
+
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(String, nullable=False)
+    aum = Column(Integer, nullable=False)
+    check_size = Column(String, nullable=False)
+    sector_focus = Column(String, nullable=False)
+    stage_focus = Column(String, nullable=False)
+    region = Column(String, nullable=False)
+    created_at = Column(DateTime, default=datetime.datetime.now(datetime.UTC))
+    updated_at = Column(
+        DateTime,
+        default=datetime.datetime.now(datetime.UTC),
+        onupdate=datetime.datetime.now(datetime.UTC),
+    )
diff --git a/app/main.py b/app/main.py
index 4781a23..a09042e 100644
--- a/app/main.py
+++ b/app/main.py
@@ -1,7 +1,44 @@
-from fastapi import FastAPI
+import io
+
+import pandas as pd
+from db.db import db_dependency, init_database
+from fastapi import FastAPI, File, UploadFile
+from services.openrouter import InvestorProcessor
+
+from app.services.querying import QueryProcessor
 
 app = FastAPI()
 
+init_database()
+
+
 @app.get("/")
 def read_root():
-    return {"Hello": "World"}
\ No newline at end of file
+    return {"Hello": "World"}
+
+
+@app.post("/parse-csv")
+async def parse_csv(db: db_dependency, file: UploadFile = File(...)):
+    # Read uploaded CSV with pandas
+    content = await file.read()
+    df = pd.read_csv(io.StringIO(content.decode("utf-8")))
+
+    # Process the dataframe
+    processor = InvestorProcessor(sql_session=db)
+    results = await processor.process_csv(df)
+
+    # Convert Pydantic objects to dictionaries
+    return {"results": [r.dict() for r in results]}
+
+
+@app.post("/query")
+async def query_investors(db: db_dependency, question: str):
+    processor = QueryProcessor(sql_session=db)
+    results = processor.process_query(question)
+    return {"results": [r.dict() for r in results]}
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app="main:app", host="localhost", port=8000, reload=True)
diff --git a/app/pydantic_schemas.py b/app/pydantic_schemas.py
new file mode 100644
index 0000000..08588b5
--- /dev/null
+++ b/app/pydantic_schemas.py
@@ -0,0 +1,38 @@
+from typing import List
+
+from pydantic import BaseModel
+
+
+class Investor(BaseModel):
+    name: str
+    aum: int
+    check_size: str
+    sector_focus: str
+    stage_focus: str
+    region: str
+    investment_thesis: str
+    investor_description: str
+
+
+class InvestorList(BaseModel):
+    investor_list: List[Investor]
+
+
+class QueryResponse(BaseModel):
+    name: str
+    aum: int
+    check_size: str
+    sector_focus: str
+    stage_focus: str
+    region: str
+    investment_thesis: str
+    investor_description: str
+    reason: str
+
+
+class QueryRequest(BaseModel):
+    question: str
+
+
+class QueryResponseList(BaseModel):
+    responses: List[QueryResponse]
\ No newline at end of file
diff --git a/app/services/__pycache__/__init__.cpython-312.pyc b/app/services/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9191821e8d8caa29167735920660866a2272a897
GIT binary patch
literal 173
zcmX@j%ge<81nL(zWPs?$AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxd<)fdGpPQ<mpHo_%
zSe%%bo2l=TpIn-onpaY+AD&;7t?%pYqMw*olAjk}o>`QdR+I=-omfzyUz}Q0mYJMd
ttREkrnU`4-AFo$X`HRCQH$SB`C)KWq6=*mk5Ep|OADI~$8H<>KEC3JAEd&4n

literal 0
HcmV?d00001

diff --git a/app/services/__pycache__/langgraph_agent.cpython-312.pyc b/app/services/__pycache__/langgraph_agent.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fe524c0d6bcdaf52266d0021e07a6eb987a590c4
GIT binary patch
literal 11285
zcmb_iYit`=cD}>m@GX&gQ18cBvL(@yNIUXVeyuFYvSVA0B`1!x-EwKpNE(?OGBZQV
zVyGnDcoz!bcAfsHW$bRFEU<_cb^&jH6hVJw6E6^8_XiniOLm%cT_gpXe~jf|x9Otj
zxpz2xiQY6t2jHFiIQPuuz4x5^opbK5tEy}Wp1=C%U(8SJLg-Um&>tq3+4@Jw+(8`T
zs1TaLZ$pR*8)gh)dWH@gXN+NHh6$TyOq51ro-qRr9kPV2GgeY(4B5i=8GG0<;~-@w
zR28nCsU~?-$QgFcxJcd{stMQ5)RMd<R2QzBsiTkqO(4#C6LB`)bdT1?G*eG%?NIBu
zXUNsLNnI7xRp;v(mMD+&b4)PlVN`lVTvCl=LQqkS=LA_%Ef=DS5D^0*)p}_u%5%8M
zL#ejW`G9gE%8MgsJO-5>pBz(d(@H?$C!|1h9teyIe*+Vmr+HZh29T|}7~`d-DPE37
zM489jq0%DriXw<}a!?zimKt+~tq^4HARf(79GWpak9e9la@0|%<r&@tCG)JI8A|vE
zErzGI(4@t}TXU4g93`DAGq}8D@|acI$Q&;!<j<(gOL)ktU4AL#mv}iAQe?IIIv-Rb
zQlUI6!1UlE`CqDum!ya!PkL;s#qSpbVczdoZGL|^!o@;3@9_IyiUmTs7Ft#$)#>*K
zL@@#r2va5d{Sw|;5}pMKucu_fNfjhNM_Z7hucm$TkudLzgkp;UIUt4w-&iCV3&X%=
z-{pvO&3FFnm@gnI5z)UWNc^k>!|(;7QJ>69*M%T2`$7S6ZVoS&KY;(QH@YOb@K<nB
z4L^AllJBF8z54ZMGd1qp-0IL8y?W?PF;%l`!?-I`-?Uo48eAJ%oxj_Ws_)w{_GRkZ
zlI`A9y>G+l%Q#%O>)trCX>?0%I@ii8%t3Qug*l3+XAPX;CI}5j^9<yTkT-D*Z-%mY
z*2tNlY$+<6p=>QGTcB*?th^oQ9YAk`a@CUE<4|kQh}U@;){bmMvdr4j%Gw?c=HvuM
zgb<!`Joj7w4apsppc2RrIqgw!3wacMtD*(mqwL`*&cM<4jrVd?z#~tu*mdFshn_3E
zIp)6U9;KC`msxK~par}-WzRxQSp^h1GiTwf0(IY}%R2C}>U6-zt>+8xd$}x7=0~@L
zJod?W+q|Mg<zb&M0zxlEVhS&L0|IO*!FPRcycX9&aW1M1?u!hD1yPXjWO|IM8A$xs
z_$AeHH6Zi;m=sbO5CuM@+7&(=<)wfUllZvZ>kH0Hk#NAzUB&7n8c^m{w~$*fKgY{K
zNg%37R_#Tl_+EWc*`ivR4Tv18^~OTMD)YQ7$m}c#i3|$Mh=otJgn1>v1(bltA+a!8
z$$}F+y=v`+=f)?eE<Ar}e9Axa+!_Db@hd7F3WZgRHcIZQ>UxfsfTsfNM?(Unsw<}%
zxU2r4R?{@cD}K-wL5T<5=nqChAy6J5<EpJF@9<xZ2_eo;#8G9)>*p{?nGem9fysC-
zSypPtLj$D()2sOX@mBq>7xuGPZ)^lo`DI96Lz&i|72~F*>EpWgkL%mg^*#{MjC&|k
z+mmT(eqv?nZ7arGmM1Q>!;@wYr`W^qE^o5uahLW?OGmoJmum5)TTZ81PG@TR9=Y0+
zol{xb&`_OaQ0p0L3o)&jfivqu4J~PRPs-i1;qF^8{-uqGzG{+qSYa`k+srT|gs>10
z;d%vX?;s`LrsPFX-~7u|f?CiITGCldI1tE6BByV53PiSIKLhl`3tjenZ9f|N5hzgF
zv7{eX(Pa-u>Hma`$XV75MXV%BxKyG6HS)DIw1Sm4c}i&O^xDE(7xZZm(RkZQy#26@
zu;<`ZAV)a62&c$wD6**9@Zl%(0Vz1IR^@hhuILO#!dC^6*Uv(aO~Nxo&JfiS;5fes
zlCHY^1r}OOoxg;UR_O-$C}F2nI{_j1LA<0Te>5V1m=VT2H^)m@o2h2#6%|6f#~|&;
zWs5%YVyTgyfu?w8C4niDy#XMSe+tQK=(D=^A1`dwc~&f06SCH%EzK!Q^XkC5G2M48
z)pu;uay(;k<!XKF`_lcRss7PT%UHfP+0ws$DLpum8l2d)oX#;}Qx<luZPVh-lL1TT
zrsYtf>`V3^-LxFbR{)>;)0>vPkDQHZXJ^XUx#8TAwCvDii;*$rDml0C=6M}{Tl+wi
z&LRrL%L<~vQ5fhf8w#QZn=U7I%QQzP3=8=)4G43OzD<4vX!R%bvJpfee}3sS2rcAK
zr$SwV=8OrWK<OKCnVCb&rUWyFz5}CMHYZFA`Ln2~m${ONCCrshNT{uF5<+cW2uk?Y
z7D{>!XzTTS;SIG7g_53w+9o|;ctdSVp`_=a7K5I$hhsR?kL~)LQ|Q+BjVNJ)l`&)J
znV_aoH&QzEPCJpbf7t>mY=>S`cypFHq|Ym7Rl4<-ghl86BlHu)92JKK;BUeF=WvdE
z=?d5h%a&Jw_jE3|NG+lp^cA#7{T;nXdF*lTG!b{A5vDJmWAPb@Pj5IsC061?{B?kq
zx#Ja}u7uG?;BDAHHn!}U9zQoedWqdukIC>EJW#P^k-sVn%2OJ(QymV~qVv8oo5K0;
z*Uo=GIsYpY&+`EeV7UaqMy^Z|h=u)ua3m%wmD^`yA}7PyJR7Op9+YBYaGszbu#KcZ
z@F{#kf_RPR1koRrBD0{_Dsz(~F)7IV6)C37>y+Cj^VHOZ^M!TVz6_U7k57$nug0r$
z?!x8qsXn|$9`@Xsv*YZrru6+VL1Fg7)V6OGI#pVs|05F&-bxnl8}lh%*Kgc;`;vWQ
z7Uiq?jTuy0=n71>t;e}D=g(YX_wFfa_=?&ECJ&KrvXCkbN3+K+c|d|yb58Em`bsOP
zHa<n3R7dd~tG0risJ4PksI}WCQ+1Wkj#^*o@2V!PR>DeL!gfo%7V9PLkFdnx9%d&!
zwGy^kR0d!R)Uh~k=5O#puvSzPSiii;k$)<kfhOr$oSenUIh>rw$s|rL;N&@+T!cim
zj^7CK#Pm>U!1EG5NIg~pHl#63h1H*G#TbXU3#18L2H1=J1!Tx7Edh|Cco`pgSWd`8
z;JkDUs^Z;ewppg$a?pec<vnm@{|EZRYba~nZ|%v{H?Q`-Ik__a#DtpLv#6oNwS^j*
zYgW!=_n`KURsOAOcdz{@yy{;cNw#=b&Su=rY4@&_dso^$m~s!U3-1no*qS{2{D%95
zl~a#e+JK_5{V{S@)!wEvuGX}xE9L5X#CE@9dE1id?s@0P+eb2chSGaZr}muA3>`=h
zO{Rt>GkyK(zC)?LLzy1WJL21-{_^fkd&g4VvCM!kJ@Bp6z_$uryL!K9bhsP7q^oOc
zvow4z+l6-eR>p4+rW|b6fn4qt#~+?pk*oPJGQi*L=zdGME38`{bZvC(PaZk{n_zNk
z`q#4`zL-4xLh{h%RL5oL+xNzWO!qF#<hKs?k+Xiq@n2ah^!}slgPHptHP%Gk9IY8`
zL?5^fkpH03I_9Q7V0VwX=!Z5F)I4-CQ1j4DL%sx3VQT?E3)=bznEmaL3P2GwmOqu?
zR00lV&M*gH>ZcSq1>nHM&oT|r$OwRm!8=R(uAbver2L|iw|+`n?&aVcKq}aVe+F=G
znE{AptOUmdfEquepuZXO%yG7tmJ{UrA^--cCw{Ubx}tKP&cxX``;Y0L6<Ij9P64Pf
z0@QF6fttLvML>-iKn)htvWKfG0yWi2-jYg~_5WD{YO3;}hI|P?jdR)j3h)L{Lvm%H
z#uYzS1Zp(vqZHT3d>BxT5DbJuOROw}g-}2$oSj6-KgZUMhgPk+<3LueobGpnEO-L~
zA~@H8#7<NJ=-3<JGPH`ig`}DVF&K(*y!7Y5iugz*>{e1?-SNw$jSLuAbjFa&@s*}<
zA8gHOI?(+B@kDM@gJx+jrqR?AFM?YToGiTbJg$5JCzo+T)R?7QjhS<9PVyO*5~SUj
z_S=xico}o5OLIj?$Dt<PQ%QGiH}7s>783>a#acai^##^ZW9yyBn~`)wf2yHB)84f@
zk!fjDn@(iSsBvHm85^rtCO|QCuq%@pXT$B6KXP`jn;+OyeMjHD_)*`9kGlufFF*Kp
zYH;Me9jU>wjqdTs$Whbq=GogLw{K+XyVCWZRJ|u_ur{}3I(o7+yy2T{@7l|$&Y=e*
zA9Wtc(#W$X$1v4*;yvo4zLSr3bU*6s`8t}eOpo_*4RHLDw!0g$G<-MPhI;p}j3;Zm
zQjTu$i)i|+is&;V{O=NdmZ|N%Pp^;MGp*f7y7qu)<#6>E$k17>sWcd7uZOVut)n}q
z)vADpuGL1HshcNjPd1_V8w`+tzsWk<K)=6Z_h=3Mf!zc(AJi~V^Fad*`N~>NEin`q
za#{tJEB|m*K{l5SAitGpK(FaW$ubQZfOyuIjRI(;gaLFEY$VW5riy)y9HaNGq&Ial
z6wF4mOx@}Mvx!L%W9HTX=o=cigO2}y<8Dqc*D3f;f6;gL8c~rRW-94eeU!Ss0`tux
z*C=^2cMcV~bc@VB(6>e~u&saEA)K&aZQBCjocmS!TtTnupD@c-IDsmd!j+uDN?sS|
zM`=sg7Akm%5>}n!bubfOhO_E#=sWZe7zQod6L#I0#QOFix@8B`HE&9TmOruVc*-tu
z7Iq2eQm~hjJPC*XwGu7w#9IDO&mb{p9k0Omh&c<nGR<ETKQOBKm55VV3W(Rhh9$8a
zb{0I&?A3@epFav@HYP(jiamGkyoNT3y2YwgLyjEeeom=YJ=`Np_<zTD+H}o`-xI=h
zUL@9Y4B{;;ISPi^t^fzK*i}MVQW$|a(o&o!9cRH+99hI8(XlugqV^YvLzR$)BeGJ#
zB8Rqs!iED%aGsEn5qL{Jb_z^+J^=08?D+u}4A1KV#}Aiu7$gS!FnbByg!-6#vjP}{
z*hnlR1sh<P9g)Fx2NNp7aR-ADA<#se@qIb-mxZJ0DlZMdl=JK!R+wcW1jYug2jJ+v
z3J$$8p22xOc+D>haUKS-6vZcXZXkp!X@-LvCNTd2vcAQB0`jS@=f!JcWKrz;+E2<_
zsE@b}hS~E0L4?B=NB?9ztxLJj!>eqlfCJ`u&D$`{YUd*vBdO2zB~00f2&47Rc?Z~O
zp675X3B1gX46wWs^p;P7#Lq$SV3?iYBP2>71htTcuh;@(Ly;f}B$f?ITeKpJnyjUu
zgB&uz=)4e$$dTy$l23D)ROC_Wd>Lj>!kA}^<1cJvmh3xypOkTdXo!se#^w1XAcMG!
zPN!{I`2X;6O>9bfmK3yUg<XujQ>9yVQu+=E<>zp&A&4cJ1lWM062DQ>y`(UuNmN7x
z=!ZY8j|UF~1Suf#1u;<~h0%H0DXdPQ3#gGLohHbaikgPj>NG7xRz^2jA@D@7*ffk(
zHHHA^0Rjw!V!TYEy{d5m=1FC^SU4(6*gLH@EDGT7ftYbDh~s9QA3|}_m;zxo7<1$r
zuK_%Q?W&5f9Wk8qs!Vm|Jnfv0OQkEoT=N3h0RZdOstHs<II5`CMPDRgFkSy?&=8~b
zVg;DPhbN#q-ls3rMI1{fQCDw8PygQnwS2FD5QnX&s7y=eTb{d~wet_2*=RWc_Gx?9
zTUYL0N%kImw|1lbD4;>z1jcx-E9q#1cWcjML|5&*Z31idPW;XITF-jdM&n?*aVXU|
zw9&Z#wo$EX%nS}C>vyhQ`lx<))(q5uE?YhSi+-zP$-YA$wH+$&{{;GXCOduU&I75=
z0~?)(Hd+p^oc*`Dj*oXAdGG4x?lZTUWJ6EN)thN-yY0xDsFt0Xou0LWS>)`mdeVWa
zYkxTN#>}dHZFFra+0^%-X0vK2>DZS=^!}>sj=Bcd%Bk!C>gamw#k()2+mEK&k8U2D
z+-Sc5Q_$X@ZXZgu4?Q@w(S9V|cx2`DqaJS_epVCsnXN-jNB{GQg<wz`&uj?@O&*#2
zjhvjm^y}*%&L<CFP9AzO)$yW+KtJm__+Hm$&v+RKwPPlKl)np$_C0F6pSn3xJI114
zHhLj_-(<w8{TZAd!>#WhAE}4J!&+*r!}zeyit}v-Xn5E`;XG>{@1q}j_KkPbziKx@
z{ja(isQFbN4S8b8lwemJa3B_W7BZSeUI=U?0Ih<J4e5QuJq8Hi0n~~Xus$ezDn$qr
z`M?MuABrF!Lk{`S3Az&UgV<aS5lcmQ4+0<%Siz!*F^x_EQNYSPm;(MQMv6MtBRK0M
zanPINp68?p*qIRK*0f{}v|!a$h~U8yBr0&JhfH4)n8ARBs3c7<)r^z-Dr_Vej7gwT
zhFJn>q!5sKOxmT8A>1W~6RfUOCMrQ7j;JV)QPVQS*sQ{VX#p<7kg$|$*KWp;f{f!v
zSVd?->?+VJ_z!3vvv}@|EWVlHEur{V=q)coLY!ZpHFafb8#7HUnb!79YiFj;{lso+
zwyjiwt<}Ect!M8(n{M-_+Pv$_8*RtZ4aZjOkL(RUJn_bf)kB;1ogX`DlJ&dRu07~W
z^&Cm=I-0CKw&^(j$l3D4SKoLQBzSZ0$q%PC_dd7byqL6H%-T`o@vmeYRQjH8<N);x
zYGj}B??-6JPI{^(94eA9jFoVBNOIuh08X&0fJ7a&8N}W@VlPWrDH7wG%%AisF1F&N
z4JYk5>C_X<T@FKX6J-r`jAw0P3&AV9+sSmTE^HxqWxEbD?)7K02wv|FZ{b(=I<*t(
z;#mZ*cYzFE*>lu!#{Hlri{SO1Z41A$-!eFvDQdkAbP&9gdrv+g8EqtZ5JC9mAH0J2
z+3JJj=~s3*BX5FhHw;|6F_Q~b4bw|vaQh)3vB0K8ssSBqw*q#O4-`wZE<JEDC2xDe
z#S~Zq1DP$AA-i4L2R%#TzIZL-os~zBzbx?>_rey`Yv^Nl@49`%y)S=%L6e??&{#57
ztXJ@Ow;n+PSO37T7XmE<q1HbcNCnH7uH^j!?S!{Z*|UIyxn)m+(&s{dlfa?AI0&q-
zouFe3mGqQup~5H(6~nK2Jw~A`=++B*SkObqd+`*)1syRGli5YMHwHI%NH`GdoPZ3C
zthNGJ$xeDK07!795K*h~9T*-ABE=tyM53x=3L<Dim?vsStrq#k;?`;s?JrQ*LeR{Q
zJA^Q1h=zD@WW5fbAaYZze5CI~Enq14M5(SJl<}QU*#ae6=1X89&q4AU`qa{pHDX^)
zT}!&oldAJ<)b+27Wo^h+pLX`7oIPvGJMp*U>E08m-V^UJ8@;DCoD)gQ1aVupVYhWz
z*Nmes?ErsJ*V?{+a`a^xAoxngTJs045rlq;c8?6wzZ{?-h1q+$TEWN3(-p^G;8vV`
z0Y7;cBv^4!%zmsm;FUGmnA6nSP!{1g+%$MXGMd2Sex622f|qJiz`zpcwA+VxZ)rCY
z$vs4T_)3GA%7rfx2Im2K`n3yS<OY@M%-=2a5-Ww=(;y0wTqX&Gz*Gw_X|bAW2%F?8
zyb&o(6sYQoE^)*c>z6TUKtsdiW|pQ+iGn15wp4R|WjU%{vpg>SH4Kn~pZqK&S(>7#
ze?j)&p@Y9g2R}v5Pf+jgQ2!^W^%K<f@5uWJYJXy4sD|6kPY}F*@48G=Q`8rTyncV(
OR85_x9wVHRsrnz;60-*Y

literal 0
HcmV?d00001

diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0867b819f89c7f54c9db03c6a5c39346c92d14a8
GIT binary patch
literal 2916
zcmbVOUu+b|8K2qR+xxS3w(nvJ!7eN$F<gxAOhVcUXo-nY<C2)f4yrgMUafb>zCHHO
zb!N}UxweD?tw4%Gkg6J~sl+1|RBd0Qy!b6@73oe^d)9`$G!ISR988rI^`YPF-5Nsa
zOUK@CzVF*_zS*1k&F`B(XEG@SO@8~2<u76geZh^WfgQH3ThOf_6PegXrB07a7;M27
z9I+%iQc1$xFWNCjF3F)T*>NXPN?;_QA0sn%8JRMXZ;PFjrKEs}WX4IN(%<nsD(@t2
z3*9)yOkPH%w3z~#F%^(KFtQh(jG4ZTJ5f=OQ3K~Q|KQHaybvg3tl=7Gs>B_e2;@$m
z$3gO>&dBSYNo+6@jQGCg&NHKvsw2B!iM9@by@CkhzqTZtMMQ+J#>c@MYQnZAa;+id
zV?lDtt4|wc;^;w~5yQ7UH&Dohs;#?vXvA5~adg_4gy4jxxw=C%El6pa<C!&^>uF8<
zSxvXQZ$#$PpjXp$*Y*4`1=F-uw8cO5!DO-QIi%>>wYtu9*RhJ@o>6m%>$BodJ$k<Q
z`swka?)sjq)h$ZqD9l#WtJNYSbipzRE84cBRdot>q1vDcNE!;RupEeApp8u5rPnsH
z{p(UcsFz=(+#Loj4wikt%8nL`9;`C;YCdGqEttS6E*yp}NX)`oYc*;Iaqwy9NyF&w
z8Kgic4$mccwswQ}G{UgfKKF;M(#1ez^S&ZIYIXDq&+=F^u_>7168a7Ltzb$Y3iH^E
z`TQq^t)fKQ!~S+xHRTV5JMjpomjKm22Q>UzSP_32i=k!wNS4gQ(UG=<D?O3^us0J=
zgHh>w=!vunU&_Z?gyw1Vee~YZ--&gcPfk7<nD@MSn-u4XW4YGI3x&~<Ia_Dtk<rn^
zfmDO-Tiku>f=_kBS1GZ{0&Ke4#oy7?st`!jvq=y)Jl6+w1WJVgCcN1SKomAg@*7ps
zb+Qn>t6~rXL=rnlID|2Mp0J>o5?1wG*wQ(VI=WB!7UffcY}gi0Dj8)RQX8IAwFw71
zgIAR301LkOK{iaNg;RHC37+b>7IfP(AufCeUs?7zID>e1YB~VJ;YWzntHUpWuV8#?
zRi#hyyh@dN5Y9AsO$*@Rq{5a#e2Kofi0)>0t_)lsXlD<$vIpDQVk=wR=o#J^*w-9A
z{mCnzywohb*~(2dv(p><3LiJxCrYgorS%hUw|2d=8ArL%EhOdSuaT6NFOJ_&q4bWn
za<HWwyw(5v!PUV(=&O6%`IlSym)rSQTlrVlm5HV_v8h1xgYf*kJC;7F;I$rnGAXU~
z9s~VbM?i*;<aD$3ClLP^{CS4QS|$0I1I+(E5`IYiJn~K~0{k)t2&bPgS`H!n1aLnI
z*3&2-pQJk>4c!G|apZp?Q`KElowIz^FB7$ch@&aWr=okg2=SGq&ww6Cv&7fwv)uh0
zw__dF3K=WUz0Y&69C}SU492%G@|{O8q5-AW48KN+NneD(@ABXo5P%gWd+pfOV{K)i
zr3|!{11;r1TN!C7BN1X|n%Zo0#%LDIR*p2Yb9Z;`1C*o-fD#UnO#n#Wwc}Th-@sQ-
zw3YoWWq(`Ax0HNS%7?^hz%<)!n*hHASC;^{3DU$76}*I&k<U>QwkMI`5{hE^4vwR1
zf+5T!LwE;BYFY5NDRN1O;;swt^-5@2gtyyhsca)6nhT*N@qOUvE)9wu8Z-nb_V5>#
zX^f^}E)37IQ0KIjdj=gw4A;ST4rm<;GrAz2Lv<X|nJ~GSfWxjA47QM$1Bn-pKm;R5
zja!C4MRb$U@Ek+g;5FRx0(o8sjL^Ik;JH8sPT?WEJO?pfXXhE?l|hBpVFbz1Etmcf
zd_(Xs0-{3&$;_3?dh+Q_B*qTic=K+ye`U}0JvZ@scK;Q5W8}rZ_8qxt{A%FF^k1_3
znteyE#6aBLF}N~wedgxb=8oZZ@9@`g82S|-|F6IG9ogs|{wDN<8UM|G05iRt9(w`5
zzk6&*yp<c<D}S^Tw6!cAdsbe{iCo{2f`GNXGS`P<W5xK|a2z~~pT1SLm<s>8%5=`5
zs^$6=t_9&WLIr5+^Hi^v;kf9&8eS?amxYH13aw?R!0raA`j7z5RkcQ|9wVwZ7e<)f
zi$Ou1@Krbr3zkV3AK{Y=z6yj6b%U>JXE4-q+j1eG&ZmN<Z>Wau@;RYksJ^G#M5nGg
zM?ELZKIhrCSBINaRCHJ&-$(gs(C>i=WWLF|MLCKo->V=|ZJ1D`EQ9ikW{{0;B!%co
zMme)}3P<kD8>c3x&YXSg)RZ>%#)NkI)VYupLXd<<INWRC1j<(|%Lj(H6kZ-4B#w8k
zRzISzfXUAnn+35cVvIjSJ)fb}-_g+LXwT<p=)NT4><7vF2)eJ*DAl)-8Qw_ux6=n&
y=>zv;VoJCu-%HE*C-`1Y#?Rl&N%#=NVYwv_ZX&eD5#e4Icg26gJf0YS!+!uf%)BuG

literal 0
HcmV?d00001

diff --git a/app/services/__pycache__/openrouter.cpython-312.pyc b/app/services/__pycache__/openrouter.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46d57bd3895341e45d2e94e48ee40b4cb14b231c
GIT binary patch
literal 8681
zcma($Yj6|SdS_pfR$j^P_iM2YMv0LO#6VmEacqOl!w^hvFYZls7wy_!MX&6xZLCO5
zZo)L8<p!8L_u47V#QciWa9h$}w9_9Uk4|Pf?N~8Xn!RMg%*~JHpTKki^#15~cD0fq
zx@mW2&-wN|zH`oZzW4r{%jHDy?EjCCXaBw#q2J>V>!I_&#v1@EArXm02u<Od3=v^+
ziVRaz6k)d0Q?${~Oxd8OLiVs@%3<`;AvWxsavCrbst9vaoB`WHuCRN`9j=_JG`{Vj
zs&Lg*6@f@}5Q&bnNMt`E%^gftFA^T-clcw%L+bPqS=DsMv6v=DBSOeS=&td|yrgPT
z<)m;rBte7wxDpM=w3AXe77{ev;vAk8v|}+TGBge-9M%M#vmlHV#yM0;)8xpE8Zg&3
zOE&+p@s9v3Aqh<pBAOyu#3HduBqd6sfqHOo3!0`y`YbcWh>T<ly7RM3D$yo6M7zXJ
z+u*45lv89SR;n;?*hK<nJE4{PL2CuHx?s&}k^3WfU4Yk}$L$a+VGnK~UX5ODof9Y%
zkW@9QOz0Krt0BJ%A3#UFYF-Ke1^wb_e;_1F5iLNK?AHb`{@zdsgBOSd@)x?b!pH;)
z7FvsdNX`&fDeE?2X8v9g((KkK&0#@7gCngkvleNQ5p7rPALV%xaNr#o53I>qaIp?n
z7GVq0sx0UuI0b$Hk9D;ufzMP}J3+!%g*giLiEqYuf-M$QtVPFFwgh8g7VNB{ILd;8
z?G{w5MW<LHax!t%RYIl02^NvZJt2GyR6Z4*;{`?Hg$OU5iAjpai-INqtSLeOivusf
zZK|3O(Y*Yq*~-g#;Q(Mds)Pk&+%yOXugyw)Oo`6Rq9pRelRq)B2@#Q3ByCQK@UjL7
zd4UfBPCh!#tC}(wfF2mGNP(y#s$Qq_pgb={3|!OEP$;?p(o;mHL>JUS=WDOdfqcUL
zl1}G=sKQG^V3vojK5Of6BC&}531ri<6cPc;Ha;SRr9u8AY}-0iugXu)g+fLT3=rnR
zgZz*RLag$05m8e3un-Yuq%bfvzZK3RM>J_hQu_EgRpR@3d76(z;gIu!95QqV;0Vl0
zfmi&h9EYV$sWE07%{*>45CJLRpdJ{nnu0SO4a}*7{G{>48(&Wj)L<w6{J0X#a9=Z0
z*%^v714J9-4@%J)MTpJH0iZi>dIGaa4ZKZ1+$nItV>>sS%1*%oFR1}VHk9cgKT`a_
z_l+6EqV!|)o#WwHRKa{U$O8$+r-4a~YvN_~af-rx0}W<Wlj=eYcrK>MpqP14nil3l
z8b2?DfJ6ALps=+?%m|o?fLnPDc-zFR_Q8GOrpR3Qw4|t*^rI0Kn2{HN0ZIlMB7)bD
zl+Yp%ClX@7u$aRF(TFC%h9Xe|RH7lcZ2`*;AOw6TOtue%Bq7p=HzkYwp~+(td^8eT
zbUO3hm@0<c8M09#6PB1*BvZY*!{U5wb+o)a(ktaiY)<pf3qS?1SZcg&R?}kYpw9=J
zjwsPN*pF9`eL_t3&2Nu4;#T;aiD}!Ss=96aj{ehw(~6{EMe3n-JB;(cA}#8U(}F7b
z=ai7ngrlMq(kt+ik|JQ4jaPVmfmx7y0hBzLIyxq3vwFR?*-}PO^@<X<h;JXaR7goH
z=mnstIolABJewK(jm<O`R^1VnGyxOKQ>9eHZWXMzbsO9c+_Zw#zk+qHUUTgD=){R*
zFP$7c;U79a?ms+wN~c1hu->pBYqS0$k^Mj%pq+yCw(c%zhrx;q<HP1wcbF#?PwQ33
z;ch{Hfp82PhD~ppku*PWen63c2mJxiS|GF_Rl2hXdnlb&rO<Q{r;5!S&-0Hae6!K8
z<co&p76es@gk|3d$Udfl>ig5E@`~^1;SnE*Ml|AIkQHfK0Ybo4#(b)z%*$YXOK&*_
zx90cD5n1#5<1H4Uk6TvSYxUK@;_6i>-at8LUGuIKecpAy4>fMi);BM`eCg$DQ`e5%
zr^rSpbX44Tpt_FRb#0kCU%Jkht>2lgc{<zFe4l0NaK9txMxEX7&%QgG>3A;P@!Y?J
z);kWT=$nq#Z0i<WH)Z*LT(^JAZ_V(}ruk>DCvWmcjizi%N2bM>Zt-PW#?mcg+3Kx#
zs@m5&Pvj`F!JT7J>o~E2m{wTlmz5i^U!*gq1uZbEvqt5Y#WT9iv@egj7?=&-f_LLz
zfvXQA0=t|<l1dQ4ys^f(tmmUVFoIrQN9boTBS|M286|BACW!CpiKk?|njddZIyA-_
z7A&|ERtq=>4$G*k#Q8R@=qME0g9TGxoF$TC!PP0-PyZkNOtC-7ibN3e^b?P0JBt$R
z2(nIP?K8<H*n}-%-wWf<&(TP<mz$li{Wyg#*e^H&)C>xcFN0%c&=zQwuwMqZJmI*E
z-lHy&=iM|4P)SG9URc|iphPwSo@*RdH^w@lB;RA=aDpUt{uT}1vQ}_v#EJx27|Xp!
zS$GI=%7O()t$6TYZ4tJhL}AujNOZjh_x>AdiTX>1LCK0lMX=5qSF9)DR78H~3urew
z-_VV;f|J&R6h6t7?9pAaM@NE#v(v?WVhWvtb(7p*aFh9ZfmlFis8eWx_${?Scq-!^
zc}eC4a2+BuA&EDn02^1V6?OE8a6TgVM}+GU!3Ih>9lN?a;oTU4!jy*udbtT`Noa*}
z3BuZsh+g>_Xb3t%h{QZnp#BZ9L=fnF3yl^@V>i@yJfQMWiWe%S9~ax9m;hTqfiDL-
zoAmbUG`5H;wk$laY=P<#yLU#4fWN8ii|;I>pg@=7h{A6i%)2)f4G1B1pSLjSCBUIR
z4aI+<2XCNzh<>W!P8)x_wPRVk{Ol_EH~Uw`wOwOr@7R3?HMifTp#PzYPyc2IPsGVS
zK28quar#7bf&UI4JYJorDfSUeYQ`zA*E<n!990x>5b_%;c6&J8A%jW9&Zy1+6jpUI
zCh83Mei1>Zz>4U0MS@r{An9~4Do1n%9F&Nr@UR}3SFm!?S0vqKT01Zu*wX9l=$U|I
zI8GjyvKdAy9$Y+y=j4?g#!O+2N5R^9h2dC8q91=y*kCh;<CE_Ie*nB7*hzE&8$8`*
zDojOE!9`MaLQ=6V01H%nj0@7;{>Sbho_AZLN}f<r#ryaO%u;F`h-q9&&W7q6Q*5@W
z{eAbl?)9dv_fR9d``lP+_+ssymbTR4Y{#be<#*-FuiQhGuAP^h7paRc-L9=)+H+~o
z^60Pk-?yRWw&l^w&TpFecN<sgR@C+8r~eq7V^KX1_NROE2fbH%?;70D<Gatmt#2T@
zyW!&DoD*%`dGYWH^;KO@t`io_b)m-ACHaz^9e6%7a3DQ!V0~aLQ#F1izM`&<UL3wS
zuuLuwEcbp@-L+mdz5zziojM4Iay2YJvqG&1SM1A+tK{nLRnN5@Yt6gT+^%!uIUgJ`
zx5LIZ+~%s+>Nc;)*EU@VulTPGuhs0i$vvCx+LGxSNOuinx}HmSJ$GGR?;6Xrjirue
zYxzvAH(l#p<Gc?yDpB<zBFCZjapK{(l}+~$nMe5a>iBiy!=tPFuXkO4@j4)V{w6n+
zt*!s%`2XY>81x7A&)^Q;9I}mWB5trld!YJ^s2<tIeOA+q;lBQn0rYtrF|w2XyuA{`
zeRgR7d?$h70d};5`g~vgXfyRiEdlTsbqw@=(ad1Dg92D*EoM~%SeUH`n=!-8?t$yK
z)Jo9mEvZ#vkw_3=V4*Wfmc;mE5DVuMPl7CKLdjqut}jl2vDnx@@jz>tI38Ndq))Dr
zALR`;I0$8P4ngc}KJgUI1*pXkjSbBcBk*TALt`0Q0xg<~or-r%3iHL_NaeLCKl$Pj
zL#bmYe8OW>aH>G3AS4C9vJg}1Txq-rIhbNlh}|L_+<Ba)(}}$k1Bvn?hECvO5*H_N
zfz>Kt7e(C`h=#+m=Alg`bPRVmtlI}q{xrCkW`4|2t5|!OdV}+;a0tH^^+P5o-Z7pZ
zRiZUY`ei_?z72(;GwxK@W-2?ss_e{iH6UuuUH1^ndQ)S!TY9tg?bspB*0n7ixpXAk
z+MQ|jrdz$)_RX309qINR*|wfcTYtK(Kik;3^!laO@4M`+?$qI26>?Q)xVALcmf`p`
z$FH>B;<n{#0d1}hv964xDeY*=I6Bjg&RdR64^_+oZ@$p<LJRR2gJae*82p|82ZPT5
zYr_lPPLeKR@q$$a_J2L^12N!R+RCv$_Hv-7j0cH*rKBBLxZpd&-tteP;5mWELzNpL
zQh>FLpvZ;Y!A!dBOgh29u9))k%lU+&O=PUp)8k&w`CovbkV+>AOBq2dD`sSZO3;w3
z1Ah!ZSZMNM<A`tiq1c#-$D-uK>zpPh8d2rRd_KdJrh-!Q>x5tN0d|PfSkVf<bk+(t
zRcv-u4{Jt)tOQ8|90p5zoh1vU@vdHNww7d2bQ?r$QbaTba38Fs^x)zrcuhx9(v&~P
ztvDH~{2U6Mkd>d}+5wyL<D`U&HK1j&O}VQn$Xy}n8t?fL=_`zS4N$574n<K0d%mjd
z0U5kg*8mpWDBM*k>f6?yl~e1jKJd2KL&UkmsiD-FY)e<BWp}z|_geFwwLM3_oXt$k
zrYC0Cj>>C|!PHomt2d=@`KLFzEyb_tmDvx%SHf%U&)nn&vRvhbL+1`HH?M48>0Wtp
z<;(}kE6Ggn-gNKY>yw{!ulF8U+j4NNaqJd1o@4RScR9q>-{Gn*96NU`!*!;)&Q;I#
z#V-SET<0zB1cZxReO{7>sbPlDNpV_tntl&XwdwA#a0VjuNMKG;AixfknjTD8L(&~!
z;NC`WbEY}NvJ5X%^pZuQoNOmZi$g)mp8_LigIKc^KcL4CL{zo}iKChp1k%g|3rTCE
z+b9xtV+`zLnzTG}Ffx5KD#nfFWVvXP{)$VX6gN$Z<XHzqrUi{r)+jO~=mHx6%>^+U
zXow_BLbRGFB%vWb;~+jGA%3JpS|;8hAU<M@_=wEMN6v&3P7NHLtSH7iiHedGdERMI
zi1T%zWy(cg6(uL#0$PGrcAP`O`XW^<C{c)-p4<n4U`C@gu2}!rj0ByJ;^2Hn3^xmL
z+!lB%jVQ+!AIuxDVip=welbY%L7pdMMhuW^G&0WEhJ)qCp%}K>s*zE9EDXb!{VRAk
zu0!!_((;&$u$V}a=jdn9Tktcel(=vxs$9|n{xFc1k_AGTbMgUefE4kcG*sw^@5gaY
z;&9amZzH^HH`<Qg!efA8&7gNTVvtHKKzMr!R4ypp(m<4eAWxZq6L=U)yzrTe0gIu~
zbXJwZkY<iT$bp>?&{}W2VXCYKE5b0-an`W-b3asD*2sW5UyKH=#>c0>2a6bg+8!7R
zE!*h<>z^9E_-wY3{~i&pVdAb)X4^Mq+WXV({nzTRv+M0IWEx-ip2FScpnJxk2ie9K
z9^{PX2WERIia)5wVX?Qkp%&r-vEO!)0^nB-Lk#sHF+|w@jsobnxbvTgA<Fj8Hh^vr
z1T@~D2n;i}p+@_S3S!93-QesPuC+tEUTtQw{E`(4swXEP++-9XG9y6-Q;O)c3?Z>1
z!s-x7s*90;9M#!Li-Njs2GX#SqB~_s5a3@o0ZAFggKc0uumPHa4`l`lE6SY34cHx1
zf>7Yd)eI2rhTa4wmT;d37g1b<ad89+ohywfl?XH$A?434^91$t=6zRK7Y4T&Z~xIt
zeg~te250M`8BID+ZGFlPP9d&tS2tXI^>4eEPhaj`8C<XS;s|LVH3XJw>ENY<%R?)~
z%AqUnwK`vFG|Saqm^e4FJi4-d)p=$ATI<eR+%q^>8o2L7^^nWloQ60pvR*fkV*i__
z*kR&6hdQ^sA9**D9X|A*uEW=+((ay>sax*;uUv<76xHdx)6sR8f%<zyH9J!-9H>2o
zcd#vW<aTZQa_H*#t=e7Ls>UVvCHHdTR#hKfZrfca;%c)s4VjwmbWQil!S$NH4A-~D
z^%-rRbd6`VcfDpuhTE~m?YL9fka9n~*N)sr2w1b(`5@PXT-zRGxgC$xF(A#^p<N>+
zabvJ@c(>zI)(P;ZwZ!mV`qR2*40}CLe%43~@1s9!YQk_oF@osNwsRQXM?m>G!lj3t
zfWP10VTVvjLgA^ziDR5(_xp9H-wz4@xe$gqzyH-aA!PO_*r4gQ=;<J&xRqW!`A={Q
zI|AZ=0B5GbQG!GV_SFmpp<t_8<T)h)pC2M+D{jTvAO#0$Mu8(NBO`ql!DV~4Ww&j=
zYwu^Ack0`hr_=SDa}0byPiyzeu5`;&8@Mkg65UM0YS%yXeb|>n0DiL5sJ?t=1ApZX
z5mijr>d3XG4<~X6!0Xxuu5vxq%p`HG=h~T%y*Y&8Pfl(ab#5y<N*p8ZQawy#wx({W
z?^0jRfct!AePG<E@35}>42&@kg3sWoQ8cJ@o3<E(RFO#mOf6-T(HsK<A*gw3iKo3_
zA>dDqg7u=_5CZXrJeeE;-$aqTAT01ZLp6VHR24hT|2D>N4!tV2D8f$xIp9|z8Vmz!
zgVNa08^1tI!k)#S1!6yP>@}}-pYqqx0WyZvJ}5xT5ybD1`x~_B_o(uBsP=2r{WaSD
YHR}Ba?fe?GfA1DZqVXQWrGf2#0i7_44gdfE

literal 0
HcmV?d00001

diff --git a/app/services/__pycache__/settings.cpython-312.pyc b/app/services/__pycache__/settings.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64d23fccd82b9ecf9f956752e9e4a5b98ab989df
GIT binary patch
literal 681
zcmY*X&1w`u5U!q?UB|ea)u^BeKER0VTs%n#s98j!n~)g9xirI0Z!%=2d(%Ceu;N8X
zAmRh$8Du^94jw%U5*j>t@@CwCuqUhMN7;s{`l{#a`l`D6OS3r#xITaTwEGeL4--_Y
zR)WcW1g8)|L=s3zKnPgy1S0DQBAb!1UGM_S0%ncNLxDYVyxLC_c|<oJg^F#lEQ|S$
z>Xnq-Ib|@phu{<#1SEpM+6HDbCnC$Z`(JEq73`p9l54lhs2A2ZI^E6n?H8R*`f%eZ
zUF*D>Q6ZJe)fttDV40R@Pg_-{_G7{O@y=_f&G<prbF@n-4^u{|o})As(J(Q%LFxW5
zOv*nla%n$K*aw&xq3JV!SES4r$?z~#Ax~p}RrH1_<C*eb3c2S$Ut9G<o(WD5W6Amw
z75s28@D-B>agQkk#a*=rBV8|Qk}jscgr73z2+pqGe&@=0#L{%t_3HAn3Cuv{6*Ssb
znba*xL(WAOX0hOk(wr=thLoRe8DvJC8#4~IfaVy^ZY_Ovmi{=Tw)h*+6m-Kh_i}A8
zim-#YN2^k$c?WqC?f%=F)tS1>#TBl|n@BcOQFqY%vI!yI;qFD<A&Vz(E&$#6JTz{+
JfAF?w@h@y|p?UxS

literal 0
HcmV?d00001

diff --git a/app/services/investor_parser.py b/app/services/investor_parser.py
deleted file mode 100644
index 8e2864e..0000000
--- a/app/services/investor_parser.py
+++ /dev/null
@@ -1,449 +0,0 @@
-#!/usr/bin/env python3
-"""
-LLM-powered Investor Parser
-
-A comprehensive parser that processes investor CSV data and saves it to both SQL and vector databases.
-Supports both simple parsing and LLM-enhanced parsing for better data quality.
-
-Usage:
-    python investor_parser.py --help
-    python investor_parser.py --file="path/to/csv" --limit=10
-    python investor_parser.py --file="path/to/csv" --use-llm --limit=50
-    python investor_parser.py --search="bioeconomy circular"
-"""
-
-import argparse
-import json
-import logging
-import os
-from typing import Any, Dict, Optional
-
-import chromadb
-import pandas as pd
-from dotenv import load_dotenv
-from openai import OpenAI
-
-from db import get_session, init_database
-from schema import CSVRow, Investor
-
-# Load environment variables
-load_dotenv()
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-
-class InvestorParser:
-    """Complete investor parser with optional LLM enhancement"""
-
-    def __init__(self, use_llm: bool = False):
-        self.use_llm = use_llm
-
-        # Initialize OpenAI client if using LLM
-        if self.use_llm:
-            api_key = os.getenv("OPENAI_API_KEY")
-            if not api_key:
-                logger.warning(
-                    "OpenAI API key not found. LLM features will be disabled."
-                )
-                self.use_llm = False
-            else:
-                self.openai_client = OpenAI(api_key=api_key)
-                logger.info("LLM enhancement enabled")
-
-        # Initialize ChromaDB
-        self.chroma_client = chromadb.PersistentClient(path="./chroma_db")
-        self.collection = self.chroma_client.get_or_create_collection(
-            name="investor_descriptions",
-            metadata={
-                "description": "Investor descriptions and investment thesis focus"
-            },
-        )
-
-        # Initialize database
-        init_database()
-
-    def parse_json_field(self, json_str: str) -> Dict[str, Any]:
-        """Safely parse JSON string with optional LLM assistance"""
-        if not json_str or json_str.strip() == "":
-            return {}
-
-        try:
-            return json.loads(json_str)
-        except json.JSONDecodeError as e:
-            logger.warning(f"JSON parsing failed: {e}")
-
-            # Use LLM to clean JSON if available
-            if self.use_llm:
-                return self._llm_clean_json(json_str)
-            else:
-                return {}
-
-    def _llm_clean_json(self, malformed_json: str) -> Dict[str, Any]:
-        """Use LLM to clean and parse malformed JSON"""
-        try:
-            prompt = f"""
-            The following text appears to be malformed JSON. Please clean it up and return valid JSON.
-            If it's not possible to create valid JSON, return an empty object {{}}.
-            
-            Original text:
-            {malformed_json[:2000]}  # Limit length for API
-            
-            Return only the cleaned JSON, no explanations:
-            """
-
-            response = self.openai_client.chat.completions.create(
-                model="gpt-3.5-turbo",
-                messages=[{"role": "user", "content": prompt}],
-                temperature=0,
-            )
-
-            cleaned_json = response.choices[0].message.content.strip()
-            return json.loads(cleaned_json)
-
-        except Exception as e:
-            logger.error(f"LLM JSON cleaning failed: {e}")
-            return {}
-
-    def extract_structured_data(self, csv_row: CSVRow) -> Dict[str, Any]:
-        """Extract and structure data from CSV row"""
-        # Parse the investment firm profile
-        profile_data = {}
-        if csv_row.investment_firm_profile:
-            profile_data = self.parse_json_field(csv_row.investment_firm_profile)
-
-        # Create structured output
-        structured_data = {
-            "name": csv_row.name,
-            "website": csv_row.website or profile_data.get("websiteURL"),
-            "investor_description": profile_data.get("investorDescription", ""),
-            "investment_thesis_focus": profile_data.get("investmentThesisFocus", []),
-            "headquarters": profile_data.get("headquarters", ""),
-            "aum_info": profile_data.get("overallAssetsUnderManagement", {}),
-            "funds_info": profile_data.get("funds", []),
-            "crunchbase_urls": csv_row.crunchbase_linkedin_urls or "",
-            "crunchbase_extract": csv_row.crunchbase_firm_extract or "",
-            "linkedin_profile": csv_row.linkedin_investment_profile or "",
-            "source_truth_profile": csv_row.source_of_truth_profile or "",
-        }
-
-        return structured_data
-
-    def enhance_with_llm(self, investor_data: Dict[str, Any]) -> Dict[str, Any]:
-        """Use LLM to enhance and standardize investor data"""
-        if not self.use_llm:
-            return investor_data
-
-        try:
-            # Combine all available text for context
-            context_text = " ".join(
-                [
-                    investor_data.get("investor_description", ""),
-                    investor_data.get("crunchbase_extract", ""),
-                    investor_data.get("linkedin_profile", ""),
-                    investor_data.get("source_truth_profile", ""),
-                ]
-            )
-
-            if not context_text.strip():
-                return investor_data
-
-            prompt = f"""
-            Based on the following information about an investor, please extract and standardize:
-            1. A concise investor description (2-3 sentences)
-            2. Investment thesis focus areas (list of specific focus areas)
-            3. Headquarters location (city, country format)
-            
-            Investor: {investor_data["name"]}
-            Context: {context_text[:3000]}  # Limit for API
-            
-            Return in JSON format:
-            {{
-                "enhanced_description": "concise description here",
-                "standardized_focus": ["focus area 1", "focus area 2", ...],
-                "standardized_headquarters": "City, Country"
-            }}
-            """
-
-            response = self.openai_client.chat.completions.create(
-                model="gpt-3.5-turbo",
-                messages=[{"role": "user", "content": prompt}],
-                temperature=0.3,
-            )
-
-            enhanced_data = json.loads(response.choices[0].message.content)
-
-            # Update investor data with enhanced information
-            if enhanced_data.get("enhanced_description"):
-                investor_data["enhanced_description"] = enhanced_data[
-                    "enhanced_description"
-                ]
-
-            if enhanced_data.get("standardized_focus"):
-                investor_data["standardized_focus"] = enhanced_data[
-                    "standardized_focus"
-                ]
-
-            if enhanced_data.get("standardized_headquarters"):
-                investor_data["standardized_headquarters"] = enhanced_data[
-                    "standardized_headquarters"
-                ]
-
-            return investor_data
-
-        except Exception as e:
-            logger.error(f"LLM enhancement failed for {investor_data['name']}: {e}")
-            return investor_data
-
-    def save_to_sql(self, investor_data: Dict[str, Any]) -> int:
-        """Save investor data to SQL database"""
-        try:
-            with get_session() as session:
-                # Check if investor already exists
-                existing = (
-                    session.query(Investor)
-                    .filter_by(name=investor_data["name"])
-                    .first()
-                )
-
-                if existing:
-                    logger.info(f"Updating existing investor: {investor_data['name']}")
-                    investor = existing
-                else:
-                    logger.info(f"Creating new investor: {investor_data['name']}")
-                    investor = Investor()
-
-                # Map data to investor object
-                investor.name = investor_data["name"]
-                investor.website = investor_data.get("website")
-                investor.investor_description = investor_data.get(
-                    "enhanced_description"
-                ) or investor_data.get("investor_description")
-                investor.investment_thesis_focus = investor_data.get(
-                    "standardized_focus"
-                ) or investor_data.get("investment_thesis_focus")
-                investor.headquarters = investor_data.get(
-                    "standardized_headquarters"
-                ) or investor_data.get("headquarters")
-
-                # AUM information
-                aum_info = investor_data.get("aum_info") or {}
-                investor.aum_amount = aum_info.get("aumAmount")
-                investor.aum_as_of_date = aum_info.get("asOfDate")
-                investor.aum_source_url = aum_info.get("sourceUrl")
-
-                # Fund information
-                investor.funds_info = investor_data.get("funds_info", [])
-
-                # Raw data
-                investor.crunchbase_urls = investor_data.get("crunchbase_urls")
-                investor.crunchbase_extract = investor_data.get("crunchbase_extract")
-                investor.linkedin_profile = investor_data.get("linkedin_profile")
-                investor.source_truth_profile = investor_data.get(
-                    "source_truth_profile"
-                )
-
-                if not existing:
-                    session.add(investor)
-
-                session.flush()  # Get the ID
-                return investor.id
-
-        except Exception as e:
-            logger.error(f"Failed to save to SQL: {e}")
-            raise
-
-    def save_to_vector_db(self, investor_id: int, investor_data: Dict[str, Any]):
-        """Save investor description and focus to ChromaDB"""
-        try:
-            # Prepare text for embedding
-            description_text = investor_data.get(
-                "enhanced_description"
-            ) or investor_data.get("investor_description", "")
-            focus_areas = investor_data.get("standardized_focus") or investor_data.get(
-                "investment_thesis_focus", []
-            )
-
-            if isinstance(focus_areas, list):
-                focus_text = " ".join(focus_areas)
-            else:
-                focus_text = str(focus_areas)
-
-            # Combine description and focus for embedding
-            combined_text = f"{description_text} {focus_text}".strip()
-
-            if not combined_text:
-                logger.warning(f"No text to embed for investor {investor_data['name']}")
-                return
-
-            # Create metadata
-            metadata = {
-                "investor_id": investor_id,
-                "name": investor_data["name"],
-                "website": investor_data.get("website") or "",
-                "headquarters": investor_data.get("standardized_headquarters")
-                or investor_data.get("headquarters")
-                or "",
-                "focus_areas_count": len(focus_areas)
-                if isinstance(focus_areas, list)
-                else 0,
-            }
-
-            # Add to ChromaDB
-            self.collection.add(
-                documents=[combined_text],
-                metadatas=[metadata],
-                ids=[f"investor_{investor_id}"],
-            )
-
-            logger.info(f"Added investor {investor_data['name']} to vector database")
-
-        except Exception as e:
-            logger.error(f"Failed to save to vector DB: {e}")
-
-    def process_csv_file(self, csv_file_path: str, limit: Optional[int] = None):
-        """Process the entire CSV file"""
-        logger.info(f"Starting to process CSV file: {csv_file_path}")
-
-        # Read CSV
-        df = pd.read_csv(csv_file_path)
-        logger.info(f"Loaded {len(df)} rows from CSV")
-
-        if limit:
-            df = df.head(limit)
-            logger.info(f"Processing limited to {limit} rows")
-
-        processed_count = 0
-        error_count = 0
-
-        for index, row in df.iterrows():
-            try:
-                logger.info(f"Processing row {index + 1}/{len(df)}: {row['Name']}")
-
-                # Create CSVRow object
-                csv_row = CSVRow(
-                    name=row["Name"],
-                    website=row.get("Website"),
-                    investment_firm_profile=row.get("Investment Firm Profile"),
-                    crunchbase_linkedin_urls=row.get("Crunchbase & LinkedIn URLs"),
-                    crunchbase_firm_extract=row.get("Crunchbase Firm Extract"),
-                    linkedin_investment_profile=row.get("LinkedIn Investment Profile"),
-                    source_of_truth_profile=row.get("Source of Truth Profile"),
-                )
-
-                # Extract structured data
-                structured_data = self.extract_structured_data(csv_row)
-
-                # Enhance with LLM if enabled
-                enhanced_data = self.enhance_with_llm(structured_data)
-
-                # Save to SQL database
-                investor_id = self.save_to_sql(enhanced_data)
-
-                # Save to vector database
-                self.save_to_vector_db(investor_id, enhanced_data)
-
-                processed_count += 1
-
-                # Progress update every 10 rows
-                if (index + 1) % 10 == 0:
-                    logger.info(
-                        f"Progress: {processed_count} processed, {error_count} errors"
-                    )
-
-            except Exception as e:
-                error_count += 1
-                logger.error(
-                    f"Error processing row {index + 1} ({row.get('Name', 'Unknown')}): {e}"
-                )
-                continue
-
-        logger.info(
-            f"Processing complete! Processed: {processed_count}, Errors: {error_count}"
-        )
-        return processed_count, error_count
-
-    def search_investors(self, query: str, limit: int = 10):
-        """Search investors using vector similarity"""
-        try:
-            results = self.collection.query(query_texts=[query], n_results=limit)
-
-            return results
-
-        except Exception as e:
-            logger.error(f"Search failed: {e}")
-            return None
-
-
-def main():
-    """Main function with command line interface"""
-    parser = argparse.ArgumentParser(description="LLM-powered Investor Parser")
-    parser.add_argument("--file", type=str, help="Path to CSV file to process")
-    parser.add_argument("--limit", type=int, help="Limit number of rows to process")
-    parser.add_argument(
-        "--use-llm",
-        action="store_true",
-        help="Enable LLM enhancement (requires OpenAI API key)",
-    )
-    parser.add_argument("--search", type=str, help="Search query for vector database")
-    parser.add_argument(
-        "--search-limit",
-        type=int,
-        default=10,
-        help="Number of search results to return",
-    )
-
-    args = parser.parse_args()
-
-    # Initialize parser
-    investor_parser = InvestorParser(use_llm=args.use_llm)
-
-    if args.search:
-        # Perform search
-        logger.info(f"Searching for: {args.search}")
-        results = investor_parser.search_investors(args.search, args.search_limit)
-
-        if results and results["documents"][0]:
-            print(f"\nFound {len(results['documents'][0])} similar investors:")
-            for i, (doc, metadata) in enumerate(
-                zip(results["documents"][0], results["metadatas"][0])
-            ):
-                print(f"{i + 1}. {metadata['name']}")
-                print(f"   Website: {metadata.get('website', 'N/A')}")
-                print(f"   HQ: {metadata.get('headquarters', 'N/A')}")
-                print(f"   Focus areas: {metadata.get('focus_areas_count', 0)}")
-                print(f"   Similarity score: {results['distances'][0][i]:.3f}")
-                print()
-        else:
-            print("No results found.")
-
-    elif args.file:
-        # Process CSV file
-        if not os.path.exists(args.file):
-            logger.error(f"File not found: {args.file}")
-            return
-
-        processed, errors = investor_parser.process_csv_file(args.file, args.limit)
-
-        print("\nProcessing complete!")
-        print(f"Successfully processed: {processed} investors")
-        print(f"Errors encountered: {errors}")
-
-        # Show some search examples
-        print("\nTrying some example searches...")
-        for query in ["bioeconomy", "venture capital", "sustainability"]:
-            results = investor_parser.search_investors(query, 3)
-            if results and results["documents"][0]:
-                print(f"\nTop matches for '{query}':")
-                for i, metadata in enumerate(results["metadatas"][0][:3]):
-                    print(f"  {i + 1}. {metadata['name']}")
-
-    else:
-        parser.print_help()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/app/services/langgraph_agent.py b/app/services/langgraph_agent.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py
index e22238b..4465251 100644
--- a/app/services/llm_parser.py
+++ b/app/services/llm_parser.py
@@ -1,28 +1,368 @@
-import asyncio
-import csv
+import json
+import logging
+import os
+from typing import Any, Dict, Optional
 
-from openai import AsyncOpenAI
-from pydantic import BaseModel
+import chromadb
+import pandas as pd
+from dotenv import load_dotenv
+from openai import OpenAI
+
+from db import get_session, init_database
+from schema import CSVRow, Investor
+
+# Load environment variables
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 
-class RowSchema(BaseModel):
-    section: str
-    explanation: str
+class LLMInvestorParser:
+    def __init__(self):
+        # Initialize OpenAI client
+        self.openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
-client = AsyncOpenAI()
+        # Initialize ChromaDB
+        self.chroma_client = chromadb.PersistentClient(path="./chroma_db")
+        self.collection = self.chroma_client.get_or_create_collection(
+            name="investor_descriptions",
+            metadata={
+                "description": "Investor descriptions and investment thesis focus"
+            },
+        )
 
-async def process_row(row):
-    resp = await client.chat.completions.create(
-        model="gpt-4o-mini",
-        messages=[{"role": "user", "content": f"Extract relevant section:\n{row}"}],
-        response_format={"type": "json_object"}  # ensures JSON output
-    )
-    return RowSchema.model_validate_json(resp.choices[0].message.content)
+        # Initialize database
+        init_database()
 
-async def main():
-    with open("data.csv") as f:
-        reader = csv.DictReader(f)
-        tasks = [process_row(row) for row in reader]
-        return await asyncio.gather(*tasks)
+    def parse_json_field(self, json_str: str) -> Dict[str, Any]:
+        """Safely parse JSON string with LLM assistance if needed"""
+        if not json_str or json_str.strip() == "":
+            return {}
 
-results = asyncio.run(main())
\ No newline at end of file
+        try:
+            # Try direct JSON parsing first
+            return json.loads(json_str)
+        except json.JSONDecodeError:
+            # If direct parsing fails, use LLM to clean and parse
+            logger.info("Direct JSON parsing failed, using LLM to clean JSON")
+            return self._llm_clean_json(json_str)
+
+    def _llm_clean_json(self, malformed_json: str) -> Dict[str, Any]:
+        """Use LLM to clean and parse malformed JSON"""
+        try:
+            prompt = f"""
+            The following text appears to be malformed JSON. Please clean it up and return valid JSON.
+            If it's not possible to create valid JSON, return an empty object {{}}.
+            
+            Original text:
+            {malformed_json[:2000]}  # Limit length for API
+            
+            Return only the cleaned JSON, no explanations:
+            """
+
+            response = self.openai_client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0,
+            )
+
+            cleaned_json = response.choices[0].message.content.strip()
+            return json.loads(cleaned_json)
+
+        except Exception as e:
+            logger.error(f"LLM JSON cleaning failed: {e}")
+            return {}
+
+    def extract_structured_data(self, csv_row: CSVRow) -> Dict[str, Any]:
+        """Extract and structure data from CSV row using LLM"""
+        # Parse the investment firm profile
+        profile_data = {}
+        if csv_row.investment_firm_profile:
+            profile_data = self.parse_json_field(csv_row.investment_firm_profile)
+
+        # Create structured output
+        structured_data = {
+            "name": csv_row.name,
+            "website": csv_row.website or profile_data.get("websiteURL"),
+            "investor_description": profile_data.get("investorDescription", ""),
+            "investment_thesis_focus": profile_data.get("investmentThesisFocus", []),
+            "headquarters": profile_data.get("headquarters", ""),
+            "aum_info": profile_data.get("overallAssetsUnderManagement", {}),
+            "funds_info": profile_data.get("funds", []),
+            "crunchbase_urls": csv_row.crunchbase_linkedin_urls or "",
+            "crunchbase_extract": csv_row.crunchbase_firm_extract or "",
+            "linkedin_profile": csv_row.linkedin_investment_profile or "",
+            "source_truth_profile": csv_row.source_of_truth_profile or "",
+        }
+
+        return structured_data
+
+    def enhance_with_llm(self, investor_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Use LLM to enhance and standardize investor data"""
+        try:
+            # Combine all available text for context
+            context_text = " ".join(
+                [
+                    investor_data.get("investor_description", ""),
+                    investor_data.get("crunchbase_extract", ""),
+                    investor_data.get("linkedin_profile", ""),
+                    investor_data.get("source_truth_profile", ""),
+                ]
+            )
+
+            if not context_text.strip():
+                return investor_data
+
+            prompt = f"""
+            Based on the following information about an investor, please extract and standardize:
+            1. A concise investor description (2-3 sentences)
+            2. Investment thesis focus areas (list of specific focus areas)
+            3. Headquarters location (city, country format)
+            
+            Investor: {investor_data["name"]}
+            Context: {context_text[:3000]}  # Limit for API
+            
+            Return in JSON format:
+            {{
+                "enhanced_description": "concise description here",
+                "standardized_focus": ["focus area 1", "focus area 2", ...],
+                "standardized_headquarters": "City, Country"
+            }}
+            """
+
+            response = self.openai_client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.3,
+            )
+
+            enhanced_data = json.loads(response.choices[0].message.content)
+
+            # Update investor data with enhanced information
+            if enhanced_data.get("enhanced_description"):
+                investor_data["enhanced_description"] = enhanced_data[
+                    "enhanced_description"
+                ]
+
+            if enhanced_data.get("standardized_focus"):
+                investor_data["standardized_focus"] = enhanced_data[
+                    "standardized_focus"
+                ]
+
+            if enhanced_data.get("standardized_headquarters"):
+                investor_data["standardized_headquarters"] = enhanced_data[
+                    "standardized_headquarters"
+                ]
+
+            return investor_data
+
+        except Exception as e:
+            logger.error(f"LLM enhancement failed for {investor_data['name']}: {e}")
+            return investor_data
+
+    def save_to_sql(self, investor_data: Dict[str, Any]) -> int:
+        """Save investor data to SQL database"""
+        try:
+            with get_session() as session:
+                # Check if investor already exists
+                existing = (
+                    session.query(Investor)
+                    .filter_by(name=investor_data["name"])
+                    .first()
+                )
+
+                if existing:
+                    logger.info(f"Updating existing investor: {investor_data['name']}")
+                    investor = existing
+                else:
+                    logger.info(f"Creating new investor: {investor_data['name']}")
+                    investor = Investor()
+
+                # Map data to investor object
+                investor.name = investor_data["name"]
+                investor.website = investor_data.get("website")
+                investor.investor_description = investor_data.get(
+                    "enhanced_description"
+                ) or investor_data.get("investor_description")
+                investor.investment_thesis_focus = investor_data.get(
+                    "standardized_focus"
+                ) or investor_data.get("investment_thesis_focus")
+                investor.headquarters = investor_data.get(
+                    "standardized_headquarters"
+                ) or investor_data.get("headquarters")
+
+                # AUM information
+                aum_info = investor_data.get("aum_info", {})
+                investor.aum_amount = aum_info.get("aumAmount")
+                investor.aum_as_of_date = aum_info.get("asOfDate")
+                investor.aum_source_url = aum_info.get("sourceUrl")
+
+                # Fund information
+                investor.funds_info = investor_data.get("funds_info", [])
+
+                # Raw data
+                investor.crunchbase_urls = investor_data.get("crunchbase_urls")
+                investor.crunchbase_extract = investor_data.get("crunchbase_extract")
+                investor.linkedin_profile = investor_data.get("linkedin_profile")
+                investor.source_truth_profile = investor_data.get(
+                    "source_truth_profile"
+                )
+
+                if not existing:
+                    session.add(investor)
+
+                session.flush()  # Get the ID
+                return investor.id
+
+        except Exception as e:
+            logger.error(f"Failed to save to SQL: {e}")
+            raise
+
+    def save_to_vector_db(self, investor_id: int, investor_data: Dict[str, Any]):
+        """Save investor description and focus to ChromaDB"""
+        try:
+            # Prepare text for embedding
+            description_text = investor_data.get(
+                "enhanced_description"
+            ) or investor_data.get("investor_description", "")
+            focus_areas = investor_data.get("standardized_focus") or investor_data.get(
+                "investment_thesis_focus", []
+            )
+
+            if isinstance(focus_areas, list):
+                focus_text = " ".join(focus_areas)
+            else:
+                focus_text = str(focus_areas)
+
+            # Combine description and focus for embedding
+            combined_text = f"{description_text} {focus_text}".strip()
+
+            if not combined_text:
+                logger.warning(f"No text to embed for investor {investor_data['name']}")
+                return
+
+            # Create metadata
+            metadata = {
+                "investor_id": investor_id,
+                "name": investor_data["name"],
+                "website": investor_data.get("website", ""),
+                "headquarters": investor_data.get("standardized_headquarters")
+                or investor_data.get("headquarters", ""),
+                "focus_areas_count": len(focus_areas)
+                if isinstance(focus_areas, list)
+                else 0,
+            }
+
+            # Add to ChromaDB
+            self.collection.add(
+                documents=[combined_text],
+                metadatas=[metadata],
+                ids=[f"investor_{investor_id}"],
+            )
+
+            logger.info(f"Added investor {investor_data['name']} to vector database")
+
+        except Exception as e:
+            logger.error(f"Failed to save to vector DB: {e}")
+
+    def process_csv_file(self, csv_file_path: str, limit: Optional[int] = None):
+        """Process the entire CSV file"""
+        logger.info(f"Starting to process CSV file: {csv_file_path}")
+
+        # Read CSV
+        df = pd.read_csv(csv_file_path)
+        logger.info(f"Loaded {len(df)} rows from CSV")
+
+        if limit:
+            df = df.head(limit)
+            logger.info(f"Processing limited to {limit} rows")
+
+        processed_count = 0
+        error_count = 0
+
+        for index, row in df.iterrows():
+            try:
+                logger.info(f"Processing row {index + 1}/{len(df)}: {row['Name']}")
+
+                # Create CSVRow object
+                csv_row = CSVRow(
+                    name=row["Name"],
+                    website=row.get("Website"),
+                    investment_firm_profile=row.get("Investment Firm Profile"),
+                    crunchbase_linkedin_urls=row.get("Crunchbase & LinkedIn URLs"),
+                    crunchbase_firm_extract=row.get("Crunchbase Firm Extract"),
+                    linkedin_investment_profile=row.get("LinkedIn Investment Profile"),
+                    source_of_truth_profile=row.get("Source of Truth Profile"),
+                )
+
+                # Extract structured data
+                structured_data = self.extract_structured_data(csv_row)
+
+                # Enhance with LLM
+                enhanced_data = self.enhance_with_llm(structured_data)
+
+                # Save to SQL database
+                investor_id = self.save_to_sql(enhanced_data)
+
+                # Save to vector database
+                self.save_to_vector_db(investor_id, enhanced_data)
+
+                processed_count += 1
+
+                # Progress update every 10 rows
+                if (index + 1) % 10 == 0:
+                    logger.info(
+                        f"Processed {processed_count} rows successfully, {error_count} errors"
+                    )
+
+            except Exception as e:
+                error_count += 1
+                logger.error(
+                    f"Error processing row {index + 1} ({row.get('Name', 'Unknown')}): {e}"
+                )
+                continue
+
+        logger.info(
+            f"Processing complete! Processed: {processed_count}, Errors: {error_count}"
+        )
+        return processed_count, error_count
+
+    def search_investors(self, query: str, limit: int = 5):
+        """Search investors using vector similarity"""
+        try:
+            results = self.collection.query(query_texts=[query], n_results=limit)
+
+            return results
+
+        except Exception as e:
+            logger.error(f"Search failed: {e}")
+            return None
+
+
+def main():
+    """Main function to run the parser"""
+    parser = LLMInvestorParser()
+
+    # Process the CSV file
+    csv_file = "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/New Excerpt 5 investors - Sheet1 parse.csv"
+
+    # Start with a small sample for testing
+    processed, errors = parser.process_csv_file(csv_file, limit=5)
+
+    print("\nProcessing complete!")
+    print(f"Successfully processed: {processed} investors")
+    print(f"Errors encountered: {errors}")
+
+    # Test search functionality
+    print("\nTesting search functionality...")
+    results = parser.search_investors("bioeconomy circular economy")
+    if results:
+        print(f"Found {len(results['documents'][0])} similar investors")
+        for i, doc in enumerate(results["documents"][0]):
+            print(f"  {i + 1}. {results['metadatas'][0][i]['name']}")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/app/services/openrouter.py b/app/services/openrouter.py
new file mode 100644
index 0000000..6fa8a01
--- /dev/null
+++ b/app/services/openrouter.py
@@ -0,0 +1,178 @@
+import asyncio
+from typing import List, Optional
+
+import chromadb
+import pandas as pd
+from db.tables import InvestorTable
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
+from pydantic_schemas import Investor, InvestorList
+from settings import settings
+
+# Add these imports for your databases
+# from sqlalchemy.ext.asyncio import AsyncSession
+# from your_vector_db import VectorDBClient
+
+
+class InvestorProcessor:
+    def __init__(
+        self,
+        sql_session: Optional[object] = None,
+        vector_db_client: Optional[object] = None,
+    ):
+        self.template = """You are an expert data extraction assistant. Extract investor information from the provided CSV data and return it as a list of structured records.
+
+Given the following CSV data rows:
+{question}
+
+For each row, extract and structure the following fields:
+- name: The investor's full name
+- aum: Assets under management (as integer, use 0 if not available)
+- check_size: Investment check size (as string)
+- sector_focus: Sector focus (as string)
+- stage_focus: Investment stage focus (as string)
+- region: Geographic region (as string)
+- investment_thesis: Investment thesis (as string)
+- investor_description: Description of the investor (as string)
+
+Important: 
+- If a field is not available in the data, use appropriate default values (empty string for text fields, 0 for numbers)
+- Ensure all text fields are properly escaped and contain no control characters
+- Return clean, valid JSON only
+
+Return the data as a structured list of investors."""
+
+        self.prompt = PromptTemplate(
+            template=self.template, input_variables=["question"]
+        )
+
+        self.llm = ChatOpenAI(
+            api_key=settings.OPENROUTER_API_KEY,
+            base_url="https://openrouter.ai/api/v1",
+            model="openai/gpt-oss-120b:fre",
+            temperature=0,
+        )
+
+        self.structured_llm = self.llm.with_structured_output(InvestorList)
+        self.sql_session = sql_session
+        self.vector_db_client = vector_db_client
+
+        self.vector_db_client = chromadb.PersistentClient(path="./chroma_db")
+        self.collection = self.vector_db_client.get_or_create_collection(
+            name="investor_descriptions",
+            metadata={
+                "description": "Investor descriptions and investment thesis focus"
+            },
+        )
+
+    async def _process_batch(self, batch: pd.DataFrame, batch_idx: int) -> List:
+        """Process a single batch of data"""
+        # Convert batch to string representation - clean the data
+        batch_str = ""
+        for idx, row in batch.iterrows():
+            # Clean values to remove control characters
+            cleaned_row = {}
+            for key, value in row.items():
+                if pd.notna(value):
+                    # Convert to string and clean control characters
+                    clean_value = (
+                        str(value)
+                        .replace("\n", " ")
+                        .replace("\r", " ")
+                        .replace("\t", " ")
+                    )
+                    # Remove other control characters
+                    clean_value = "".join(
+                        char
+                        for char in clean_value
+                        if ord(char) >= 32 or char in ["\n", "\r", "\t"]
+                    )
+                    cleaned_row[key] = clean_value
+
+            row_str = ", ".join(
+                [f"{key}: {value}" for key, value in cleaned_row.items()]
+            )
+            batch_str += f"Row {idx + 1}: {row_str}\n"
+
+        try:
+            print(f"Processing batch {batch_idx + 1}...")
+            batch_results = await self.structured_llm.ainvoke(batch_str)
+            return batch_results.investor_list
+        except Exception as e:
+            print(f"Error processing batch {batch_idx + 1}: {e}")
+            return []
+
+    async def _save_to_sql(self, investors: List[Investor]) -> None:
+        """Save investors to SQL database"""
+        if not self.sql_session:
+            return
+
+        # Implement SQL saving logic here
+        for investor in investors:
+            db_investor = InvestorTable(
+                name=investor.name,
+                aum=investor.aum,
+                check_size=investor.check_size,
+                sector_focus=investor.sector_focus,
+                stage_focus=investor.stage_focus,
+                region=investor.region,
+            )
+            self.sql_session.add(db_investor)
+        self.sql_session.commit()
+
+    async def _save_to_vector_db(self, investors: List[Investor]) -> None:
+        """Save investors to vector database"""
+        if not self.vector_db_client:
+            return
+
+        documents = []
+        metadatas = []
+        ids = []
+
+        for i, investor in enumerate(investors):
+            doc_text = f"{investor.investor_description}\nInvestment Thesis: {investor.investment_thesis}"
+            documents.append(doc_text)
+            metadatas.append({"name": investor.name})
+            ids.append(f"investor_{i}_{investor.name.replace(' ', '_')}")
+
+        if documents:
+            # Use add method with proper parameters
+            self.collection.add(documents=documents, metadatas=metadatas, ids=ids)
+
+    async def process_csv(
+        self, df: pd.DataFrame, batch_size: int = 10, max_concurrent: int = 10
+    ) -> List:
+        """Process CSV data in parallel batches and save to databases"""
+        results = []
+
+        # Create batches
+        batches = []
+        for i in range(0, len(df), batch_size):
+            batch = df.iloc[i : i + batch_size]
+            batches.append((batch, i // batch_size))
+
+        # Process batches with concurrency control
+        semaphore = asyncio.Semaphore(max_concurrent)
+
+        async def process_with_semaphore(batch_data):
+            batch, batch_idx = batch_data
+            async with semaphore:
+                return await self._process_batch(batch, batch_idx)
+
+        # Execute all batches concurrently
+        batch_results = await asyncio.gather(
+            *[process_with_semaphore(batch_data) for batch_data in batches],
+            return_exceptions=True,
+        )
+
+        # Collect results, filtering out exceptions
+        for batch_result in batch_results:
+            if not isinstance(batch_result, Exception):
+                results.extend(batch_result)
+
+        # Save to databases
+        if results:
+            await self._save_to_sql(results)
+            await self._save_to_vector_db(results)
+
+        return results
diff --git a/app/services/querying.py b/app/services/querying.py
new file mode 100644
index 0000000..ea80784
--- /dev/null
+++ b/app/services/querying.py
@@ -0,0 +1,61 @@
+from typing import Optional
+
+import chromadb
+from langchain_openai import ChatOpenAI
+from pydantic_schemas import Investor, InvestorList
+from settings import settings
+
+# Add these imports for your databases
+# from sqlalchemy.ext.asyncio import AsyncSession
+# from your_vector_db import VectorDBClient
+
+
+class QueryProcessor:
+    def __init__(
+        self,
+        sql_session: Optional[object] = None,
+        vector_db_client: Optional[object] = None,
+    ):
+        self.llm = ChatOpenAI(
+            api_key=settings.OPENROUTER_API_KEY,
+            base_url="https://openrouter.ai/api/v1",
+            model="openai/gpt-oss-120b:free",
+            temperature=0,
+        )
+
+        self.structured_llm = self.llm.with_structured_output(InvestorList)
+        self.sql_session = sql_session
+        self.vector_db_client = vector_db_client
+
+        self.vector_db_client = chromadb.PersistentClient(path="./chroma_db")
+        self.collection = self.vector_db_client.get_or_create_collection(
+            name="investor_descriptions",
+            metadata={
+                "description": "Investor descriptions and investment thesis focus"
+            },
+        )
+
+    def query_sql_database(self, query: str) -> Optional[InvestorList]:
+        """Query the SQL database for investor information."""
+        if not self.sql_session:
+            return None
+
+        # Implement SQL querying logic here
+        result = self.sql_session.execute(query)
+        investors = result.scalars().all()
+        return InvestorList(investors=investors)
+
+    def query_vector_database(self, query: str) -> Optional[InvestorList]:
+        """Query the vector database for investor information."""
+        if not self.vector_db_client:
+            return None
+
+        # Implement vector database querying logic here
+        results = self.vector_db_client.query(collection=self.collection, query=query)
+        investors = [Investor(**doc.metadata) for doc in results.documents]
+        return InvestorList(investors=investors)
+
+    def process_query(self, question: str) -> InvestorList:
+        """Process a query using the LLM and return structured investor data."""
+        response = self.structured_llm.predict(question=question)
+        return response
diff --git a/app/settings.py b/app/settings.py
index 2f9dd5b..a9376fe 100644
--- a/app/settings.py
+++ b/app/settings.py
@@ -1,10 +1,11 @@
 from pydantic_settings import BaseSettings
 
+
 class Settings(BaseSettings):
-    api_key: str
-    db_url: str
+    OPENROUTER_API_KEY: str
 
     class Config:
         env_file = ".env"
 
-settings = Settings() 
\ No newline at end of file
+
+settings = Settings()