From 6d902345c0cf5a7a18575b47914b9f21e0cde5cc Mon Sep 17 00:00:00 2001 From: bolade Date: Sat, 27 Sep 2025 10:45:08 +0100 Subject: [PATCH] Refactor investor and company schemas to allow optional fields; update filtering logic in read_companies function and add find_similar_investors endpoint; change LLM model in InvestorProcessor and QueryProcessor for improved performance. --- .../__pycache__/companies.cpython-312.pyc | Bin 9571 -> 9792 bytes .../__pycache__/investors.cpython-312.pyc | Bin 9689 -> 12563 bytes app/routers/companies.py | 4 + app/routers/investors.py | 68 ++++++ .../__pycache__/py_schemas.cpython-312.pyc | Bin 10845 -> 14003 bytes app/schemas/py_schemas.py | 202 ++++++++++++++---- .../__pycache__/llm_parser.cpython-312.pyc | Bin 13273 -> 13263 bytes .../__pycache__/querying.cpython-312.pyc | Bin 5344 -> 5341 bytes app/services/llm_parser.py | 2 +- app/services/querying.py | 4 +- 10 files changed, 234 insertions(+), 46 deletions(-) diff --git a/app/routers/__pycache__/companies.cpython-312.pyc b/app/routers/__pycache__/companies.cpython-312.pyc index 2ab18c8913efe145fae4941c6d9378d0acfa12c5..d1b45589eb3180d359ee8fe271d25b168be031c6 100644 GIT binary patch delta 525 zcmaFtb-;)3G%qg~0}x1Wyq+N?vypEOJChXCMm#=m&8IR=y8%n>)Fb*%evZ$hd3sR=Hj#ps=%I5aa#L p(-h?x8BHf|RXQdn%_uve{1XF^`pnG0Wb=Uu$ogQjd7|noi=<v2PJX~8I+;(}Y;rs|8yi0ZE8hpP&Dq?_Y>c{-`vt{0 zZn33h=9HusOAc{0{- zekT^d3>S)!N@bk6d7qRT3sC;4oDoD~vVnXe?>3-0#qx{{3=Iq)xF**t*>1iq@4^IB wrl1tWczbiPk{lzW@#IO$$Hc@LWhaz>VgOQ~nHiXDJ}?1UA51pqsa#?N0539S+W-In diff --git a/app/routers/__pycache__/investors.cpython-312.pyc b/app/routers/__pycache__/investors.cpython-312.pyc index 0c218269bac2c6eff23958abb5b6568ec7af1a2f..1416da7f071c96bb3aaeb5b44b761597cc20c2b1 100644 GIT binary patch delta 3530 zcma)8-ESMm5#PHbk4K6}ktW4YQKCL+I+E{bnP$|*|0gT$tJ~V(GwCF>lLP8RRFZ~4kRut_62^C>XxM==?x^x_%X`Z@sM^Ypu zpD<=bV0B^Yqg8TA9h>eJj%NqQyMyBc&e5ic{5ymKJ2;)2 zZtA!vCrCk=m2I#Tsf$vb8r5B=LX-+>yq-GMTOaMKQxE{V)ZY+*t~~-UAh&PYYYQdc zu#O{xa;F6P1}WU2-%a%=tI=%eXy~av1pUZ<*<00%H0bqEz3B#pDk*7pL2nf7?U#L; zE-xKu(CwqTGrQ?-Fz+*-7q%UJu0d&FkA)6x?=+YVY-I zk?~s>a5^R{N*d>T4GT<%n>vII6v|vYrD7|5c9IW`S1m3e0qpPdFY*1_7n$Y%K@ORE zbGVa!N9;HV>@I+AfDQnd4Tk`F05l68pn3tk0I=uS4-f#*tpe2tpxGJ#DqI5)Q*KiD zG5W&awMPDG!n#Z0{2Ada+dm9~BLD{}l(}d!iAA9J=*o(mk|IVNA)i?Kx`%)%##1+B zMNQ*5DXK;lBjtsl;Q{j0;xZni--E=`_8HSp{@6Cgxbt7Pd5yW}C_Cc9hiHd+fad}1 z04?VEg?LhxvA($G1(%KSWPC}+5RkGVk&dUZ31-kDcNi#e8y}$%5%F=LUjTR!#VC#g zc>>@hz$t(TfaVljR166P!z%F@1qEi))>Z=yfi2s059exuQ1BVbp3FaS{ElJz^S^dZ zvHE(g|HpbU+@vxam&(S4I95~)c~e_Qgu`%KbRWSf1u`OaMfXC)a#X7OfM2C!KLVkT zD6El$`y+jihw>gzn&lz>xR2cSgtL=1XNBIKXGBg_El@pl-% zPx%kXxW7af>Z>9q(KB_ikn(>74s*(>BvUERaqqhpFdp?MqIGb{82%iAIR z0Tuae-r4;;?BaB&pJ{7Y6j6Id=Csgh$9EU!%e*8fWqNeM-=@vG0uD57Ub`;3ZF`=I zd`XzzH?&>56(S!yy`h6jd^w(s;#jpDQ5l5b#KTWG-TO?n|$u=GSs*t+^~8hPrUxa5=b?%_3`^iGJMF@ zrmgk0iHIra^ioC0XDw~%HF6*r%F#hcd@XH>X0lq%S>;s7dRn8>kn&oZWV}TaPTjWQ zf6DW9d5GJpQIEyUE>mX>ORpiM)cR;E}|HF-A0Y@Sv~=2 zxTW1=Z`%!M)kMcl_*|Atgvn3a&(G|yiS4G7fLqhjvZmVlYd!2iVwg<2oMgf-1cgPk zYR(F0(Dl>Q8;c5AU29oO))l;@N6xeXRfo3KwyZe;cdVsl+iI2=bbHEx zvgXTltqlFI@;B+VyLI5DcuES@^I@oZZbNE16b&uLZ^)_oAa1x*)C@k2atJNz2EUxB z65mYd>B85LF7~|pyIe)RcxW|p3XEtaQ>!Jg@ml@Ri^g~8XHF|3P{QYMOA)bdXW(+X z8pVsUx_xCK8I;Y@%<^0^eN)CdnV}UEYgMM~tSTsS460=+x-2Vrmojr&vC^4pxe0nb z!Mhh|Q0JKdw8$+COi?Rx4)*1%(1V4W)*efe*;nSwA=;mNm- z$=tsMWD?(AxP84O28&|w7lEQUw8als%*be|*pRsI&g^@$4}vA%;iB*Gr|Htz#UlOr zE^b*btxZ?>27zNm-?5VKWYKr>A@lHHX==VWHD8#F6=G81l)U9z*s?A*Xgn4=O2S|< zCk(D1EFE~INPogBRb~_KnUNFuwdt>|$l<#C-I6O@bcNUZimu@;`^ehNW4m)F^9!p3 zyw+y(6$Rf1SBt`6^H+7&O0N}-O*5s@+2ZKz=l=8SV;_wDeEj}+!GFGB@KAhY@)NW{FnP20LC&Gb_(q(*14rI~H(@-r zh#gd>%;R(h9xm|g(;i=Cj-D3`4*65 w`f#>5adFMR#awzcKJ{?y_Y+UiB*S#Aa}}-s)bo|rKjRR4nR&{>nD&hPFEG%%#4*eP36Kz{9_$0uAXSL}0EuhGUV7o<+Fm#ygm`b(u{TL0VaY$g{k`{Q-uu0o zwYd80`sB}s5mw;y$LIIo{cd`DGDpsA&v&k>3OhtY*GT_RkDyyoE~iyxO`+l23XQNt zPaCos$wq-Sd~BAbuP=KMex#F(uvi|beoTC;=DTs0+*E1ecCa@LyPQ8D^YA#cr+UFq zW0WRos-ND&WcD%HeM}CSXLc==-xpNa#}xZ%C_LK@(J7{~No<8qgH@2$k&$JBRg~P! z$SRGzvm*-yh|{u9fT;rnsIXW+w$rG%rzTd8vI52Y0G;#YPec9-Lp6OrAU))x=qvLq zGt67?<(ZIo+LtgisoWRyj^lpwEGu($YWZ?Yko(Hxa(hJlt7ePiN{{)n${!}JMe&Pv zcko$YlcdwI01p7TTD|G;C_Wa2p-Na2vq=A zt2G+jKufQ++pI~gAYTw>q--uCY1EtVGuvtL4O(+*b`ZE3{P2pXMN+{N@O)H!6uC>v zVkvrpqz5#b3wDnIZKe2=@WXY4rx4-@d$!j%>kWr-UO-=Xs@Gife>BHeaY?mK-QZ@O+YZMUl`PuQ_!+cijaZb8 zuOleMtHR){Mn^U9*TDBN2HgR;DcsB*Z=j2VPqHmlN3r>=_$NRAoNV_Xtz|iU9iz`7 zpafRH(_}$hDXfr+*e;}lZvy|N=oRim!?%j2);){SYY49bT(pR5un*p8bST^CG;QW^ zIqrl{kyQ*H?GAN%@w= target_investor.check_size_lower, + InvestorTable.check_size_lower <= target_investor.check_size_upper + ) + + # Filter by similar AUM (within 50% range) + aum_lower = int(target_investor.aum * 0.5) + aum_upper = int(target_investor.aum * 1.5) + query = query.filter( + InvestorTable.aum >= aum_lower, + InvestorTable.aum <= aum_upper + ) + + # Filter by common sectors + target_sector_names = [sector.name for sector in target_investor.sectors] + if target_sector_names: + query = query.join(InvestorTable.sectors).filter( + SectorTable.name.in_(target_sector_names) + ) + + investors = query.all() + + # Transform to InvestorData format + investor_data_list = [] + for investor in investors: + investor_data = InvestorData( + investor=investor, + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_data_list.append(investor_data) + + return investor_data_list \ No newline at end of file diff --git a/app/schemas/__pycache__/py_schemas.cpython-312.pyc b/app/schemas/__pycache__/py_schemas.cpython-312.pyc index 9dd38935d4f4edc7ed7ba23d3f3779335720d3fb..d8c984413983de829414315beace66f4de641d6b 100644 GIT binary patch literal 14003 zcmeHOU2GdycAnwSkm8R>Nt9&yPoqe&O-G^Zjn}ag+lgW+wj5dRTCx+fP1q6VO5&R2 z(04|*$@K!tb{CNIQf3=8+C?AykOFpWWQ{%)i}oc8v`<9=dm&)C(=8TlABw&?6&ERt z0_{0>W~dozWJ%tn>jgTn5AWPLbLRe@@0@dw{vjF-ad7?5onOrTe30Y*4R5SVFbgaH z4TX<6nUncESK!k;k7ZBZQ}Cv}1z+07?!9?`K}ZXQKsrzerh`1^;ZAX~@B5tWCoP1( z>ocUKL#$qa`T!B`dd=D}s|`YJi>)@oYC}*Pw$(;iZ3Jqgw%S%!+X}TYTWyTh#-X;& zRvTxv?NHlct8HVo38?L~)wY+rlHLD-%}nyTZ?rgHNP2YNcuv*y;JK2PQ;JeP>D61_ zkW_M3kx5?nznLR>S&z-&Pv)wW&&iUe(5%r_gYxFh$^aBT<_MSOWiIWJ`LtK|qXRN`0&(dW^!uy)aXRAMGs~&MX5kC89kKA6cl+rkL5@vb7fx28&7B$nnoi? zqDWei#E`^++~HP$a*xjr%_#*kq~zzXNvc#V^!zn9eP`#mxs>2H8La>HKmxj zmZM~b!dHi+QfWxd&XIzo4wcFovzRKCDXvCF0#HyzAm8U|;kJd5TC8W`w2eeN7v8M3 z?Rx(V?X+HT46q++@ml#iAgcp>0TyxE>*0t`_Q+n@chBEgf_@^9Ko4+}pe!7NUz00v zFL>7jw{WFlagvb9(_-?32z#=Zjg8FXf7&G_A~QqBch?x!Js= zsvscCL>nR|C=ONSkkL|cY-CU@%&VF>O~eDDLd981B#qFNctOP)`v+2Yym~+;Gtzus z(><`f!!l8`G{^Rs?#aoOGp1B*VM;VwYcy2VXs$Sm;);sq+Cy=MDh1Yh*o;!@j)!iC zhSHrtbRP=Tg=qrgMDD;YhYdlp=A5&MMCFC5YULduA9FW&nZLXb7HGd_ZDgp^8gE$h zMiV#qZ(A#o2{MSaIt(e4FMDqAliVF|W!Fgs_BhO^y<4iNDdL1uBsw<%V-(7%xuU8` zFfqCpIBxQ^N%4kZa2!bko_LY=!V-exy73d^j7v--92D+QG_WF6;rLk5*3*eWs7QpCO{6|55&>KNQF?P(X$>$&+Lz|GbF!kbd8~_?brZC&O1)|LN z`>HS9d~t>IvWG%v_2R94%l;Q?{`eOjpTBLz%L$AOKJvls7ZDCvrfvn7{mC^f`x(oZ z5#HbXpBxZlq_CL|5NKF1M_@WvC-G|l0fdu~4Jg1~X#hpo1||_3PsGL(wefi6RvRpV zjnjaon90=&kQ0`pa{OMKWml1y4EBtrSghYPSURK?U?;|SaW}#Ryd5sQHv7j?VCi;P z>|J`ousjrcmK3JR<^f0}Qb8=RceaW`Y~QO-#vxO(OaPzqsvUqHA2M3HH2|WB>})Sp zhNo;ze;T|=(cLHvH(uqGT^LpJWJt?t0J>X*u$1n@mn-M(;=qQ=<+myckr;upM3t*x z2FNMh&*VHlY)$mm22Ky>m}#oeOirdEcDxUW0}cL9Ae0W^xA!1v#tEk9DoLw@_Lb}k zfZswPDI}<@9bMS~*Z{|0+z8l;8kr^3KsWVC@WwQArO&n5c67UhEj`$1E)*KqMo_lH zRk#Id^biPYMg@ixr{IBv4_`*YAdO*FM-x`jBlz5jQ}ig-y@F&Fn_k1)*O9z|1fhz) zisZ=^P|uN>Z_G*Bh0OH*~?W##_G=5Tqwy7;Kn!-0XtYn61pmp7XRnRt zikFEzRzv`q$>oW7;rzH0gG@;lz#!l>Jjs+E%rgPGV&!C0o7O|eC$LGK6o^{~bXLirN{#BYIKdb41` zjOk!qa(%OJkA*>R0S5I63umgqrH&6x@65!Y^zE+ubIbnM*1*f_yaR-e-3G3g*Ki$i z1tA;<2MFOP;)7rtOi-S+s#&|8Cu1)TC;q73&1}B=WaDmiqE}RJ#{qBM2 z_x6VO*9qR=4n1maO*8KJmzxH?Ee0MNVV`w~Ck2t?Aipu=tu zxEA|14k6kh@U%H5;MU3z-=yUsl?7X!D7XokdeD+k8M9>O6(giNB*`)*s`?Z{EfJMd zmQY(T)hVwDTBWPtP-0BjiP{!g2Mh!&#Azn9M1P^k(Fs~7$#o*-=gR@Ta zAYuy5l;LZY6IKtdk!dxjZQ0lenD0H)e(7^&CyhPk3haLiOIh6q1mc$w?rVz3LOYBA zv@;@WpOB5cRSV0_&e$!4zsLQ!Y9W{^2`V|R5g_;+ZZKT@ehlL59u{X0LCMQXekl2{ z1Yu-;gXnu_3#VaFlwCLYyQU-jUC#}V?7#d+AOlL1a3VCxIV53A*kvg~8i21_ZvZ#G z(a&jCc##|6sF&w1ab^A*cinr5yT&Jl$_cll87Z43rS(oIa4X?41}=Mbq7t2e5Y4c) z9qIGXBt3AUc)6%tE3!#Q`siM$Gy=QmQ0t+Ks24{mRcO*nU%-cOstYF^Oe8`D92pr0 z1D)o!I*k;fO^`K#{jO9%h#YSmZk?Z0*9$K|T_ApVWpGLX8!b@nb@`J{h2vFG-| z`~0Ve7q2`BzItB+@?Uj7m)H$a-ssL>cJFxPh2O_&7kvDaw&6DJuX!LpYik+a<^9=C z9;n0a*eLRmaCUZMb29_q{U`X}Y;JZvZOCI1g-s#ZiR1#3i%8rI&v)?w3%xKSokd)5 zBw+wpK$+XKqLDmp{K=_j&~3 zkpO20wo`-v)&*Bj5tdHehjWCZYwXuoY)0;TxA!gkUt%mF|FOnt#Gyw%xczdQ^QZww z+_BUeChosJD<-a2j@V37JZ!K|x3_lK@YR{QJ`(@-b1-mC7nrfDC+zJc4(IQkHsjeV zciH*@2^)=mbemiWW}yDT(FZv(SRHHU4CCQ^UWD8fLUUPhv&Qi-OoYui zK4*1^r2wSF5rrKRP@>IKIQ}n|MYTk-IVmq@=Oj!pAXFU`&*1SsW(A;0ErLZTiH6zh zFqa#mzY;T)467>9RPh3so>V+56(Ji2Q%+!D#S4=g=Nzn?B;d3|z0!Q4QW#gR!ABtZ zp(u;WjHs1Mknxa*Pg%)0FdQU%SyXcsFmivp2Bh>j6P7_?nLK{EQn*lp6p!CY1LpC| zl|Ma2lvyg3=3r^rdOZVqKH!>cVU<`;i3%Ackgx)YAhI}JMt1@dfu87;IC53=_-vy= zV7x#pDo2g=#nu3%M$8iMOIYhL%>=WgaABIcCC;Q1J|J5Htd)*8}Jfm9IFeG4vHD_;F2V4m>4A+`>+j}kZ?mHlPyRdL_ zaa*=hpP64kjS^+O148YbYTc~HoV2ioBfV%V7j$y&Vk z7x96I@qt?69PkX_NnEcLJ{>%0wX+0`7=%XjEFHOZ@<+!V$8_&L7(DU$JInrwHP+)f zTa2_8jMReBT5vo2*ISEr)}sM1LgP-uv<>Up!7y#kUBO9m7?hS1y7C!3anD`3=e1ng zG)rETeU{lY3t6}KgoeQqBrRslF(3zQJV6^z706_w>mD=b7IUyTQ6nI)q9y6;2 za(r1PZsS|Aa%(cCN30Ds0@-$zpSYPu=zCEzM~Z3=vJMTE2>6TXd7{cv99c5h`1n~o zX39bw418lF$%AD%mXy+b9x%i_L>I5+v^n?GKU*BqLYX||@497|81gF+-lMiQIRrRA zzW!H@JVV|{F@z`|nhOf8jAH6ALQ6@Z+KiIVDP}w^N49{0EW1SSXvoL_h{EwC$f%sK zK48nf*@9#}g70MtENe%tj9Ai)4j2f_LY-TY88D)ws$+$NHe%PhDR%ZXl-gJ&Yas1) z5Za`YC)Om-*%A^Ic!w!SBNw5Bx482Rm--5pSb71)66Tk=`rie7fMh>>Ui}j&tg2b@ z`5LH6uSdkwo8k)R0S{Q<4t%o~Z?7f(5OmT3&^z5riKW8BoTSc*oEWEPsiq2BtHZe!#dW zI|-&`EJ;Y*OtY)_0LcjIjPF5VRcAo_3nFJ;4-0-l;LFe%=B~$FrRH4^F}3|eOef&e z{Y)o3Xn+1;bdRM`(4GR7@hy0~S6CQZ)T$E?WBu%Dv&z6mImW!9GO!so1)`umdfVvTvXr>9#YLHrBJt(ww`fd0JCbwD{$7} zFJ#s?~}#1Me-ibZ66eV!2Qy9@n6s$HTCxM#w3hCjP=zyuvU%y zIecd#*`fOgIE&N^ce;Q9hvKY(N_d)~2TNrc{-P$Ar5L(066=^lun%uTNZ3kbz{L_1 zFj{G>MAl@s^jI66?S33QX8bA83Hm;~0d42hMIaD#=lOr;I)Bae{X2K~A$Pbg2>kA9 zcIi-^gF?N%kMFG>t#j~GKh3B3-X*w!pZYYrNm@5=^1wG%=kT|F-g6pnFV#8xtzYzX Z8$#fxehAC(8Y>IDP(At^_>eJ1{|o4Uqs#yR delta 3605 zcmcguU2GIp6rP#go!y=No!!}OciZi@bfFBTEzlMODi-+@&{Ck~rwFC(Or=P7akikv zhb@Va0wxr%#`^Q%gD=Qxf=!5tKKSN?gpio%W)p+FX#9y0x(PAy!E^5HPNywRfEYKK zZ|>f6=G=47{mwaeYWT-4@3$V0z`*a^l^>23;@7=5Ir2fhlRZjSw3NN&wC>1SuW=}6 zm>!$Tt9HfK24jv2at|kU4bOHb!y2!6<_9r7-G}-Q-it$8HUcooU&t z4veJ89ifT4%8^NHznj%6K#;d4`-W9=vayk@x9_r5!&Ho1wukJsFzS$9ijt(N#4HEr$i{~U9Vv)CN>1jZ-jT<+V^&Vq&W@ng@9Tc#P##^2)^F`Sc* z!mP-TZ62?svkx5PYe$^!`bKfrnJ3vbM8s)|h(IGE7}0W>WG*Um8!}Gum{xSFH5h2G z5lkNh({BXRGb+jfvP8&|?}UvIjgV^_AA{X%Rixi_AA;9FY~FaL2B##15JDIM!>FwU zSc8KE0u}1v^=cx{1Q3DJTva+ERq&2n`5L2y_9uXl7>BWhg;Fm(u3| z9`T^|&pxQ)@g1Hw8z{?3$6z+Vk8K%m){k1|nerGV!5)y5^J6-Qymw3iK*!Nrsk337Jg!K&NCWT-5O^l2Vo}FD<8vN`#!X1S0{C zfesJ_idK|lJwYg1l_XJqL)0ny6s%pODEXs&Q>lQpMzS`o1!UxIWQWj-O4t`VNK9SC*^;*moJi-{)8v=fF7`FDrs}7Kli~&V`aJqFJ1J!8 z@p6Ku!v?hT3rnWwf0)zwbq<;mNmm_%&MD>Dh>?B)(;`vb1f)*|q)(->>)@zYj*<1XUlr4*5kg%` zA7`T5rT2fFY#~abgV!^MOeK!99aFKz9b6qG56v5zLCCRuD0ynQU)6e3gS|t^(`tHT zSWS+Q6x_WWa%5rFBhfy`mHq?P&v1P=THJ%M7ojVI*o}aMP+bziI?(GZiHoXelaJVEZD6wEE}0@ z{z!Un-85sh@ekay+f?V&rTXt#PKyz>p_^?bUpD;Ru;9h;D1`7PJm!-ncN@cp7ULwn z2Oh!^jXAvszKDOV2(%6z#u2^k9l=pK!ov=&2L%W>7_z5ng6)AT*sZDSD|nWTkuO$u z>hwKAysPpHd;1c6|1%mg@oMiw5K`Fgrs5qj#dYHHUC`Gu9Ta+)Rj3GogrKT;Wvi#w>Gjvu#>OXM46i~<6vlFafmdN&6X@El0FF diff --git a/app/schemas/py_schemas.py b/app/schemas/py_schemas.py index 4fa88ec..5b982fa 100644 --- a/app/schemas/py_schemas.py +++ b/app/schemas/py_schemas.py @@ -19,13 +19,32 @@ class SectorSchema(BaseModel): Leave name empty if uncertain about the sector classification. """ - id: int = Field( - ge=0, description="Sector ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Sector ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Sector name. Leave empty string if not clearly identifiable from the data." + name: Optional[str] = Field( + default=None, + description="Sector name. Leave empty string if not clearly identifiable from the data.", ) + @field_validator("name", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional id field""" + if v == 0: + return None + return v + class Config: from_attributes = True @@ -36,22 +55,45 @@ class InvestorMemberSchema(BaseModel): Leave fields empty if uncertain about the member details. """ - id: int = Field( - ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Member ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Team member name. Leave empty string if not clearly identifiable." + name: Optional[str] = Field( + default=None, + description="Team member name. Leave empty string if not clearly identifiable.", ) - role: str = Field( - description="Team member role/title. Leave empty string if not clearly identifiable." + role: Optional[str] = Field( + default=None, + description="Team member role/title. Leave empty string if not clearly identifiable.", ) - email: str = Field( - description="Team member email. Leave empty string if not clearly identifiable or not provided." + email: Optional[str] = Field( + default=None, + description="Team member email. Leave empty string if not clearly identifiable or not provided.", ) - investor_id: int = Field( - ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain." + investor_id: Optional[int] = Field( + default=None, + ge=0, + description="Investor ID, must be 0 or greater. Use 0 if uncertain.", ) + @field_validator("name", "role", "email", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", "investor_id", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional integer fields""" + if v == 0: + return None + return v + class Config: from_attributes = True @@ -62,25 +104,45 @@ class CompanyMemberSchema(BaseModel): Leave fields empty if uncertain about the member details. """ - id: int = Field( - ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Member ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( - default="", + default=None, description="Company member name. Leave empty if not clearly identifiable.", ) linkedin: Optional[str] = Field( - default="", + default=None, description="LinkedIn profile URL. Leave empty if not provided or uncertain.", ) role: Optional[str] = Field( - default="", + default=None, description="Company member role/title. Leave empty if not clearly identifiable.", ) - company_id: int = Field( - ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain." + company_id: Optional[int] = Field( + default=None, + ge=0, + description="Company ID, must be 0 or greater. Use 0 if uncertain.", ) + @field_validator("name", "linkedin", "role", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", "company_id", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional integer fields""" + if v == 0: + return None + return v + class Config: from_attributes = True @@ -91,20 +153,25 @@ class CompanySchema(BaseModel): Leave optional fields empty if uncertain. Integer values must be 0 or greater. """ - id: int = Field( - ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Company ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Company name. Leave empty string if not clearly identifiable." + name: Optional[str] = Field( + default=None, + description="Company name. Leave empty string if not clearly identifiable.", ) - industry: str = Field( - description="Company industry/sector. Leave empty string if not clearly identifiable." + industry: Optional[str] = Field( + default=None, + description="Company industry/sector. Leave empty string if not clearly identifiable.", ) - location: str = Field( - description="Company location/address. Leave empty string if not clearly identifiable." + location: Optional[str] = Field( + default=None, + description="Company location/address. Leave empty string if not clearly identifiable.", ) description: Optional[str] = Field( - default="", + default=None, description="Company description. Leave empty if not clearly available or uncertain.", ) founded_year: Optional[int] = Field( @@ -113,10 +180,28 @@ class CompanySchema(BaseModel): description="Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain.", ) website: Optional[str] = Field( - default="", + default=None, description="Company website URL. Leave empty if not provided or uncertain.", ) + @field_validator( + "name", "industry", "location", "description", "website", mode="before" + ) + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", "founded_year", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for founded_year""" + if v == 0: + return None + return v + @field_validator("founded_year", mode="before") @classmethod def validate_founded_year(cls, v): @@ -141,40 +226,71 @@ class InvestorSchema(BaseModel): Leave optional fields empty if uncertain. All numeric values must be 0 or greater. """ - id: int = Field( - ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Investor ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Investor name. Do not return any special characters, Just the name as a string." + name: Optional[str] = Field( + default=None, + description="Investor name. Do not return any special characters, Just the name as a string.", ) description: Optional[str] = Field( - default="", + default=None, description="Investor description. Leave empty if not clearly available or uncertain.", ) - aum: int | None = Field( + aum: Optional[int] = Field( + default=None, ge=0, description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.", ) - check_size_lower: int | None = Field( + check_size_lower: Optional[int] = Field( + default=None, ge=0, description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - check_size_upper: int | None = Field( + check_size_upper: Optional[int] = Field( + default=None, ge=0, description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - geographic_focus: str | None = Field( + geographic_focus: Optional[str] = Field( + default=None, description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.", ) stage_focus: InvestmentStage = Field( - description="Investment stage focus. Use SEED as default if uncertain." + default=InvestmentStage.SEED, + description="Investment stage focus. Use SEED as default if uncertain.", ) - number_of_investments: int | None = Field( + number_of_investments: Optional[int] = Field( + default=None, ge=0, - default=0, description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.", ) + @field_validator("name", "description", "geographic_focus", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator( + "id", + "aum", + "check_size_lower", + "check_size_upper", + "number_of_investments", + mode="before", + ) + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional integer fields""" + if v == 0: + return None + return v + class Config: from_attributes = True diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index e64b11789a573cbdc1281bde11e04fe26dbdf560..7e4223f87d770f71be87f76752cce071e9fda6b5 100644 GIT binary patch delta 54 zcmcbaem+|0bp&7I7)jEriV JkFyBr0{{SZ5!wI% delta 64 zcmX?~elwl-G%qg~0}vdXa6M!3M&5X4S()_w{Pdhu{q)q_%)CrpBRx~yw4B7^4Bec} TlGM%h%(je-2AlV?2