From abac19c6aea3f383f91e5051f682e8f5b163d23b Mon Sep 17 00:00:00 2001 From: bolade Date: Fri, 26 Sep 2025 15:56:29 +0100 Subject: [PATCH] Update .gitignore to exclude __pycache__ directories and modify schemas to allow optional fields for better flexibility; adjust batch size in InvestorProcessor for improved processing efficiency. --- .gitignore | 5 ++- .../__pycache__/py_schemas.cpython-312.pyc | Bin 10708 -> 10845 bytes .../router_schemas.cpython-312.pyc | Bin 4537 -> 4644 bytes app/schemas/py_schemas.py | 14 +++---- app/schemas/router_schemas.py | 38 +++++++++--------- .../__pycache__/llm_parser.cpython-312.pyc | Bin 13262 -> 13262 bytes app/services/llm_parser.py | 4 +- 7 files changed, 32 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index 2ae4ed8..f6d2d64 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,9 @@ /chroma_db -/*__pycache__*/ +*__pycache__ /*.db -/*.cypython-* \ No newline at end of file +*.cypython + diff --git a/app/schemas/__pycache__/py_schemas.cpython-312.pyc b/app/schemas/__pycache__/py_schemas.cpython-312.pyc index e923ca2d2ee0719a3c96b660f8a90ee6d3a82734..e89ff8e2ffea520a8a35f35fe0796912c1ee75c7 100644 GIT binary patch delta 626 zcmb7;zb^w}9LC>!udb-;54}2dDzAj0iFCmtHcfc;j*s;YZUgE=V}f-|)MJ#gza;!vLZr#F z#5|&{JoSr&NrcZ(y%6#t?d5n${86;lJ?Z$SMuDMi1EbR$8!ZOedHX{qGiI$4m)Z`p zut`!VZ6nL3$RV`gSO{B1>`X@>g22?b{)S9 zOg@XaV6K}J_8OD5H7061!U5aY5~KgHuh=@KIoooPOY;0PB?ek3BHcDk%%*z#t}5D$ zz#d|Q3@0)$BCzgYl-(xU1j1etsZF)B5@@L?8`TC;m9|E9qq}sk;{%8d(t9K5N%HUw zf>;{zyhpnCDTn%T`e^wdK&`$At0MeUhukl5KU?im;>lEPLpV}V@>Rbt$=}3}`_Gh< Kp{vub!N4a}>#D{8 delta 423 zcmcZ`awVAeG%qg~0}v!Eyp~b3kvCqP$&7I_BU{kq1L7u=XR~qDr?R9lrSPv|SlSljX>L`nPk#AiTj}!2#?tMKK9hG#%T5;NFxc!Z6U@lyH+i0H z3*)cJ3Vb4y&E>=xLniyl=_Lt>-r(lBAyWN?m05tT!MQ{DGXpa#+eZ!tStX#v2POtl z7bc*bEZYSZkqODfN{S?XVP=Brnfy^sPE<)9WQ~B}4UvK`>?}$^(b>Q_~j z5j=~#zwX}tpk5{TCOwE%F4%F4dm6#9rL*kIYG(z&B~H~Ny02)i?p*fBB;AQu@dl*M(`_wP z=d%z4xw%1Zd&md1aNzl06+&C2^L(COD$X&pD?(GUaO-x8*$aNY0msHrf*A@p~rL=7>0B#j<%j9yY`W4e>S?av|hH zzsR`7nF7R!;TkJV>8dOCrhlZ$9x?o*lzx$_c%Ml^ zWqD%LBH;!Fv$DB}4@5ZR0}LM=oj6Q;+pGn}NyV6H+^b7P^x{}f{F1rhV5gftl{{sxM-0T1V delta 943 zcmZuvO=}ZT6rGpM*PF~tlNdu1OEM&F=u~YNLZ#wDP-s(W5VVVyLT$}R1k=h)5wdC_ z3dt^TH$@OZbmdCBa_Pd29}5YEp>DeH4~TK^eJ{x*Ee_0i_hTOSoO9>R#K(g1)i5*x zAM@c+HS^xMEs`ZT2^D4>$q_5z>)7Fu$fA(2AXKDhLPdTTVs`{#O|C1pdYpi$lz=ay z21jBmTy`z^B%hm_#1C8sX?#5k;kn?ek;a0r8I4j8B4%R=-b&NN#i{A_N@z`TWJh5) z2J^9|j?_34tkIF0#CeACh8*xzkf$2()aoIam&ZvNR^%qJAx>v9Y=|o}C@{FhpvWN0 z=)*;YUdvWl1=mz{7w=MqNx3KustMLm<#K(dCCBY|YKuG_}3aWlD7d$PG*zx$x-)HdA|6UZjP zhWa6Sm1Q{w2?kk&vQ42#C*&|Ma3^Y%=9oL)J%oW1e1~SNvFtN|S2TA%nmS1Rh~}9W z!w`PbY4VyG0k#(3!yWMrI&52K_Qs9s$yKo16Ux5h644U1Z;{rwV30=w3v{%=2rQs= z(;hoEOWRK!Z+FK<3*bbH!yoM%DZ%HlNtmOF2xUS?)LJ^_EAUofRv({hEc(fGM@*l> znAH(2|3a4i1VgDK;o(dDF(E6UnUBcw;jVdykPV!8pR7VL`kib+H~K3bS4r-`?h1Hx c6FSMYOI-nvt_8nig=n0Re0%<{fCt~sKOZ>GXaE2J diff --git a/app/schemas/py_schemas.py b/app/schemas/py_schemas.py index ad902d0..4fa88ec 100644 --- a/app/schemas/py_schemas.py +++ b/app/schemas/py_schemas.py @@ -145,31 +145,31 @@ class InvestorSchema(BaseModel): ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain." ) name: str = Field( - description="Investor name. Leave empty string if not clearly identifiable." + description="Investor name. Do not return any special characters, Just the name as a string." ) description: Optional[str] = Field( default="", description="Investor description. Leave empty if not clearly available or uncertain.", ) - aum: int = Field( + aum: int | None = Field( ge=0, description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.", ) - check_size_lower: int = Field( + check_size_lower: int | None = Field( ge=0, description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - check_size_upper: int = Field( + check_size_upper: int | None = Field( ge=0, description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - geographic_focus: str = Field( - description="Geographic investment focus. Leave empty string if not clearly identifiable." + geographic_focus: str | None = Field( + description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.", ) stage_focus: InvestmentStage = Field( description="Investment stage focus. Use SEED as default if uncertain." ) - number_of_investments: int = Field( + number_of_investments: int | None = Field( ge=0, default=0, description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.", diff --git a/app/schemas/router_schemas.py b/app/schemas/router_schemas.py index 5e34ee2..1d1a685 100644 --- a/app/schemas/router_schemas.py +++ b/app/schemas/router_schemas.py @@ -25,50 +25,51 @@ class SectorSchema(BaseModel): class InvestorMemberSchema(BaseModel): id: int name: str - role: str - email: str + role: str | None + email: str | None class Config: from_attributes = True + class CompanyMemberSchema(BaseModel): id: int - name: Optional[str] = None - linkedin: Optional[str] = None - role: Optional[str] = None + name: Optional[str] + linkedin: Optional[str] + role: Optional[str] company_id: int class Config: from_attributes = True + class CompanySchema(BaseModel): id: int name: str - industry: str - location: str + industry: str | None + location: str | None description: Optional[str] founded_year: Optional[int] website: Optional[str] - created_at: Optional[datetime] - updated_at: Optional[datetime] + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None class Config: from_attributes = True - class InvestorSchema(BaseModel): id: int name: str description: Optional[str] - aum: int - check_size_lower: int - check_size_upper: int - geographic_focus: str + aum: int | None + check_size_lower: int | None + check_size_upper: int | None + geographic_focus: str | None stage_focus: InvestmentStage - number_of_investments: int - created_at: Optional[datetime] - updated_at: Optional[datetime] + number_of_investments: int | None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None class Config: from_attributes = True @@ -95,5 +96,6 @@ class CompanyData(BaseModel): # Renamed from CompaniesData for consistency class Config: from_attributes = True + class InvestorList(BaseModel): - investors: List[InvestorData] \ No newline at end of file + investors: List[InvestorData] diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index 0837f36ce41c6407c744f84ee5dfeac14a422488..17400ff0cf0d4ce2c4bddfea26303c82d06703d7 100644 GIT binary patch delta 26 gcmX??elDHsG%qg~0}v=I-^g`OiBV+pOQoj*0CWoot^fc4 delta 26 gcmX??elDHsG%qg~0}!x8ZREPA#K^z-rP5OY0B+?7KmY&$ diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py index b2966ef..4e5dc19 100644 --- a/app/services/llm_parser.py +++ b/app/services/llm_parser.py @@ -183,7 +183,7 @@ class InvestorProcessor: try: # Process rows in batches asynchronously - batch_size = 15 # Adjust batch size as needed + batch_size = 20 # Adjust batch size as needed rows = [(idx, row) for idx, row in df.iterrows()] for i in range(0, len(rows), batch_size): @@ -251,7 +251,7 @@ class InvestorProcessor: try: # Process rows in batches asynchronously - batch_size = 15 # Adjust batch size as needed + batch_size = 20 # Adjust batch size as needed rows = [(idx, row) for idx, row in df.iterrows()] for i in range(0, len(rows), batch_size):