diff --git a/.gitignore b/.gitignore index 2ae4ed8..f6d2d64 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,9 @@ /chroma_db -/*__pycache__*/ +*__pycache__ /*.db -/*.cypython-* \ No newline at end of file +*.cypython + diff --git a/app/schemas/__pycache__/py_schemas.cpython-312.pyc b/app/schemas/__pycache__/py_schemas.cpython-312.pyc index e923ca2..e89ff8e 100644 Binary files a/app/schemas/__pycache__/py_schemas.cpython-312.pyc and b/app/schemas/__pycache__/py_schemas.cpython-312.pyc differ diff --git a/app/schemas/__pycache__/router_schemas.cpython-312.pyc b/app/schemas/__pycache__/router_schemas.cpython-312.pyc index e90b44a..b8cad73 100644 Binary files a/app/schemas/__pycache__/router_schemas.cpython-312.pyc and b/app/schemas/__pycache__/router_schemas.cpython-312.pyc differ diff --git a/app/schemas/py_schemas.py b/app/schemas/py_schemas.py index ad902d0..4fa88ec 100644 --- a/app/schemas/py_schemas.py +++ b/app/schemas/py_schemas.py @@ -145,31 +145,31 @@ class InvestorSchema(BaseModel): ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain." ) name: str = Field( - description="Investor name. Leave empty string if not clearly identifiable." + description="Investor name. Do not return any special characters, Just the name as a string." ) description: Optional[str] = Field( default="", description="Investor description. Leave empty if not clearly available or uncertain.", ) - aum: int = Field( + aum: int | None = Field( ge=0, description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.", ) - check_size_lower: int = Field( + check_size_lower: int | None = Field( ge=0, description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - check_size_upper: int = Field( + check_size_upper: int | None = Field( ge=0, description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - geographic_focus: str = Field( - description="Geographic investment focus. Leave empty string if not clearly identifiable." + geographic_focus: str | None = Field( + description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.", ) stage_focus: InvestmentStage = Field( description="Investment stage focus. Use SEED as default if uncertain." ) - number_of_investments: int = Field( + number_of_investments: int | None = Field( ge=0, default=0, description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.", diff --git a/app/schemas/router_schemas.py b/app/schemas/router_schemas.py index 5e34ee2..1d1a685 100644 --- a/app/schemas/router_schemas.py +++ b/app/schemas/router_schemas.py @@ -25,50 +25,51 @@ class SectorSchema(BaseModel): class InvestorMemberSchema(BaseModel): id: int name: str - role: str - email: str + role: str | None + email: str | None class Config: from_attributes = True + class CompanyMemberSchema(BaseModel): id: int - name: Optional[str] = None - linkedin: Optional[str] = None - role: Optional[str] = None + name: Optional[str] + linkedin: Optional[str] + role: Optional[str] company_id: int class Config: from_attributes = True + class CompanySchema(BaseModel): id: int name: str - industry: str - location: str + industry: str | None + location: str | None description: Optional[str] founded_year: Optional[int] website: Optional[str] - created_at: Optional[datetime] - updated_at: Optional[datetime] + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None class Config: from_attributes = True - class InvestorSchema(BaseModel): id: int name: str description: Optional[str] - aum: int - check_size_lower: int - check_size_upper: int - geographic_focus: str + aum: int | None + check_size_lower: int | None + check_size_upper: int | None + geographic_focus: str | None stage_focus: InvestmentStage - number_of_investments: int - created_at: Optional[datetime] - updated_at: Optional[datetime] + number_of_investments: int | None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None class Config: from_attributes = True @@ -95,5 +96,6 @@ class CompanyData(BaseModel): # Renamed from CompaniesData for consistency class Config: from_attributes = True + class InvestorList(BaseModel): - investors: List[InvestorData] \ No newline at end of file + investors: List[InvestorData] diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index 0837f36..17400ff 100644 Binary files a/app/services/__pycache__/llm_parser.cpython-312.pyc and b/app/services/__pycache__/llm_parser.cpython-312.pyc differ diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py index b2966ef..4e5dc19 100644 --- a/app/services/llm_parser.py +++ b/app/services/llm_parser.py @@ -183,7 +183,7 @@ class InvestorProcessor: try: # Process rows in batches asynchronously - batch_size = 15 # Adjust batch size as needed + batch_size = 20 # Adjust batch size as needed rows = [(idx, row) for idx, row in df.iterrows()] for i in range(0, len(rows), batch_size): @@ -251,7 +251,7 @@ class InvestorProcessor: try: # Process rows in batches asynchronously - batch_size = 15 # Adjust batch size as needed + batch_size = 20 # Adjust batch size as needed rows = [(idx, row) for idx, row in df.iterrows()] for i in range(0, len(rows), batch_size):