Update .gitignore to exclude __pycache__ directories and modify schemas to allow optional fields for better flexibility; adjust batch size in InvestorProcessor for improved processing efficiency.
This commit is contained in:
+3
-2
@@ -8,8 +8,9 @@
|
||||
|
||||
/chroma_db
|
||||
|
||||
/*__pycache__*/
|
||||
*__pycache__
|
||||
|
||||
/*.db
|
||||
|
||||
/*.cypython-*
|
||||
*.cypython
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -145,31 +145,31 @@ class InvestorSchema(BaseModel):
|
||||
ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain."
|
||||
)
|
||||
name: str = Field(
|
||||
description="Investor name. Leave empty string if not clearly identifiable."
|
||||
description="Investor name. Do not return any special characters, Just the name as a string."
|
||||
)
|
||||
description: Optional[str] = Field(
|
||||
default="",
|
||||
description="Investor description. Leave empty if not clearly available or uncertain.",
|
||||
)
|
||||
aum: int = Field(
|
||||
aum: int | None = Field(
|
||||
ge=0,
|
||||
description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.",
|
||||
)
|
||||
check_size_lower: int = Field(
|
||||
check_size_lower: int | None = Field(
|
||||
ge=0,
|
||||
description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||
)
|
||||
check_size_upper: int = Field(
|
||||
check_size_upper: int | None = Field(
|
||||
ge=0,
|
||||
description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||
)
|
||||
geographic_focus: str = Field(
|
||||
description="Geographic investment focus. Leave empty string if not clearly identifiable."
|
||||
geographic_focus: str | None = Field(
|
||||
description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.",
|
||||
)
|
||||
stage_focus: InvestmentStage = Field(
|
||||
description="Investment stage focus. Use SEED as default if uncertain."
|
||||
)
|
||||
number_of_investments: int = Field(
|
||||
number_of_investments: int | None = Field(
|
||||
ge=0,
|
||||
default=0,
|
||||
description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||
|
||||
@@ -25,50 +25,51 @@ class SectorSchema(BaseModel):
|
||||
class InvestorMemberSchema(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
role: str
|
||||
email: str
|
||||
role: str | None
|
||||
email: str | None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class CompanyMemberSchema(BaseModel):
|
||||
id: int
|
||||
name: Optional[str] = None
|
||||
linkedin: Optional[str] = None
|
||||
role: Optional[str] = None
|
||||
name: Optional[str]
|
||||
linkedin: Optional[str]
|
||||
role: Optional[str]
|
||||
company_id: int
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class CompanySchema(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
industry: str
|
||||
location: str
|
||||
industry: str | None
|
||||
location: str | None
|
||||
description: Optional[str]
|
||||
founded_year: Optional[int]
|
||||
website: Optional[str]
|
||||
created_at: Optional[datetime]
|
||||
updated_at: Optional[datetime]
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
|
||||
class InvestorSchema(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
description: Optional[str]
|
||||
aum: int
|
||||
check_size_lower: int
|
||||
check_size_upper: int
|
||||
geographic_focus: str
|
||||
aum: int | None
|
||||
check_size_lower: int | None
|
||||
check_size_upper: int | None
|
||||
geographic_focus: str | None
|
||||
stage_focus: InvestmentStage
|
||||
number_of_investments: int
|
||||
created_at: Optional[datetime]
|
||||
updated_at: Optional[datetime]
|
||||
number_of_investments: int | None
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -95,5 +96,6 @@ class CompanyData(BaseModel): # Renamed from CompaniesData for consistency
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class InvestorList(BaseModel):
|
||||
investors: List[InvestorData]
|
||||
Binary file not shown.
@@ -183,7 +183,7 @@ class InvestorProcessor:
|
||||
|
||||
try:
|
||||
# Process rows in batches asynchronously
|
||||
batch_size = 15 # Adjust batch size as needed
|
||||
batch_size = 20 # Adjust batch size as needed
|
||||
rows = [(idx, row) for idx, row in df.iterrows()]
|
||||
|
||||
for i in range(0, len(rows), batch_size):
|
||||
@@ -251,7 +251,7 @@ class InvestorProcessor:
|
||||
|
||||
try:
|
||||
# Process rows in batches asynchronously
|
||||
batch_size = 15 # Adjust batch size as needed
|
||||
batch_size = 20 # Adjust batch size as needed
|
||||
rows = [(idx, row) for idx, row in df.iterrows()]
|
||||
|
||||
for i in range(0, len(rows), batch_size):
|
||||
|
||||
Reference in New Issue
Block a user