Update .gitignore to exclude __pycache__ directories and modify schemas to allow optional fields for better flexibility; adjust batch size in InvestorProcessor for improved processing efficiency.
This commit is contained in:
+3
-2
@@ -8,8 +8,9 @@
|
|||||||
|
|
||||||
/chroma_db
|
/chroma_db
|
||||||
|
|
||||||
/*__pycache__*/
|
*__pycache__
|
||||||
|
|
||||||
/*.db
|
/*.db
|
||||||
|
|
||||||
/*.cypython-*
|
*.cypython
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -145,31 +145,31 @@ class InvestorSchema(BaseModel):
|
|||||||
ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain."
|
ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
)
|
)
|
||||||
name: str = Field(
|
name: str = Field(
|
||||||
description="Investor name. Leave empty string if not clearly identifiable."
|
description="Investor name. Do not return any special characters, Just the name as a string."
|
||||||
)
|
)
|
||||||
description: Optional[str] = Field(
|
description: Optional[str] = Field(
|
||||||
default="",
|
default="",
|
||||||
description="Investor description. Leave empty if not clearly available or uncertain.",
|
description="Investor description. Leave empty if not clearly available or uncertain.",
|
||||||
)
|
)
|
||||||
aum: int = Field(
|
aum: int | None = Field(
|
||||||
ge=0,
|
ge=0,
|
||||||
description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.",
|
description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.",
|
||||||
)
|
)
|
||||||
check_size_lower: int = Field(
|
check_size_lower: int | None = Field(
|
||||||
ge=0,
|
ge=0,
|
||||||
description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||||
)
|
)
|
||||||
check_size_upper: int = Field(
|
check_size_upper: int | None = Field(
|
||||||
ge=0,
|
ge=0,
|
||||||
description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||||
)
|
)
|
||||||
geographic_focus: str = Field(
|
geographic_focus: str | None = Field(
|
||||||
description="Geographic investment focus. Leave empty string if not clearly identifiable."
|
description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.",
|
||||||
)
|
)
|
||||||
stage_focus: InvestmentStage = Field(
|
stage_focus: InvestmentStage = Field(
|
||||||
description="Investment stage focus. Use SEED as default if uncertain."
|
description="Investment stage focus. Use SEED as default if uncertain."
|
||||||
)
|
)
|
||||||
number_of_investments: int = Field(
|
number_of_investments: int | None = Field(
|
||||||
ge=0,
|
ge=0,
|
||||||
default=0,
|
default=0,
|
||||||
description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.",
|
description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||||
|
|||||||
@@ -25,50 +25,51 @@ class SectorSchema(BaseModel):
|
|||||||
class InvestorMemberSchema(BaseModel):
|
class InvestorMemberSchema(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
name: str
|
name: str
|
||||||
role: str
|
role: str | None
|
||||||
email: str
|
email: str | None
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class CompanyMemberSchema(BaseModel):
|
class CompanyMemberSchema(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
name: Optional[str] = None
|
name: Optional[str]
|
||||||
linkedin: Optional[str] = None
|
linkedin: Optional[str]
|
||||||
role: Optional[str] = None
|
role: Optional[str]
|
||||||
company_id: int
|
company_id: int
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class CompanySchema(BaseModel):
|
class CompanySchema(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
name: str
|
name: str
|
||||||
industry: str
|
industry: str | None
|
||||||
location: str
|
location: str | None
|
||||||
description: Optional[str]
|
description: Optional[str]
|
||||||
founded_year: Optional[int]
|
founded_year: Optional[int]
|
||||||
website: Optional[str]
|
website: Optional[str]
|
||||||
created_at: Optional[datetime]
|
created_at: Optional[datetime] = None
|
||||||
updated_at: Optional[datetime]
|
updated_at: Optional[datetime] = None
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class InvestorSchema(BaseModel):
|
class InvestorSchema(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
name: str
|
name: str
|
||||||
description: Optional[str]
|
description: Optional[str]
|
||||||
aum: int
|
aum: int | None
|
||||||
check_size_lower: int
|
check_size_lower: int | None
|
||||||
check_size_upper: int
|
check_size_upper: int | None
|
||||||
geographic_focus: str
|
geographic_focus: str | None
|
||||||
stage_focus: InvestmentStage
|
stage_focus: InvestmentStage
|
||||||
number_of_investments: int
|
number_of_investments: int | None
|
||||||
created_at: Optional[datetime]
|
created_at: Optional[datetime] = None
|
||||||
updated_at: Optional[datetime]
|
updated_at: Optional[datetime] = None
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
@@ -95,5 +96,6 @@ class CompanyData(BaseModel): # Renamed from CompaniesData for consistency
|
|||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class InvestorList(BaseModel):
|
class InvestorList(BaseModel):
|
||||||
investors: List[InvestorData]
|
investors: List[InvestorData]
|
||||||
|
|||||||
Binary file not shown.
@@ -183,7 +183,7 @@ class InvestorProcessor:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Process rows in batches asynchronously
|
# Process rows in batches asynchronously
|
||||||
batch_size = 15 # Adjust batch size as needed
|
batch_size = 20 # Adjust batch size as needed
|
||||||
rows = [(idx, row) for idx, row in df.iterrows()]
|
rows = [(idx, row) for idx, row in df.iterrows()]
|
||||||
|
|
||||||
for i in range(0, len(rows), batch_size):
|
for i in range(0, len(rows), batch_size):
|
||||||
@@ -251,7 +251,7 @@ class InvestorProcessor:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Process rows in batches asynchronously
|
# Process rows in batches asynchronously
|
||||||
batch_size = 15 # Adjust batch size as needed
|
batch_size = 20 # Adjust batch size as needed
|
||||||
rows = [(idx, row) for idx, row in df.iterrows()]
|
rows = [(idx, row) for idx, row in df.iterrows()]
|
||||||
|
|
||||||
for i in range(0, len(rows), batch_size):
|
for i in range(0, len(rows), batch_size):
|
||||||
|
|||||||
Reference in New Issue
Block a user