Update .gitignore to exclude __pycache__ directories and modify schemas to allow optional fields for better flexibility; adjust batch size in InvestorProcessor for improved processing efficiency.

2025-09-26 15:56:29 +01:00
parent f2bbcb96f3
commit abac19c6ae
7 changed files with 32 additions and 29 deletions
@@ -8,8 +8,9 @@

 /chroma_db

-/*__pycache__*/
+*__pycache__

 /*.db

-/*.cypython-*
+*.cypython
+
@@ -145,31 +145,31 @@ class InvestorSchema(BaseModel):
        ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain."
    )
    name: str = Field(
-        description="Investor name. Leave empty string if not clearly identifiable."
+        description="Investor name. Do not return any special characters, Just the name as a string."
    )
    description: Optional[str] = Field(
        default="",
        description="Investor description. Leave empty if not clearly available or uncertain.",
    )
-    aum: int = Field(
+    aum: int | None = Field(
        ge=0,
        description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.",
    )
-    check_size_lower: int = Field(
+    check_size_lower: int | None = Field(
        ge=0,
        description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
    )
-    check_size_upper: int = Field(
+    check_size_upper: int | None = Field(
        ge=0,
        description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
    )
-    geographic_focus: str = Field(
-        description="Geographic investment focus. Leave empty string if not clearly identifiable."
+    geographic_focus: str | None = Field(
+        description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.",
    )
    stage_focus: InvestmentStage = Field(
        description="Investment stage focus. Use SEED as default if uncertain."
    )
-    number_of_investments: int = Field(
+    number_of_investments: int | None = Field(
        ge=0,
        default=0,
        description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.",
@@ -25,50 +25,51 @@ class SectorSchema(BaseModel):
 class InvestorMemberSchema(BaseModel):
    id: int
    name: str
-    role: str
-    email: str
+    role: str | None
+    email: str | None

    class Config:
        from_attributes = True

+
 class CompanyMemberSchema(BaseModel):
    id: int
-    name: Optional[str] = None
-    linkedin: Optional[str] = None
-    role: Optional[str] = None
+    name: Optional[str]
+    linkedin: Optional[str]
+    role: Optional[str]
    company_id: int

    class Config:
        from_attributes = True

+
 class CompanySchema(BaseModel):
    id: int
    name: str
-    industry: str
-    location: str
+    industry: str | None
+    location: str | None
    description: Optional[str]
    founded_year: Optional[int]
    website: Optional[str]
-    created_at: Optional[datetime]
-    updated_at: Optional[datetime]
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None

    class Config:
        from_attributes = True


-
 class InvestorSchema(BaseModel):
    id: int
    name: str
    description: Optional[str]
-    aum: int
-    check_size_lower: int
-    check_size_upper: int
-    geographic_focus: str
+    aum: int | None
+    check_size_lower: int | None
+    check_size_upper: int | None
+    geographic_focus: str | None
    stage_focus: InvestmentStage
-    number_of_investments: int
-    created_at: Optional[datetime]
-    updated_at: Optional[datetime]
+    number_of_investments: int | None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None

    class Config:
        from_attributes = True
@@ -95,5 +96,6 @@ class CompanyData(BaseModel):  # Renamed from CompaniesData for consistency
    class Config:
        from_attributes = True

+
 class InvestorList(BaseModel):
    investors: List[InvestorData]
@@ -183,7 +183,7 @@ class InvestorProcessor:

        try:
            # Process rows in batches asynchronously
-            batch_size = 15  # Adjust batch size as needed
+            batch_size = 20  # Adjust batch size as needed
            rows = [(idx, row) for idx, row in df.iterrows()]

            for i in range(0, len(rows), batch_size):
@@ -251,7 +251,7 @@ class InvestorProcessor:

        try:
            # Process rows in batches asynchronously
-            batch_size = 15  # Adjust batch size as needed
+            batch_size = 20  # Adjust batch size as needed
            rows = [(idx, row) for idx, row in df.iterrows()]

            for i in range(0, len(rows), batch_size):