from enum import Enum from typing import List, Optional from pydantic import BaseModel, Field, field_validator class InvestmentStage(str, Enum): SEED = "SEED" SERIES_A = "SERIES_A" SERIES_B = "SERIES_B" SERIES_C = "SERIES_C" GROWTH = "GROWTH" LATE_STAGE = "LATE_STAGE" class SectorSchema(BaseModel): """ Expert parser: Only extract sector information if clearly identifiable. Leave name empty if uncertain about the sector classification. """ id: Optional[int] = Field( default=None, ge=0, description="Sector ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( default=None, description="Sector name. Leave empty string if not clearly identifiable from the data.", ) @field_validator("name", mode="before") @classmethod def empty_string_to_none(cls, v): """Convert empty strings to None""" if v == "" or (isinstance(v, str) and v.strip() == ""): return None return v @field_validator("id", mode="before") @classmethod def zero_to_none(cls, v): """Convert 0 to None for optional id field""" if v == 0: return None return v class Config: from_attributes = True class InvestorMemberSchema(BaseModel): """ Expert parser: Only extract team member information if clearly identifiable. Leave fields empty if uncertain about the member details. """ id: Optional[int] = Field( default=None, ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( default=None, description="Team member name. Leave empty string if not clearly identifiable.", ) role: Optional[str] = Field( default=None, description="Team member role/title. Leave empty string if not clearly identifiable.", ) email: Optional[str] = Field( default=None, description="Team member email. Leave empty string if not clearly identifiable or not provided.", ) investor_id: Optional[int] = Field( default=None, ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain.", ) @field_validator("name", "role", "email", mode="before") @classmethod def empty_string_to_none(cls, v): """Convert empty strings to None""" if v == "" or (isinstance(v, str) and v.strip() == ""): return None return v @field_validator("id", "investor_id", mode="before") @classmethod def zero_to_none(cls, v): """Convert 0 to None for optional integer fields""" if v == 0: return None return v class Config: from_attributes = True class CompanyMemberSchema(BaseModel): """ Expert parser: Only extract company member information if clearly identifiable. Leave fields empty if uncertain about the member details. """ id: Optional[int] = Field( default=None, ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( default=None, description="Company member name. Leave empty if not clearly identifiable.", ) linkedin: Optional[str] = Field( default=None, description="LinkedIn profile URL. Leave empty if not provided or uncertain.", ) role: Optional[str] = Field( default=None, description="Company member role/title. Leave empty if not clearly identifiable.", ) company_id: Optional[int] = Field( default=None, ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain.", ) @field_validator("name", "linkedin", "role", mode="before") @classmethod def empty_string_to_none(cls, v): """Convert empty strings to None""" if v == "" or (isinstance(v, str) and v.strip() == ""): return None return v @field_validator("id", "company_id", mode="before") @classmethod def zero_to_none(cls, v): """Convert 0 to None for optional integer fields""" if v == 0: return None return v class Config: from_attributes = True class CompanySchema(BaseModel): """ Expert parser: Only extract company information if clearly identifiable. Leave optional fields empty if uncertain. Integer values must be 0 or greater. """ id: Optional[int] = Field( default=None, ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( default=None, description="Company name. Leave empty string if not clearly identifiable.", ) industry: Optional[str] = Field( default=None, description="Company industry/sector. Leave empty string if not clearly identifiable.", ) location: Optional[str] = Field( default=None, description="Company location/address. Leave empty string if not clearly identifiable.", ) description: Optional[str] = Field( default=None, description="Company description. Leave empty if not clearly available or uncertain.", ) founded_year: Optional[int] = Field( default=None, ge=0, description="Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain.", ) website: Optional[str] = Field( default=None, description="Company website URL. Leave empty if not provided or uncertain.", ) @field_validator( "name", "industry", "location", "description", "website", mode="before" ) @classmethod def empty_string_to_none(cls, v): """Convert empty strings to None""" if v == "" or (isinstance(v, str) and v.strip() == ""): return None return v @field_validator("id", "founded_year", mode="before") @classmethod def zero_to_none(cls, v): """Convert 0 to None for founded_year""" if v == 0: return None return v @field_validator("founded_year", mode="before") @classmethod def validate_founded_year(cls, v): """Expert parser: Only accept clearly identifiable founding years""" if v is None or v == "Not Available" or v == "" or v == "Unknown": return None if isinstance(v, str): try: year = int(v) return year if year >= 0 else None except ValueError: return None return v if isinstance(v, int) and v >= 0 else None class Config: from_attributes = True class InvestorSchema(BaseModel): """ Expert parser: Only extract investor information if clearly identifiable. Leave optional fields empty if uncertain. All numeric values must be 0 or greater. """ id: Optional[int] = Field( default=None, ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( default=None, description="Investor name. Do not return any special characters, Just the name as a string.", ) description: Optional[str] = Field( default=None, description="Investor description. Leave empty if not clearly available or uncertain.", ) aum: Optional[int] = Field( default=None, ge=0, description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.", ) check_size_lower: Optional[int] = Field( default=None, ge=0, description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) check_size_upper: Optional[int] = Field( default=None, ge=0, description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) geographic_focus: Optional[str] = Field( default=None, description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.", ) stage_focus: InvestmentStage = Field( default=InvestmentStage.SEED, description="Investment stage focus. Use SEED as default if uncertain.", ) number_of_investments: Optional[int] = Field( default=None, ge=0, description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.", ) @field_validator("name", "description", "geographic_focus", mode="before") @classmethod def empty_string_to_none(cls, v): """Convert empty strings to None""" if v == "" or (isinstance(v, str) and v.strip() == ""): return None return v @field_validator( "id", "aum", "check_size_lower", "check_size_upper", "number_of_investments", mode="before", ) @classmethod def zero_to_none(cls, v): """Convert 0 to None for optional integer fields""" if v == 0: return None return v class Config: from_attributes = True class InvestorData(BaseModel): """ Expert parser: Comprehensive investor data schema for LLM processing. Only populate fields with clearly identifiable information. Leave lists empty if uncertain. """ investor: InvestorSchema = Field( description="Core investor information. Only populate with clearly identifiable data." ) portfolio_companies: List[CompanySchema] = Field( default=[], description="List of portfolio companies. Leave empty if not clearly identifiable.", ) team_members: List[InvestorMemberSchema] = Field( default=[], description="List of team members. Leave empty if not clearly identifiable.", ) sectors: List[SectorSchema] = Field( default=[], description="List of investment sectors. Leave empty if not clearly identifiable.", ) class Config: from_attributes = True class CompanyData(BaseModel): """ Expert parser: Comprehensive company data schema for LLM processing. Only populate fields with clearly identifiable information. Leave lists empty if uncertain. """ company: CompanySchema = Field( description="Core company information. Only populate with clearly identifiable data." ) sectors: List[SectorSchema] = Field( default=[], description="List of company sectors. Leave empty if not clearly identifiable.", ) members: List[CompanyMemberSchema] = Field( default=[], description="List of company members. Leave empty if not clearly identifiable.", ) investors: List[InvestorSchema] = Field( default=[], description="List of investors. Leave empty if not clearly identifiable.", ) class Config: from_attributes = True class InvestorList(BaseModel): """Expert parser: List of investors with clearly identifiable information only.""" investors: List[InvestorData] = Field( default=[], description="List of investors. Leave empty if no clearly identifiable investors.", )