357 lines
11 KiB
Python
357 lines
11 KiB
Python
from enum import Enum
|
|
from typing import List, Optional
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
|
|
|
|
class InvestmentStage(str, Enum):
|
|
SEED = "SEED"
|
|
SERIES_A = "SERIES_A"
|
|
SERIES_B = "SERIES_B"
|
|
SERIES_C = "SERIES_C"
|
|
GROWTH = "GROWTH"
|
|
LATE_STAGE = "LATE_STAGE"
|
|
|
|
|
|
class SectorSchema(BaseModel):
|
|
"""
|
|
Expert parser: Only extract sector information if clearly identifiable.
|
|
Leave name empty if uncertain about the sector classification.
|
|
"""
|
|
|
|
id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Sector ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
name: Optional[str] = Field(
|
|
default=None,
|
|
description="Sector name. Leave empty string if not clearly identifiable from the data.",
|
|
)
|
|
|
|
@field_validator("name", mode="before")
|
|
@classmethod
|
|
def empty_string_to_none(cls, v):
|
|
"""Convert empty strings to None"""
|
|
if v == "" or (isinstance(v, str) and v.strip() == ""):
|
|
return None
|
|
return v
|
|
|
|
@field_validator("id", mode="before")
|
|
@classmethod
|
|
def zero_to_none(cls, v):
|
|
"""Convert 0 to None for optional id field"""
|
|
if v == 0:
|
|
return None
|
|
return v
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class InvestorMemberSchema(BaseModel):
|
|
"""
|
|
Expert parser: Only extract team member information if clearly identifiable.
|
|
Leave fields empty if uncertain about the member details.
|
|
"""
|
|
|
|
id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Member ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
name: Optional[str] = Field(
|
|
default=None,
|
|
description="Team member name. Leave empty string if not clearly identifiable.",
|
|
)
|
|
role: Optional[str] = Field(
|
|
default=None,
|
|
description="Team member role/title. Leave empty string if not clearly identifiable.",
|
|
)
|
|
email: Optional[str] = Field(
|
|
default=None,
|
|
description="Team member email. Leave empty string if not clearly identifiable or not provided.",
|
|
)
|
|
investor_id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Investor ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
|
|
@field_validator("name", "role", "email", mode="before")
|
|
@classmethod
|
|
def empty_string_to_none(cls, v):
|
|
"""Convert empty strings to None"""
|
|
if v == "" or (isinstance(v, str) and v.strip() == ""):
|
|
return None
|
|
return v
|
|
|
|
@field_validator("id", "investor_id", mode="before")
|
|
@classmethod
|
|
def zero_to_none(cls, v):
|
|
"""Convert 0 to None for optional integer fields"""
|
|
if v == 0:
|
|
return None
|
|
return v
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class CompanyMemberSchema(BaseModel):
|
|
"""
|
|
Expert parser: Only extract company member information if clearly identifiable.
|
|
Leave fields empty if uncertain about the member details.
|
|
"""
|
|
|
|
id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Member ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
name: Optional[str] = Field(
|
|
default=None,
|
|
description="Company member name. Leave empty if not clearly identifiable.",
|
|
)
|
|
linkedin: Optional[str] = Field(
|
|
default=None,
|
|
description="LinkedIn profile URL. Leave empty if not provided or uncertain.",
|
|
)
|
|
role: Optional[str] = Field(
|
|
default=None,
|
|
description="Company member role/title. Leave empty if not clearly identifiable.",
|
|
)
|
|
company_id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Company ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
|
|
@field_validator("name", "linkedin", "role", mode="before")
|
|
@classmethod
|
|
def empty_string_to_none(cls, v):
|
|
"""Convert empty strings to None"""
|
|
if v == "" or (isinstance(v, str) and v.strip() == ""):
|
|
return None
|
|
return v
|
|
|
|
@field_validator("id", "company_id", mode="before")
|
|
@classmethod
|
|
def zero_to_none(cls, v):
|
|
"""Convert 0 to None for optional integer fields"""
|
|
if v == 0:
|
|
return None
|
|
return v
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class CompanySchema(BaseModel):
|
|
"""
|
|
Expert parser: Only extract company information if clearly identifiable.
|
|
Leave optional fields empty if uncertain. Integer values must be 0 or greater.
|
|
"""
|
|
|
|
id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Company ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
name: Optional[str] = Field(
|
|
default=None,
|
|
description="Company name. Leave empty string if not clearly identifiable.",
|
|
)
|
|
industry: Optional[str] = Field(
|
|
default=None,
|
|
description="Company industry/sector. Leave empty string if not clearly identifiable.",
|
|
)
|
|
location: Optional[str] = Field(
|
|
default=None,
|
|
description="Company location/address. Leave empty string if not clearly identifiable.",
|
|
)
|
|
description: Optional[str] = Field(
|
|
default=None,
|
|
description="Company description. Leave empty if not clearly available or uncertain.",
|
|
)
|
|
founded_year: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain.",
|
|
)
|
|
website: Optional[str] = Field(
|
|
default=None,
|
|
description="Company website URL. Leave empty if not provided or uncertain.",
|
|
)
|
|
|
|
@field_validator(
|
|
"name", "industry", "location", "description", "website", mode="before"
|
|
)
|
|
@classmethod
|
|
def empty_string_to_none(cls, v):
|
|
"""Convert empty strings to None"""
|
|
if v == "" or (isinstance(v, str) and v.strip() == ""):
|
|
return None
|
|
return v
|
|
|
|
@field_validator("id", "founded_year", mode="before")
|
|
@classmethod
|
|
def zero_to_none(cls, v):
|
|
"""Convert 0 to None for founded_year"""
|
|
if v == 0:
|
|
return None
|
|
return v
|
|
|
|
@field_validator("founded_year", mode="before")
|
|
@classmethod
|
|
def validate_founded_year(cls, v):
|
|
"""Expert parser: Only accept clearly identifiable founding years"""
|
|
if v is None or v == "Not Available" or v == "" or v == "Unknown":
|
|
return None
|
|
if isinstance(v, str):
|
|
try:
|
|
year = int(v)
|
|
return year if year >= 0 else None
|
|
except ValueError:
|
|
return None
|
|
return v if isinstance(v, int) and v >= 0 else None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class InvestorSchema(BaseModel):
|
|
"""
|
|
Expert parser: Only extract investor information if clearly identifiable.
|
|
Leave optional fields empty if uncertain. All numeric values must be 0 or greater.
|
|
"""
|
|
|
|
id: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Investor ID, must be 0 or greater. Use 0 if uncertain.",
|
|
)
|
|
name: Optional[str] = Field(
|
|
default=None,
|
|
description="Investor name. Do not return any special characters, Just the name as a string.",
|
|
)
|
|
description: Optional[str] = Field(
|
|
default=None,
|
|
description="Investor description. Leave empty if not clearly available or uncertain.",
|
|
)
|
|
aum: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.",
|
|
)
|
|
check_size_lower: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
|
)
|
|
check_size_upper: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
|
)
|
|
geographic_focus: Optional[str] = Field(
|
|
default=None,
|
|
description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.",
|
|
)
|
|
stage_focus: InvestmentStage = Field(
|
|
default=InvestmentStage.SEED,
|
|
description="Investment stage focus. Use SEED as default if uncertain.",
|
|
)
|
|
number_of_investments: Optional[int] = Field(
|
|
default=None,
|
|
ge=0,
|
|
description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.",
|
|
)
|
|
|
|
@field_validator("name", "description", "geographic_focus", mode="before")
|
|
@classmethod
|
|
def empty_string_to_none(cls, v):
|
|
"""Convert empty strings to None"""
|
|
if v == "" or (isinstance(v, str) and v.strip() == ""):
|
|
return None
|
|
return v
|
|
|
|
@field_validator(
|
|
"id",
|
|
"aum",
|
|
"check_size_lower",
|
|
"check_size_upper",
|
|
"number_of_investments",
|
|
mode="before",
|
|
)
|
|
@classmethod
|
|
def zero_to_none(cls, v):
|
|
"""Convert 0 to None for optional integer fields"""
|
|
if v == 0:
|
|
return None
|
|
return v
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class InvestorData(BaseModel):
|
|
"""
|
|
Expert parser: Comprehensive investor data schema for LLM processing.
|
|
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
|
|
"""
|
|
|
|
investor: InvestorSchema = Field(
|
|
description="Core investor information. Only populate with clearly identifiable data."
|
|
)
|
|
portfolio_companies: List[CompanySchema] = Field(
|
|
default=[],
|
|
description="List of portfolio companies. Leave empty if not clearly identifiable.",
|
|
)
|
|
team_members: List[InvestorMemberSchema] = Field(
|
|
default=[],
|
|
description="List of team members. Leave empty if not clearly identifiable.",
|
|
)
|
|
sectors: List[SectorSchema] = Field(
|
|
default=[],
|
|
description="List of investment sectors. Leave empty if not clearly identifiable.",
|
|
)
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class CompanyData(BaseModel):
|
|
"""
|
|
Expert parser: Comprehensive company data schema for LLM processing.
|
|
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
|
|
"""
|
|
|
|
company: CompanySchema = Field(
|
|
description="Core company information. Only populate with clearly identifiable data."
|
|
)
|
|
sectors: List[SectorSchema] = Field(
|
|
default=[],
|
|
description="List of company sectors. Leave empty if not clearly identifiable.",
|
|
)
|
|
members: List[CompanyMemberSchema] = Field(
|
|
default=[],
|
|
description="List of company members. Leave empty if not clearly identifiable.",
|
|
)
|
|
investors: List[InvestorSchema] = Field(
|
|
default=[],
|
|
description="List of investors. Leave empty if not clearly identifiable.",
|
|
)
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class InvestorList(BaseModel):
|
|
"""Expert parser: List of investors with clearly identifiable information only."""
|
|
|
|
investors: List[InvestorData] = Field(
|
|
default=[],
|
|
description="List of investors. Leave empty if no clearly identifiable investors.",
|
|
)
|