2025-09-25 17:00:38 +01:00
from enum import Enum
from typing import List , Optional
2025-09-26 15:24:42 +01:00
from pydantic import BaseModel , Field , field_validator
2025-09-25 17:00:38 +01:00
class InvestmentStage ( str , Enum ) :
SEED = " SEED "
SERIES_A = " SERIES_A "
SERIES_B = " SERIES_B "
SERIES_C = " SERIES_C "
GROWTH = " GROWTH "
LATE_STAGE = " LATE_STAGE "
class SectorSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract sector information if clearly identifiable.
Leave name empty if uncertain about the sector classification.
"""
id : int = Field (
ge = 0 , description = " Sector ID, must be 0 or greater. Use 0 if uncertain. "
)
name : str = Field (
description = " Sector name. Leave empty string if not clearly identifiable from the data. "
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorMemberSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract team member information if clearly identifiable.
Leave fields empty if uncertain about the member details.
"""
id : int = Field (
ge = 0 , description = " Member ID, must be 0 or greater. Use 0 if uncertain. "
)
name : str = Field (
description = " Team member name. Leave empty string if not clearly identifiable. "
)
role : str = Field (
description = " Team member role/title. Leave empty string if not clearly identifiable. "
)
email : str = Field (
description = " Team member email. Leave empty string if not clearly identifiable or not provided. "
)
investor_id : int = Field (
ge = 0 , description = " Investor ID, must be 0 or greater. Use 0 if uncertain. "
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class CompanyMemberSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract company member information if clearly identifiable.
Leave fields empty if uncertain about the member details.
"""
id : int = Field (
ge = 0 , description = " Member ID, must be 0 or greater. Use 0 if uncertain. "
)
name : Optional [ str ] = Field (
default = " " ,
description = " Company member name. Leave empty if not clearly identifiable. " ,
)
linkedin : Optional [ str ] = Field (
default = " " ,
description = " LinkedIn profile URL. Leave empty if not provided or uncertain. " ,
)
role : Optional [ str ] = Field (
default = " " ,
description = " Company member role/title. Leave empty if not clearly identifiable. " ,
)
company_id : int = Field (
ge = 0 , description = " Company ID, must be 0 or greater. Use 0 if uncertain. "
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class CompanySchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract company information if clearly identifiable.
Leave optional fields empty if uncertain. Integer values must be 0 or greater.
"""
id : int = Field (
ge = 0 , description = " Company ID, must be 0 or greater. Use 0 if uncertain. "
)
name : str = Field (
description = " Company name. Leave empty string if not clearly identifiable. "
)
industry : str = Field (
description = " Company industry/sector. Leave empty string if not clearly identifiable. "
)
location : str = Field (
description = " Company location/address. Leave empty string if not clearly identifiable. "
)
description : Optional [ str ] = Field (
default = " " ,
description = " Company description. Leave empty if not clearly available or uncertain. " ,
)
founded_year : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain. " ,
)
website : Optional [ str ] = Field (
default = " " ,
description = " Company website URL. Leave empty if not provided or uncertain. " ,
)
2025-09-25 17:00:38 +01:00
@field_validator ( " founded_year " , mode = " before " )
@classmethod
def validate_founded_year ( cls , v ) :
2025-09-26 15:24:42 +01:00
""" Expert parser: Only accept clearly identifiable founding years """
if v is None or v == " Not Available " or v == " " or v == " Unknown " :
2025-09-25 17:00:38 +01:00
return None
if isinstance ( v , str ) :
try :
2025-09-26 15:24:42 +01:00
year = int ( v )
return year if year > = 0 else None
2025-09-25 17:00:38 +01:00
except ValueError :
return None
2025-09-26 15:24:42 +01:00
return v if isinstance ( v , int ) and v > = 0 else None
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract investor information if clearly identifiable.
Leave optional fields empty if uncertain. All numeric values must be 0 or greater.
"""
id : int = Field (
ge = 0 , description = " Investor ID, must be 0 or greater. Use 0 if uncertain. "
)
name : str = Field (
2025-09-26 15:56:29 +01:00
description = " Investor name. Do not return any special characters, Just the name as a string. "
2025-09-26 15:24:42 +01:00
)
description : Optional [ str ] = Field (
default = " " ,
description = " Investor description. Leave empty if not clearly available or uncertain. " ,
)
2025-09-26 15:56:29 +01:00
aum : int | None = Field (
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain. " ,
)
2025-09-26 15:56:29 +01:00
check_size_lower : int | None = Field (
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable. " ,
)
2025-09-26 15:56:29 +01:00
check_size_upper : int | None = Field (
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable. " ,
)
2025-09-26 15:56:29 +01:00
geographic_focus : str | None = Field (
description = " Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
stage_focus : InvestmentStage = Field (
description = " Investment stage focus. Use SEED as default if uncertain. "
)
2025-09-26 15:56:29 +01:00
number_of_investments : int | None = Field (
2025-09-26 15:24:42 +01:00
ge = 0 ,
default = 0 ,
description = " Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable. " ,
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorData ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Comprehensive investor data schema for LLM processing.
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
"""
investor : InvestorSchema = Field (
description = " Core investor information. Only populate with clearly identifiable data. "
)
portfolio_companies : List [ CompanySchema ] = Field (
default = [ ] ,
description = " List of portfolio companies. Leave empty if not clearly identifiable. " ,
)
team_members : List [ InvestorMemberSchema ] = Field (
default = [ ] ,
description = " List of team members. Leave empty if not clearly identifiable. " ,
)
sectors : List [ SectorSchema ] = Field (
default = [ ] ,
description = " List of investment sectors. Leave empty if not clearly identifiable. " ,
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
2025-09-26 15:24:42 +01:00
class CompanyData ( BaseModel ) :
"""
Expert parser: Comprehensive company data schema for LLM processing.
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
"""
company : CompanySchema = Field (
description = " Core company information. Only populate with clearly identifiable data. "
)
sectors : List [ SectorSchema ] = Field (
default = [ ] ,
description = " List of company sectors. Leave empty if not clearly identifiable. " ,
)
members : List [ CompanyMemberSchema ] = Field (
default = [ ] ,
description = " List of company members. Leave empty if not clearly identifiable. " ,
)
investors : List [ InvestorSchema ] = Field (
default = [ ] ,
description = " List of investors. Leave empty if not clearly identifiable. " ,
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorList ( BaseModel ) :
2025-09-26 15:24:42 +01:00
""" Expert parser: List of investors with clearly identifiable information only. """
2025-09-25 17:00:38 +01:00
2025-09-26 15:24:42 +01:00
investors : List [ InvestorData ] = Field (
default = [ ] ,
description = " List of investors. Leave empty if no clearly identifiable investors. " ,
)