2025-09-25 17:00:38 +01:00
from enum import Enum
from typing import List , Optional
2025-09-26 15:24:42 +01:00
from pydantic import BaseModel , Field , field_validator
2025-09-25 17:00:38 +01:00
class InvestmentStage ( str , Enum ) :
SEED = " SEED "
SERIES_A = " SERIES_A "
SERIES_B = " SERIES_B "
SERIES_C = " SERIES_C "
GROWTH = " GROWTH "
LATE_STAGE = " LATE_STAGE "
class SectorSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract sector information if clearly identifiable.
Leave name empty if uncertain about the sector classification.
"""
2025-09-27 10:45:08 +01:00
id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Sector ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
name : Optional [ str ] = Field (
default = None ,
description = " Sector name. Leave empty string if not clearly identifiable from the data. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-25 17:00:38 +01:00
2025-09-27 10:45:08 +01:00
@field_validator ( " name " , mode = " before " )
@classmethod
def empty_string_to_none ( cls , v ) :
""" Convert empty strings to None """
if v == " " or ( isinstance ( v , str ) and v . strip ( ) == " " ) :
return None
return v
@field_validator ( " id " , mode = " before " )
@classmethod
def zero_to_none ( cls , v ) :
""" Convert 0 to None for optional id field """
if v == 0 :
return None
return v
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorMemberSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract team member information if clearly identifiable.
Leave fields empty if uncertain about the member details.
"""
2025-09-27 10:45:08 +01:00
id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Member ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
name : Optional [ str ] = Field (
default = None ,
description = " Team member name. Leave empty string if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
role : Optional [ str ] = Field (
default = None ,
description = " Team member role/title. Leave empty string if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
email : Optional [ str ] = Field (
default = None ,
description = " Team member email. Leave empty string if not clearly identifiable or not provided. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
investor_id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Investor ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-25 17:00:38 +01:00
2025-09-27 10:45:08 +01:00
@field_validator ( " name " , " role " , " email " , mode = " before " )
@classmethod
def empty_string_to_none ( cls , v ) :
""" Convert empty strings to None """
if v == " " or ( isinstance ( v , str ) and v . strip ( ) == " " ) :
return None
return v
@field_validator ( " id " , " investor_id " , mode = " before " )
@classmethod
def zero_to_none ( cls , v ) :
""" Convert 0 to None for optional integer fields """
if v == 0 :
return None
return v
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class CompanyMemberSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract company member information if clearly identifiable.
Leave fields empty if uncertain about the member details.
"""
2025-09-27 10:45:08 +01:00
id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Member ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
name : Optional [ str ] = Field (
2025-09-27 10:45:08 +01:00
default = None ,
2025-09-26 15:24:42 +01:00
description = " Company member name. Leave empty if not clearly identifiable. " ,
)
linkedin : Optional [ str ] = Field (
2025-09-27 10:45:08 +01:00
default = None ,
2025-09-26 15:24:42 +01:00
description = " LinkedIn profile URL. Leave empty if not provided or uncertain. " ,
)
role : Optional [ str ] = Field (
2025-09-27 10:45:08 +01:00
default = None ,
2025-09-26 15:24:42 +01:00
description = " Company member role/title. Leave empty if not clearly identifiable. " ,
)
2025-09-27 10:45:08 +01:00
company_id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Company ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-25 17:00:38 +01:00
2025-09-27 10:45:08 +01:00
@field_validator ( " name " , " linkedin " , " role " , mode = " before " )
@classmethod
def empty_string_to_none ( cls , v ) :
""" Convert empty strings to None """
if v == " " or ( isinstance ( v , str ) and v . strip ( ) == " " ) :
return None
return v
@field_validator ( " id " , " company_id " , mode = " before " )
@classmethod
def zero_to_none ( cls , v ) :
""" Convert 0 to None for optional integer fields """
if v == 0 :
return None
return v
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class CompanySchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract company information if clearly identifiable.
Leave optional fields empty if uncertain. Integer values must be 0 or greater.
"""
2025-09-27 10:45:08 +01:00
id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Company ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
name : Optional [ str ] = Field (
default = None ,
description = " Company name. Leave empty string if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
industry : Optional [ str ] = Field (
default = None ,
description = " Company industry/sector. Leave empty string if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
location : Optional [ str ] = Field (
default = None ,
description = " Company location/address. Leave empty string if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
description : Optional [ str ] = Field (
2025-09-27 10:45:08 +01:00
default = None ,
2025-09-26 15:24:42 +01:00
description = " Company description. Leave empty if not clearly available or uncertain. " ,
)
founded_year : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain. " ,
)
website : Optional [ str ] = Field (
2025-09-27 10:45:08 +01:00
default = None ,
2025-09-26 15:24:42 +01:00
description = " Company website URL. Leave empty if not provided or uncertain. " ,
)
2025-09-25 17:00:38 +01:00
2025-09-27 10:45:08 +01:00
@field_validator (
" name " , " industry " , " location " , " description " , " website " , mode = " before "
)
@classmethod
def empty_string_to_none ( cls , v ) :
""" Convert empty strings to None """
if v == " " or ( isinstance ( v , str ) and v . strip ( ) == " " ) :
return None
return v
@field_validator ( " id " , " founded_year " , mode = " before " )
@classmethod
def zero_to_none ( cls , v ) :
""" Convert 0 to None for founded_year """
if v == 0 :
return None
return v
2025-09-25 17:00:38 +01:00
@field_validator ( " founded_year " , mode = " before " )
@classmethod
def validate_founded_year ( cls , v ) :
2025-09-26 15:24:42 +01:00
""" Expert parser: Only accept clearly identifiable founding years """
if v is None or v == " Not Available " or v == " " or v == " Unknown " :
2025-09-25 17:00:38 +01:00
return None
if isinstance ( v , str ) :
try :
2025-09-26 15:24:42 +01:00
year = int ( v )
return year if year > = 0 else None
2025-09-25 17:00:38 +01:00
except ValueError :
return None
2025-09-26 15:24:42 +01:00
return v if isinstance ( v , int ) and v > = 0 else None
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorSchema ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Only extract investor information if clearly identifiable.
Leave optional fields empty if uncertain. All numeric values must be 0 or greater.
"""
2025-09-27 10:45:08 +01:00
id : Optional [ int ] = Field (
default = None ,
ge = 0 ,
description = " Investor ID, must be 0 or greater. Use 0 if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
name : Optional [ str ] = Field (
default = None ,
description = " Investor name. Do not return any special characters, Just the name as a string. " ,
2025-09-26 15:24:42 +01:00
)
description : Optional [ str ] = Field (
2025-09-27 10:45:08 +01:00
default = None ,
2025-09-26 15:24:42 +01:00
description = " Investor description. Leave empty if not clearly available or uncertain. " ,
)
2025-09-27 10:45:08 +01:00
aum : Optional [ int ] = Field (
default = None ,
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain. " ,
)
2025-09-27 10:45:08 +01:00
check_size_lower : Optional [ int ] = Field (
default = None ,
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable. " ,
)
2025-09-27 10:45:08 +01:00
check_size_upper : Optional [ int ] = Field (
default = None ,
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable. " ,
)
2025-09-27 10:45:08 +01:00
geographic_focus : Optional [ str ] = Field (
default = None ,
2025-09-26 15:56:29 +01:00
description = " Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable. " ,
2025-09-26 15:24:42 +01:00
)
stage_focus : InvestmentStage = Field (
2025-09-27 10:45:08 +01:00
default = InvestmentStage . SEED ,
description = " Investment stage focus. Use SEED as default if uncertain. " ,
2025-09-26 15:24:42 +01:00
)
2025-09-27 10:45:08 +01:00
number_of_investments : Optional [ int ] = Field (
default = None ,
2025-09-26 15:24:42 +01:00
ge = 0 ,
description = " Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable. " ,
)
2025-09-25 17:00:38 +01:00
2025-09-27 10:45:08 +01:00
@field_validator ( " name " , " description " , " geographic_focus " , mode = " before " )
@classmethod
def empty_string_to_none ( cls , v ) :
""" Convert empty strings to None """
if v == " " or ( isinstance ( v , str ) and v . strip ( ) == " " ) :
return None
return v
@field_validator (
" id " ,
" aum " ,
" check_size_lower " ,
" check_size_upper " ,
" number_of_investments " ,
mode = " before " ,
)
@classmethod
def zero_to_none ( cls , v ) :
""" Convert 0 to None for optional integer fields """
if v == 0 :
return None
return v
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorData ( BaseModel ) :
2025-09-26 15:24:42 +01:00
"""
Expert parser: Comprehensive investor data schema for LLM processing.
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
"""
investor : InvestorSchema = Field (
description = " Core investor information. Only populate with clearly identifiable data. "
)
portfolio_companies : List [ CompanySchema ] = Field (
default = [ ] ,
description = " List of portfolio companies. Leave empty if not clearly identifiable. " ,
)
team_members : List [ InvestorMemberSchema ] = Field (
default = [ ] ,
description = " List of team members. Leave empty if not clearly identifiable. " ,
)
sectors : List [ SectorSchema ] = Field (
default = [ ] ,
description = " List of investment sectors. Leave empty if not clearly identifiable. " ,
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
2025-09-26 15:24:42 +01:00
class CompanyData ( BaseModel ) :
"""
Expert parser: Comprehensive company data schema for LLM processing.
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
"""
company : CompanySchema = Field (
description = " Core company information. Only populate with clearly identifiable data. "
)
sectors : List [ SectorSchema ] = Field (
default = [ ] ,
description = " List of company sectors. Leave empty if not clearly identifiable. " ,
)
members : List [ CompanyMemberSchema ] = Field (
default = [ ] ,
description = " List of company members. Leave empty if not clearly identifiable. " ,
)
investors : List [ InvestorSchema ] = Field (
default = [ ] ,
description = " List of investors. Leave empty if not clearly identifiable. " ,
)
2025-09-25 17:00:38 +01:00
class Config :
from_attributes = True
class InvestorList ( BaseModel ) :
2025-09-26 15:24:42 +01:00
""" Expert parser: List of investors with clearly identifiable information only. """
2025-09-25 17:00:38 +01:00
2025-09-26 15:24:42 +01:00
investors : List [ InvestorData ] = Field (
default = [ ] ,
description = " List of investors. Leave empty if no clearly identifiable investors. " ,
)