Refactor investor and fund schemas to support new check size range
- Removed deprecated `stage_focus` column from `InvestorTable` and `InvestorSchema`. - Updated `FundTable` to change `fund_size` from VARCHAR to INTEGER and added `check_size_lower` and `check_size_upper` columns. - Modified API routes to return investor-fund combinations as separate entries. - Created new `InvestorFundData` schema for combined investor-fund responses. - Implemented LLM parsing for check size range from estimated investment size. - Updated database migration script to reflect schema changes and ensure data integrity. - Removed obsolete verification and test scripts related to the old schema.
This commit is contained in:
+78
-12
@@ -27,6 +27,15 @@ class CurrencyConversion(BaseModel):
|
||||
notes: str = ""
|
||||
|
||||
|
||||
class CheckSizeRange(BaseModel):
|
||||
"""Schema for LLM check size range parsing from estimated investment size"""
|
||||
|
||||
lower_bound_usd: int = 0
|
||||
upper_bound_usd: int = 0
|
||||
confidence: str = "high" # high, medium, low
|
||||
notes: str = ""
|
||||
|
||||
|
||||
class InvestorProcessor:
|
||||
def __init__(self):
|
||||
self.llm = ChatOpenAI(
|
||||
@@ -36,10 +45,12 @@ class InvestorProcessor:
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
# Only use structured LLM for currency conversion
|
||||
# Structured LLMs for specific parsing tasks
|
||||
self.currency_converter_llm = self.llm.with_structured_output(
|
||||
CurrencyConversion
|
||||
)
|
||||
self.check_size_parser_llm = self.llm.with_structured_output(CheckSizeRange)
|
||||
|
||||
# Keep legacy structured LLMs for backward compatibility
|
||||
self.investor_structured_llm = self.llm.with_structured_output(InvestorData)
|
||||
self.company_structured_llm = self.llm.with_structured_output(CompanyData)
|
||||
@@ -77,6 +88,57 @@ Return only the USD integer amount with current exchange rates."""
|
||||
print(f"Error converting currency '{amount_str}': {e}")
|
||||
return None
|
||||
|
||||
async def parse_check_size_range(
|
||||
self, estimated_investment_str: str
|
||||
) -> tuple[Optional[int], Optional[int]]:
|
||||
"""
|
||||
Use LLM to parse check size range from estimated investment size string.
|
||||
Returns tuple of (lower_bound_usd, upper_bound_usd).
|
||||
|
||||
Handles formats like:
|
||||
- "EUR 1,000 to 2,000"
|
||||
- "$100K-$500K"
|
||||
- "Between $1M and $5M"
|
||||
- "Up to EUR 10 million"
|
||||
- "$2M typical"
|
||||
"""
|
||||
if (
|
||||
not estimated_investment_str
|
||||
or estimated_investment_str == "Not Available"
|
||||
or estimated_investment_str == "0"
|
||||
):
|
||||
return None, None
|
||||
|
||||
try:
|
||||
prompt = f"""Parse this check size/investment range into lower and upper bounds in USD as integers.
|
||||
|
||||
Input: {estimated_investment_str}
|
||||
|
||||
Instructions:
|
||||
- If it's a range (e.g., "EUR 1M to 5M"), extract both bounds
|
||||
- If it's a single amount (e.g., "$2M typical"), use it as both lower and upper
|
||||
- If it says "up to X", use 0 as lower and X as upper
|
||||
- Convert all currencies to USD using current exchange rates
|
||||
- Return integers (whole numbers, no decimals)
|
||||
|
||||
Examples:
|
||||
- "EUR 1,000 to 2,000" -> lower: 1100, upper: 2200
|
||||
- "$100K-$500K" -> lower: 100000, upper: 500000
|
||||
- "Between $1M and $5M" -> lower: 1000000, upper: 5000000
|
||||
- "Up to EUR 10 million" -> lower: 0, upper: 11000000
|
||||
- "$2M typical" -> lower: 2000000, upper: 2000000
|
||||
- "GBP 500K-2M" -> lower: 600000, upper: 2400000
|
||||
|
||||
Return the lower and upper bounds in USD."""
|
||||
|
||||
result = await self.check_size_parser_llm.ainvoke(prompt)
|
||||
lower = result.lower_bound_usd if result.lower_bound_usd > 0 else None
|
||||
upper = result.upper_bound_usd if result.upper_bound_usd > 0 else None
|
||||
return lower, upper
|
||||
except Exception as e:
|
||||
print(f"Error parsing check size range '{estimated_investment_str}': {e}")
|
||||
return None, None
|
||||
|
||||
def parse_json_profile(self, json_str: str) -> Optional[dict]:
|
||||
"""
|
||||
Manually parse the JSON profile from the CSV.
|
||||
@@ -157,7 +219,8 @@ Return only the USD integer amount with current exchange rates."""
|
||||
"fund_name": fund.get("fundName"),
|
||||
"fund_size": None,
|
||||
"fund_size_source_url": fund.get("fundSizeSourceUrl"),
|
||||
"estimated_investment_size": None,
|
||||
"check_size_lower": None,
|
||||
"check_size_upper": None,
|
||||
"source_url": fund.get("sourceUrl"),
|
||||
"source_provider": fund.get("sourceProvider"),
|
||||
"geographic_focus": fund.get("geographicFocus", []),
|
||||
@@ -165,19 +228,23 @@ Return only the USD integer amount with current exchange rates."""
|
||||
"sector_focus": fund.get("sectorFocus", []),
|
||||
}
|
||||
|
||||
# Convert fund size to USD
|
||||
# Convert fund size to USD integer
|
||||
fund_size_str = fund.get("fundSize")
|
||||
if fund_size_str and fund_size_str != "Not Available":
|
||||
fund_size_usd = await self.convert_to_usd(fund_size_str)
|
||||
if fund_size_usd:
|
||||
fund_data["fund_size"] = str(fund_size_usd)
|
||||
fund_data["fund_size"] = fund_size_usd # Store as integer
|
||||
|
||||
# Convert estimated investment size
|
||||
# Parse check size range from estimated investment size
|
||||
est_size_str = fund.get("estimatedInvestmentSize")
|
||||
if est_size_str and est_size_str != "Not Available":
|
||||
est_size_usd = await self.convert_to_usd(est_size_str)
|
||||
if est_size_usd:
|
||||
fund_data["estimated_investment_size"] = str(est_size_usd)
|
||||
check_lower, check_upper = await self.parse_check_size_range(
|
||||
est_size_str
|
||||
)
|
||||
if check_lower is not None:
|
||||
fund_data["check_size_lower"] = check_lower
|
||||
if check_upper is not None:
|
||||
fund_data["check_size_upper"] = check_upper
|
||||
|
||||
investor_data["funds"].append(fund_data)
|
||||
|
||||
@@ -430,11 +497,10 @@ Return only the USD integer amount with current exchange rates."""
|
||||
fund = FundTable(
|
||||
investor_id=investor.id,
|
||||
fund_name=fund_data.get("fund_name"),
|
||||
fund_size=fund_data.get("fund_size"),
|
||||
fund_size=fund_data.get("fund_size"), # Now an integer
|
||||
fund_size_source_url=fund_data.get("fund_size_source_url"),
|
||||
estimated_investment_size=fund_data.get(
|
||||
"estimated_investment_size"
|
||||
),
|
||||
check_size_lower=fund_data.get("check_size_lower"), # NEW
|
||||
check_size_upper=fund_data.get("check_size_upper"), # NEW
|
||||
source_url=fund_data.get("source_url"),
|
||||
source_provider=fund_data.get("source_provider"),
|
||||
geographic_focus=fund_data.get("geographic_focus"),
|
||||
|
||||
Reference in New Issue
Block a user