ba0ed169ce
- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases. - Introduced QueryProcessor class for querying investor information from SQL and vector databases. - Integrated OpenAI's ChatGPT for structured output generation. - Implemented data cleaning and control character removal in CSV processing. - Added asynchronous processing capabilities for batch handling. - Established connection to ChromaDB for vector storage of investor descriptions. - Defined structured output schemas using Pydantic for investor data validation. - Enhanced settings management for API key and database configurations.
38 lines
651 B
Python
38 lines
651 B
Python
from typing import List
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class Investor(BaseModel):
|
|
name: str
|
|
aum: int
|
|
check_size: str
|
|
sector_focus: str
|
|
stage_focus: str
|
|
region: str
|
|
investment_thesis: str
|
|
investor_description: str
|
|
|
|
|
|
class InvestorList(BaseModel):
|
|
investor_list: List[Investor]
|
|
|
|
|
|
class QueryResponse(BaseModel):
|
|
name: str
|
|
aum: int
|
|
check_size: str
|
|
sector_focus: str
|
|
stage_focus: str
|
|
region: str
|
|
investment_thesis: str
|
|
investor_description: str
|
|
reason: str
|
|
|
|
|
|
class QueryRequest(BaseModel):
|
|
question: str
|
|
|
|
|
|
class QueryResponseList(BaseModel):
|
|
responses: List[QueryResponse] |