Refactor database models and schemas to allow nullable fields; update init_database function for improved initialization.
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -32,7 +32,6 @@ db_dependency = Annotated[Session, Depends(get_db)]
|
|||||||
def init_database():
|
def init_database():
|
||||||
"""Initialize the database by creating all tables"""
|
"""Initialize the database by creating all tables"""
|
||||||
Base.metadata.create_all(bind=engine)
|
Base.metadata.create_all(bind=engine)
|
||||||
print("Database initialized successfully!")
|
|
||||||
|
|
||||||
|
|
||||||
def get_session_sync() -> Session:
|
def get_session_sync() -> Session:
|
||||||
|
|||||||
+12
-12
@@ -55,12 +55,12 @@ class InvestorTable(Base, TimestampMixin):
|
|||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
name = Column(String, nullable=False)
|
name = Column(String, nullable=False)
|
||||||
description = Column(Text, nullable=True)
|
description = Column(Text, nullable=True)
|
||||||
aum = Column(Integer, nullable=False) # Assets Under Management
|
aum = Column(Integer, nullable=True) # Assets Under Management
|
||||||
check_size_lower = Column(Integer, nullable=False) # Lower bound
|
check_size_lower = Column(Integer, nullable=True) # Lower bound
|
||||||
check_size_upper = Column(Integer, nullable=False) # Upper bound
|
check_size_upper = Column(Integer, nullable=True) # Upper bound
|
||||||
geographic_focus = Column(String, nullable=False)
|
geographic_focus = Column(String, nullable=True)
|
||||||
stage_focus = Column(Enum(InvestmentStage), nullable=False)
|
stage_focus = Column(Enum(InvestmentStage), nullable=True)
|
||||||
number_of_investments = Column(Integer, default=0)
|
number_of_investments = Column(Integer, default=0, nullable=True)
|
||||||
|
|
||||||
# Relationship to portfolio companies
|
# Relationship to portfolio companies
|
||||||
portfolio_companies = relationship(
|
portfolio_companies = relationship(
|
||||||
@@ -80,8 +80,8 @@ class InvestorMember(Base, TimestampMixin):
|
|||||||
__tablename__ = "investor_members"
|
__tablename__ = "investor_members"
|
||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
name = Column(String, nullable=False)
|
name = Column(String, nullable=False)
|
||||||
role = Column(String, nullable=False)
|
role = Column(String, nullable=True)
|
||||||
email = Column(String, nullable=False)
|
email = Column(String, nullable=True)
|
||||||
|
|
||||||
investor_id = Column(Integer, ForeignKey("investors.id"))
|
investor_id = Column(Integer, ForeignKey("investors.id"))
|
||||||
investor = relationship("InvestorTable", back_populates="team_members")
|
investor = relationship("InvestorTable", back_populates="team_members")
|
||||||
@@ -92,8 +92,8 @@ class CompanyTable(Base, TimestampMixin):
|
|||||||
|
|
||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
name = Column(String, nullable=False)
|
name = Column(String, nullable=False)
|
||||||
industry = Column(String, nullable=False)
|
industry = Column(String, nullable=True)
|
||||||
location = Column(String, nullable=False)
|
location = Column(String, nullable=True)
|
||||||
description = Column(String, nullable=True)
|
description = Column(String, nullable=True)
|
||||||
founded_year = Column(Integer, nullable=True)
|
founded_year = Column(Integer, nullable=True)
|
||||||
website = Column(String, nullable=True)
|
website = Column(String, nullable=True)
|
||||||
@@ -115,8 +115,8 @@ class CompanyMember(Base, TimestampMixin):
|
|||||||
__tablename__ = "company_members"
|
__tablename__ = "company_members"
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
name = Column(String)
|
name = Column(String)
|
||||||
linkedin = Column(String)
|
linkedin = Column(String, nullable=True)
|
||||||
role = Column(String)
|
role = Column(String, nullable=True)
|
||||||
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
||||||
|
|
||||||
company = relationship("CompanyTable", back_populates="members")
|
company = relationship("CompanyTable", back_populates="members")
|
||||||
|
|||||||
+11
-2
@@ -1,7 +1,7 @@
|
|||||||
import io
|
import io
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from db.db import db_dependency, init_database
|
from db.db import Base, db_dependency, engine
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from fastapi import FastAPI, File, Form, UploadFile
|
from fastapi import FastAPI, File, Form, UploadFile
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
@@ -11,6 +11,13 @@ from services.llm_parser import InvestorProcessor
|
|||||||
from services.querying import QueryProcessor
|
from services.querying import QueryProcessor
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
def init_database():
|
||||||
|
"""Initialize the database by creating all tables"""
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
|
||||||
init_database()
|
init_database()
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
@@ -34,7 +41,9 @@ def health():
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
|
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
|
||||||
async def parse_csv(db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)):
|
async def parse_csv(
|
||||||
|
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
|
||||||
|
):
|
||||||
# Read uploaded CSV with pandas
|
# Read uploaded CSV with pandas
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+173
-44
@@ -1,7 +1,7 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from pydantic import BaseModel, field_validator
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
|
|
||||||
class InvestmentStage(str, Enum):
|
class InvestmentStage(str, Enum):
|
||||||
@@ -14,98 +14,227 @@ class InvestmentStage(str, Enum):
|
|||||||
|
|
||||||
|
|
||||||
class SectorSchema(BaseModel):
|
class SectorSchema(BaseModel):
|
||||||
id: int
|
"""
|
||||||
name: str
|
Expert parser: Only extract sector information if clearly identifiable.
|
||||||
|
Leave name empty if uncertain about the sector classification.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: int = Field(
|
||||||
|
ge=0, description="Sector ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
|
)
|
||||||
|
name: str = Field(
|
||||||
|
description="Sector name. Leave empty string if not clearly identifiable from the data."
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class InvestorMemberSchema(BaseModel):
|
class InvestorMemberSchema(BaseModel):
|
||||||
id: int
|
"""
|
||||||
name: str
|
Expert parser: Only extract team member information if clearly identifiable.
|
||||||
role: str
|
Leave fields empty if uncertain about the member details.
|
||||||
email: str
|
"""
|
||||||
investor_id: int
|
|
||||||
|
id: int = Field(
|
||||||
|
ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
|
)
|
||||||
|
name: str = Field(
|
||||||
|
description="Team member name. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
role: str = Field(
|
||||||
|
description="Team member role/title. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
email: str = Field(
|
||||||
|
description="Team member email. Leave empty string if not clearly identifiable or not provided."
|
||||||
|
)
|
||||||
|
investor_id: int = Field(
|
||||||
|
ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class CompanyMemberSchema(BaseModel):
|
class CompanyMemberSchema(BaseModel):
|
||||||
id: int
|
"""
|
||||||
name: Optional[str] = None
|
Expert parser: Only extract company member information if clearly identifiable.
|
||||||
linkedin: Optional[str] = None
|
Leave fields empty if uncertain about the member details.
|
||||||
role: Optional[str] = None
|
"""
|
||||||
company_id: int
|
|
||||||
|
id: int = Field(
|
||||||
|
ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
|
)
|
||||||
|
name: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="Company member name. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
linkedin: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="LinkedIn profile URL. Leave empty if not provided or uncertain.",
|
||||||
|
)
|
||||||
|
role: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="Company member role/title. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
company_id: int = Field(
|
||||||
|
ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class CompanySchema(BaseModel):
|
class CompanySchema(BaseModel):
|
||||||
id: int
|
"""
|
||||||
name: str
|
Expert parser: Only extract company information if clearly identifiable.
|
||||||
industry: str
|
Leave optional fields empty if uncertain. Integer values must be 0 or greater.
|
||||||
location: str
|
"""
|
||||||
description: Optional[str] = None # Fixed typo from 'nullabel'
|
|
||||||
founded_year: Optional[int] = None # Changed from str to int to match model
|
id: int = Field(
|
||||||
website: Optional[str] = None
|
ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
|
)
|
||||||
|
name: str = Field(
|
||||||
|
description="Company name. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
industry: str = Field(
|
||||||
|
description="Company industry/sector. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
location: str = Field(
|
||||||
|
description="Company location/address. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
description: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="Company description. Leave empty if not clearly available or uncertain.",
|
||||||
|
)
|
||||||
|
founded_year: Optional[int] = Field(
|
||||||
|
default=None,
|
||||||
|
ge=0,
|
||||||
|
description="Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain.",
|
||||||
|
)
|
||||||
|
website: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="Company website URL. Leave empty if not provided or uncertain.",
|
||||||
|
)
|
||||||
|
|
||||||
@field_validator("founded_year", mode="before")
|
@field_validator("founded_year", mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_founded_year(cls, v):
|
def validate_founded_year(cls, v):
|
||||||
if v is None or v == "Not Available" or v == "":
|
"""Expert parser: Only accept clearly identifiable founding years"""
|
||||||
|
if v is None or v == "Not Available" or v == "" or v == "Unknown":
|
||||||
return None
|
return None
|
||||||
if isinstance(v, str):
|
if isinstance(v, str):
|
||||||
try:
|
try:
|
||||||
return int(v)
|
year = int(v)
|
||||||
|
return year if year >= 0 else None
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
return v
|
return v if isinstance(v, int) and v >= 0 else None
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class InvestorSchema(BaseModel):
|
class InvestorSchema(BaseModel):
|
||||||
id: int
|
"""
|
||||||
name: str
|
Expert parser: Only extract investor information if clearly identifiable.
|
||||||
description: Optional[str] = None
|
Leave optional fields empty if uncertain. All numeric values must be 0 or greater.
|
||||||
aum: int
|
"""
|
||||||
check_size_lower: int
|
|
||||||
check_size_upper: int
|
id: int = Field(
|
||||||
geographic_focus: str
|
ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain."
|
||||||
stage_focus: InvestmentStage
|
)
|
||||||
number_of_investments: int = 0
|
name: str = Field(
|
||||||
|
description="Investor name. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
description: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="Investor description. Leave empty if not clearly available or uncertain.",
|
||||||
|
)
|
||||||
|
aum: int = Field(
|
||||||
|
ge=0,
|
||||||
|
description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.",
|
||||||
|
)
|
||||||
|
check_size_lower: int = Field(
|
||||||
|
ge=0,
|
||||||
|
description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
check_size_upper: int = Field(
|
||||||
|
ge=0,
|
||||||
|
description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
geographic_focus: str = Field(
|
||||||
|
description="Geographic investment focus. Leave empty string if not clearly identifiable."
|
||||||
|
)
|
||||||
|
stage_focus: InvestmentStage = Field(
|
||||||
|
description="Investment stage focus. Use SEED as default if uncertain."
|
||||||
|
)
|
||||||
|
number_of_investments: int = Field(
|
||||||
|
ge=0,
|
||||||
|
default=0,
|
||||||
|
description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class InvestorData(BaseModel):
|
class InvestorData(BaseModel):
|
||||||
"""Comprehensive investor data schema for LLM processing"""
|
"""
|
||||||
|
Expert parser: Comprehensive investor data schema for LLM processing.
|
||||||
|
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
|
||||||
|
"""
|
||||||
|
|
||||||
investor: InvestorSchema
|
investor: InvestorSchema = Field(
|
||||||
portfolio_companies: List[CompanySchema] = []
|
description="Core investor information. Only populate with clearly identifiable data."
|
||||||
team_members: List[InvestorMemberSchema] = [] # Changed from TeamMember
|
)
|
||||||
sectors: List[SectorSchema] = []
|
portfolio_companies: List[CompanySchema] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of portfolio companies. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
team_members: List[InvestorMemberSchema] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of team members. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
sectors: List[SectorSchema] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of investment sectors. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class CompanyData(BaseModel): # Renamed from CompaniesData for consistency
|
class CompanyData(BaseModel):
|
||||||
company: CompanySchema
|
"""
|
||||||
sectors: List[SectorSchema] = []
|
Expert parser: Comprehensive company data schema for LLM processing.
|
||||||
members: List[CompanyMemberSchema] = [] # Changed to match model relationship name
|
Only populate fields with clearly identifiable information. Leave lists empty if uncertain.
|
||||||
investors: List[InvestorSchema] = []
|
"""
|
||||||
|
|
||||||
|
company: CompanySchema = Field(
|
||||||
|
description="Core company information. Only populate with clearly identifiable data."
|
||||||
|
)
|
||||||
|
sectors: List[SectorSchema] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of company sectors. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
members: List[CompanyMemberSchema] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of company members. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
investors: List[InvestorSchema] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of investors. Leave empty if not clearly identifiable.",
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
from_attributes = True
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
class InvestorList(BaseModel):
|
class InvestorList(BaseModel):
|
||||||
investors: List[InvestorData] = []
|
"""Expert parser: List of investors with clearly identifiable information only."""
|
||||||
|
|
||||||
|
investors: List[InvestorData] = Field(
|
||||||
|
default=[],
|
||||||
|
description="List of investors. Leave empty if no clearly identifiable investors.",
|
||||||
|
)
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user