diff --git a/app/routers/__pycache__/companies.cpython-312.pyc b/app/routers/__pycache__/companies.cpython-312.pyc index 2ab18c8..d1b4558 100644 Binary files a/app/routers/__pycache__/companies.cpython-312.pyc and b/app/routers/__pycache__/companies.cpython-312.pyc differ diff --git a/app/routers/__pycache__/investors.cpython-312.pyc b/app/routers/__pycache__/investors.cpython-312.pyc index 0c21826..1416da7 100644 Binary files a/app/routers/__pycache__/investors.cpython-312.pyc and b/app/routers/__pycache__/investors.cpython-312.pyc differ diff --git a/app/routers/companies.py b/app/routers/companies.py index 3e83c3a..626c080 100644 --- a/app/routers/companies.py +++ b/app/routers/companies.py @@ -34,6 +34,10 @@ def read_companies(db: Session = Depends(get_db)): """Get all companies with their investor relationships""" companies = ( db.query(CompanyTable) + .filter( + CompanyTable.name.isnot(None), + CompanyTable.description.isnot(None) + ) .options( selectinload(CompanyTable.investors), selectinload(CompanyTable.members), diff --git a/app/routers/investors.py b/app/routers/investors.py index d7078fc..b28b532 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -7,6 +7,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query from schemas.router_schemas import InvestmentStage, InvestorData from pydantic import BaseModel from sqlalchemy.orm import Session, selectinload +from services.querying import QueryProcessor router = APIRouter(tags=["Investor Routes"]) @@ -234,3 +235,70 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)): return {"message": "Investor deleted successfully"} +@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) +def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): + """Find investors similar to a given investor""" + + # First, get the target investor + target_investor = ( + db.query(InvestorTable) + .options( + selectinload(InvestorTable.portfolio_companies), + selectinload(InvestorTable.team_members), + selectinload(InvestorTable.sectors), + ) + .filter(InvestorTable.id == investor_id) + .first() + ) + + if not target_investor: + raise HTTPException(status_code=404, detail="Investor not found") + + # Build query to find similar investors + query = db.query(InvestorTable).options( + selectinload(InvestorTable.portfolio_companies), + selectinload(InvestorTable.team_members), + selectinload(InvestorTable.sectors), + ).filter(InvestorTable.id != investor_id) # Exclude the target investor + + # Filter by same stage focus + query = query.filter(InvestorTable.stage_focus == target_investor.stage_focus) + + # Filter by similar geographic focus (partial match) + query = query.filter(InvestorTable.geographic_focus.ilike(f"%{target_investor.geographic_focus}%")) + + # Filter by overlapping check size ranges + query = query.filter( + InvestorTable.check_size_upper >= target_investor.check_size_lower, + InvestorTable.check_size_lower <= target_investor.check_size_upper + ) + + # Filter by similar AUM (within 50% range) + aum_lower = int(target_investor.aum * 0.5) + aum_upper = int(target_investor.aum * 1.5) + query = query.filter( + InvestorTable.aum >= aum_lower, + InvestorTable.aum <= aum_upper + ) + + # Filter by common sectors + target_sector_names = [sector.name for sector in target_investor.sectors] + if target_sector_names: + query = query.join(InvestorTable.sectors).filter( + SectorTable.name.in_(target_sector_names) + ) + + investors = query.all() + + # Transform to InvestorData format + investor_data_list = [] + for investor in investors: + investor_data = InvestorData( + investor=investor, + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_data_list.append(investor_data) + + return investor_data_list \ No newline at end of file diff --git a/app/schemas/__pycache__/py_schemas.cpython-312.pyc b/app/schemas/__pycache__/py_schemas.cpython-312.pyc index 9dd3893..d8c9844 100644 Binary files a/app/schemas/__pycache__/py_schemas.cpython-312.pyc and b/app/schemas/__pycache__/py_schemas.cpython-312.pyc differ diff --git a/app/schemas/py_schemas.py b/app/schemas/py_schemas.py index 4fa88ec..5b982fa 100644 --- a/app/schemas/py_schemas.py +++ b/app/schemas/py_schemas.py @@ -19,13 +19,32 @@ class SectorSchema(BaseModel): Leave name empty if uncertain about the sector classification. """ - id: int = Field( - ge=0, description="Sector ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Sector ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Sector name. Leave empty string if not clearly identifiable from the data." + name: Optional[str] = Field( + default=None, + description="Sector name. Leave empty string if not clearly identifiable from the data.", ) + @field_validator("name", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional id field""" + if v == 0: + return None + return v + class Config: from_attributes = True @@ -36,22 +55,45 @@ class InvestorMemberSchema(BaseModel): Leave fields empty if uncertain about the member details. """ - id: int = Field( - ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Member ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Team member name. Leave empty string if not clearly identifiable." + name: Optional[str] = Field( + default=None, + description="Team member name. Leave empty string if not clearly identifiable.", ) - role: str = Field( - description="Team member role/title. Leave empty string if not clearly identifiable." + role: Optional[str] = Field( + default=None, + description="Team member role/title. Leave empty string if not clearly identifiable.", ) - email: str = Field( - description="Team member email. Leave empty string if not clearly identifiable or not provided." + email: Optional[str] = Field( + default=None, + description="Team member email. Leave empty string if not clearly identifiable or not provided.", ) - investor_id: int = Field( - ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain." + investor_id: Optional[int] = Field( + default=None, + ge=0, + description="Investor ID, must be 0 or greater. Use 0 if uncertain.", ) + @field_validator("name", "role", "email", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", "investor_id", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional integer fields""" + if v == 0: + return None + return v + class Config: from_attributes = True @@ -62,25 +104,45 @@ class CompanyMemberSchema(BaseModel): Leave fields empty if uncertain about the member details. """ - id: int = Field( - ge=0, description="Member ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Member ID, must be 0 or greater. Use 0 if uncertain.", ) name: Optional[str] = Field( - default="", + default=None, description="Company member name. Leave empty if not clearly identifiable.", ) linkedin: Optional[str] = Field( - default="", + default=None, description="LinkedIn profile URL. Leave empty if not provided or uncertain.", ) role: Optional[str] = Field( - default="", + default=None, description="Company member role/title. Leave empty if not clearly identifiable.", ) - company_id: int = Field( - ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain." + company_id: Optional[int] = Field( + default=None, + ge=0, + description="Company ID, must be 0 or greater. Use 0 if uncertain.", ) + @field_validator("name", "linkedin", "role", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", "company_id", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional integer fields""" + if v == 0: + return None + return v + class Config: from_attributes = True @@ -91,20 +153,25 @@ class CompanySchema(BaseModel): Leave optional fields empty if uncertain. Integer values must be 0 or greater. """ - id: int = Field( - ge=0, description="Company ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Company ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Company name. Leave empty string if not clearly identifiable." + name: Optional[str] = Field( + default=None, + description="Company name. Leave empty string if not clearly identifiable.", ) - industry: str = Field( - description="Company industry/sector. Leave empty string if not clearly identifiable." + industry: Optional[str] = Field( + default=None, + description="Company industry/sector. Leave empty string if not clearly identifiable.", ) - location: str = Field( - description="Company location/address. Leave empty string if not clearly identifiable." + location: Optional[str] = Field( + default=None, + description="Company location/address. Leave empty string if not clearly identifiable.", ) description: Optional[str] = Field( - default="", + default=None, description="Company description. Leave empty if not clearly available or uncertain.", ) founded_year: Optional[int] = Field( @@ -113,10 +180,28 @@ class CompanySchema(BaseModel): description="Year company was founded, must be 0 or greater. Leave None if not clearly identifiable or uncertain.", ) website: Optional[str] = Field( - default="", + default=None, description="Company website URL. Leave empty if not provided or uncertain.", ) + @field_validator( + "name", "industry", "location", "description", "website", mode="before" + ) + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator("id", "founded_year", mode="before") + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for founded_year""" + if v == 0: + return None + return v + @field_validator("founded_year", mode="before") @classmethod def validate_founded_year(cls, v): @@ -141,40 +226,71 @@ class InvestorSchema(BaseModel): Leave optional fields empty if uncertain. All numeric values must be 0 or greater. """ - id: int = Field( - ge=0, description="Investor ID, must be 0 or greater. Use 0 if uncertain." + id: Optional[int] = Field( + default=None, + ge=0, + description="Investor ID, must be 0 or greater. Use 0 if uncertain.", ) - name: str = Field( - description="Investor name. Do not return any special characters, Just the name as a string." + name: Optional[str] = Field( + default=None, + description="Investor name. Do not return any special characters, Just the name as a string.", ) description: Optional[str] = Field( - default="", + default=None, description="Investor description. Leave empty if not clearly available or uncertain.", ) - aum: int | None = Field( + aum: Optional[int] = Field( + default=None, ge=0, description="Assets Under Management in USD, must be 0 or greater. Use 0 if not clearly identifiable or uncertain.", ) - check_size_lower: int | None = Field( + check_size_lower: Optional[int] = Field( + default=None, ge=0, description="Lower bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - check_size_upper: int | None = Field( + check_size_upper: Optional[int] = Field( + default=None, ge=0, description="Upper bound of typical investment check size in USD, must be 0 or greater. Use 0 if not clearly identifiable.", ) - geographic_focus: str | None = Field( + geographic_focus: Optional[str] = Field( + default=None, description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.", ) stage_focus: InvestmentStage = Field( - description="Investment stage focus. Use SEED as default if uncertain." + default=InvestmentStage.SEED, + description="Investment stage focus. Use SEED as default if uncertain.", ) - number_of_investments: int | None = Field( + number_of_investments: Optional[int] = Field( + default=None, ge=0, - default=0, description="Total number of investments made, must be 0 or greater. Use 0 if not clearly identifiable.", ) + @field_validator("name", "description", "geographic_focus", mode="before") + @classmethod + def empty_string_to_none(cls, v): + """Convert empty strings to None""" + if v == "" or (isinstance(v, str) and v.strip() == ""): + return None + return v + + @field_validator( + "id", + "aum", + "check_size_lower", + "check_size_upper", + "number_of_investments", + mode="before", + ) + @classmethod + def zero_to_none(cls, v): + """Convert 0 to None for optional integer fields""" + if v == 0: + return None + return v + class Config: from_attributes = True diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index e64b117..7e4223f 100644 Binary files a/app/services/__pycache__/llm_parser.cpython-312.pyc and b/app/services/__pycache__/llm_parser.cpython-312.pyc differ diff --git a/app/services/__pycache__/querying.cpython-312.pyc b/app/services/__pycache__/querying.cpython-312.pyc index cadd1a4..fb5cb6d 100644 Binary files a/app/services/__pycache__/querying.cpython-312.pyc and b/app/services/__pycache__/querying.cpython-312.pyc differ diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py index 0387a6c..bc714f2 100644 --- a/app/services/llm_parser.py +++ b/app/services/llm_parser.py @@ -21,7 +21,7 @@ class InvestorProcessor: self.llm = ChatOpenAI( api_key=os.getenv("OPENROUTER_API_KEY"), base_url="https://openrouter.ai/api/v1", - model="google/gemini-2.5-flash-lite", + model="openai/gpt-4o-mini", temperature=0, ) diff --git a/app/services/querying.py b/app/services/querying.py index 11b38f4..3078e18 100644 --- a/app/services/querying.py +++ b/app/services/querying.py @@ -21,8 +21,8 @@ class QueryProcessor: self.llm = ChatOpenAI( api_key=os.getenv("OPENROUTER_API_KEY"), base_url="https://openrouter.ai/api/v1", - model="openai/gpt-5-nano", - temperature=0.3, + model="openai/gpt-4o-mini", + temperature=0, ) self.toolkit = SQLDatabaseToolkit(db=db, llm=self.llm) # Update system message to specifically request only investor IDs