Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 84e3c7b72a | |||
| a9589e54f3 | |||
| d341cacb9a | |||
| c0fbbdd917 | |||
| 1f3f08e80d | |||
| cd7172ed9f | |||
| c199f5423a | |||
| a2b3ceedbe | |||
| 3842171549 |
+1
-2
@@ -10,8 +10,7 @@
|
||||
|
||||
*__pycache__
|
||||
|
||||
/*.db
|
||||
|
||||
*.cypython
|
||||
|
||||
/preprocessor
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
+7
-1
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Depends
|
||||
@@ -9,7 +10,11 @@ from sqlalchemy.orm import Session, sessionmaker
|
||||
Base = declarative_base()
|
||||
|
||||
# Database configuration
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./investors.db")
|
||||
# Use the preprocessor's database for consistency
|
||||
# Get absolute path to the preprocessor database
|
||||
# APP_DIR = Path(__file__).parent.parent
|
||||
# PREPROCESSOR_DB = APP_DIR.parent / "preprocessor" / "version_two.db"
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./version_two.db")
|
||||
|
||||
# Create engine
|
||||
engine = create_engine(DATABASE_URL, echo=False)
|
||||
@@ -38,6 +43,7 @@ def get_session_sync() -> Session:
|
||||
"""Get a database session for synchronous operations"""
|
||||
return SessionLocal()
|
||||
|
||||
|
||||
def get_db_session():
|
||||
"""Get a database session for direct use."""
|
||||
return SessionLocal()
|
||||
|
||||
+117
-10
@@ -2,7 +2,7 @@ import enum
|
||||
|
||||
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Table, Text, func
|
||||
from sqlalchemy.orm import declarative_mixin, relationship
|
||||
from sqlalchemy.types import Enum
|
||||
from sqlalchemy.types import JSON, Enum
|
||||
|
||||
from db.db import Base
|
||||
|
||||
@@ -70,6 +70,22 @@ project_company_association = Table(
|
||||
Column("company_id", Integer, ForeignKey("companies.id")),
|
||||
)
|
||||
|
||||
# Association table for fund-stage many-to-many
|
||||
fund_investment_stages_association = Table(
|
||||
"fund_investment_stages",
|
||||
Base.metadata,
|
||||
Column("fund_id", Integer, ForeignKey("funds.id")),
|
||||
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
||||
)
|
||||
|
||||
# Association table for fund-sector many-to-many
|
||||
fund_sectors_association = Table(
|
||||
"fund_sectors",
|
||||
Base.metadata,
|
||||
Column("fund_id", Integer, ForeignKey("funds.id")),
|
||||
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
||||
)
|
||||
|
||||
|
||||
class InvestorTable(Base, TimestampMixin):
|
||||
__tablename__ = "investors"
|
||||
@@ -77,14 +93,47 @@ class InvestorTable(Base, TimestampMixin):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
aum = Column(Integer, nullable=True) # Assets Under Management
|
||||
check_size_lower = Column(Integer, nullable=True) # Lower bound
|
||||
check_size_upper = Column(Integer, nullable=True) # Upper bound
|
||||
|
||||
# Basic investor info
|
||||
website = Column(String, nullable=True)
|
||||
headquarters = Column(String, nullable=True)
|
||||
|
||||
# AUM fields
|
||||
aum = Column(Integer, nullable=True) # Store as integer for numerical filtering
|
||||
aum_as_of_date = Column(String, nullable=True)
|
||||
aum_source_url = Column(String, nullable=True)
|
||||
|
||||
# Check size (deprecated in favor of fund-level data, but keeping for backward compatibility)
|
||||
check_size_lower = Column(Integer, nullable=True)
|
||||
check_size_upper = Column(Integer, nullable=True)
|
||||
|
||||
# Geographic focus (deprecated in favor of fund-level, but keeping for backward compatibility)
|
||||
geographic_focus = Column(String, nullable=True)
|
||||
stage_focus = Column(Enum(InvestmentStage), nullable=True)
|
||||
|
||||
# Investment thesis and portfolio
|
||||
investment_thesis = Column(JSON, nullable=True) # Array of thesis statements
|
||||
portfolio_highlights = Column(
|
||||
JSON, nullable=True
|
||||
) # Array of portfolio company names
|
||||
linked_documents = Column(JSON, nullable=True) # Array of document URLs
|
||||
|
||||
# Research metadata
|
||||
researcher_notes = Column(Text, nullable=True)
|
||||
missing_important_fields = Column(
|
||||
JSON, nullable=True
|
||||
) # Array of missing field names
|
||||
sources = Column(JSON, nullable=True) # JSON object with source URLs
|
||||
|
||||
# Portfolio info
|
||||
number_of_investments = Column(Integer, default=0, nullable=True)
|
||||
|
||||
team_members = relationship("InvestorMember", back_populates="investor")
|
||||
# Relationships
|
||||
team_members = relationship(
|
||||
"InvestorMember", back_populates="investor", cascade="all, delete-orphan"
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable", back_populates="investor", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
# Relationship to portfolio companies
|
||||
portfolio_companies = relationship(
|
||||
@@ -111,12 +160,51 @@ class InvestorMember(Base, TimestampMixin):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
role = Column(String, nullable=True)
|
||||
title = Column(String, nullable=True) # Alternative to role
|
||||
email = Column(String, nullable=True)
|
||||
source_url = Column(String, nullable=True) # URL where member info was found
|
||||
|
||||
investor_id = Column(Integer, ForeignKey("investors.id"))
|
||||
investor = relationship("InvestorTable", back_populates="team_members")
|
||||
|
||||
|
||||
class FundTable(Base, TimestampMixin):
|
||||
__tablename__ = "funds"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
investor_id = Column(Integer, ForeignKey("investors.id"), nullable=False)
|
||||
|
||||
# Fund details
|
||||
fund_name = Column(String, nullable=True)
|
||||
fund_size = Column(
|
||||
Integer, nullable=True
|
||||
) # Store as integer for numerical filtering
|
||||
fund_size_source_url = Column(String, nullable=True)
|
||||
|
||||
# Check size range (parsed from estimated_investment_size by LLM)
|
||||
check_size_lower = Column(Integer, nullable=True)
|
||||
check_size_upper = Column(Integer, nullable=True)
|
||||
|
||||
source_url = Column(String, nullable=True)
|
||||
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
|
||||
|
||||
# Geographic focus as simple string
|
||||
geographic_focus = Column(String, nullable=True)
|
||||
|
||||
# Relationships
|
||||
investor = relationship("InvestorTable", back_populates="funds")
|
||||
investment_stages = relationship(
|
||||
"InvestmentStageTable",
|
||||
secondary=fund_investment_stages_association,
|
||||
back_populates="funds",
|
||||
)
|
||||
sectors = relationship(
|
||||
"SectorTable",
|
||||
secondary=fund_sectors_association,
|
||||
back_populates="funds",
|
||||
)
|
||||
|
||||
|
||||
class CompanyTable(Base, TimestampMixin):
|
||||
__tablename__ = "companies"
|
||||
|
||||
@@ -128,7 +216,9 @@ class CompanyTable(Base, TimestampMixin):
|
||||
founded_year = Column(Integer, nullable=True)
|
||||
website = Column(String, nullable=True)
|
||||
|
||||
members = relationship("CompanyMember", back_populates="company")
|
||||
members = relationship(
|
||||
"CompanyMember", back_populates="company", cascade="all, delete-orphan"
|
||||
)
|
||||
# Relationship back to investors
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
@@ -158,26 +248,43 @@ class CompanyMember(Base, TimestampMixin):
|
||||
company = relationship("CompanyTable", back_populates="members")
|
||||
|
||||
|
||||
class InvestmentStageTable(Base, TimestampMixin):
|
||||
__tablename__ = "investment_stages"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False, unique=True)
|
||||
|
||||
# Relationships
|
||||
funds = relationship(
|
||||
"FundTable",
|
||||
secondary=fund_investment_stages_association,
|
||||
back_populates="investment_stages",
|
||||
)
|
||||
|
||||
|
||||
class SectorTable(Base, TimestampMixin):
|
||||
__tablename__ = "sectors"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
|
||||
# Add relationship back to investors
|
||||
# Relationships
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=investor_sector_association,
|
||||
back_populates="sectors",
|
||||
)
|
||||
|
||||
companies = relationship(
|
||||
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
|
||||
)
|
||||
|
||||
projects = relationship(
|
||||
"ProjectTable", secondary=project_sector_association, back_populates="sector"
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable",
|
||||
secondary=fund_sectors_association,
|
||||
back_populates="sectors",
|
||||
)
|
||||
|
||||
|
||||
class ProjectTable(Base, TimestampMixin):
|
||||
|
||||
+29
-5
@@ -44,6 +44,27 @@ def health():
|
||||
async def parse_csv(
|
||||
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
|
||||
):
|
||||
"""
|
||||
Parse and import CSV data into the database.
|
||||
|
||||
**For investors:**
|
||||
- Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing
|
||||
- Manually parses JSON profiles for efficiency
|
||||
- Uses LLM only for currency conversion to USD
|
||||
- Handles AUM, fund sizes, and check sizes as integers
|
||||
|
||||
**For companies:**
|
||||
- Expected columns: Name, Website, Investor, Final Investor Profile (company profile)
|
||||
- 100% manual JSON parsing - no LLM needed
|
||||
- Extracts company details, executives, investors, and client categories
|
||||
- Automatically links companies to investors in database
|
||||
|
||||
**Benefits:**
|
||||
- Fast processing (5-10s per record)
|
||||
- Low cost (minimal or no LLM usage)
|
||||
- Accurate data extraction
|
||||
- Automatic database persistence
|
||||
"""
|
||||
# Read uploaded CSV with pandas
|
||||
content = await file.read()
|
||||
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
||||
@@ -52,12 +73,15 @@ async def parse_csv(
|
||||
processor = InvestorProcessor()
|
||||
|
||||
if is_investor == 1:
|
||||
results = await processor.parse_investors(df)
|
||||
# Manual parser with LLM currency conversion
|
||||
results = await processor.parse_investors(df, save_to_db=True)
|
||||
# Results are already dicts from the new parser
|
||||
return results
|
||||
else:
|
||||
results = await processor.parse_companies(df)
|
||||
|
||||
# Convert Pydantic objects to dictionaries
|
||||
return [r.model_dump() for r in results]
|
||||
# Manual parser for companies (no LLM needed)
|
||||
results = await processor.parse_companies(df, save_to_db=True)
|
||||
# Results are already dicts from the new parser
|
||||
return results
|
||||
|
||||
|
||||
@app.post("/query", response_model=InvestorList, tags=["Querying"])
|
||||
|
||||
Binary file not shown.
+325
-61
@@ -4,8 +4,11 @@ from db.db import get_db
|
||||
from db.models import InvestorTable, SectorTable
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from schemas.router_schemas import InvestmentStage, InvestorData
|
||||
from services.querying import QueryProcessor
|
||||
from schemas.router_schemas import (
|
||||
InvestmentStage,
|
||||
InvestorData,
|
||||
InvestorFundData,
|
||||
)
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
router = APIRouter(tags=["Investor Routes"])
|
||||
@@ -34,34 +37,95 @@ class InvestorUpdate(BaseModel):
|
||||
number_of_investments: Optional[int] = None
|
||||
|
||||
|
||||
@router.get("/investors", response_model=List[InvestorData])
|
||||
@router.get("/investors", response_model=List[InvestorFundData])
|
||||
def read_investors(db: Session = Depends(get_db)):
|
||||
"""Get all investors with their related data"""
|
||||
"""Get all investors with their funds as separate entries
|
||||
|
||||
Each investor-fund combination is returned as a separate row.
|
||||
An investor with 3 funds will appear as 3 entries.
|
||||
"""
|
||||
investors = (
|
||||
db.query(InvestorTable)
|
||||
.options(
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform InvestorTable objects to InvestorData format
|
||||
investor_data_list = []
|
||||
# Transform to InvestorFundData format (one row per investor-fund combination)
|
||||
investor_fund_list = []
|
||||
for investor in investors:
|
||||
investor_data = InvestorData(
|
||||
investor=investor, # This maps to InvestorSchema
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_data_list.append(investor_data)
|
||||
# If investor has funds, create one entry per fund
|
||||
if investor.funds:
|
||||
for fund in investor.funds:
|
||||
investor_fund_data = InvestorFundData(
|
||||
# Investor fields
|
||||
investor_id=investor.id,
|
||||
investor_name=investor.name,
|
||||
investor_description=investor.description,
|
||||
investor_website=investor.website,
|
||||
investor_headquarters=investor.headquarters,
|
||||
aum=investor.aum,
|
||||
aum_as_of_date=investor.aum_as_of_date,
|
||||
aum_source_url=investor.aum_source_url,
|
||||
investment_thesis=investor.investment_thesis,
|
||||
portfolio_highlights=investor.portfolio_highlights,
|
||||
number_of_investments=investor.number_of_investments,
|
||||
# Fund fields
|
||||
fund_id=fund.id,
|
||||
fund_name=fund.fund_name,
|
||||
fund_size=fund.fund_size,
|
||||
fund_size_source_url=fund.fund_size_source_url,
|
||||
check_size_lower=fund.check_size_lower,
|
||||
check_size_upper=fund.check_size_upper,
|
||||
geographic_focus=fund.geographic_focus,
|
||||
fund_investment_stages=fund.investment_stages, # Now a relationship
|
||||
fund_sectors=fund.sectors, # Now a relationship
|
||||
# Related data (same for all funds of this investor)
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_fund_list.append(investor_fund_data)
|
||||
else:
|
||||
# If no funds, create one entry with null fund fields
|
||||
investor_fund_data = InvestorFundData(
|
||||
# Investor fields
|
||||
investor_id=investor.id,
|
||||
investor_name=investor.name,
|
||||
investor_description=investor.description,
|
||||
investor_website=investor.website,
|
||||
investor_headquarters=investor.headquarters,
|
||||
aum=investor.aum,
|
||||
aum_as_of_date=investor.aum_as_of_date,
|
||||
aum_source_url=investor.aum_source_url,
|
||||
investment_thesis=investor.investment_thesis,
|
||||
portfolio_highlights=investor.portfolio_highlights,
|
||||
number_of_investments=investor.number_of_investments,
|
||||
# Fund fields (null)
|
||||
fund_id=None,
|
||||
fund_name=None,
|
||||
fund_size=None,
|
||||
fund_size_source_url=None,
|
||||
check_size_lower=None,
|
||||
check_size_upper=None,
|
||||
geographic_focus=None,
|
||||
fund_investment_stages=None,
|
||||
fund_sectors=None,
|
||||
# Related data
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_fund_list.append(investor_fund_data)
|
||||
|
||||
return investor_data_list
|
||||
return investor_fund_list
|
||||
|
||||
|
||||
@router.get("/investors/filter", response_model=List[InvestorData])
|
||||
@router.get("/investors/filter", response_model=List[InvestorFundData])
|
||||
def filter_investors(
|
||||
stage: Optional[InvestmentStage] = Query(
|
||||
None, description="Filter by investment stage"
|
||||
@@ -76,13 +140,18 @@ def filter_investors(
|
||||
max_aum: Optional[int] = Query(None, description="Maximum AUM"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Filter investors based on various criteria"""
|
||||
"""Filter investors based on various criteria
|
||||
|
||||
Returns investor-fund combinations as separate rows.
|
||||
An investor with 3 funds will appear as 3 entries.
|
||||
"""
|
||||
|
||||
# Start with base query
|
||||
query = db.query(InvestorTable).options(
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
|
||||
# Apply filters
|
||||
@@ -112,29 +181,86 @@ def filter_investors(
|
||||
|
||||
investors = query.all()
|
||||
|
||||
# Transform to InvestorData format
|
||||
investor_data_list = []
|
||||
# Transform to InvestorFundData format (one row per investor-fund combination)
|
||||
investor_fund_list = []
|
||||
for investor in investors:
|
||||
investor_data = InvestorData(
|
||||
investor=investor,
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_data_list.append(investor_data)
|
||||
# If investor has funds, create one entry per fund
|
||||
if investor.funds:
|
||||
for fund in investor.funds:
|
||||
investor_fund_data = InvestorFundData(
|
||||
# Investor fields
|
||||
investor_id=investor.id,
|
||||
investor_name=investor.name,
|
||||
investor_description=investor.description,
|
||||
investor_website=investor.website,
|
||||
investor_headquarters=investor.headquarters,
|
||||
aum=investor.aum,
|
||||
aum_as_of_date=investor.aum_as_of_date,
|
||||
aum_source_url=investor.aum_source_url,
|
||||
investment_thesis=investor.investment_thesis,
|
||||
portfolio_highlights=investor.portfolio_highlights,
|
||||
number_of_investments=investor.number_of_investments,
|
||||
# Fund fields
|
||||
fund_id=fund.id,
|
||||
fund_name=fund.fund_name,
|
||||
fund_size=fund.fund_size,
|
||||
fund_size_source_url=fund.fund_size_source_url,
|
||||
check_size_lower=fund.check_size_lower,
|
||||
check_size_upper=fund.check_size_upper,
|
||||
geographic_focus=fund.geographic_focus,
|
||||
fund_investment_stages=fund.investment_stages, # Now a relationship
|
||||
fund_sectors=fund.sectors, # Now a relationship
|
||||
# Related data
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_fund_list.append(investor_fund_data)
|
||||
else:
|
||||
# If no funds, create one entry with null fund fields
|
||||
investor_fund_data = InvestorFundData(
|
||||
# Investor fields
|
||||
investor_id=investor.id,
|
||||
investor_name=investor.name,
|
||||
investor_description=investor.description,
|
||||
investor_website=investor.website,
|
||||
investor_headquarters=investor.headquarters,
|
||||
aum=investor.aum,
|
||||
aum_as_of_date=investor.aum_as_of_date,
|
||||
aum_source_url=investor.aum_source_url,
|
||||
investment_thesis=investor.investment_thesis,
|
||||
portfolio_highlights=investor.portfolio_highlights,
|
||||
number_of_investments=investor.number_of_investments,
|
||||
# Fund fields (null)
|
||||
fund_id=None,
|
||||
fund_name=None,
|
||||
fund_size=None,
|
||||
fund_size_source_url=None,
|
||||
check_size_lower=None,
|
||||
check_size_upper=None,
|
||||
geographic_focus=None,
|
||||
fund_investment_stages=None,
|
||||
fund_sectors=None,
|
||||
# Related data
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_fund_list.append(investor_fund_data)
|
||||
|
||||
return investor_data_list
|
||||
return investor_fund_list
|
||||
|
||||
|
||||
@router.get("/investors/{investor_id}", response_model=InvestorData)
|
||||
def read_investor(investor_id: int, db: Session = Depends(get_db)):
|
||||
"""Get a specific investor by ID"""
|
||||
"""Get a specific investor by ID with all their funds"""
|
||||
investor = (
|
||||
db.query(InvestorTable)
|
||||
.options(
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.filter(InvestorTable.id == investor_id)
|
||||
.first()
|
||||
@@ -143,12 +269,13 @@ def read_investor(investor_id: int, db: Session = Depends(get_db)):
|
||||
if not investor:
|
||||
raise HTTPException(status_code=404, detail="Investor not found")
|
||||
|
||||
# Transform to InvestorData format
|
||||
# Transform to InvestorData format (includes funds array)
|
||||
return InvestorData(
|
||||
investor=investor,
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
funds=investor.funds,
|
||||
)
|
||||
|
||||
|
||||
@@ -167,6 +294,7 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)):
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.filter(InvestorTable.id == db_investor.id)
|
||||
.first()
|
||||
@@ -178,17 +306,76 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)):
|
||||
portfolio_companies=investor_with_relations.portfolio_companies,
|
||||
team_members=investor_with_relations.team_members,
|
||||
sectors=investor_with_relations.sectors,
|
||||
funds=investor_with_relations.funds,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData])
|
||||
def find_similar_investors(
|
||||
investor_id: int,
|
||||
limit: int = Query(10, description="Maximum number of similar investors to return"),
|
||||
db: Session = Depends(get_db)
|
||||
@router.put("/investors/{investor_id}", response_model=InvestorData)
|
||||
def update_investor(
|
||||
investor_id: int, investor: InvestorUpdate, db: Session = Depends(get_db)
|
||||
):
|
||||
"""Find investors similar to a given investor based on characteristics"""
|
||||
|
||||
"""Update an existing investor"""
|
||||
db_investor = (
|
||||
db.query(InvestorTable).filter(InvestorTable.id == investor_id).first()
|
||||
)
|
||||
if not db_investor:
|
||||
raise HTTPException(status_code=404, detail="Investor not found")
|
||||
|
||||
update_data = investor.dict(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(db_investor, field, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(db_investor)
|
||||
|
||||
# Reload with relationships
|
||||
investor_with_relations = (
|
||||
db.query(InvestorTable)
|
||||
.options(
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.filter(InvestorTable.id == investor_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
# Transform to InvestorData format
|
||||
return InvestorData(
|
||||
investor=investor_with_relations,
|
||||
portfolio_companies=investor_with_relations.portfolio_companies,
|
||||
team_members=investor_with_relations.team_members,
|
||||
sectors=investor_with_relations.sectors,
|
||||
funds=investor_with_relations.funds,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/investors/{investor_id}")
|
||||
def delete_investor(investor_id: int, db: Session = Depends(get_db)):
|
||||
"""Delete an investor"""
|
||||
db_investor = (
|
||||
db.query(InvestorTable).filter(InvestorTable.id == investor_id).first()
|
||||
)
|
||||
if not db_investor:
|
||||
raise HTTPException(status_code=404, detail="Investor not found")
|
||||
|
||||
db.delete(db_investor)
|
||||
db.commit()
|
||||
return {"message": "Investor deleted successfully"}
|
||||
|
||||
|
||||
@router.get("/investors/{investor_id}/similar", response_model=List[InvestorFundData])
|
||||
def find_similar_investors(
|
||||
investor_id: int,
|
||||
limit: int = Query(10, description="Maximum number of similar investors to return"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Find investors similar to a given investor based on characteristics
|
||||
|
||||
Returns investor-fund combinations as separate rows.
|
||||
"""
|
||||
|
||||
# Get the target investor
|
||||
target_investor = (
|
||||
db.query(InvestorTable)
|
||||
@@ -196,6 +383,7 @@ def find_similar_investors(
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.filter(InvestorTable.id == investor_id)
|
||||
.first()
|
||||
@@ -214,6 +402,7 @@ def find_similar_investors(
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.filter(InvestorTable.id != investor_id)
|
||||
.all()
|
||||
@@ -223,59 +412,134 @@ def find_similar_investors(
|
||||
scored_investors = []
|
||||
for candidate in candidates:
|
||||
score = 0
|
||||
|
||||
|
||||
# Stage focus match (30 points)
|
||||
if candidate.stage_focus == target_investor.stage_focus:
|
||||
score += 30
|
||||
|
||||
|
||||
# Geographic focus match (20 points for exact, 10 for partial)
|
||||
if candidate.geographic_focus and target_investor.geographic_focus:
|
||||
if candidate.geographic_focus.lower() == target_investor.geographic_focus.lower():
|
||||
if (
|
||||
candidate.geographic_focus.lower()
|
||||
== target_investor.geographic_focus.lower()
|
||||
):
|
||||
score += 20
|
||||
elif (candidate.geographic_focus.lower() in target_investor.geographic_focus.lower() or
|
||||
target_investor.geographic_focus.lower() in candidate.geographic_focus.lower()):
|
||||
elif (
|
||||
candidate.geographic_focus.lower()
|
||||
in target_investor.geographic_focus.lower()
|
||||
or target_investor.geographic_focus.lower()
|
||||
in candidate.geographic_focus.lower()
|
||||
):
|
||||
score += 10
|
||||
|
||||
|
||||
# Check size overlap (20 points max)
|
||||
if (candidate.check_size_lower and candidate.check_size_upper and
|
||||
target_investor.check_size_lower and target_investor.check_size_upper):
|
||||
if (
|
||||
candidate.check_size_lower
|
||||
and candidate.check_size_upper
|
||||
and target_investor.check_size_lower
|
||||
and target_investor.check_size_upper
|
||||
):
|
||||
# Calculate overlap percentage
|
||||
overlap_start = max(candidate.check_size_lower, target_investor.check_size_lower)
|
||||
overlap_end = min(candidate.check_size_upper, target_investor.check_size_upper)
|
||||
overlap_start = max(
|
||||
candidate.check_size_lower, target_investor.check_size_lower
|
||||
)
|
||||
overlap_end = min(
|
||||
candidate.check_size_upper, target_investor.check_size_upper
|
||||
)
|
||||
if overlap_end > overlap_start:
|
||||
overlap = overlap_end - overlap_start
|
||||
target_range = target_investor.check_size_upper - target_investor.check_size_lower
|
||||
target_range = (
|
||||
target_investor.check_size_upper - target_investor.check_size_lower
|
||||
)
|
||||
overlap_ratio = overlap / target_range if target_range > 0 else 0
|
||||
score += int(20 * overlap_ratio)
|
||||
|
||||
|
||||
# AUM similarity (15 points max)
|
||||
if candidate.aum and target_investor.aum:
|
||||
aum_diff = abs(candidate.aum - target_investor.aum)
|
||||
max_aum = max(candidate.aum, target_investor.aum)
|
||||
similarity_ratio = 1 - (aum_diff / max_aum) if max_aum > 0 else 0
|
||||
score += int(15 * similarity_ratio)
|
||||
|
||||
|
||||
# Sector overlap (30 points max)
|
||||
candidate_sector_ids = {sector.id for sector in candidate.sectors}
|
||||
if target_sector_ids and candidate_sector_ids:
|
||||
common_sectors = target_sector_ids.intersection(candidate_sector_ids)
|
||||
overlap_ratio = len(common_sectors) / len(target_sector_ids)
|
||||
score += int(30 * overlap_ratio)
|
||||
|
||||
|
||||
if score > 0: # Only include investors with some similarity
|
||||
scored_investors.append((score, candidate))
|
||||
|
||||
|
||||
# Sort by score (descending) and take top N
|
||||
scored_investors.sort(key=lambda x: x[0], reverse=True)
|
||||
similar_investors = [inv for score, inv in scored_investors[:limit]]
|
||||
|
||||
# Transform to InvestorData format
|
||||
return [
|
||||
InvestorData(
|
||||
investor=inv,
|
||||
portfolio_companies=inv.portfolio_companies,
|
||||
team_members=inv.team_members,
|
||||
sectors=inv.sectors,
|
||||
)
|
||||
for inv in similar_investors
|
||||
]
|
||||
|
||||
# Transform to InvestorFundData format (one row per investor-fund combination)
|
||||
investor_fund_list = []
|
||||
for investor in similar_investors:
|
||||
# If investor has funds, create one entry per fund
|
||||
if investor.funds:
|
||||
for fund in investor.funds:
|
||||
investor_fund_data = InvestorFundData(
|
||||
# Investor fields
|
||||
investor_id=investor.id,
|
||||
investor_name=investor.name,
|
||||
investor_description=investor.description,
|
||||
investor_website=investor.website,
|
||||
investor_headquarters=investor.headquarters,
|
||||
aum=investor.aum,
|
||||
aum_as_of_date=investor.aum_as_of_date,
|
||||
aum_source_url=investor.aum_source_url,
|
||||
investment_thesis=investor.investment_thesis,
|
||||
portfolio_highlights=investor.portfolio_highlights,
|
||||
number_of_investments=investor.number_of_investments,
|
||||
# Fund fields
|
||||
fund_id=fund.id,
|
||||
fund_name=fund.fund_name,
|
||||
fund_size=fund.fund_size,
|
||||
fund_size_source_url=fund.fund_size_source_url,
|
||||
check_size_lower=fund.check_size_lower,
|
||||
check_size_upper=fund.check_size_upper,
|
||||
geographic_focus=fund.geographic_focus,
|
||||
fund_investment_stages=fund.investment_stages, # Now a relationship
|
||||
fund_sectors=fund.sectors, # Now a relationship
|
||||
# Related data
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_fund_list.append(investor_fund_data)
|
||||
else:
|
||||
# If no funds, create one entry with null fund fields
|
||||
investor_fund_data = InvestorFundData(
|
||||
# Investor fields
|
||||
investor_id=investor.id,
|
||||
investor_name=investor.name,
|
||||
investor_description=investor.description,
|
||||
investor_website=investor.website,
|
||||
investor_headquarters=investor.headquarters,
|
||||
aum=investor.aum,
|
||||
aum_as_of_date=investor.aum_as_of_date,
|
||||
aum_source_url=investor.aum_source_url,
|
||||
investment_thesis=investor.investment_thesis,
|
||||
portfolio_highlights=investor.portfolio_highlights,
|
||||
number_of_investments=investor.number_of_investments,
|
||||
# Fund fields (null)
|
||||
fund_id=None,
|
||||
fund_name=None,
|
||||
fund_size=None,
|
||||
fund_size_source_url=None,
|
||||
check_size_lower=None,
|
||||
check_size_upper=None,
|
||||
geographic_focus=None,
|
||||
fund_investment_stages=None,
|
||||
fund_sectors=None,
|
||||
# Related data
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
)
|
||||
investor_fund_list.append(investor_fund_data)
|
||||
|
||||
return investor_fund_list
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -258,10 +258,6 @@ class InvestorSchema(BaseModel):
|
||||
default=None,
|
||||
description="Geographic investment focus. Do not return any special characters, Just locations separated by commas. Leave empty if not clearly identifiable.",
|
||||
)
|
||||
stage_focus: InvestmentStage = Field(
|
||||
default=InvestmentStage.SEED,
|
||||
description="Investment stage focus. Use SEED as default if uncertain.",
|
||||
)
|
||||
number_of_investments: Optional[int] = Field(
|
||||
default=None,
|
||||
ge=0,
|
||||
|
||||
@@ -22,6 +22,14 @@ class SectorSchema(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class InvestmentStageSchema(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class InvestorMemberSchema(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
@@ -32,6 +40,25 @@ class InvestorMemberSchema(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class FundSchema(BaseModel):
|
||||
id: int
|
||||
fund_name: str | None
|
||||
fund_size: int | None # Changed to int for numerical filtering
|
||||
fund_size_source_url: str | None
|
||||
check_size_lower: int | None # NEW: Lower bound of check size range
|
||||
check_size_upper: int | None # NEW: Upper bound of check size range
|
||||
source_url: str | None
|
||||
source_provider: str | None
|
||||
geographic_focus: str | None # Changed from List[str] to string
|
||||
investment_stages: List[InvestmentStageSchema] | None # Changed to relationship
|
||||
sectors: List[SectorSchema] | None # Changed to relationship
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class CompanyMemberSchema(BaseModel):
|
||||
id: int
|
||||
name: Optional[str]
|
||||
@@ -76,12 +103,55 @@ class InvestorSchema(BaseModel):
|
||||
|
||||
|
||||
class InvestorData(BaseModel):
|
||||
"""Comprehensive investor data schema for LLM processing"""
|
||||
"""Comprehensive investor data schema - used for individual investor requests"""
|
||||
|
||||
investor: InvestorSchema
|
||||
portfolio_companies: List[CompanySchema]
|
||||
team_members: List[InvestorMemberSchema]
|
||||
sectors: List[SectorSchema]
|
||||
funds: List[FundSchema]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class InvestorFundData(BaseModel):
|
||||
"""Investor-Fund combined data - used for list/filter requests
|
||||
|
||||
Each row represents one investor-fund combination.
|
||||
An investor with 3 funds will appear as 3 separate entries.
|
||||
"""
|
||||
|
||||
# Investor fields
|
||||
investor_id: int
|
||||
investor_name: str
|
||||
investor_description: Optional[str]
|
||||
investor_website: Optional[str]
|
||||
investor_headquarters: Optional[str]
|
||||
aum: int | None
|
||||
aum_as_of_date: str | None
|
||||
aum_source_url: str | None
|
||||
investment_thesis: List[str] | None
|
||||
portfolio_highlights: List[str] | None
|
||||
number_of_investments: int | None
|
||||
|
||||
# Fund fields
|
||||
fund_id: int | None
|
||||
fund_name: str | None
|
||||
fund_size: int | None # Changed to int for numerical filtering
|
||||
fund_size_source_url: str | None
|
||||
check_size_lower: int | None # NEW: Lower bound of check size range
|
||||
check_size_upper: int | None # NEW: Upper bound of check size range
|
||||
geographic_focus: str | None # Changed from List[str] to string
|
||||
fund_investment_stages: (
|
||||
List[InvestmentStageSchema] | None
|
||||
) # Changed to relationship
|
||||
fund_sectors: List[SectorSchema] | None # Changed to relationship
|
||||
|
||||
# Related data
|
||||
portfolio_companies: List[CompanySchema]
|
||||
team_members: List[InvestorMemberSchema]
|
||||
sectors: List[SectorSchema]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -99,3 +169,9 @@ class CompanyData(BaseModel): # Renamed from CompaniesData for consistency
|
||||
|
||||
class InvestorList(BaseModel):
|
||||
investors: List[InvestorData]
|
||||
|
||||
|
||||
class InvestorFundList(BaseModel):
|
||||
"""List of investor-fund combinations"""
|
||||
|
||||
investor_funds: List[InvestorFundData]
|
||||
|
||||
Binary file not shown.
Binary file not shown.
+719
-122
@@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
@@ -7,15 +8,35 @@ from db.db import get_db_session
|
||||
from db.models import (
|
||||
CompanyMember,
|
||||
CompanyTable,
|
||||
FundTable,
|
||||
InvestmentStageTable,
|
||||
InvestorMember,
|
||||
InvestorTable,
|
||||
SectorTable,
|
||||
)
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
from schemas.py_schemas import CompanyData, InvestorData
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
class CurrencyConversion(BaseModel):
|
||||
"""Schema for LLM currency conversion responses"""
|
||||
|
||||
amount_usd: int = 0
|
||||
confidence: str = "high" # high, medium, low
|
||||
notes: str = ""
|
||||
|
||||
|
||||
class CheckSizeRange(BaseModel):
|
||||
"""Schema for LLM check size range parsing from estimated investment size"""
|
||||
|
||||
lower_bound_usd: int = 0
|
||||
upper_bound_usd: int = 0
|
||||
confidence: str = "high" # high, medium, low
|
||||
notes: str = ""
|
||||
|
||||
|
||||
class InvestorProcessor:
|
||||
def __init__(self):
|
||||
self.llm = ChatOpenAI(
|
||||
@@ -25,9 +46,508 @@ class InvestorProcessor:
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
# Structured LLMs for specific parsing tasks
|
||||
self.currency_converter_llm = self.llm.with_structured_output(
|
||||
CurrencyConversion
|
||||
)
|
||||
self.check_size_parser_llm = self.llm.with_structured_output(CheckSizeRange)
|
||||
|
||||
# Keep legacy structured LLMs for backward compatibility
|
||||
self.investor_structured_llm = self.llm.with_structured_output(InvestorData)
|
||||
self.company_structured_llm = self.llm.with_structured_output(CompanyData)
|
||||
|
||||
async def convert_to_usd(self, amount_str: str) -> Optional[int]:
|
||||
"""
|
||||
Use LLM to convert currency amounts to USD integers.
|
||||
Handles formats like:
|
||||
- "EUR 850,000,000"
|
||||
- "$5M"
|
||||
- "GBP 10-20 million"
|
||||
- "Approximately EUR 100 million"
|
||||
"""
|
||||
if not amount_str or amount_str == "Not Available" or amount_str == "0":
|
||||
return None
|
||||
|
||||
try:
|
||||
prompt = f"""Convert this amount to USD as an integer (whole number, no decimals).
|
||||
If it's a range, use the midpoint. If already in USD, just extract the number.
|
||||
Remove all commas and convert millions/billions to actual numbers.
|
||||
|
||||
Amount: {amount_str}
|
||||
|
||||
Examples:
|
||||
- "EUR 850,000,000" -> 935000000 (assuming EUR to USD rate ~1.10)
|
||||
- "$5M" -> 5000000
|
||||
- "GBP 10-20 million" -> 18000000 (midpoint 15M * 1.20 rate)
|
||||
- "Approximately EUR 100 million" -> 110000000
|
||||
|
||||
Return only the USD integer amount with current exchange rates."""
|
||||
|
||||
result = await self.currency_converter_llm.ainvoke(prompt)
|
||||
return result.amount_usd if result.amount_usd > 0 else None
|
||||
except Exception as e:
|
||||
print(f"Error converting currency '{amount_str}': {e}")
|
||||
return None
|
||||
|
||||
async def parse_check_size_range(
|
||||
self, estimated_investment_str: str
|
||||
) -> tuple[Optional[int], Optional[int]]:
|
||||
"""
|
||||
Use LLM to parse check size range from estimated investment size string.
|
||||
Returns tuple of (lower_bound_usd, upper_bound_usd).
|
||||
|
||||
Handles formats like:
|
||||
- "EUR 1,000 to 2,000"
|
||||
- "$100K-$500K"
|
||||
- "Between $1M and $5M"
|
||||
- "Up to EUR 10 million"
|
||||
- "$2M typical"
|
||||
"""
|
||||
if (
|
||||
not estimated_investment_str
|
||||
or estimated_investment_str == "Not Available"
|
||||
or estimated_investment_str == "0"
|
||||
):
|
||||
return None, None
|
||||
|
||||
try:
|
||||
prompt = f"""Parse this check size/investment range into lower and upper bounds in USD as integers.
|
||||
|
||||
Input: {estimated_investment_str}
|
||||
|
||||
Instructions:
|
||||
- If it's a range (e.g., "EUR 1M to 5M"), extract both bounds
|
||||
- If it's a single amount (e.g., "$2M typical"), use it as both lower and upper
|
||||
- If it says "up to X", use 0 as lower and X as upper
|
||||
- Convert all currencies to USD using current exchange rates
|
||||
- Return integers (whole numbers, no decimals)
|
||||
|
||||
Examples:
|
||||
- "EUR 1,000 to 2,000" -> lower: 1100, upper: 2200
|
||||
- "$100K-$500K" -> lower: 100000, upper: 500000
|
||||
- "Between $1M and $5M" -> lower: 1000000, upper: 5000000
|
||||
- "Up to EUR 10 million" -> lower: 0, upper: 11000000
|
||||
- "$2M typical" -> lower: 2000000, upper: 2000000
|
||||
- "GBP 500K-2M" -> lower: 600000, upper: 2400000
|
||||
|
||||
Return the lower and upper bounds in USD."""
|
||||
|
||||
result = await self.check_size_parser_llm.ainvoke(prompt)
|
||||
lower = result.lower_bound_usd if result.lower_bound_usd > 0 else None
|
||||
upper = result.upper_bound_usd if result.upper_bound_usd > 0 else None
|
||||
return lower, upper
|
||||
except Exception as e:
|
||||
print(f"Error parsing check size range '{estimated_investment_str}': {e}")
|
||||
return None, None
|
||||
|
||||
def parse_json_profile(self, json_str: str) -> Optional[dict]:
|
||||
"""
|
||||
Manually parse the JSON profile from the CSV.
|
||||
Returns a cleaned dictionary with the investor profile data.
|
||||
"""
|
||||
if not json_str or pd.isna(json_str):
|
||||
return None
|
||||
|
||||
try:
|
||||
# Parse JSON string
|
||||
profile = json.loads(json_str)
|
||||
return profile
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing JSON: {e}")
|
||||
return None
|
||||
|
||||
async def process_investor_profile(
|
||||
self, name: str, website: str, profile_json: str
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Process investor profile from CSV data.
|
||||
Manually extracts fields and uses LLM only for currency conversion.
|
||||
"""
|
||||
profile = self.parse_json_profile(profile_json)
|
||||
if not profile:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Extract basic info
|
||||
investor_data = {
|
||||
"name": name.strip() if name else None,
|
||||
"website": website.strip() if website else None,
|
||||
"headquarters": profile.get("headquarters"),
|
||||
"description": profile.get("investorDescription"),
|
||||
"aum": None,
|
||||
"aum_as_of_date": None,
|
||||
"aum_source_url": None,
|
||||
"investment_thesis": profile.get("investmentThesisFocus", []),
|
||||
"portfolio_highlights": profile.get("portfolioHighlights", []),
|
||||
"linked_documents": profile.get("linkedDocuments", []),
|
||||
"researcher_notes": profile.get("researcherNotes"),
|
||||
"missing_important_fields": profile.get("missingImportantFields", []),
|
||||
"sources": profile.get("sources", {}),
|
||||
"team_members": [],
|
||||
"funds": [],
|
||||
}
|
||||
|
||||
# Process AUM
|
||||
aum_data = profile.get("overallAssetsUnderManagement", {})
|
||||
if aum_data and isinstance(aum_data, dict):
|
||||
aum_amount = aum_data.get("aumAmount")
|
||||
if aum_amount and aum_amount != "Not Available":
|
||||
# Convert AUM to USD integer
|
||||
aum_usd = await self.convert_to_usd(aum_amount)
|
||||
investor_data["aum"] = aum_usd
|
||||
investor_data["aum_as_of_date"] = aum_data.get("asOfDate")
|
||||
investor_data["aum_source_url"] = aum_data.get("sourceUrl")
|
||||
|
||||
# Process senior leadership
|
||||
senior_leadership = profile.get("seniorLeadership", [])
|
||||
for member in senior_leadership:
|
||||
if isinstance(member, dict) and member.get("name"):
|
||||
investor_data["team_members"].append(
|
||||
{
|
||||
"name": member.get("name"),
|
||||
"title": member.get("title"),
|
||||
"role": member.get("title"), # Use title as role
|
||||
"email": None,
|
||||
"source_url": member.get("sourceUrl"),
|
||||
}
|
||||
)
|
||||
|
||||
# Process funds
|
||||
funds = profile.get("funds", [])
|
||||
for fund in funds:
|
||||
if isinstance(fund, dict):
|
||||
fund_data = {
|
||||
"fund_name": fund.get("fundName"),
|
||||
"fund_size": None,
|
||||
"fund_size_source_url": fund.get("fundSizeSourceUrl"),
|
||||
"check_size_lower": None,
|
||||
"check_size_upper": None,
|
||||
"source_url": fund.get("sourceUrl"),
|
||||
"source_provider": fund.get("sourceProvider"),
|
||||
"geographic_focus": None, # Will be converted to string
|
||||
"investment_stage_names": fund.get("investmentStageFocus", []),
|
||||
"sector_names": fund.get("sectorFocus", []),
|
||||
}
|
||||
|
||||
# Convert geographic focus from array to comma-separated string
|
||||
geo_focus = fund.get("geographicFocus", [])
|
||||
if geo_focus and isinstance(geo_focus, list):
|
||||
fund_data["geographic_focus"] = ", ".join(geo_focus)
|
||||
|
||||
# Convert fund size to USD integer
|
||||
fund_size_str = fund.get("fundSize")
|
||||
if fund_size_str and fund_size_str != "Not Available":
|
||||
fund_size_usd = await self.convert_to_usd(fund_size_str)
|
||||
if fund_size_usd:
|
||||
fund_data["fund_size"] = fund_size_usd # Store as integer
|
||||
|
||||
# Parse check size range from estimated investment size
|
||||
est_size_str = fund.get("estimatedInvestmentSize")
|
||||
if est_size_str and est_size_str != "Not Available":
|
||||
check_lower, check_upper = await self.parse_check_size_range(
|
||||
est_size_str
|
||||
)
|
||||
if check_lower is not None:
|
||||
fund_data["check_size_lower"] = check_lower
|
||||
if check_upper is not None:
|
||||
fund_data["check_size_upper"] = check_upper
|
||||
|
||||
investor_data["funds"].append(fund_data)
|
||||
|
||||
return investor_data
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing investor profile for {name}: {e}")
|
||||
return None
|
||||
|
||||
async def process_company_profile(
|
||||
self, name: str, website: str, profile_json: str, investor_names: str = None
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Process company profile from CSV data.
|
||||
Manually extracts fields without using LLM.
|
||||
"""
|
||||
profile = self.parse_json_profile(profile_json)
|
||||
if not profile:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Extract basic info
|
||||
company_data = {
|
||||
"name": name.strip() if name else None,
|
||||
"website": website.strip() if website else None,
|
||||
"description": profile.get("companyDescription"),
|
||||
"location": profile.get("geographicFocus"),
|
||||
"industry": profile.get("sectorDescription"),
|
||||
"founded_year": None, # Not typically in the company JSON
|
||||
"key_executives": [],
|
||||
"client_categories": profile.get("clientCategories", []),
|
||||
"product_description": profile.get("productDescription"),
|
||||
"linked_documents": profile.get("linkedDocuments", []),
|
||||
"researcher_notes": profile.get("researcherNotes"),
|
||||
"missing_important_fields": profile.get("missingImportantFields", []),
|
||||
"sources": profile.get("sources", {}),
|
||||
"investor_names": [],
|
||||
}
|
||||
|
||||
# Parse investor names from the Investor column
|
||||
if investor_names and pd.notna(investor_names):
|
||||
# Split by comma and clean
|
||||
investors = [inv.strip() for inv in str(investor_names).split(",")]
|
||||
company_data["investor_names"] = [inv for inv in investors if inv]
|
||||
|
||||
# Process key executives/leadership
|
||||
key_executives = profile.get("keyExecutives", [])
|
||||
if not key_executives:
|
||||
# Try alternative field names
|
||||
key_executives = profile.get("seniorLeadership", [])
|
||||
|
||||
for exec_member in key_executives:
|
||||
if isinstance(exec_member, dict) and exec_member.get("name"):
|
||||
company_data["key_executives"].append(
|
||||
{
|
||||
"name": exec_member.get("name"),
|
||||
"title": exec_member.get("title"),
|
||||
"source_url": exec_member.get("sourceUrl"),
|
||||
}
|
||||
)
|
||||
|
||||
# Try to extract founding year from description
|
||||
description = company_data.get("description", "")
|
||||
if description:
|
||||
# Look for patterns like "founded in 2020", "Gegründet 2020", "founded 2020"
|
||||
year_patterns = [
|
||||
r"founded in (\d{4})",
|
||||
r"founded (\d{4})",
|
||||
r"Gegründet (\d{4})",
|
||||
r"established in (\d{4})",
|
||||
r"since (\d{4})",
|
||||
r"\((\d{4})\)", # Year in parentheses
|
||||
]
|
||||
for pattern in year_patterns:
|
||||
match = re.search(pattern, description, re.IGNORECASE)
|
||||
if match:
|
||||
try:
|
||||
year = int(match.group(1))
|
||||
if 1900 <= year <= 2025: # Sanity check
|
||||
company_data["founded_year"] = year
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return company_data
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing company profile for {name}: {e}")
|
||||
return None
|
||||
|
||||
def _save_parsed_company_to_db(
|
||||
self, db: Session, company_data: dict
|
||||
) -> Optional[CompanyTable]:
|
||||
"""Save manually parsed company data to database"""
|
||||
try:
|
||||
# Check if company already exists
|
||||
existing_company = (
|
||||
db.query(CompanyTable).filter_by(name=company_data["name"]).first()
|
||||
)
|
||||
|
||||
if existing_company:
|
||||
# Update existing company
|
||||
company = existing_company
|
||||
company.website = company_data.get("website") or company.website
|
||||
company.location = company_data.get("location") or company.location
|
||||
company.description = (
|
||||
company_data.get("description") or company.description
|
||||
)
|
||||
company.industry = company_data.get("industry") or company.industry
|
||||
if company_data.get("founded_year"):
|
||||
company.founded_year = company_data["founded_year"]
|
||||
else:
|
||||
# Create new company
|
||||
company = CompanyTable(
|
||||
name=company_data["name"],
|
||||
website=company_data.get("website"),
|
||||
location=company_data.get("location"),
|
||||
description=company_data.get("description"),
|
||||
industry=company_data.get("industry"),
|
||||
founded_year=company_data.get("founded_year"),
|
||||
)
|
||||
db.add(company)
|
||||
db.flush()
|
||||
|
||||
# Add/update company members (key executives)
|
||||
# First, remove existing members if updating
|
||||
if existing_company:
|
||||
db.query(CompanyMember).filter_by(company_id=company.id).delete()
|
||||
|
||||
for exec_data in company_data.get("key_executives", []):
|
||||
member = CompanyMember(
|
||||
name=exec_data.get("name"),
|
||||
role=exec_data.get("title"),
|
||||
linkedin=exec_data.get(
|
||||
"source_url"
|
||||
), # Store source URL in linkedin field
|
||||
company_id=company.id,
|
||||
)
|
||||
db.add(member)
|
||||
|
||||
# Link to investors if provided
|
||||
for investor_name in company_data.get("investor_names", []):
|
||||
# Find investor in database
|
||||
investor = (
|
||||
db.query(InvestorTable)
|
||||
.filter_by(name=investor_name.strip())
|
||||
.first()
|
||||
)
|
||||
if investor:
|
||||
# Add company to investor's portfolio if not already there
|
||||
if company not in investor.portfolio_companies:
|
||||
investor.portfolio_companies.append(company)
|
||||
|
||||
return company
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error saving company to database: {e}")
|
||||
db.rollback()
|
||||
return None
|
||||
|
||||
def _save_parsed_investor_to_db(
|
||||
self, db: Session, investor_data: dict
|
||||
) -> Optional[InvestorTable]:
|
||||
"""Save manually parsed investor data to database"""
|
||||
try:
|
||||
# Check if investor already exists
|
||||
existing_investor = (
|
||||
db.query(InvestorTable).filter_by(name=investor_data["name"]).first()
|
||||
)
|
||||
|
||||
if existing_investor:
|
||||
# Update existing investor
|
||||
investor = existing_investor
|
||||
investor.website = investor_data.get("website") or investor.website
|
||||
investor.headquarters = (
|
||||
investor_data.get("headquarters") or investor.headquarters
|
||||
)
|
||||
investor.description = (
|
||||
investor_data.get("description") or investor.description
|
||||
)
|
||||
investor.aum = investor_data.get("aum") or investor.aum
|
||||
investor.aum_as_of_date = (
|
||||
investor_data.get("aum_as_of_date") or investor.aum_as_of_date
|
||||
)
|
||||
investor.aum_source_url = (
|
||||
investor_data.get("aum_source_url") or investor.aum_source_url
|
||||
)
|
||||
investor.investment_thesis = (
|
||||
investor_data.get("investment_thesis") or investor.investment_thesis
|
||||
)
|
||||
investor.portfolio_highlights = (
|
||||
investor_data.get("portfolio_highlights")
|
||||
or investor.portfolio_highlights
|
||||
)
|
||||
investor.linked_documents = (
|
||||
investor_data.get("linked_documents") or investor.linked_documents
|
||||
)
|
||||
investor.researcher_notes = (
|
||||
investor_data.get("researcher_notes") or investor.researcher_notes
|
||||
)
|
||||
investor.missing_important_fields = (
|
||||
investor_data.get("missing_important_fields")
|
||||
or investor.missing_important_fields
|
||||
)
|
||||
investor.sources = investor_data.get("sources") or investor.sources
|
||||
else:
|
||||
# Create new investor
|
||||
investor = InvestorTable(
|
||||
name=investor_data["name"],
|
||||
website=investor_data.get("website"),
|
||||
headquarters=investor_data.get("headquarters"),
|
||||
description=investor_data.get("description"),
|
||||
aum=investor_data.get("aum"),
|
||||
aum_as_of_date=investor_data.get("aum_as_of_date"),
|
||||
aum_source_url=investor_data.get("aum_source_url"),
|
||||
investment_thesis=investor_data.get("investment_thesis"),
|
||||
portfolio_highlights=investor_data.get("portfolio_highlights"),
|
||||
linked_documents=investor_data.get("linked_documents"),
|
||||
researcher_notes=investor_data.get("researcher_notes"),
|
||||
missing_important_fields=investor_data.get(
|
||||
"missing_important_fields"
|
||||
),
|
||||
sources=investor_data.get("sources"),
|
||||
)
|
||||
db.add(investor)
|
||||
db.flush()
|
||||
|
||||
# Add/update team members
|
||||
# First, remove existing team members if updating
|
||||
if existing_investor:
|
||||
db.query(InvestorMember).filter_by(investor_id=investor.id).delete()
|
||||
|
||||
for member_data in investor_data.get("team_members", []):
|
||||
member = InvestorMember(
|
||||
name=member_data.get("name"),
|
||||
role=member_data.get("role"),
|
||||
title=member_data.get("title"),
|
||||
email=member_data.get("email"),
|
||||
source_url=member_data.get("source_url"),
|
||||
investor_id=investor.id,
|
||||
)
|
||||
db.add(member)
|
||||
|
||||
# Add/update funds
|
||||
# First, remove existing funds if updating
|
||||
if existing_investor:
|
||||
db.query(FundTable).filter_by(investor_id=investor.id).delete()
|
||||
|
||||
for fund_data in investor_data.get("funds", []):
|
||||
fund = FundTable(
|
||||
investor_id=investor.id,
|
||||
fund_name=fund_data.get("fund_name"),
|
||||
fund_size=fund_data.get("fund_size"), # Now an integer
|
||||
fund_size_source_url=fund_data.get("fund_size_source_url"),
|
||||
check_size_lower=fund_data.get("check_size_lower"),
|
||||
check_size_upper=fund_data.get("check_size_upper"),
|
||||
source_url=fund_data.get("source_url"),
|
||||
source_provider=fund_data.get("source_provider"),
|
||||
geographic_focus=fund_data.get("geographic_focus"), # Now a string
|
||||
)
|
||||
db.add(fund)
|
||||
db.flush() # Get the fund ID
|
||||
|
||||
# Add investment stages (many-to-many)
|
||||
for stage_name in fund_data.get("investment_stage_names", []):
|
||||
stage = self._get_or_create_investment_stage(db, stage_name)
|
||||
fund.investment_stages.append(stage)
|
||||
|
||||
# Add sectors (many-to-many)
|
||||
for sector_name in fund_data.get("sector_names", []):
|
||||
sector = self._get_or_create_sector(db, sector_name)
|
||||
fund.sectors.append(sector)
|
||||
|
||||
return investor
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error saving investor to database: {e}")
|
||||
db.rollback()
|
||||
return None
|
||||
|
||||
def _get_or_create_investment_stage(
|
||||
self, db: Session, stage_name: str
|
||||
) -> InvestmentStageTable:
|
||||
"""Get existing investment stage or create new one"""
|
||||
from db.models import InvestmentStageTable
|
||||
|
||||
stage = (
|
||||
db.query(InvestmentStageTable)
|
||||
.filter(InvestmentStageTable.name == stage_name)
|
||||
.first()
|
||||
)
|
||||
if not stage:
|
||||
stage = InvestmentStageTable(name=stage_name)
|
||||
db.add(stage)
|
||||
db.flush() # Get the ID without committing
|
||||
return stage
|
||||
|
||||
def _get_or_create_sector(self, db: Session, sector_name: str) -> SectorTable:
|
||||
"""Get existing sector or create new one"""
|
||||
sector = db.query(SectorTable).filter(SectorTable.name == sector_name).first()
|
||||
@@ -49,7 +569,6 @@ class InvestorProcessor:
|
||||
check_size_lower=investor_data.investor.check_size_lower,
|
||||
check_size_upper=investor_data.investor.check_size_upper,
|
||||
geographic_focus=investor_data.investor.geographic_focus,
|
||||
stage_focus=investor_data.investor.stage_focus,
|
||||
number_of_investments=investor_data.investor.number_of_investments,
|
||||
)
|
||||
db.add(investor)
|
||||
@@ -173,141 +692,219 @@ class InvestorProcessor:
|
||||
print(f"Error processing row {row_idx + 1}: {e}")
|
||||
return None
|
||||
|
||||
async def parse_investors(self, df, save_to_db: bool = True):
|
||||
"""Parse investors from DataFrame and optionally save to database"""
|
||||
investors = []
|
||||
df = df[20:]
|
||||
async def parse_investors(self, df: pd.DataFrame, save_to_db: bool = True):
|
||||
"""
|
||||
Parse investors from DataFrame using manual JSON parsing and LLM for currency conversion.
|
||||
Expected CSV columns: Name, Website, Final Investor Profile, Final Profile sourcing
|
||||
"""
|
||||
results = []
|
||||
db = None
|
||||
if save_to_db:
|
||||
db = get_db_session()
|
||||
|
||||
try:
|
||||
# Process rows in batches asynchronously
|
||||
batch_size = 20 # Adjust batch size as needed
|
||||
rows = [(idx, row) for idx, row in df.iterrows()]
|
||||
total_rows = len(df)
|
||||
print(f"\n🚀 Starting to process {total_rows} investors...")
|
||||
|
||||
for i in range(0, len(rows), batch_size):
|
||||
batch = rows[i : i + batch_size]
|
||||
|
||||
# Process batch asynchronously
|
||||
tasks = [
|
||||
self._process_row(row, idx, is_investor=True) for idx, row in batch
|
||||
]
|
||||
|
||||
batch_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Handle results from batch
|
||||
for (idx, row), result in zip(batch, batch_results):
|
||||
if isinstance(result, Exception):
|
||||
print(f"Error processing row {idx}: {result}")
|
||||
if db:
|
||||
db.rollback()
|
||||
continue
|
||||
|
||||
if result:
|
||||
# Convert dict to InvestorData if needed
|
||||
if isinstance(result, dict):
|
||||
investor_data = InvestorData(**result)
|
||||
else:
|
||||
investor_data = result
|
||||
|
||||
investors.append(investor_data)
|
||||
|
||||
# Save to database if requested
|
||||
if save_to_db and db:
|
||||
try:
|
||||
saved_investor = self._save_investor_to_db(
|
||||
db, investor_data
|
||||
)
|
||||
db.commit()
|
||||
print(
|
||||
f"✅ Saved investor '{saved_investor.name}' to database"
|
||||
)
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"❌ Failed to save investor to database: {e}")
|
||||
|
||||
print(
|
||||
f"Completed batch {i // batch_size + 1} of {(len(rows) + batch_size - 1) // batch_size}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in batch processing: {e}")
|
||||
if db:
|
||||
db.rollback()
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
return investors
|
||||
|
||||
async def parse_companies(self, df, save_to_db: bool = True):
|
||||
"""Parse companies from DataFrame and optionally save to database"""
|
||||
companies = []
|
||||
df = df[20:]
|
||||
db = None
|
||||
if save_to_db:
|
||||
db = get_db_session()
|
||||
|
||||
try:
|
||||
# Process rows in batches asynchronously
|
||||
batch_size = 20 # Adjust batch size as needed
|
||||
rows = [(idx, row) for idx, row in df.iterrows()]
|
||||
|
||||
for i in range(0, len(rows), batch_size):
|
||||
batch = rows[i : i + batch_size]
|
||||
|
||||
# Process batch asynchronously
|
||||
tasks = [
|
||||
self._process_row(row, idx, is_investor=False) for idx, row in batch
|
||||
]
|
||||
|
||||
batch_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Handle results from batch
|
||||
for (idx, row), result in zip(batch, batch_results):
|
||||
if isinstance(result, Exception):
|
||||
print(f"Error processing row {idx}: {result}")
|
||||
if db:
|
||||
db.rollback()
|
||||
continue
|
||||
|
||||
if result:
|
||||
# Convert dict to CompanyData if needed
|
||||
if isinstance(result, dict):
|
||||
company_data = CompanyData(**result)
|
||||
else:
|
||||
company_data = result
|
||||
|
||||
companies.append(company_data)
|
||||
|
||||
# Save to database if requested
|
||||
if save_to_db and db:
|
||||
try:
|
||||
saved_company = self._save_company_to_db(
|
||||
db, company_data
|
||||
)
|
||||
db.commit()
|
||||
print(
|
||||
f"✅ Saved company '{saved_company.name}' to database"
|
||||
)
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"❌ Failed to save company to database: {e}")
|
||||
|
||||
print(
|
||||
f"Completed batch {i // batch_size + 1} of {(len(rows) + batch_size - 1) // batch_size}"
|
||||
for idx, row in df.iterrows():
|
||||
try:
|
||||
name = (
|
||||
row.get("Name", "").strip()
|
||||
if pd.notna(row.get("Name"))
|
||||
else None
|
||||
)
|
||||
website = (
|
||||
row.get("Website", "").strip()
|
||||
if pd.notna(row.get("Website"))
|
||||
else None
|
||||
)
|
||||
profile_json = (
|
||||
row.get("Final Investor Profile", "")
|
||||
if pd.notna(row.get("Final Investor Profile"))
|
||||
else None
|
||||
)
|
||||
|
||||
if not name or not profile_json:
|
||||
print(f"⚠️ Row {idx + 1}: Skipping - missing name or profile")
|
||||
continue
|
||||
|
||||
print(f"\n📊 Processing {idx + 1}/{total_rows}: {name}")
|
||||
|
||||
# Process the investor profile
|
||||
investor_data = await self.process_investor_profile(
|
||||
name, website, profile_json
|
||||
)
|
||||
|
||||
if investor_data:
|
||||
results.append(investor_data)
|
||||
print(" ✓ Parsed successfully")
|
||||
print(f" - HQ: {investor_data.get('headquarters')}")
|
||||
print(
|
||||
f" - AUM: ${investor_data.get('aum'):,}"
|
||||
if investor_data.get("aum")
|
||||
else " - AUM: Not Available"
|
||||
)
|
||||
print(f" - Funds: {len(investor_data.get('funds', []))}")
|
||||
print(
|
||||
f" - Team: {len(investor_data.get('team_members', []))}"
|
||||
)
|
||||
|
||||
# Save to database
|
||||
if save_to_db and db:
|
||||
try:
|
||||
saved_investor = self._save_parsed_investor_to_db(
|
||||
db, investor_data
|
||||
)
|
||||
if saved_investor:
|
||||
db.commit()
|
||||
print(
|
||||
f" ✅ Saved to database (ID: {saved_investor.id})"
|
||||
)
|
||||
else:
|
||||
print(" ❌ Failed to save to database")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f" ❌ Database error: {e}")
|
||||
else:
|
||||
print(" ⚠️ Failed to process profile")
|
||||
|
||||
# Commit every 10 investors to avoid memory issues
|
||||
if save_to_db and db and (idx + 1) % 10 == 0:
|
||||
db.commit()
|
||||
print(f"\n💾 Committed batch at row {idx + 1}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error processing row {idx + 1}: {e}")
|
||||
if db:
|
||||
db.rollback()
|
||||
continue
|
||||
|
||||
# Final commit
|
||||
if save_to_db and db:
|
||||
db.commit()
|
||||
print("\n✅ Final commit completed")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing row {idx}: {e}")
|
||||
print(f"❌ Fatal error in parse_investors: {e}")
|
||||
if db:
|
||||
db.rollback()
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
return companies
|
||||
print(f"\n🎉 Completed! Processed {len(results)}/{total_rows} investors")
|
||||
return results
|
||||
|
||||
async def parse_companies(self, df: pd.DataFrame, save_to_db: bool = True):
|
||||
"""
|
||||
Parse companies from DataFrame using manual JSON parsing.
|
||||
Expected CSV columns: Name, Website, Investor, Final Investor Profile (actually company profile)
|
||||
"""
|
||||
results = []
|
||||
db = None
|
||||
if save_to_db:
|
||||
db = get_db_session()
|
||||
|
||||
try:
|
||||
total_rows = len(df)
|
||||
print(f"\n🚀 Starting to process {total_rows} companies...")
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
try:
|
||||
name = (
|
||||
row.get("Name", "").strip()
|
||||
if pd.notna(row.get("Name"))
|
||||
else None
|
||||
)
|
||||
website = (
|
||||
row.get("Website", "").strip()
|
||||
if pd.notna(row.get("Website"))
|
||||
else None
|
||||
)
|
||||
investor_names = (
|
||||
row.get("Investor", "").strip()
|
||||
if pd.notna(row.get("Investor"))
|
||||
else None
|
||||
)
|
||||
profile_json = (
|
||||
row.get("Final Investor Profile", "")
|
||||
if pd.notna(row.get("Final Investor Profile"))
|
||||
else None
|
||||
)
|
||||
|
||||
if not name or not profile_json:
|
||||
print(f"⚠️ Row {idx + 1}: Skipping - missing name or profile")
|
||||
continue
|
||||
|
||||
print(f"\n📊 Processing {idx + 1}/{total_rows}: {name}")
|
||||
|
||||
# Process the company profile
|
||||
company_data = await self.process_company_profile(
|
||||
name, website, profile_json, investor_names
|
||||
)
|
||||
|
||||
if company_data:
|
||||
results.append(company_data)
|
||||
print(" ✓ Parsed successfully")
|
||||
print(f" - Location: {company_data.get('location')}")
|
||||
print(f" - Industry: {company_data.get('industry')}")
|
||||
print(
|
||||
f" - Founded: {company_data.get('founded_year')}"
|
||||
if company_data.get("founded_year")
|
||||
else " - Founded: Unknown"
|
||||
)
|
||||
print(
|
||||
f" - Executives: {len(company_data.get('key_executives', []))}"
|
||||
)
|
||||
print(
|
||||
f" - Investors: {len(company_data.get('investor_names', []))}"
|
||||
)
|
||||
|
||||
# Save to database
|
||||
if save_to_db and db:
|
||||
try:
|
||||
saved_company = self._save_parsed_company_to_db(
|
||||
db, company_data
|
||||
)
|
||||
if saved_company:
|
||||
db.commit()
|
||||
print(
|
||||
f" ✅ Saved to database (ID: {saved_company.id})"
|
||||
)
|
||||
else:
|
||||
print(" ❌ Failed to save to database")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f" ❌ Database error: {e}")
|
||||
else:
|
||||
print(" ⚠️ Failed to process profile")
|
||||
|
||||
# Commit every 10 companies to avoid memory issues
|
||||
if save_to_db and db and (idx + 1) % 10 == 0:
|
||||
db.commit()
|
||||
print(f"\n💾 Committed batch at row {idx + 1}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error processing row {idx + 1}: {e}")
|
||||
if db:
|
||||
db.rollback()
|
||||
continue
|
||||
|
||||
# Final commit
|
||||
if save_to_db and db:
|
||||
db.commit()
|
||||
print("\n✅ Final commit completed")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Fatal error in parse_companies: {e}")
|
||||
if db:
|
||||
db.rollback()
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
print(f"\n🎉 Completed! Processed {len(results)}/{total_rows} companies")
|
||||
return results
|
||||
|
||||
|
||||
# async def main():
|
||||
|
||||
@@ -95,6 +95,7 @@ class QueryProcessor:
|
||||
selectinload(InvestorTable.portfolio_companies),
|
||||
selectinload(InvestorTable.team_members),
|
||||
selectinload(InvestorTable.sectors),
|
||||
selectinload(InvestorTable.funds),
|
||||
)
|
||||
.filter(InvestorTable.id.in_(investor_ids))
|
||||
)
|
||||
@@ -109,6 +110,7 @@ class QueryProcessor:
|
||||
portfolio_companies=investor.portfolio_companies,
|
||||
team_members=investor.team_members,
|
||||
sectors=investor.sectors,
|
||||
funds=investor.funds,
|
||||
)
|
||||
investor_data_list.append(investor_data)
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,315 @@
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
import pandas as pd
|
||||
from models import CompanyTable, InvestorTable, SectorTable, engine, init_database
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import the schema
|
||||
init_database()
|
||||
|
||||
|
||||
# ===================== Ingesting Original Data =====================#
|
||||
def parse_investor_names(investor_names_str):
|
||||
"""Parse comma-separated investor names and return a list"""
|
||||
if pd.isna(investor_names_str) or investor_names_str == "":
|
||||
return []
|
||||
|
||||
# Split by comma and clean whitespace
|
||||
# investors = [name.strip() for name in str(investor_names_str).split(",")]
|
||||
investors = [
|
||||
clean_name(name.strip()) for name in str(investor_names_str).split(",")
|
||||
]
|
||||
return [investor for investor in investors if investor]
|
||||
|
||||
|
||||
def parse_industries(industries_str):
|
||||
"""Parse comma-separated industries and return a list"""
|
||||
if pd.isna(industries_str) or industries_str == "":
|
||||
return []
|
||||
|
||||
# Split by comma and clean whitespace
|
||||
industries = [industry.strip() for industry in str(industries_str).split(",")]
|
||||
return [industry for industry in industries if industry]
|
||||
|
||||
|
||||
def clean_special_characters(text):
|
||||
"""Clean special characters from text, converting to ASCII equivalents"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# First remove ellipses and other problematic patterns
|
||||
text = str(text).replace("...", "").replace("..", "")
|
||||
|
||||
# Normalize unicode characters to their closest ASCII equivalents
|
||||
normalized = unicodedata.normalize("NFKD", text)
|
||||
|
||||
# Remove accents and convert to ASCII
|
||||
ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
|
||||
|
||||
# Remove any remaining non-alphanumeric characters except spaces, hyphens, and periods
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9\s\-\.]", "", ascii_text)
|
||||
|
||||
# Clean up multiple spaces
|
||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def clean_string(value):
|
||||
"""Clean string values, converting empty/null/nan/0 to None and removing special characters"""
|
||||
if (
|
||||
pd.isna(value)
|
||||
or value == ""
|
||||
or str(value).lower() in ["nan", "null", "none", "0", "0.0"]
|
||||
):
|
||||
return None
|
||||
|
||||
# First clean special characters
|
||||
cleaned = clean_special_characters(str(value).strip())
|
||||
|
||||
# Check if result is just "0" after cleaning
|
||||
if cleaned in ["0", "0.0", "null", "nan", "none"]:
|
||||
return None
|
||||
|
||||
return cleaned if cleaned else None
|
||||
|
||||
|
||||
def clean_name(value):
|
||||
"""Clean names (companies, investors) with special character handling"""
|
||||
if (
|
||||
pd.isna(value)
|
||||
or value == ""
|
||||
or str(value).lower() in ["nan", "null", "none", "0", "0.0"]
|
||||
):
|
||||
return None
|
||||
|
||||
# Clean special characters but be more permissive for names
|
||||
text = str(value).strip()
|
||||
# First remove ellipses and other problematic patterns
|
||||
# text = text.replace("...", "").replace("..", "")
|
||||
|
||||
# Normalize unicode characters
|
||||
normalized = unicodedata.normalize("NFKD", text)
|
||||
|
||||
# Convert to ASCII but keep more characters for business names
|
||||
ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
|
||||
|
||||
# Allow alphanumeric, spaces, hyphens, periods, parentheses, and ampersands
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9\s\-\.\(\)&]", "", ascii_text)
|
||||
|
||||
# Clean up multiple spaces
|
||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||
|
||||
# Remove any trailing or leading periods
|
||||
cleaned = cleaned.strip(".")
|
||||
|
||||
cleaned = cleaned.replace("..", "").replace("...", "")
|
||||
# Check if result is just "0" after cleaning
|
||||
if cleaned in ["0", "0.0", "null", "nan", "none"]:
|
||||
return None
|
||||
|
||||
return cleaned if cleaned else None
|
||||
|
||||
|
||||
def clean_integer(value):
|
||||
"""Clean integer values, converting empty/null/nan/0 to None"""
|
||||
if pd.isna(value) or str(value).lower() in ["nan", "null", "none", "", "0", "0.0"]:
|
||||
return None
|
||||
try:
|
||||
cleaned_val = int(float(value))
|
||||
return cleaned_val if cleaned_val > 0 else None
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def parse_website(website_str: str):
|
||||
try:
|
||||
_, end = website_str.split(":")
|
||||
|
||||
if end == "0":
|
||||
return None
|
||||
return "https:" + end
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def ingest_data():
|
||||
# Create database engine and session
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
|
||||
# Load CSV files
|
||||
print("Loading CSV files...")
|
||||
companies_df = pd.read_csv("companies.csv")
|
||||
investors_df = pd.read_csv("investors.csv")
|
||||
|
||||
print(f"📊 Companies CSV: {len(companies_df)} rows")
|
||||
print(f"📊 Investors CSV: {len(investors_df)} rows")
|
||||
|
||||
# Step 1: Ingest Investors
|
||||
print("\n🔄 Step 1: Ingesting Investors...")
|
||||
investors_processed = 0
|
||||
|
||||
for index, row in investors_df.iterrows():
|
||||
try:
|
||||
investor_name = clean_name(row.get("Filtered investor names", ""))
|
||||
|
||||
if investor_name:
|
||||
# Check if investor already exists
|
||||
existing_investor = (
|
||||
session.query(InvestorTable).filter_by(name=investor_name).first()
|
||||
)
|
||||
if not existing_investor:
|
||||
investor = InvestorTable(
|
||||
name=investor_name,
|
||||
description=clean_string(row.get("Business model", "")),
|
||||
headquarters=clean_string(row.get("HQ", "")),
|
||||
website=parse_website(str(row.get("Website", "")).strip()),
|
||||
number_of_investments=clean_integer(
|
||||
row.get("Number of investments")
|
||||
),
|
||||
)
|
||||
session.add(investor)
|
||||
investors_processed += 1
|
||||
|
||||
if investors_processed % 1000 == 0:
|
||||
session.commit()
|
||||
print(f" Committed {investors_processed} investors")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing investor {index}: {e}")
|
||||
continue
|
||||
|
||||
session.commit()
|
||||
print(f"✅ Investors completed: {investors_processed} processed")
|
||||
|
||||
# Step 2: Ingest Companies and Rounds
|
||||
print("\n🔄 Step 2: Ingesting Companies and Sectors...")
|
||||
companies_processed = 0
|
||||
sectors_created = set()
|
||||
|
||||
for index, row in companies_df.iterrows():
|
||||
try:
|
||||
# Process company
|
||||
company_name = clean_name(row.get("Organization Name", ""))
|
||||
if not company_name:
|
||||
continue
|
||||
|
||||
# Check if company already exists
|
||||
existing_company = (
|
||||
session.query(CompanyTable).filter_by(name=company_name).first()
|
||||
)
|
||||
if existing_company:
|
||||
company = existing_company
|
||||
else:
|
||||
# Create company
|
||||
company = CompanyTable(
|
||||
name=company_name,
|
||||
description=clean_string(row.get("Organization Description", "")),
|
||||
location=clean_string(row.get("Organization Location", "")),
|
||||
industry=clean_string(row.get("Organization Industries", "")),
|
||||
website=clean_string(row.get("Organization Website", "")),
|
||||
)
|
||||
session.add(company)
|
||||
session.flush() # Get the company ID
|
||||
companies_processed += 1
|
||||
|
||||
# Process investor relationships
|
||||
investor_names_str = row.get("Investor Names", "")
|
||||
if pd.notna(investor_names_str) and investor_names_str:
|
||||
investor_names = parse_investor_names(investor_names_str)
|
||||
|
||||
for investor_name in investor_names:
|
||||
# Find investor in database
|
||||
investor = (
|
||||
session.query(InvestorTable)
|
||||
.filter_by(name=investor_name.strip())
|
||||
.first()
|
||||
)
|
||||
|
||||
if investor:
|
||||
# Add investor-company relationship
|
||||
if company not in investor.portfolio_companies:
|
||||
investor.portfolio_companies.append(company)
|
||||
else:
|
||||
print("This company has an investor not in DB:", investor_name)
|
||||
|
||||
# Process sectors/industries
|
||||
industries_str = row.get("Organization Industries", "")
|
||||
if pd.notna(industries_str) and industries_str:
|
||||
industries = parse_industries(industries_str)
|
||||
|
||||
for industry_name in industries:
|
||||
industry_name = industry_name.strip()
|
||||
if industry_name:
|
||||
# Check if sector exists
|
||||
sector = (
|
||||
session.query(SectorTable)
|
||||
.filter_by(name=industry_name)
|
||||
.first()
|
||||
)
|
||||
if not sector:
|
||||
sector = SectorTable(name=industry_name)
|
||||
session.add(sector)
|
||||
session.flush()
|
||||
sectors_created.add(industry_name)
|
||||
|
||||
# Add company-sector relationship
|
||||
if sector not in company.sectors:
|
||||
company.sectors.append(sector)
|
||||
|
||||
# Commit every 100 companies
|
||||
if companies_processed % 100 == 0 and companies_processed > 0:
|
||||
session.commit()
|
||||
print(f" Processed {companies_processed} companies...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing company {index}: {e}")
|
||||
session.rollback()
|
||||
continue
|
||||
|
||||
# Step 3: Link investors to sectors based on portfolio companies
|
||||
print("\n🔄 Step 3: Linking Investors to Sectors...")
|
||||
investors_linked_to_sectors = 0
|
||||
all_investors = session.query(InvestorTable).all()
|
||||
for investor in all_investors:
|
||||
sectors = set()
|
||||
for company in investor.portfolio_companies:
|
||||
for sector in company.sectors:
|
||||
sectors.add(sector)
|
||||
# Add sectors to investor if not already present
|
||||
for sector in sectors:
|
||||
if sector not in investor.sectors:
|
||||
investor.sectors.append(sector)
|
||||
if sectors:
|
||||
investors_linked_to_sectors += 1
|
||||
session.commit()
|
||||
print(f"✅ Linked {investors_linked_to_sectors} investors to sectors")
|
||||
|
||||
# Final commit
|
||||
session.commit()
|
||||
|
||||
# Final counts
|
||||
final_investors = session.query(InvestorTable).count()
|
||||
final_companies = session.query(CompanyTable).count()
|
||||
final_sectors = session.query(SectorTable).count()
|
||||
|
||||
print("\n🎉 Ingestion Complete!")
|
||||
print(f" Investors: {final_investors}")
|
||||
print(f" Companies: {final_companies}")
|
||||
print(f" Sectors: {final_sectors}")
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ingest_data()
|
||||
# print(clean_name("A... Energi"))
|
||||
# print(clean_name("B.. Tech"))
|
||||
# print(clean_name("A... Energi"))
|
||||
@@ -0,0 +1,381 @@
|
||||
import enum
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Depends
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
String,
|
||||
Table,
|
||||
Text,
|
||||
create_engine,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Session, declarative_mixin, relationship, sessionmaker
|
||||
from sqlalchemy.types import JSON, Enum
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
# Database configuration
|
||||
# DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./investors.db")
|
||||
|
||||
# Create engine
|
||||
engine = create_engine("sqlite:///./investors.db", echo=False)
|
||||
|
||||
# Create session factory
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
db_dependency = Annotated[Session, Depends(get_db)]
|
||||
|
||||
|
||||
def init_database():
|
||||
"""Initialize the database by creating all tables"""
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
def get_session_sync() -> Session:
|
||||
"""Get a database session for synchronous operations"""
|
||||
return SessionLocal()
|
||||
|
||||
|
||||
def get_db_session():
|
||||
"""Get a database session for direct use."""
|
||||
return SessionLocal()
|
||||
|
||||
|
||||
@declarative_mixin
|
||||
class TimestampMixin:
|
||||
created_at = Column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||
|
||||
|
||||
class InvestmentStage(enum.Enum):
|
||||
SEED = "SEED"
|
||||
SERIES_A = "SERIES_A"
|
||||
SERIES_B = "SERIES_B"
|
||||
SERIES_C = "SERIES_C"
|
||||
GROWTH = "GROWTH"
|
||||
LATE_STAGE = "LATE_STAGE"
|
||||
|
||||
|
||||
# Association table for many-to-many relationship between investors and companies
|
||||
investor_company_association = Table(
|
||||
"investor_companies",
|
||||
Base.metadata,
|
||||
Column("investor_id", Integer, ForeignKey("investors.id")),
|
||||
Column("company_id", Integer, ForeignKey("companies.id")),
|
||||
)
|
||||
|
||||
|
||||
# Association table for investor-sector many-to-many
|
||||
investor_sector_association = Table(
|
||||
"investor_sectors",
|
||||
Base.metadata,
|
||||
Column("investor_id", Integer, ForeignKey("investors.id")),
|
||||
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
||||
)
|
||||
|
||||
|
||||
company_sector_association = Table(
|
||||
"company_sector",
|
||||
Base.metadata,
|
||||
Column("company_id", Integer, ForeignKey("companies.id")),
|
||||
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
||||
)
|
||||
|
||||
project_sector_association = Table(
|
||||
"project_sector",
|
||||
Base.metadata,
|
||||
Column("project_id", Integer, ForeignKey("projects.id")),
|
||||
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
||||
)
|
||||
|
||||
project_investor_association = Table(
|
||||
"project_investors",
|
||||
Base.metadata,
|
||||
Column("project_id", Integer, ForeignKey("projects.id")),
|
||||
Column("investor_id", Integer, ForeignKey("investors.id")),
|
||||
)
|
||||
|
||||
project_company_association = Table(
|
||||
"project_companies",
|
||||
Base.metadata,
|
||||
Column("project_id", Integer, ForeignKey("projects.id")),
|
||||
Column("company_id", Integer, ForeignKey("companies.id")),
|
||||
)
|
||||
|
||||
# Association table for investor-stage many-to-many
|
||||
investor_stage_association = Table(
|
||||
"investor_stages",
|
||||
Base.metadata,
|
||||
Column("investor_id", Integer, ForeignKey("investors.id")),
|
||||
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
||||
)
|
||||
|
||||
# Association table for fund-stage many-to-many
|
||||
fund_investment_stages_association = Table(
|
||||
"fund_investment_stages",
|
||||
Base.metadata,
|
||||
Column("fund_id", Integer, ForeignKey("funds.id")),
|
||||
Column("stage_id", Integer, ForeignKey("investment_stages.id")),
|
||||
)
|
||||
|
||||
# Association table for fund-sector many-to-many
|
||||
fund_sectors_association = Table(
|
||||
"fund_sectors",
|
||||
Base.metadata,
|
||||
Column("fund_id", Integer, ForeignKey("funds.id")),
|
||||
Column("sector_id", Integer, ForeignKey("sectors.id")),
|
||||
)
|
||||
|
||||
|
||||
class InvestorTable(Base, TimestampMixin):
|
||||
__tablename__ = "investors"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
# Basic investor info
|
||||
website = Column(String, nullable=True)
|
||||
headquarters = Column(String, nullable=True)
|
||||
|
||||
# AUM fields
|
||||
aum = Column(Integer, nullable=True) # Store as integer for numerical filtering
|
||||
aum_as_of_date = Column(String, nullable=True)
|
||||
aum_source_url = Column(String, nullable=True)
|
||||
|
||||
# Check size (deprecated in favor of fund-level data, but keeping for backward compatibility)
|
||||
check_size_lower = Column(Integer, nullable=True)
|
||||
check_size_upper = Column(Integer, nullable=True)
|
||||
|
||||
# Geographic focus (deprecated in favor of fund-level, but keeping for backward compatibility)
|
||||
geographic_focus = Column(String, nullable=True)
|
||||
|
||||
# Investment thesis and portfolio
|
||||
investment_thesis = Column(JSON, nullable=True) # Array of thesis statements
|
||||
portfolio_highlights = Column(
|
||||
JSON, nullable=True
|
||||
) # Array of portfolio company names
|
||||
linked_documents = Column(JSON, nullable=True) # Array of document URLs
|
||||
|
||||
# Research metadata
|
||||
researcher_notes = Column(Text, nullable=True)
|
||||
missing_important_fields = Column(
|
||||
JSON, nullable=True
|
||||
) # Array of missing field names
|
||||
sources = Column(JSON, nullable=True) # JSON object with source URLs
|
||||
|
||||
# Portfolio info
|
||||
number_of_investments = Column(Integer, nullable=True)
|
||||
|
||||
# Relationships
|
||||
team_members = relationship(
|
||||
"InvestorMember", back_populates="investor", cascade="all, delete-orphan"
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable", back_populates="investor", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
# Many-to-many relationship with investment stages
|
||||
investment_stages = relationship(
|
||||
"InvestmentStageTable",
|
||||
secondary=investor_stage_association,
|
||||
back_populates="investors",
|
||||
)
|
||||
|
||||
# Relationship to portfolio companies
|
||||
portfolio_companies = relationship(
|
||||
"CompanyTable",
|
||||
secondary=investor_company_association,
|
||||
back_populates="investors",
|
||||
)
|
||||
|
||||
sectors = relationship(
|
||||
"SectorTable",
|
||||
secondary=investor_sector_association,
|
||||
back_populates="investors",
|
||||
)
|
||||
|
||||
projects = relationship(
|
||||
"ProjectTable",
|
||||
secondary=project_investor_association,
|
||||
back_populates="investors",
|
||||
)
|
||||
|
||||
|
||||
class InvestorMember(Base, TimestampMixin):
|
||||
__tablename__ = "investor_members"
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
role = Column(String, nullable=True)
|
||||
title = Column(String, nullable=True) # Alternative to role
|
||||
email = Column(String, nullable=True)
|
||||
source_url = Column(String, nullable=True) # URL where member info was found
|
||||
|
||||
investor_id = Column(Integer, ForeignKey("investors.id"))
|
||||
investor = relationship("InvestorTable", back_populates="team_members")
|
||||
|
||||
|
||||
class FundTable(Base, TimestampMixin):
|
||||
__tablename__ = "funds"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
investor_id = Column(Integer, ForeignKey("investors.id"), nullable=False)
|
||||
|
||||
# Fund details
|
||||
fund_name = Column(String, nullable=True)
|
||||
fund_size = Column(
|
||||
Integer, nullable=True
|
||||
) # Store as integer for numerical filtering
|
||||
fund_size_source_url = Column(String, nullable=True)
|
||||
|
||||
# Check size range (parsed from estimated_investment_size by LLM)
|
||||
check_size_lower = Column(Integer, nullable=True)
|
||||
check_size_upper = Column(Integer, nullable=True)
|
||||
|
||||
source_url = Column(String, nullable=True)
|
||||
source_provider = Column(String, nullable=True) # e.g., "Perplexity"
|
||||
|
||||
# Geographic focus as simple string
|
||||
geographic_focus = Column(String, nullable=True)
|
||||
|
||||
# Relationships
|
||||
investor = relationship("InvestorTable", back_populates="funds")
|
||||
investment_stages = relationship(
|
||||
"InvestmentStageTable",
|
||||
secondary=fund_investment_stages_association,
|
||||
back_populates="funds",
|
||||
)
|
||||
sectors = relationship(
|
||||
"SectorTable",
|
||||
secondary=fund_sectors_association,
|
||||
back_populates="funds",
|
||||
)
|
||||
|
||||
|
||||
class InvestmentStageTable(Base, TimestampMixin):
|
||||
__tablename__ = "investment_stages"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False, unique=True)
|
||||
|
||||
# Relationships
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=investor_stage_association,
|
||||
back_populates="investment_stages",
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable",
|
||||
secondary=fund_investment_stages_association,
|
||||
back_populates="investment_stages",
|
||||
)
|
||||
|
||||
|
||||
class CompanyTable(Base, TimestampMixin):
|
||||
__tablename__ = "companies"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
industry = Column(String, nullable=True)
|
||||
location = Column(String, nullable=True)
|
||||
description = Column(String, nullable=True)
|
||||
founded_year = Column(Integer, nullable=True)
|
||||
website = Column(String, nullable=True)
|
||||
|
||||
members = relationship(
|
||||
"CompanyMember", back_populates="company", cascade="all, delete-orphan"
|
||||
)
|
||||
# Relationship back to investors
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=investor_company_association,
|
||||
back_populates="portfolio_companies",
|
||||
)
|
||||
|
||||
sectors = relationship(
|
||||
"SectorTable", secondary=company_sector_association, back_populates="companies"
|
||||
)
|
||||
|
||||
projects = relationship(
|
||||
"ProjectTable",
|
||||
secondary=project_company_association,
|
||||
back_populates="companies",
|
||||
)
|
||||
|
||||
|
||||
class CompanyMember(Base, TimestampMixin):
|
||||
__tablename__ = "company_members"
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String)
|
||||
linkedin = Column(String, nullable=True)
|
||||
role = Column(String, nullable=True)
|
||||
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
||||
|
||||
company = relationship("CompanyTable", back_populates="members")
|
||||
|
||||
|
||||
class SectorTable(Base, TimestampMixin):
|
||||
__tablename__ = "sectors"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
|
||||
# Relationships
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=investor_sector_association,
|
||||
back_populates="sectors",
|
||||
)
|
||||
companies = relationship(
|
||||
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
|
||||
)
|
||||
projects = relationship(
|
||||
"ProjectTable", secondary=project_sector_association, back_populates="sector"
|
||||
)
|
||||
funds = relationship(
|
||||
"FundTable",
|
||||
secondary=fund_sectors_association,
|
||||
back_populates="sectors",
|
||||
)
|
||||
|
||||
|
||||
class ProjectTable(Base, TimestampMixin):
|
||||
__tablename__ = "projects"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
valuation = Column(Integer, nullable=True)
|
||||
|
||||
stage = Column(Enum(InvestmentStage), nullable=True)
|
||||
location = Column(String, nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
start_date = Column(DateTime, nullable=True)
|
||||
end_date = Column(DateTime, nullable=True)
|
||||
|
||||
sector = relationship(
|
||||
"SectorTable", secondary=project_sector_association, back_populates="projects"
|
||||
)
|
||||
investors = relationship(
|
||||
"InvestorTable",
|
||||
secondary=project_investor_association,
|
||||
back_populates="projects",
|
||||
)
|
||||
companies = relationship(
|
||||
"CompanyTable", secondary=project_company_association, back_populates="projects"
|
||||
)
|
||||
Reference in New Issue
Block a user