made version 2

This commit is contained in:
bolade
2025-09-25 17:00:38 +01:00
parent b1b1c5ea1e
commit 0f7beca5e1
42 changed files with 660 additions and 2036 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+5 -1
View File
@@ -9,7 +9,7 @@ from sqlalchemy.orm import Session, sessionmaker
Base = declarative_base()
# Database configuration
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///investors.db")
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./investors.db")
# Create engine
engine = create_engine(DATABASE_URL, echo=False)
@@ -38,3 +38,7 @@ def init_database():
def get_session_sync() -> Session:
"""Get a database session for synchronous operations"""
return SessionLocal()
def get_db_session():
"""Get a database session for direct use."""
return SessionLocal()
+54 -30
View File
@@ -1,11 +1,17 @@
import datetime
import enum
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Table, Text
from sqlalchemy.orm import relationship
from db.db import Base
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Table, Text, func
from sqlalchemy.orm import declarative_mixin, relationship
from sqlalchemy.types import Enum
from db.db import Base
@declarative_mixin
class TimestampMixin:
created_at = Column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
class InvestmentStage(enum.Enum):
@@ -16,6 +22,7 @@ class InvestmentStage(enum.Enum):
GROWTH = "GROWTH"
LATE_STAGE = "LATE_STAGE"
# Association table for many-to-many relationship between investors and companies
investor_company_association = Table(
"investor_companies",
@@ -34,7 +41,15 @@ investor_sector_association = Table(
)
class InvestorTable(Base):
company_sector_association = Table(
"company_sector",
Base.metadata,
Column("company_id", Integer, ForeignKey("companies.id")),
Column("sector_id", Integer, ForeignKey("sectors.id")),
)
class InvestorTable(Base, TimestampMixin):
__tablename__ = "investors"
id = Column(Integer, primary_key=True, index=True)
@@ -46,12 +61,6 @@ class InvestorTable(Base):
geographic_focus = Column(String, nullable=False)
stage_focus = Column(Enum(InvestmentStage), nullable=False)
number_of_investments = Column(Integer, default=0)
created_at = Column(DateTime, default=datetime.datetime.now(datetime.UTC))
updated_at = Column(
DateTime,
default=datetime.datetime.now(datetime.UTC),
onupdate=datetime.datetime.now(datetime.UTC),
)
# Relationship to portfolio companies
portfolio_companies = relationship(
@@ -59,7 +68,7 @@ class InvestorTable(Base):
secondary=investor_company_association,
back_populates="investors",
)
team_members = relationship("InvestorTeamMember", back_populates="investor")
team_members = relationship("InvestorMember", back_populates="investor")
sectors = relationship(
"SectorTable",
secondary=investor_sector_association,
@@ -67,22 +76,29 @@ class InvestorTable(Base):
)
class CompanyTable(Base):
class InvestorMember(Base, TimestampMixin):
__tablename__ = "investor_members"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, nullable=False)
role = Column(String, nullable=False)
email = Column(String, nullable=False)
investor_id = Column(Integer, ForeignKey("investors.id"))
investor = relationship("InvestorTable", back_populates="team_members")
class CompanyTable(Base, TimestampMixin):
__tablename__ = "companies"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, nullable=False)
industry = Column(String, nullable=False)
location = Column(String, nullable=False)
description = Column(String, nullable=True)
founded_year = Column(Integer, nullable=True)
website = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.now(datetime.UTC))
updated_at = Column(
DateTime,
default=datetime.datetime.now(datetime.UTC),
onupdate=datetime.datetime.now(datetime.UTC),
)
members = relationship("CompanyMember", back_populates="company")
# Relationship back to investors
investors = relationship(
"InvestorTable",
@@ -90,8 +106,23 @@ class CompanyTable(Base):
back_populates="portfolio_companies",
)
sectors = relationship(
"SectorTable", secondary=company_sector_association, back_populates="companies"
)
class SectorTable(Base):
class CompanyMember(Base, TimestampMixin):
__tablename__ = "company_members"
id = Column(Integer, primary_key=True)
name = Column(String)
linkedin = Column(String)
role = Column(String)
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
company = relationship("CompanyTable", back_populates="members")
class SectorTable(Base, TimestampMixin):
__tablename__ = "sectors"
id = Column(Integer, primary_key=True, index=True)
@@ -104,13 +135,6 @@ class SectorTable(Base):
back_populates="sectors",
)
class InvestorTeamMember(Base):
__tablename__ = "investor_team"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, nullable=False)
role = Column(String, nullable=False)
email = Column(String, nullable=False)
investor_id = Column(Integer, ForeignKey("investors.id"))
investor = relationship("InvestorTable", back_populates="team_members")
companies = relationship(
"CompanyTable", secondary=company_sector_association, back_populates="sectors"
)
-115
View File
@@ -1,115 +0,0 @@
import json
from typing import List, Optional
from pydantic import BaseModel
from sqlalchemy import JSON, Column, DateTime, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
class Investor(Base):
__tablename__ = "investors"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(500), nullable=False)
website = Column(String(1000))
# Core investment information
investor_description = Column(Text)
investment_thesis_focus = Column(JSON) # List of focus areas
headquarters = Column(String(1000))
# AUM information
aum_amount = Column(String(200))
aum_as_of_date = Column(String(100))
aum_source_url = Column(String(1000))
# Fund information
funds_info = Column(JSON) # Complex fund data
# Raw data columns for reference
crunchbase_urls = Column(Text)
crunchbase_extract = Column(Text)
linkedin_profile = Column(Text)
source_truth_profile = Column(Text)
# Metadata
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
def __repr__(self):
return f"<Investor(name='{self.name}', website='{self.website}')>"
# Pydantic models for data validation and parsing
class AUMInfo(BaseModel):
aumAmount: Optional[str] = None
asOfDate: Optional[str] = None
sourceUrl: Optional[str] = None
class FundInfo(BaseModel):
fundName: Optional[str] = None
fundSize: Optional[str] = None
vintage: Optional[str] = None
status: Optional[str] = None
description: Optional[str] = None
class InvestorProfile(BaseModel):
websiteURL: Optional[str] = None
investorDescription: Optional[str] = None
investmentThesisFocus: Optional[List[str]] = None
headquarters: Optional[str] = None
overallAssetsUnderManagement: Optional[AUMInfo] = None
funds: Optional[List[FundInfo]] = None
class CSVRow(BaseModel):
name: str
website: Optional[str] = None
investment_firm_profile: Optional[str] = None
crunchbase_linkedin_urls: Optional[str] = None
crunchbase_firm_extract: Optional[str] = None
linkedin_investment_profile: Optional[str] = None
source_of_truth_profile: Optional[str] = None
def get_combined_description(self) -> str:
"""Combine all description fields for vector embedding"""
descriptions = []
if self.investment_firm_profile:
try:
profile_data = json.loads(self.investment_firm_profile)
if isinstance(profile_data, dict):
desc = profile_data.get("investorDescription", "")
if desc:
descriptions.append(desc)
except (json.JSONDecodeError, TypeError):
pass
if self.crunchbase_firm_extract:
descriptions.append(self.crunchbase_firm_extract)
if self.linkedin_investment_profile:
descriptions.append(self.linkedin_investment_profile)
if self.source_of_truth_profile:
descriptions.append(self.source_of_truth_profile)
return " ".join(descriptions)
def get_investment_focus(self) -> List[str]:
"""Extract investment thesis focus"""
if self.investment_firm_profile:
try:
profile_data = json.loads(self.investment_firm_profile)
if isinstance(profile_data, dict):
focus = profile_data.get("investmentThesisFocus", [])
if isinstance(focus, list):
return focus
except (json.JSONDecodeError, TypeError):
pass
return []