Implement investor processing and querying functionality
- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases. - Introduced QueryProcessor class for querying investor information from SQL and vector databases. - Integrated OpenAI's ChatGPT for structured output generation. - Implemented data cleaning and control character removal in CSV processing. - Added asynchronous processing capabilities for batch handling. - Established connection to ChromaDB for vector storage of investor descriptions. - Defined structured output schemas using Pydantic for investor data validation. - Enhanced settings management for API key and database configurations.
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
+15
-17
@@ -1,11 +1,12 @@
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from typing import Generator
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Depends
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from schema import Base
|
||||
Base = declarative_base()
|
||||
|
||||
# Database configuration
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///investors.db")
|
||||
@@ -17,26 +18,23 @@ engine = create_engine(DATABASE_URL, echo=False)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
db_dependency = Annotated[Session, Depends(get_db)]
|
||||
|
||||
|
||||
def init_database():
|
||||
"""Initialize the database by creating all tables"""
|
||||
Base.metadata.create_all(bind=engine)
|
||||
print("Database initialized successfully!")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_session() -> Generator[Session, None, None]:
|
||||
"""Get a database session with automatic cleanup"""
|
||||
session = SessionLocal()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
raise e
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
def get_session_sync() -> Session:
|
||||
"""Get a database session for synchronous operations"""
|
||||
return SessionLocal()
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
import datetime
|
||||
|
||||
from sqlalchemy import Column, DateTime, Integer, String
|
||||
|
||||
from db.db import Base
|
||||
|
||||
|
||||
class InvestorTable(Base):
|
||||
__tablename__ = "investors"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
aum = Column(Integer, nullable=False)
|
||||
check_size = Column(String, nullable=False)
|
||||
sector_focus = Column(String, nullable=False)
|
||||
stage_focus = Column(String, nullable=False)
|
||||
region = Column(String, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.datetime.now(datetime.UTC))
|
||||
updated_at = Column(
|
||||
DateTime,
|
||||
default=datetime.datetime.now(datetime.UTC),
|
||||
onupdate=datetime.datetime.now(datetime.UTC),
|
||||
)
|
||||
Reference in New Issue
Block a user