ba0ed169ce
- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases. - Introduced QueryProcessor class for querying investor information from SQL and vector databases. - Integrated OpenAI's ChatGPT for structured output generation. - Implemented data cleaning and control character removal in CSV processing. - Added asynchronous processing capabilities for batch handling. - Established connection to ChromaDB for vector storage of investor descriptions. - Defined structured output schemas using Pydantic for investor data validation. - Enhanced settings management for API key and database configurations.
41 lines
943 B
Python
41 lines
943 B
Python
import os
|
|
from typing import Annotated
|
|
|
|
from fastapi import Depends
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.ext.declarative import declarative_base
|
|
from sqlalchemy.orm import Session, sessionmaker
|
|
|
|
Base = declarative_base()
|
|
|
|
# Database configuration
|
|
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///investors.db")
|
|
|
|
# Create engine
|
|
engine = create_engine(DATABASE_URL, echo=False)
|
|
|
|
# Create session factory
|
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
|
|
|
|
def get_db():
|
|
db = SessionLocal()
|
|
try:
|
|
yield db
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
db_dependency = Annotated[Session, Depends(get_db)]
|
|
|
|
|
|
def init_database():
|
|
"""Initialize the database by creating all tables"""
|
|
Base.metadata.create_all(bind=engine)
|
|
print("Database initialized successfully!")
|
|
|
|
|
|
def get_session_sync() -> Session:
|
|
"""Get a database session for synchronous operations"""
|
|
return SessionLocal()
|