Implement investor processing and querying functionality

- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases.
- Introduced QueryProcessor class for querying investor information from SQL and vector databases.
- Integrated OpenAI's ChatGPT for structured output generation.
- Implemented data cleaning and control character removal in CSV processing.
- Added asynchronous processing capabilities for batch handling.
- Established connection to ChromaDB for vector storage of investor descriptions.
- Defined structured output schemas using Pydantic for investor data validation.
- Enhanced settings management for API key and database configurations.
This commit is contained in:
bolade
2025-08-29 18:42:55 +01:00
parent 4c99638d94
commit ba0ed169ce
22 changed files with 719 additions and 492 deletions
+15 -17
View File
@@ -1,11 +1,12 @@
import os
from contextlib import contextmanager
from typing import Generator
from typing import Annotated
from fastapi import Depends
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, sessionmaker
from schema import Base
Base = declarative_base()
# Database configuration
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///investors.db")
@@ -17,26 +18,23 @@ engine = create_engine(DATABASE_URL, echo=False)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
db_dependency = Annotated[Session, Depends(get_db)]
def init_database():
"""Initialize the database by creating all tables"""
Base.metadata.create_all(bind=engine)
print("Database initialized successfully!")
@contextmanager
def get_session() -> Generator[Session, None, None]:
"""Get a database session with automatic cleanup"""
session = SessionLocal()
try:
yield session
session.commit()
except Exception as e:
session.rollback()
raise e
finally:
session.close()
def get_session_sync() -> Session:
"""Get a database session for synchronous operations"""
return SessionLocal()