Implement investor processing and querying functionality

- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases.
- Introduced QueryProcessor class for querying investor information from SQL and vector databases.
- Integrated OpenAI's ChatGPT for structured output generation.
- Implemented data cleaning and control character removal in CSV processing.
- Added asynchronous processing capabilities for batch handling.
- Established connection to ChromaDB for vector storage of investor descriptions.
- Defined structured output schemas using Pydantic for investor data validation.
- Enhanced settings management for API key and database configurations.
This commit is contained in:
bolade
2025-08-29 18:42:55 +01:00
parent 4c99638d94
commit ba0ed169ce
22 changed files with 719 additions and 492 deletions
+39 -2
View File
@@ -1,7 +1,44 @@
from fastapi import FastAPI
import io
import pandas as pd
from db.db import db_dependency, init_database
from fastapi import FastAPI, File, UploadFile
from services.openrouter import InvestorProcessor
from app.services.querying import QueryProcessor
app = FastAPI()
init_database()
@app.get("/")
def read_root():
return {"Hello": "World"}
return {"Hello": "World"}
@app.post("/parse-csv")
async def parse_csv(db: db_dependency, file: UploadFile = File(...)):
# Read uploaded CSV with pandas
content = await file.read()
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
# Process the dataframe
processor = InvestorProcessor(sql_session=db)
results = await processor.process_csv(df)
# Convert Pydantic objects to dictionaries
return {"results": [r.dict() for r in results]}
@app.post("/query")
async def query_investors(db: db_dependency, question: str):
processor = QueryProcessor(sql_session=db)
results = processor.process_query(question)
return {"results": [r.dict() for r in results]}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app="main:app", host="localhost", port=8000, reload=True)