ba0ed169ce
- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases. - Introduced QueryProcessor class for querying investor information from SQL and vector databases. - Integrated OpenAI's ChatGPT for structured output generation. - Implemented data cleaning and control character removal in CSV processing. - Added asynchronous processing capabilities for batch handling. - Established connection to ChromaDB for vector storage of investor descriptions. - Defined structured output schemas using Pydantic for investor data validation. - Enhanced settings management for API key and database configurations.
45 lines
1.1 KiB
Python
45 lines
1.1 KiB
Python
import io
|
|
|
|
import pandas as pd
|
|
from db.db import db_dependency, init_database
|
|
from fastapi import FastAPI, File, UploadFile
|
|
from services.openrouter import InvestorProcessor
|
|
|
|
from app.services.querying import QueryProcessor
|
|
|
|
app = FastAPI()
|
|
|
|
init_database()
|
|
|
|
|
|
@app.get("/")
|
|
def read_root():
|
|
return {"Hello": "World"}
|
|
|
|
|
|
@app.post("/parse-csv")
|
|
async def parse_csv(db: db_dependency, file: UploadFile = File(...)):
|
|
# Read uploaded CSV with pandas
|
|
content = await file.read()
|
|
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
|
|
|
# Process the dataframe
|
|
processor = InvestorProcessor(sql_session=db)
|
|
results = await processor.process_csv(df)
|
|
|
|
# Convert Pydantic objects to dictionaries
|
|
return {"results": [r.dict() for r in results]}
|
|
|
|
|
|
@app.post("/query")
|
|
async def query_investors(db: db_dependency, question: str):
|
|
processor = QueryProcessor(sql_session=db)
|
|
results = processor.process_query(question)
|
|
return {"results": [r.dict() for r in results]}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(app="main:app", host="localhost", port=8000, reload=True)
|