Implement investor processing and querying functionality

- Added InvestorProcessor class for processing CSV data in batches and saving to SQL and vector databases. - Introduced QueryProcessor class for querying investor information from SQL and vector databases. - Integrated OpenAI's ChatGPT for structured output generation. - Implemented data cleaning and control character removal in CSV processing. - Added asynchronous processing capabilities for batch handling. - Established connection to ChromaDB for vector storage of investor descriptions. - Defined structured output schemas using Pydantic for investor data validation. - Enhanced settings management for API key and database configurations.
2025-08-29 18:42:55 +01:00
parent 4c99638d94
commit ba0ed169ce
22 changed files with 719 additions and 492 deletions
@@ -1,7 +1,44 @@
-from fastapi import FastAPI
+import io
+
+import pandas as pd
+from db.db import db_dependency, init_database
+from fastapi import FastAPI, File, UploadFile
+from services.openrouter import InvestorProcessor
+
+from app.services.querying import QueryProcessor

 app = FastAPI()

+init_database()
+
+
@app.get("/")
 def read_root():
-    return {"Hello": "World"}
+    return {"Hello": "World"}
+
+
+@app.post("/parse-csv")
+async def parse_csv(db: db_dependency, file: UploadFile = File(...)):
+    # Read uploaded CSV with pandas
+    content = await file.read()
+    df = pd.read_csv(io.StringIO(content.decode("utf-8")))
+
+    # Process the dataframe
+    processor = InvestorProcessor(sql_session=db)
+    results = await processor.process_csv(df)
+
+    # Convert Pydantic objects to dictionaries
+    return {"results": [r.dict() for r in results]}
+
+
+@app.post("/query")
+async def query_investors(db: db_dependency, question: str):
+    processor = QueryProcessor(sql_session=db)
+    results = processor.process_query(question)
+    return {"results": [r.dict() for r in results]}
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app="main:app", host="localhost", port=8000, reload=True)