Files
Anton_wireframe/app/main.py
T
bolade cd7172ed9f Add test script for manual JSON parser with LLM currency conversion
- Implemented a new test script `test_parser.py` to validate the functionality of the manual JSON parser.
- The script loads investor data from a CSV file and processes a sample of three investors.
- Results include detailed information about each investor, their funds, team members, and investment thesis.
- Added error handling for missing API key in the environment variables.
2025-10-06 14:07:28 +01:00

102 lines
2.8 KiB
Python

import io
import pandas as pd
from db.db import Base, db_dependency, engine
from dotenv import load_dotenv
from fastapi import FastAPI, File, Form, UploadFile
from pydantic import BaseModel
from routers import companies, investors, projects
from schemas.router_schemas import InvestorList
from services.llm_parser import InvestorProcessor
from services.querying import QueryProcessor
load_dotenv()
def init_database():
"""Initialize the database by creating all tables"""
Base.metadata.create_all(bind=engine)
init_database()
app = FastAPI()
# Request models
class QueryRequest(BaseModel):
question: str
class Config:
json_schema_extra = {
"example": {
"question": "Find me deep tech investors that do deals in Europe under 5 million."
}
}
@app.get("/")
def health():
return {"Hello": "World"}
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
async def parse_csv(
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
):
"""
Parse and import CSV data into the database.
For investors: Expected columns - Name, Website, Final Investor Profile, Final Profile sourcing
For companies: Uses legacy LLM-based parsing
The new investor parser:
- Manually parses JSON profiles for efficiency
- Uses LLM only for currency conversion to USD
- Handles AUM, fund sizes, and check sizes as integers
- Automatically saves to database
"""
# Read uploaded CSV with pandas
content = await file.read()
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
# Process the dataframe
processor = InvestorProcessor()
if is_investor == 1:
# New manual parser with LLM currency conversion
results = await processor.parse_investors(df, save_to_db=True)
# Results are already dicts from the new parser
return results
else:
# Legacy LLM-based company parser
results = await processor.parse_companies(df, save_to_db=True)
# Convert Pydantic objects to dictionaries
return [r.model_dump() if hasattr(r, "model_dump") else r for r in results]
@app.post("/query", response_model=InvestorList, tags=["Querying"])
async def query_investors(request: QueryRequest):
"""
Query investors using natural language.
Supports queries like:
- "Show me seed stage investors"
- "Find fintech investors in Silicon Valley"
- "Growth stage investors with $5M+ check sizes"
- "Healthcare investors in Europe"
"""
processor = QueryProcessor()
results = processor.process_query(request.question)
return results
app.include_router(investors.router)
app.include_router(companies.router)
app.include_router(projects.router)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app="main:app", host="0.0.0.0", port=8585, reload=True)