cd7172ed9f
- Implemented a new test script `test_parser.py` to validate the functionality of the manual JSON parser. - The script loads investor data from a CSV file and processes a sample of three investors. - Results include detailed information about each investor, their funds, team members, and investment thesis. - Added error handling for missing API key in the environment variables.
102 lines
2.8 KiB
Python
102 lines
2.8 KiB
Python
import io
|
|
|
|
import pandas as pd
|
|
from db.db import Base, db_dependency, engine
|
|
from dotenv import load_dotenv
|
|
from fastapi import FastAPI, File, Form, UploadFile
|
|
from pydantic import BaseModel
|
|
from routers import companies, investors, projects
|
|
from schemas.router_schemas import InvestorList
|
|
from services.llm_parser import InvestorProcessor
|
|
from services.querying import QueryProcessor
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def init_database():
|
|
"""Initialize the database by creating all tables"""
|
|
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
init_database()
|
|
|
|
app = FastAPI()
|
|
|
|
|
|
# Request models
|
|
class QueryRequest(BaseModel):
|
|
question: str
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"question": "Find me deep tech investors that do deals in Europe under 5 million."
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/")
|
|
def health():
|
|
return {"Hello": "World"}
|
|
|
|
|
|
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
|
|
async def parse_csv(
|
|
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
|
|
):
|
|
"""
|
|
Parse and import CSV data into the database.
|
|
|
|
For investors: Expected columns - Name, Website, Final Investor Profile, Final Profile sourcing
|
|
For companies: Uses legacy LLM-based parsing
|
|
|
|
The new investor parser:
|
|
- Manually parses JSON profiles for efficiency
|
|
- Uses LLM only for currency conversion to USD
|
|
- Handles AUM, fund sizes, and check sizes as integers
|
|
- Automatically saves to database
|
|
"""
|
|
# Read uploaded CSV with pandas
|
|
content = await file.read()
|
|
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
|
|
|
# Process the dataframe
|
|
processor = InvestorProcessor()
|
|
|
|
if is_investor == 1:
|
|
# New manual parser with LLM currency conversion
|
|
results = await processor.parse_investors(df, save_to_db=True)
|
|
# Results are already dicts from the new parser
|
|
return results
|
|
else:
|
|
# Legacy LLM-based company parser
|
|
results = await processor.parse_companies(df, save_to_db=True)
|
|
# Convert Pydantic objects to dictionaries
|
|
return [r.model_dump() if hasattr(r, "model_dump") else r for r in results]
|
|
|
|
|
|
@app.post("/query", response_model=InvestorList, tags=["Querying"])
|
|
async def query_investors(request: QueryRequest):
|
|
"""
|
|
Query investors using natural language.
|
|
|
|
Supports queries like:
|
|
- "Show me seed stage investors"
|
|
- "Find fintech investors in Silicon Valley"
|
|
- "Growth stage investors with $5M+ check sizes"
|
|
- "Healthcare investors in Europe"
|
|
"""
|
|
processor = QueryProcessor()
|
|
results = processor.process_query(request.question)
|
|
return results
|
|
|
|
|
|
app.include_router(investors.router)
|
|
app.include_router(companies.router)
|
|
app.include_router(projects.router)
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(app="main:app", host="0.0.0.0", port=8585, reload=True)
|