117 lines
3.4 KiB
Python
117 lines
3.4 KiB
Python
import io
|
|
|
|
import pandas as pd
|
|
from db.db import Base, db_dependency, engine
|
|
from dotenv import load_dotenv
|
|
from fastapi import FastAPI, File, Form, UploadFile
|
|
from pydantic import BaseModel
|
|
from routers import companies, folk_crm, investors, projects
|
|
from schemas.router_schemas import InvestmentResponse, PaginatedResponse
|
|
from services.llm_parser import InvestorProcessor
|
|
from services.querying import QueryProcessor
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def init_database():
|
|
"""Initialize the database by creating all tables"""
|
|
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
init_database()
|
|
|
|
app = FastAPI()
|
|
|
|
|
|
# Request models
|
|
class QueryRequest(BaseModel):
|
|
question: str
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"question": "Find me deep tech investors that do deals in Europe under 5 million."
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/")
|
|
def health():
|
|
return {"Hello": "World"}
|
|
|
|
|
|
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
|
|
async def parse_csv(
|
|
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
|
|
):
|
|
"""
|
|
Parse and import CSV data into the database.
|
|
|
|
**For investors:**
|
|
- Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing
|
|
- Manually parses JSON profiles for efficiency
|
|
- Uses LLM only for currency conversion to USD
|
|
- Handles AUM, fund sizes, and check sizes as integers
|
|
|
|
**For companies:**
|
|
- Expected columns: Name, Website, Investor, Final Investor Profile (company profile)
|
|
- 100% manual JSON parsing - no LLM needed
|
|
- Extracts company details, executives, investors, and client categories
|
|
- Automatically links companies to investors in database
|
|
|
|
**Benefits:**
|
|
- Fast processing (5-10s per record)
|
|
- Low cost (minimal or no LLM usage)
|
|
- Accurate data extraction
|
|
- Automatic database persistence
|
|
"""
|
|
# Read uploaded CSV with pandas
|
|
content = await file.read()
|
|
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
|
|
|
# Process the dataframe
|
|
processor = InvestorProcessor()
|
|
|
|
if is_investor == 1:
|
|
# Manual parser with LLM currency conversion
|
|
results = await processor.parse_investors(df, save_to_db=True)
|
|
# Results are already dicts from the new parser
|
|
return results
|
|
else:
|
|
# Manual parser for companies (no LLM needed)
|
|
results = await processor.parse_companies(df, save_to_db=True)
|
|
# Results are already dicts from the new parser
|
|
return results
|
|
|
|
|
|
@app.post(
|
|
"/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"]
|
|
)
|
|
async def query_investors(request: QueryRequest):
|
|
"""
|
|
Query investors using natural language.
|
|
|
|
Returns fund-level matches (one row per fund) with investor details.
|
|
This ensures only relevant funds are included in the response.
|
|
|
|
Supports queries like:
|
|
- "Show me seed stage investors"
|
|
- "Find fintech investors in Silicon Valley"
|
|
- "Growth stage investors with $5M+ check sizes"
|
|
- "Healthcare investors in Europe"
|
|
"""
|
|
processor = QueryProcessor()
|
|
results = processor.process_query(request.question)
|
|
return results
|
|
|
|
|
|
app.include_router(investors.router)
|
|
app.include_router(companies.router)
|
|
app.include_router(projects.router)
|
|
app.include_router(folk_crm.router)
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(app="main:app", host="0.0.0.0", port=8585, reload=True)
|