import io import pandas as pd from db.db import Base, db_dependency, engine from dotenv import load_dotenv from fastapi import FastAPI, File, Form, UploadFile from pydantic import BaseModel from routers import ( companies, folk_crm, insight_route, investors, projects, report_route, ) from schemas.router_schemas import InvestmentResponse, PaginatedResponse from services.llm_parser import InvestorProcessor from services.querying import QueryProcessor load_dotenv() def init_database(): """Initialize the database by creating all tables""" Base.metadata.create_all(bind=engine) init_database() app = FastAPI() # Request models class QueryRequest(BaseModel): question: str class Config: json_schema_extra = { "example": { "question": "Find me deep tech investors that do deals in Europe under 5 million." } } @app.get("/") def health(): return {"Hello": "World"} @app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict]) async def parse_csv( db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...) ): """ Parse and import CSV data into the database. **For investors:** - Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing - Manually parses JSON profiles for efficiency - Uses LLM only for currency conversion to USD - Handles AUM, fund sizes, and check sizes as integers **For companies:** - Expected columns: Name, Website, Investor, Final Investor Profile (company profile) - 100% manual JSON parsing - no LLM needed - Extracts company details, executives, investors, and client categories - Automatically links companies to investors in database **Benefits:** - Fast processing (5-10s per record) - Low cost (minimal or no LLM usage) - Accurate data extraction - Automatic database persistence """ # Read uploaded CSV with pandas content = await file.read() df = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Process the dataframe processor = InvestorProcessor() if is_investor == 1: # Manual parser with LLM currency conversion results = await processor.parse_investors(df, save_to_db=True) # Results are already dicts from the new parser return results else: # Manual parser for companies (no LLM needed) results = await processor.parse_companies(df, save_to_db=True) # Results are already dicts from the new parser return results @app.post( "/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"] ) async def query_investors(request: QueryRequest): """ Query investors using natural language. Returns fund-level matches (one row per fund) with investor details. This ensures only relevant funds are included in the response. Supports queries like: - "Show me seed stage investors" - "Find fintech investors in Silicon Valley" - "Growth stage investors with $5M+ check sizes" - "Healthcare investors in Europe" """ processor = QueryProcessor() results = processor.process_query(request.question) return results app.include_router(investors.router) app.include_router(companies.router) app.include_router(projects.router) app.include_router(folk_crm.router) app.include_router(insight_route.router) app.include_router(report_route.router) if __name__ == "__main__": import uvicorn uvicorn.run(app="main:app", host="0.0.0.0", port=8585, reload=True)