import io import pandas as pd from db.db import Base, db_dependency, engine from dotenv import load_dotenv from fastapi import FastAPI, File, Form, UploadFile from pydantic import BaseModel from routers import ( companies, folk_crm, insight_route, investors, projects, report_route, ) from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse from services.company_querying import CompanyQueryProcessor from services.llm_parser import InvestorProcessor from services.querying import QueryProcessor load_dotenv() def init_database(): """Initialize the database by creating all tables""" Base.metadata.create_all(bind=engine) init_database() app = FastAPI() # Request models class QueryRequest(BaseModel): question: str class Config: json_schema_extra = { "example": { "question": "Find me deep tech investors that do deals in Europe under 5 million." } } class CompanyQueryRequest(BaseModel): question: str class Config: json_schema_extra = { "example": { "question": "Find me companies in the fintech sector located in San Francisco." } } @app.get("/") def health(): return {"Hello": "World"} @app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict]) async def parse_csv( db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...) ): """ Parse and import CSV data into the database. **For investors:** - Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing - Manually parses JSON profiles for efficiency - Uses LLM only for currency conversion to USD - Handles AUM, fund sizes, and check sizes as integers **For companies:** - Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile) - 100% manual JSON parsing - no LLM needed - **Only extracts:** founded_year and key_executives - **Only updates companies already in the database** (syncs with existing records) - Skips companies not found in the database **Benefits:** - Fast processing (5-10s per record) - Low cost (minimal or no LLM usage) - Accurate data extraction - Automatic database persistence - Safe: won't create duplicate companies """ # Read uploaded CSV with pandas content = await file.read() df = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Process the dataframe processor = InvestorProcessor() if is_investor == 1: # Manual parser with LLM currency conversion results = await processor.parse_investors(df, save_to_db=True) # Results are already dicts from the new parser return results else: # Manual parser for companies (no LLM needed) results = await processor.parse_companies(df, save_to_db=True) # Results are already dicts from the new parser return results @app.post( "/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"] ) async def query_investors(request: QueryRequest): """ Query investors using natural language. Returns fund-level matches (one row per fund) with investor details. This ensures only relevant funds are included in the response. Supports queries like: - "Show me seed stage investors" - "Find fintech investors in Silicon Valley" - "Growth stage investors with $5M+ check sizes" - "Healthcare investors in Europe" """ processor = QueryProcessor() results = await processor.process_query(request.question) return results @app.post( "/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"] ) async def query_companies(request: CompanyQueryRequest): """ Query companies using natural language. Returns company matches with their investor relationships, team members, and sectors. Supports queries like: - "Show me fintech companies founded in 2020" - "Find healthcare companies in San Francisco" - "Companies in the AI sector" - "Companies that received funding from Sequoia" - "European startups founded after 2019" """ processor = CompanyQueryProcessor() results = await processor.process_query(request.question) return results app.include_router(investors.router) app.include_router(companies.router) app.include_router(projects.router) app.include_router(folk_crm.router) app.include_router(insight_route.router) app.include_router(report_route.router) if __name__ == "__main__": import uvicorn uvicorn.run(app="main:app", host="0.0.0.0", port=8585)