import io import logging import pandas as pd from db.db import Base, db_dependency, engine from dotenv import load_dotenv from fastapi import FastAPI, File, Form, HTTPException, UploadFile from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from routers import ( addition, companies, folk_crm, insight_route, investors, projects, report_route, ) from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse from services.company_querying import CompanyQueryProcessor from services.llm_parser import InvestorProcessor from services.querying import QueryProcessor load_dotenv() def init_database(): """Initialize the database by creating all tables""" Base.metadata.create_all(bind=engine) logger = logging.getLogger(__name__) init_database() app = FastAPI() # Add CORS middleware to allow frontend requests app.add_middleware( CORSMiddleware, allow_origins=["*"], # In production, replace with specific origins allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Request models class QueryRequest(BaseModel): question: str class Config: json_schema_extra = { "example": { "question": "Find me deep tech investors that do deals in Europe under 5 million." } } class CompanyQueryRequest(BaseModel): question: str class Config: json_schema_extra = { "example": { "question": "Find me companies in the fintech sector located in San Francisco." } } @app.get("/") def health(): return {"Hello": "World"} @app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict]) async def parse_csv( db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...) ): """ Parse and import CSV data into the database. **For investors:** - Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing - Manually parses JSON profiles for efficiency - Uses LLM only for currency conversion to USD - Handles AUM, fund sizes, and check sizes as integers **For companies:** - Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile) - 100% manual JSON parsing - no LLM needed - **Only extracts:** founded_year and key_executives - **Only updates companies already in the database** (syncs with existing records) - Skips companies not found in the database **Benefits:** - Fast processing (5-10s per record) - Low cost (minimal or no LLM usage) - Accurate data extraction - Automatic database persistence - Safe: won't create duplicate companies """ # Read uploaded CSV with pandas content = await file.read() df = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Process the dataframe processor = InvestorProcessor() if is_investor == 1: # Manual parser with LLM currency conversion results = await processor.parse_investors(df, save_to_db=True) # Results are already dicts from the new parser return results else: # Manual parser for companies (no LLM needed) results = await processor.parse_companies(df, save_to_db=True) # Results are already dicts from the new parser return results @app.post( "/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"] ) async def query_investors(request: QueryRequest): """Query investors/funds using natural language""" try: processor = QueryProcessor() result = await processor.process_query(request.question) logger.info(f"Query completed successfully with {result.total} results") return result except Exception as e: logger.error(f"Error in query_investors: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @app.post( "/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"] ) async def query_companies(request: CompanyQueryRequest): """Query companies using natural language""" try: processor = CompanyQueryProcessor() result = await processor.process_query(request.question) logger.info(f"Company query completed successfully with {result.total} results") return result except Exception as e: logger.error(f"Error in query_companies: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) app.include_router(investors.router) app.include_router(companies.router) app.include_router(projects.router) app.include_router(folk_crm.router) app.include_router(insight_route.router) app.include_router(report_route.router) app.include_router(addition.router) if __name__ == "__main__": import uvicorn uvicorn.run(app="main:app", host="0.0.0.0", port=8585)