Anton_wireframe/app/main.py

import io

import pandas as pd
from db.db import Base, db_dependency, engine
from dotenv import load_dotenv
from fastapi import FastAPI, File, Form, UploadFile
from pydantic import BaseModel
from routers import (
    companies,
    folk_crm,
    insight_route,
    investors,
    projects,
    report_route,
)
from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse
from services.company_querying import CompanyQueryProcessor
from services.llm_parser import InvestorProcessor
from services.querying import QueryProcessor

load_dotenv()


def init_database():
    """Initialize the database by creating all tables"""
    Base.metadata.create_all(bind=engine)


init_database()

app = FastAPI()


# Request models
class QueryRequest(BaseModel):
    question: str

    class Config:
        json_schema_extra = {
            "example": {
                "question": "Find me deep tech investors that do deals in Europe under 5 million."
            }
        }


class CompanyQueryRequest(BaseModel):
    question: str

    class Config:
        json_schema_extra = {
            "example": {
                "question": "Find me companies in the fintech sector located in San Francisco."
            }
        }


@app.get("/")
def health():
    return {"Hello": "World"}


@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
async def parse_csv(
    db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
):
    """
    Parse and import CSV data into the database.

    **For investors:**
    - Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing
    - Manually parses JSON profiles for efficiency
    - Uses LLM only for currency conversion to USD
    - Handles AUM, fund sizes, and check sizes as integers

    **For companies:**
    - Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile)
    - 100% manual JSON parsing - no LLM needed
    - **Only extracts:** founded_year and key_executives
    - **Only updates companies already in the database** (syncs with existing records)
    - Skips companies not found in the database

    **Benefits:**
    - Fast processing (5-10s per record)
    - Low cost (minimal or no LLM usage)
    - Accurate data extraction
    - Automatic database persistence
    - Safe: won't create duplicate companies
    """
    # Read uploaded CSV with pandas
    content = await file.read()
    df = pd.read_csv(io.StringIO(content.decode("utf-8")))

    # Process the dataframe
    processor = InvestorProcessor()

    if is_investor == 1:
        # Manual parser with LLM currency conversion
        results = await processor.parse_investors(df, save_to_db=True)
        # Results are already dicts from the new parser
        return results
    else:
        # Manual parser for companies (no LLM needed)
        results = await processor.parse_companies(df, save_to_db=True)
        # Results are already dicts from the new parser
        return results


@app.post(
    "/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"]
)
async def query_investors(request: QueryRequest):
    """
    Query investors using natural language.

    Returns fund-level matches (one row per fund) with investor details.
    This ensures only relevant funds are included in the response.

    Supports queries like:
    - "Show me seed stage investors"
    - "Find fintech investors in Silicon Valley"
    - "Growth stage investors with $5M+ check sizes"
    - "Healthcare investors in Europe"
    """
    processor = QueryProcessor()
    results = await processor.process_query(request.question)
    return results


@app.post(
    "/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"]
)
async def query_companies(request: CompanyQueryRequest):
    """
    Query companies using natural language.

    Returns company matches with their investor relationships, team members, and sectors.

    Supports queries like:
    - "Show me fintech companies founded in 2020"
    - "Find healthcare companies in San Francisco"
    - "Companies in the AI sector"
    - "Companies that received funding from Sequoia"
    - "European startups founded after 2019"
    """
    processor = CompanyQueryProcessor()
    results = await processor.process_query(request.question)
    return results


app.include_router(investors.router)
app.include_router(companies.router)
app.include_router(projects.router)
app.include_router(folk_crm.router)
app.include_router(insight_route.router)
app.include_router(report_route.router)

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app="main:app", host="0.0.0.0", port=8585)