app/main.py

import io
import logging

import pandas as pd
from db.db import Base, db_dependency, engine
from dotenv import load_dotenv
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from routers import (
    addition,
    companies,
    folk_crm,
    insight_route,
    investors,
    projects,
    report_route,
)
from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse
from services.company_querying import CompanyQueryProcessor
from services.llm_parser import InvestorProcessor
from services.querying import QueryProcessor

load_dotenv()


def init_database():
    """Initialize the database by creating all tables"""
    Base.metadata.create_all(bind=engine)


logger = logging.getLogger(__name__)

init_database()

app = FastAPI()

# Add CORS middleware to allow frontend requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, replace with specific origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# Request models
class QueryRequest(BaseModel):
    question: str

    class Config:
        json_schema_extra = {
            "example": {
                "question": "Find me deep tech investors that do deals in Europe under 5 million."
            }
        }


class CompanyQueryRequest(BaseModel):
    question: str

    class Config:
        json_schema_extra = {
            "example": {
                "question": "Find me companies in the fintech sector located in San Francisco."
            }
        }


@app.get("/")
def health():
    return {"Hello": "World"}


@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
async def parse_csv(
    db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
):
    """
    Parse and import CSV data into the database.

    **For investors:**
    - Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing
    - Manually parses JSON profiles for efficiency
    - Uses LLM only for currency conversion to USD
    - Handles AUM, fund sizes, and check sizes as integers

    **For companies:**
    - Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile)
    - 100% manual JSON parsing - no LLM needed
    - **Only extracts:** founded_year and key_executives
    - **Only updates companies already in the database** (syncs with existing records)
    - Skips companies not found in the database

    **Benefits:**
    - Fast processing (5-10s per record)
    - Low cost (minimal or no LLM usage)
    - Accurate data extraction
    - Automatic database persistence
    - Safe: won't create duplicate companies
    """
    # Read uploaded CSV with pandas
    content = await file.read()
    df = pd.read_csv(io.StringIO(content.decode("utf-8")))

    # Process the dataframe
    processor = InvestorProcessor()

    if is_investor == 1:
        # Manual parser with LLM currency conversion
        results = await processor.parse_investors(df, save_to_db=True)
        # Results are already dicts from the new parser
        return results
    else:
        # Manual parser for companies (no LLM needed)
        results = await processor.parse_companies(df, save_to_db=True)
        # Results are already dicts from the new parser
        return results


@app.post(
    "/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"]
)
async def query_investors(request: QueryRequest):
    """Query investors/funds using natural language"""
    try:
        processor = QueryProcessor()
        result = await processor.process_query(request.question)
        logger.info(f"Query completed successfully with {result.total} results")
        return result
    except Exception as e:
        logger.error(f"Error in query_investors: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.post(
    "/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"]
)
async def query_companies(request: CompanyQueryRequest):
    """Query companies using natural language"""
    try:
        processor = CompanyQueryProcessor()
        result = await processor.process_query(request.question)
        logger.info(f"Company query completed successfully with {result.total} results")
        return result
    except Exception as e:
        logger.error(f"Error in query_companies: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


app.include_router(investors.router)
app.include_router(companies.router)
app.include_router(projects.router)
app.include_router(folk_crm.router)
app.include_router(insight_route.router)
app.include_router(report_route.router)
app.include_router(addition.router)

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app="main:app", host="0.0.0.0", port=8585)
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00			`import io`
Added logging to main 2025-10-28 21:34:20 +01:00			`import logging`
Refactor investor-related schemas and models; update database configuration and enhance investor processing logic 2025-09-02 15:51:35 +01:00
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00			`import pandas as pd`
Refactor database models and schemas to allow nullable fields; update init_database function for improved initialization. 2025-09-26 15:24:42 +01:00			`from db.db import Base, db_dependency, engine`
made version 2 2025-09-25 17:00:38 +01:00			`from dotenv import load_dotenv`
Added logging to main 2025-10-28 21:34:20 +01:00			`from fastapi import FastAPI, File, Form, HTTPException, UploadFile`
			`from fastapi.middleware.cors import CORSMiddleware`
Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00			`from pydantic import BaseModel`
feat: Implement report generation service and add report route for investor profiles 2025-10-14 12:02:23 +01:00			`from routers import (`
made improvements 2025-11-26 08:04:11 +00:00			`addition,`
feat: Implement report generation service and add report route for investor profiles 2025-10-14 12:02:23 +01:00			`companies,`
			`folk_crm,`
			`insight_route,`
			`investors,`
			`projects,`
			`report_route,`
			`)`
feat: Implement company querying functionality with natural language processing and logging 2025-10-27 20:12:30 +01:00			`from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse`
			`from services.company_querying import CompanyQueryProcessor`
made version 2 2025-09-25 17:00:38 +01:00			`from services.llm_parser import InvestorProcessor`
Add CompanyTable model and refactor query handling; update requirements for new dependencies 2025-09-02 12:22:50 +01:00			`from services.querying import QueryProcessor`
Implement LLM-powered Investor Parser with CSV processing, SQL and vector database integration 2025-08-28 22:51:58 +01:00
made version 2 2025-09-25 17:00:38 +01:00			`load_dotenv()`
Refactor database models and schemas to allow nullable fields; update init_database function for improved initialization. 2025-09-26 15:24:42 +01:00

			`def init_database():`
			`"""Initialize the database by creating all tables"""`
			`Base.metadata.create_all(bind=engine)`


Added logging to main 2025-10-28 21:34:20 +01:00			`logger = logging.getLogger(__name__)`

Refactor investor-related schemas and models; update database configuration and enhance investor processing logic 2025-09-02 15:51:35 +01:00			`init_database()`
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00
made version 2 2025-09-25 17:00:38 +01:00			`app = FastAPI()`

Added logging to main 2025-10-28 21:34:20 +01:00			`# Add CORS middleware to allow frontend requests`
			`app.add_middleware(`
			`CORSMiddleware,`
			`allow_origins=["*"], # In production, replace with specific origins`
			`allow_credentials=True,`
			`allow_methods=["*"],`
			`allow_headers=["*"],`
			`)`

Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00
Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00			`# Request models`
			`class QueryRequest(BaseModel):`
			`question: str`

			`class Config:`
			`json_schema_extra = {`
			`"example": {`
made version 2 2025-09-25 17:00:38 +01:00			`"question": "Find me deep tech investors that do deals in Europe under 5 million."`
Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00			`}`
			`}`

made querying async 2025-10-28 21:09:47 +01:00
fixed querying 2025-10-28 20:54:15 +01:00			`class CompanyQueryRequest(BaseModel):`
			`question: str`

			`class Config:`
			`json_schema_extra = {`
			`"example": {`
			`"question": "Find me companies in the fintech sector located in San Francisco."`
			`}`
			`}`
Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00
made querying async 2025-10-28 21:09:47 +01:00
Implement LLM-powered Investor Parser with CSV processing, SQL and vector database integration 2025-08-28 22:51:58 +01:00			`@app.get("/")`
Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00			`def health():`
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00			`return {"Hello": "World"}`


Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00			`@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])`
Refactor database models and schemas to allow nullable fields; update init_database function for improved initialization. 2025-09-26 15:24:42 +01:00			`async def parse_csv(`
			`db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)`
			`):`
Add test script for manual JSON parser with LLM currency conversion 2025-10-06 14:07:28 +01:00			`"""`
			`Parse and import CSV data into the database.`

Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00			`For investors:`
			`- Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing`
Add test script for manual JSON parser with LLM currency conversion 2025-10-06 14:07:28 +01:00			`- Manually parses JSON profiles for efficiency`
			`- Uses LLM only for currency conversion to USD`
			`- Handles AUM, fund sizes, and check sizes as integers`
Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00
			`For companies:`
feat: Update investor report generation and HTML template to include fund details and improve data handling 2025-10-21 10:48:58 +01:00			`- Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile)`
Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00			`- 100% manual JSON parsing - no LLM needed`
feat: Update investor report generation and HTML template to include fund details and improve data handling 2025-10-21 10:48:58 +01:00			`- Only extracts: founded_year and key_executives`
			`- Only updates companies already in the database (syncs with existing records)`
			`- Skips companies not found in the database`
Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00
			`Benefits:`
			`- Fast processing (5-10s per record)`
			`- Low cost (minimal or no LLM usage)`
			`- Accurate data extraction`
			`- Automatic database persistence`
feat: Update investor report generation and HTML template to include fund details and improve data handling 2025-10-21 10:48:58 +01:00			`- Safe: won't create duplicate companies`
Add test script for manual JSON parser with LLM currency conversion 2025-10-06 14:07:28 +01:00			`"""`
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00			`# Read uploaded CSV with pandas`
			`content = await file.read()`
			`df = pd.read_csv(io.StringIO(content.decode("utf-8")))`

			`# Process the dataframe`
made version 2 2025-09-25 17:00:38 +01:00			`processor = InvestorProcessor()`

			`if is_investor == 1:`
Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00			`# Manual parser with LLM currency conversion`
Add test script for manual JSON parser with LLM currency conversion 2025-10-06 14:07:28 +01:00			`results = await processor.parse_investors(df, save_to_db=True)`
			`# Results are already dicts from the new parser`
			`return results`
made version 2 2025-09-25 17:00:38 +01:00			`else:`
Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00			`# Manual parser for companies (no LLM needed)`
Add test script for manual JSON parser with LLM currency conversion 2025-10-06 14:07:28 +01:00			`results = await processor.parse_companies(df, save_to_db=True)`
Implement manual JSON parsing for company profiles; enhance data extraction and processing efficiency; add comprehensive test script for validation 2025-10-07 12:07:43 +01:00			`# Results are already dicts from the new parser`
			`return results`
Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00
feat: Update query endpoint to return paginated investment responses with fund details 2025-10-08 14:19:36 +01:00			`@app.post(`
			`"/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"]`
			`)`
made version 2 2025-09-25 17:00:38 +01:00			`async def query_investors(request: QueryRequest):`
Added logging to main 2025-10-28 21:34:20 +01:00			`"""Query investors/funds using natural language"""`
			`try:`
			`processor = QueryProcessor()`
			`result = await processor.process_query(request.question)`
			`logger.info(f"Query completed successfully with {result.total} results")`
			`return result`
			`except Exception as e:`
			`logger.error(f"Error in query_investors: {e}", exc_info=True)`
			`raise HTTPException(status_code=500, detail=str(e))`
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00

feat: Implement company querying functionality with natural language processing and logging 2025-10-27 20:12:30 +01:00			`@app.post(`
			`"/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"]`
			`)`
fixed querying 2025-10-28 20:54:15 +01:00			`async def query_companies(request: CompanyQueryRequest):`
Added logging to main 2025-10-28 21:34:20 +01:00			`"""Query companies using natural language"""`
			`try:`
			`processor = CompanyQueryProcessor()`
			`result = await processor.process_query(request.question)`
			`logger.info(f"Company query completed successfully with {result.total} results")`
			`return result`
			`except Exception as e:`
			`logger.error(f"Error in query_companies: {e}", exc_info=True)`
			`raise HTTPException(status_code=500, detail=str(e))`
feat: Implement company querying functionality with natural language processing and logging 2025-10-27 20:12:30 +01:00

Refactor investor and company management API with FastAPI integration 2025-09-03 10:32:19 +01:00			`app.include_router(investors.router)`
			`app.include_router(companies.router)`
Add project management functionality with CRUD operations and associations; introduce project schemas and update main application routing. 2025-09-27 08:53:59 +01:00			`app.include_router(projects.router)`
feat: Integrate Folk CRM API for investor synchronization and compatibility scoring 2025-10-08 19:21:46 +01:00			`app.include_router(folk_crm.router)`
feat: Add insight generation functionality with compatibility scoring and web search integration 2025-10-13 23:19:46 +01:00			`app.include_router(insight_route.router)`
feat: Implement report generation service and add report route for investor profiles 2025-10-14 12:02:23 +01:00			`app.include_router(report_route.router)`
made improvements 2025-11-26 08:04:11 +00:00			`app.include_router(addition.router)`
Add project management functionality with CRUD operations and associations; introduce project schemas and update main application routing. 2025-09-27 08:53:59 +01:00
Implement investor processing and querying functionality 2025-08-29 18:42:55 +01:00			`if __name__ == "__main__":`
			`import uvicorn`

refactor: Improve handling of optional fields and enhance compatibility score calculations 2025-10-15 17:58:31 +00:00			`uvicorn.run(app="main:app", host="0.0.0.0", port=8585)`