164 lines
4.8 KiB
Python
164 lines
4.8 KiB
Python
import io
|
|
import logging
|
|
|
|
import pandas as pd
|
|
from db.db import Base, db_dependency, engine
|
|
from dotenv import load_dotenv
|
|
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from pydantic import BaseModel
|
|
from routers import (
|
|
addition,
|
|
companies,
|
|
folk_crm,
|
|
insight_route,
|
|
investors,
|
|
projects,
|
|
report_route,
|
|
)
|
|
from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse
|
|
from services.company_querying import CompanyQueryProcessor
|
|
from services.llm_parser import InvestorProcessor
|
|
from services.querying import QueryProcessor
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def init_database():
|
|
"""Initialize the database by creating all tables"""
|
|
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
init_database()
|
|
|
|
app = FastAPI()
|
|
|
|
# Add CORS middleware to allow frontend requests
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # In production, replace with specific origins
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
# Request models
|
|
class QueryRequest(BaseModel):
|
|
question: str
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"question": "Find me deep tech investors that do deals in Europe under 5 million."
|
|
}
|
|
}
|
|
|
|
|
|
class CompanyQueryRequest(BaseModel):
|
|
question: str
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"question": "Find me companies in the fintech sector located in San Francisco."
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/")
|
|
def health():
|
|
return {"Hello": "World"}
|
|
|
|
|
|
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
|
|
async def parse_csv(
|
|
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
|
|
):
|
|
"""
|
|
Parse and import CSV data into the database.
|
|
|
|
**For investors:**
|
|
- Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing
|
|
- Manually parses JSON profiles for efficiency
|
|
- Uses LLM only for currency conversion to USD
|
|
- Handles AUM, fund sizes, and check sizes as integers
|
|
|
|
**For companies:**
|
|
- Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile)
|
|
- 100% manual JSON parsing - no LLM needed
|
|
- **Only extracts:** founded_year and key_executives
|
|
- **Only updates companies already in the database** (syncs with existing records)
|
|
- Skips companies not found in the database
|
|
|
|
**Benefits:**
|
|
- Fast processing (5-10s per record)
|
|
- Low cost (minimal or no LLM usage)
|
|
- Accurate data extraction
|
|
- Automatic database persistence
|
|
- Safe: won't create duplicate companies
|
|
"""
|
|
# Read uploaded CSV with pandas
|
|
content = await file.read()
|
|
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
|
|
|
# Process the dataframe
|
|
processor = InvestorProcessor()
|
|
|
|
if is_investor == 1:
|
|
# Manual parser with LLM currency conversion
|
|
results = await processor.parse_investors(df, save_to_db=True)
|
|
# Results are already dicts from the new parser
|
|
return results
|
|
else:
|
|
# Manual parser for companies (no LLM needed)
|
|
results = await processor.parse_companies(df, save_to_db=True)
|
|
# Results are already dicts from the new parser
|
|
return results
|
|
|
|
|
|
@app.post(
|
|
"/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"]
|
|
)
|
|
async def query_investors(request: QueryRequest):
|
|
"""Query investors/funds using natural language"""
|
|
try:
|
|
processor = QueryProcessor()
|
|
result = await processor.process_query(request.question)
|
|
logger.info(f"Query completed successfully with {result.total} results")
|
|
return result
|
|
except Exception as e:
|
|
logger.error(f"Error in query_investors: {e}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@app.post(
|
|
"/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"]
|
|
)
|
|
async def query_companies(request: CompanyQueryRequest):
|
|
"""Query companies using natural language"""
|
|
try:
|
|
processor = CompanyQueryProcessor()
|
|
result = await processor.process_query(request.question)
|
|
logger.info(f"Company query completed successfully with {result.total} results")
|
|
return result
|
|
except Exception as e:
|
|
logger.error(f"Error in query_companies: {e}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
app.include_router(investors.router)
|
|
app.include_router(companies.router)
|
|
app.include_router(projects.router)
|
|
app.include_router(folk_crm.router)
|
|
app.include_router(insight_route.router)
|
|
app.include_router(report_route.router)
|
|
app.include_router(addition.router)
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(app="main:app", host="0.0.0.0", port=8585)
|