Files
Anton_wireframe/app/main.py
T

164 lines
4.8 KiB
Python
Raw Normal View History

import io
2025-10-28 21:34:20 +01:00
import logging
import pandas as pd
from db.db import Base, db_dependency, engine
2025-09-25 17:00:38 +01:00
from dotenv import load_dotenv
2025-10-28 21:34:20 +01:00
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from routers import (
2025-11-26 08:04:11 +00:00
addition,
companies,
folk_crm,
insight_route,
investors,
projects,
report_route,
)
from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse
from services.company_querying import CompanyQueryProcessor
2025-09-25 17:00:38 +01:00
from services.llm_parser import InvestorProcessor
from services.querying import QueryProcessor
2025-09-25 17:00:38 +01:00
load_dotenv()
def init_database():
"""Initialize the database by creating all tables"""
Base.metadata.create_all(bind=engine)
2025-10-28 21:34:20 +01:00
logger = logging.getLogger(__name__)
init_database()
2025-09-25 17:00:38 +01:00
app = FastAPI()
2025-10-28 21:34:20 +01:00
# Add CORS middleware to allow frontend requests
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, replace with specific origins
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Request models
class QueryRequest(BaseModel):
question: str
class Config:
json_schema_extra = {
"example": {
2025-09-25 17:00:38 +01:00
"question": "Find me deep tech investors that do deals in Europe under 5 million."
}
}
2025-10-28 21:09:47 +01:00
2025-10-28 20:54:15 +01:00
class CompanyQueryRequest(BaseModel):
question: str
class Config:
json_schema_extra = {
"example": {
"question": "Find me companies in the fintech sector located in San Francisco."
}
}
2025-10-28 21:09:47 +01:00
@app.get("/")
def health():
return {"Hello": "World"}
@app.post("/parse-csv", tags=["CSV Upload"], response_model=list[dict])
async def parse_csv(
db: db_dependency, file: UploadFile = File(...), is_investor: int = Form(...)
):
"""
Parse and import CSV data into the database.
**For investors:**
- Expected columns: Name, Website, Final Investor Profile, Final Profile sourcing
- Manually parses JSON profiles for efficiency
- Uses LLM only for currency conversion to USD
- Handles AUM, fund sizes, and check sizes as integers
**For companies:**
- Expected columns: Name, Website, Perplexity Gap Output (or Final Investor Profile)
- 100% manual JSON parsing - no LLM needed
- **Only extracts:** founded_year and key_executives
- **Only updates companies already in the database** (syncs with existing records)
- Skips companies not found in the database
**Benefits:**
- Fast processing (5-10s per record)
- Low cost (minimal or no LLM usage)
- Accurate data extraction
- Automatic database persistence
- Safe: won't create duplicate companies
"""
# Read uploaded CSV with pandas
content = await file.read()
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
# Process the dataframe
2025-09-25 17:00:38 +01:00
processor = InvestorProcessor()
if is_investor == 1:
# Manual parser with LLM currency conversion
results = await processor.parse_investors(df, save_to_db=True)
# Results are already dicts from the new parser
return results
2025-09-25 17:00:38 +01:00
else:
# Manual parser for companies (no LLM needed)
results = await processor.parse_companies(df, save_to_db=True)
# Results are already dicts from the new parser
return results
@app.post(
"/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"]
)
2025-09-25 17:00:38 +01:00
async def query_investors(request: QueryRequest):
2025-10-28 21:34:20 +01:00
"""Query investors/funds using natural language"""
try:
processor = QueryProcessor()
result = await processor.process_query(request.question)
logger.info(f"Query completed successfully with {result.total} results")
return result
except Exception as e:
logger.error(f"Error in query_investors: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.post(
"/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"]
)
2025-10-28 20:54:15 +01:00
async def query_companies(request: CompanyQueryRequest):
2025-10-28 21:34:20 +01:00
"""Query companies using natural language"""
try:
processor = CompanyQueryProcessor()
result = await processor.process_query(request.question)
logger.info(f"Company query completed successfully with {result.total} results")
return result
except Exception as e:
logger.error(f"Error in query_companies: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
app.include_router(investors.router)
app.include_router(companies.router)
app.include_router(projects.router)
app.include_router(folk_crm.router)
app.include_router(insight_route.router)
app.include_router(report_route.router)
2025-11-26 08:04:11 +00:00
app.include_router(addition.router)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app="main:app", host="0.0.0.0", port=8585)