From ff0010019ee74b70e0e834539e48275fd991272e Mon Sep 17 00:00:00 2001 From: bolade Date: Mon, 27 Oct 2025 20:12:30 +0100 Subject: [PATCH] feat: Implement company querying functionality with natural language processing and logging --- app/__pycache__/main.cpython-312.pyc | Bin 5310 -> 6698 bytes app/main.py | 26 ++- app/routers/folk_crm.py | 10 +- .../router_schemas.cpython-312.pyc | Bin 10896 -> 10946 bytes app/schemas/router_schemas.py | 5 + app/services/company_querying.py | 176 ++++++++++++++++++ app/services/crm.py | 78 +------- 7 files changed, 225 insertions(+), 70 deletions(-) create mode 100644 app/services/company_querying.py diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index 81d7ae9c88abee887d6213a954e7a0c000542dfd..26e197f348ebc181e40767c82105890480c71fa1 100644 GIT binary patch delta 2313 zcmaJ>U2GIp6uvXNJKOGdr`_&vyDi*q=`UMo`6(?RTCtTv<;RLiCo#>ib9Z;znVoWH zwx%S~qE+C@$UKN4MqUM?CL~5)jPZ#?AKKJ}W(bKSKInrHQPlXNXJ$J~gV;^>%=!7g zbI;s!&V1Q*Z5RJdC=@{O`{C<<3P$~Vd~#m#BJ@6zki;lRC_fp2acov#ou^0e!jn~e zDkpHNU+}9{LKS0UJxaA25CV?wRf1}bP@{%~kje|ZS}W8#exFjOhJ~=Bb4tA$5h7|- zh^jFmrp5&|t|o+pniP@_z^^o@jY6ZE5>jfD(B$~4lxB64u*uP@mCY&^u%ic*7BwxT z8N?zIxNg4{{2Ys!8Yx@rC1IuxnfI$ACxd`zrVg)3#sTRB!r z62H_yYNbZ9Gsj7(MPDWJE;sY`CelPwc2=nwZfKL6S=vl`AOl|XRyey|&TtEG8vmD* zUdQPNj`oyWj!(^8Vw)SD9*k@hOl%a4ZWN4d6ihn7M2FNm-H8xwbXH_g7=$|=oHFbl zB(_LxQu~b#_l6iW$ILNZou=aXg!fp%gPJ-c7Uy0N4be<_yW_Dn$B7c5bH}NcCAzLr z^LcN_{Fz;CouZyKWR-O3xCgg&7R8FHkXfRvvsK`Uc>s2KET0p!xSU2+(XgrtVo_2E z)z9!bO_|2xjEtwr++J+ej2s&~GC6d7bn?jXM0R}xU;}Q>d4r6v6})Nlq7O4I<~85v zOuH#^i%l53w8a)_5Ege(E9T@pg*!pLCgX2OISQvj&rM0+rjs-huYr-ddL=|Aawhqfib3Ovo{>`9S3XLJRIJc`VBs^=*`Y< z=1kpohu@QRW107UGJ0Ct1sx!R7gvN4CDjoAXx@$0ftz(#?{6wAd~ zq8l2;lqjMhYel^v&*&L!5K+Y{QBOg`GZ;FLb&>^~ZnoF;%;)5yL9&I)DLT$+rJ_V6 z$cK0I?&wWBrS#zRhz!*7jb=qk)-eD%E*9|!6^mI}&uSG)d6eP3>|{_S9vWRc+=a}i zA7hMyXh88k2(`R&F{fD^L`iX$+S^M{+QMEf%L8t@+x)aX6||?F+Cxgm&87O(bC$mZ1#7fu zd1$Hl#A^LrwO`ewl0ptp11K=CNbhG6df;Ig<}Rwfi+Hm;(mwwxvy4vOL;LQbzI!Nh z9|adft0=PIzk?#nD6$#|zni(3Sq%=Y@~s#0clg$2zIBCfTMD&5@_QNfK>!6(7ZP`> z8<(paFAuC#Z&~2JN8I_~x!{H3<%!ELe>CyI#MQGa&HYPFdzNYjzC(i#IhN`EHGmj? ovGzV^PDgIVSl{{Nx#X%hzSJ@BMQXWY__lC+%w(dsye&}nzih)StpET3 delta 1288 zcmaJ=&1)1%6t9~8n9rW?o}QVW$+RJ6Gcl_hBf_#0KUv9&9#*{+NQ_n`F?LO2wF%@D z(DAO;vI-u&n;=3!Z(cnJvR*`H5%dobq8Guv_^QW=y6&c+fBoL?z4v>ss=My(dE87t zwX6h!_4n@|W?ShS>GH|}4WV0zB8q)Px^GM(;75Fo>qO@UF}O)gtYVrU%}b;d#-sjkv`!1zHs0rdLC&+&{0H_ZA4QnvkKc&i+HRt zcerbRjVshz#a)7?+h|jyAJUJ0o%lH?L$foO4Y2JMAl7m{c^Du`^F1Z-QeV932rV#! z7MVqDHaKn4Qrp<g9E2}27JcHLNy+*XkJP>r-`liGXD4hI$R92ijC7)^a z%6~_5TSo0IqxmhPg)O6{ZuH;|TDv%ekf?^g-Uu0iLmQbR-|vIgX&-H@dA$?BXc;f# z<~Dg-zmFsGlrb!C83XIXri+a_lsodbsE1v7G5QdDvOm^7T?JOS3Tgly^YDCY`ofIp z2WLAxK^dTpUZ?Dr+3H`la~oQuoZis%dg6Jk(yfC*(TS!h2H-7JjTn^w#H!4vP5GB+#V#oRQ9g*5u)F>w zzGUE{9M23TcLOcP6zq|gGNX35tN6l{2-xKLU`ZT-&~EuC^VS?yNk6UM%N8SJHrO3& zLCl|LK`?n~k%?vD0oT;TuPW`30(CW_0)uK~@_*phfcbrZkU}EV4wAktUd@`I#h~iDhsjB`$eIRj6fjXg%Hp4E9 zU2@rRO*OQbkarz-e|XHD=pyLMi&jSyi>-eh^2gXdK2H~YHZIOWow~rmAAlPg##nxI d2CWJF3XQ%(BX3Z$ZOI$uC-QpbPhI^4p8&%j?A-tW diff --git a/app/main.py b/app/main.py index fa6a869..4a79176 100644 --- a/app/main.py +++ b/app/main.py @@ -1,4 +1,6 @@ import io +import logging +import os import pandas as pd from db.db import Base, db_dependency, engine @@ -13,7 +15,8 @@ from routers import ( projects, report_route, ) -from schemas.router_schemas import InvestmentResponse, PaginatedResponse +from schemas.router_schemas import CompanyData, InvestmentResponse, PaginatedResponse +from services.company_querying import CompanyQueryProcessor from services.llm_parser import InvestorProcessor from services.querying import QueryProcessor @@ -114,6 +117,27 @@ async def query_investors(request: QueryRequest): return results +@app.post( + "/query-companies", response_model=PaginatedResponse[CompanyData], tags=["Querying"] +) +async def query_companies(request: QueryRequest): + """ + Query companies using natural language. + + Returns company matches with their investor relationships, team members, and sectors. + + Supports queries like: + - "Show me fintech companies founded in 2020" + - "Find healthcare companies in San Francisco" + - "Companies in the AI sector" + - "Companies that received funding from Sequoia" + - "European startups founded after 2019" + """ + processor = CompanyQueryProcessor() + results = processor.process_query(request.question) + return results + + app.include_router(investors.router) app.include_router(companies.router) app.include_router(projects.router) diff --git a/app/routers/folk_crm.py b/app/routers/folk_crm.py index ae68ad5..4ed0812 100644 --- a/app/routers/folk_crm.py +++ b/app/routers/folk_crm.py @@ -1,15 +1,21 @@ +import os from typing import List from db.db import get_db from db.models import InvestorTable from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel -from services.crm import folk +from services.crm import FolkAPI from sqlalchemy.orm import Session, selectinload router = APIRouter(prefix="/folk", tags=["Folk CRM"]) +def get_folk_client(): + """Get Folk API client with loaded environment variables""" + return FolkAPI(api_key=os.environ.get("FOLK_API_KEY", "")) + + class GroupResponse(BaseModel): id: str name: str @@ -44,6 +50,7 @@ def get_folk_groups(): to sync investors to Folk. """ try: + folk = get_folk_client() groups_data = folk.get_groups() items = groups_data.get("data", {}).get("items", []) @@ -71,6 +78,7 @@ def sync_investors_to_folk( Returns: Summary of sync operation including successes and errors """ + folk = get_folk_client() # Fetch investors with their team members investors = ( db.query(InvestorTable) diff --git a/app/schemas/__pycache__/router_schemas.cpython-312.pyc b/app/schemas/__pycache__/router_schemas.cpython-312.pyc index d58670d72bcb76bf54468bfb9c78954c2c721f87..dbc61b1e8e2180ac5801dc493b17ab6284ca4679 100644 GIT binary patch delta 288 zcmbObdMK3lG%qg~0}wjidnV@r# zSAKG%{5-}plNA&mF<#irtf<7ucn9n-OC>qRYnvmK)EOC{K?Ov)xgDdWM`D0kokoHNPQ9EWR#y_`Go;UeUSi( MYeK~*+iL{^0GjerJ^%m! delta 260 zcmX>UIw6$zG%qg~0}wp6dX~Ylk@uoB= year" + + "\n5. For investor-related: JOIN investor_companies table" + ) + self.agent = create_react_agent( + model=self.llm, + tools=self.toolkit.get_tools(), + prompt=system_message_updated, + ) + + def process_query(self, question: str) -> PaginatedResponse[CompanyData]: + """Process a query using the LLM and return company response data. + + Args: + question: The natural language query to process + """ + # Let the LLM handle all database interactions and filtering to get company IDs + response = self.agent.invoke( + {"messages": [("user", question)]}, + config={"recursion_limit": 50}, + ) + + # Extract the actual message content + logger.info(f"{response}") + + # Look through all messages to find the SQL query results (ToolMessage with actual data) + company_ids = [] + for message in response["messages"]: + if hasattr(message, "content") and message.content: + # Check if this looks like SQL results (contains tuples with numbers) + if "(" in str(message.content) and "," in str(message.content): + company_ids = self._extract_company_ids_from_response( + str(message.content) + ) + if company_ids: + logger.info( + f"Extracted {len(company_ids)} company IDs from results" + ) + break + + # If no IDs found from ToolMessage, check the final AI message + if not company_ids: + final_message_content = response["messages"][-1].content + logger.info(f"AI Response: \n{final_message_content}") + company_ids = self._extract_company_ids_from_response(final_message_content) + + # Fetch full company data with relationships using the IDs + return self._fetch_companies_by_ids(company_ids) + + def _extract_company_ids_from_response(self, ai_response: str) -> List[int]: + """Extract company IDs from AI response.""" + import re + + company_ids = [] + + # Check if response is NO_RESULTS + if "NO_RESULTS" in ai_response.upper(): + return [] + + try: + # The response contains tuples like (1,), (5,), etc. + # Extract numbers between parentheses + pattern = r"\((\d+),?\)" + matches = re.findall(pattern, ai_response) + if matches: + company_ids = [int(match) for match in matches] + else: + # Fallback: extract all numbers + numbers = re.findall(r"\b\d+\b", ai_response) + # Filter out very large numbers that might be tokens or timestamps + company_ids = [int(num) for num in numbers if int(num) < 100000] + + except Exception as e: + logger.error(f"Error extracting IDs from response: {e}") + return [] + + return company_ids + + def _fetch_companies_by_ids( + self, company_ids: List[int] + ) -> PaginatedResponse[CompanyData]: + """Fetch companies with all their relationships from the database using company IDs. + + Args: + company_ids: List of company IDs to fetch + """ + if not company_ids: + return PaginatedResponse( + items=[], + total=0, + page=1, + page_size=len(company_ids) if company_ids else 10, + total_pages=0, + ) + + # Get database session + db_session = next(get_db()) + + try: + # Query companies with all necessary relationships loaded + companies = ( + db_session.query(CompanyTable) + .options( + selectinload(CompanyTable.investors), + selectinload(CompanyTable.members), + selectinload(CompanyTable.sectors), + ) + .filter(CompanyTable.id.in_(company_ids)) + .all() + ) + + # Transform to CompanyData format + company_data_list = [] + for company in companies: + company_data = CompanyData( + company=company, + investors=company.investors, + members=company.members, + sectors=company.sectors, + ) + company_data_list.append(company_data) + + total_count = len(company_data_list) + total_pages = 1 if total_count > 0 else 0 + + return PaginatedResponse( + items=company_data_list, + total=total_count, + page=1, + page_size=total_count, + total_pages=total_pages, + ) + + finally: + db_session.close() diff --git a/app/services/crm.py b/app/services/crm.py index 4801c5a..d23f33a 100644 --- a/app/services/crm.py +++ b/app/services/crm.py @@ -1,14 +1,24 @@ +import logging import os import sys import requests +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler()], +) +logger = logging.getLogger(__name__) + class FolkAPI: BASE_URL = "https://api.folk.app/v1" def __init__(self, api_key: str): + api_key = os.environ.get("FOLK_API_KEY", api_key) self.headers = {"Authorization": f"Bearer {api_key}"} + logger.info(f"FolkAPI initialized with API key: {api_key[:4]}***") def get_groups(self): """Fetch all groups from Folk.""" @@ -190,71 +200,3 @@ class FolkAPI: response.raise_for_status() return response.json() - -# Prefer getting the API key from the environment. If not set, fall back to the -# existing (hard-coded) key so behavior is unchanged for now. -DEFAULT_API_KEY = "FOLKfIGXuv74ML9EAajxyiUR39ePaNrZ" -api_key = os.environ.get("FOLK_API_KEY", DEFAULT_API_KEY) - -folk = FolkAPI(api_key=api_key) - - -def example_flow(): - # Step 1: Get groups - groups = folk.get_groups() - print(groups) - - # Safely dig into the returned structure. The API returns groups under - # groups['data']['items'] (not groups['data'][0]). Handle missing/empty. - items = groups.get("data", {}).get("items", []) - if not items: - print("No groups returned by Folk API.") - sys.exit(1) - - # Choose the first group as an example - group_id = items[0].get("id") - if not group_id: - print("No id found for the first group item.") - sys.exit(1) - - # Step 2: Choose a group_id and create a company - company = folk.create_company( - name="2050 Investment Partners", - group_id=group_id, - website="https://2050.com", - linkedin_url="https://linkedin.com/company/2050-investments", - ) - - # Step 3: Add a person to the same group or company - person = folk.create_person( - first_name="John", - last_name="Doe", - email="john@2050.com", - company_id=company.get("data", {}).get("id"), - group_id=group_id, - ) - - print("Created company:", company) - print("Created person:", person) - - -if __name__ == "__main__": - try: - example_flow() - except requests.HTTPError as e: - # Try to include response body for easier debugging if available - resp = getattr(e, "response", None) - if resp is not None: - try: - body = resp.text - except Exception: - body = "" - print("HTTP error while talking to Folk API:", e) - print("Response status:", resp.status_code) - print("Response body:", body) - else: - print("HTTP error while talking to Folk API:", e) - sys.exit(1) - except Exception as e: # pragma: no cover - top-level safety - print("Unexpected error:", e) - sys.exit(1)