From cefe89bb677ab3f80ffaf22fd24e3bf21e77a502 Mon Sep 17 00:00:00 2001 From: bolade Date: Wed, 8 Oct 2025 14:19:36 +0100 Subject: [PATCH] feat: Update query endpoint to return paginated investment responses with fund details --- app/__pycache__/main.cpython-312.pyc | Bin 4736 -> 4932 bytes app/main.py | 9 +- .../__pycache__/querying.cpython-312.pyc | Bin 5444 -> 7350 bytes app/services/compatibility_score.py | 0 app/services/crm.py | 0 app/services/insight.py | 0 app/services/querying.py | 143 ++++++++++++------ app/services/report_gen.py | 0 8 files changed, 107 insertions(+), 45 deletions(-) create mode 100644 app/services/compatibility_score.py create mode 100644 app/services/crm.py create mode 100644 app/services/insight.py create mode 100644 app/services/report_gen.py diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index 88f6657ec93638729fa7eb4a2a10b59eb110c38e..ba7a3684bb908613a00926077d627d7473f6edc4 100644 GIT binary patch delta 1090 zcmZuwO>7fK6rSC+HzBcOJAwFj%K~w*`AHy$z#+5*BuJ%zDg**qsO~B=u{W$|oj2d#ATloH+J?#KqjuN);Rs;(`=P4~P?Q)-nRrk@nlC?|tu^H#={) z%70IfJxV4=2(IThUo^iFu9l19Jzl<5s1-D#6E=KL zRpG+SNSD-#$Q8GH*UD>6KX3vU_pR4KiQ!XEC@HQaZ<}A3kzq;xW-gqL zM4u`{v&kGUXu>h9n8rsW5w^&)#Kh0nd!tq_W^$vqOU?EpE;Jj-6D+Ol}gc@bs zvVT>nNLe3rCEx;PuPfP6gqP&)7WvPX>nSgN6iy=(nr(+uN5H&%mHJ+Xv|LZ;=Q1jj z5ezn5+?j!l4e<`@lm3I{dQ% zcsIPz@@dCoU&6Pj#Ty{vnw-r{8&AoSypj2R?nx}svu1j;i;t~y505-7?HWWMc|&wv b-!)U(#2*X)5`4WG8W6iv(i*SF)HJ;Z5)cXU delta 1021 zcmaJo}^k2#f#2tY;8pym~Y>F?|X0FzBey3wO-#V zueS~Gwek7G%%ZyBi^6*geK!yU2%r`tT#n=;uH9-x%dvb60uR6~%OZj=?rE!od?)8R ze{=lQwsp$KJzyN5_Buc+_Ag6~xO@V~XHQZE_ID?_b*jPcMC3<-62vLoO)tXXQqQ!4 z!u6)@to!~E6~TK^v;-f=wr)b5I5F)(@w&Vno940Sciz>oamVZ=z3S~eA`|QHSkUiJ(yZ+?^=_) zNM!YB^PVOcpy$+V`=H1?B;XLer5+BtztCDK5F2Yndyb6K=js>bpb#9OPrWtI{11A< zcZ6>G)-DG*%e)n06|rq=*Q=P^Bni&-OJqz0j0g}<#TKt9l}a@lt{;Rb%Im^9M zLMDp-1U75nTbQN;Mqq#pbA~CEf{6+?IYyTPcW$$gjmezO?;JFYvgVdbNI(dK+aC7{ z-7FDNBZ&zZ7a$tRq!M=A1~Iu}*$$HpK3ygwz34DWHytu6q@pV3)6Jq$MOY(N)xm@* zB9e+3KSJ diff --git a/app/main.py b/app/main.py index fb93d85..a923ecd 100644 --- a/app/main.py +++ b/app/main.py @@ -6,7 +6,7 @@ from dotenv import load_dotenv from fastapi import FastAPI, File, Form, UploadFile from pydantic import BaseModel from routers import companies, investors, projects -from schemas.router_schemas import InvestorList +from schemas.router_schemas import InvestmentResponse, PaginatedResponse from services.llm_parser import InvestorProcessor from services.querying import QueryProcessor @@ -84,11 +84,16 @@ async def parse_csv( return results -@app.post("/query", response_model=InvestorList, tags=["Querying"]) +@app.post( + "/query", response_model=PaginatedResponse[InvestmentResponse], tags=["Querying"] +) async def query_investors(request: QueryRequest): """ Query investors using natural language. + Returns fund-level matches (one row per fund) with investor details. + This ensures only relevant funds are included in the response. + Supports queries like: - "Show me seed stage investors" - "Find fintech investors in Silicon Valley" diff --git a/app/services/__pycache__/querying.cpython-312.pyc b/app/services/__pycache__/querying.cpython-312.pyc index 88b87c992371b4a79c0e417a2b5cd343ed662c0c..3159ecec8a078709de73ee9f81a4cae6f3f7970d 100644 GIT binary patch literal 7350 zcmcIJTW}LsmbbgrdbwrEvMkFl+{PwY;KzVTVmv0qU^4My3;`z-1gxg5Zrd%l)SBC^ z7;EGVAE_-qGPQW;16asFs%9%pLDiD|f!eQRK4#dO{jlOJ*7QP!s!8n@e*$|qL-u3$ z+?HB0Mw|TX-YVV4xsP-D-t*QUJRUcK^7ZL|o1JV%=uc!|6?Mh98i*RP&p?DItky^9w zRJ;ja#Ao(hN?pPq@tb|OQlAJ!0*Qu5Ln0UnCK@A+CcH)oC7L2l6rz#jj_ zY6gC52LO5z>E$_{K+O`jIN1U~D^k|$i?qmywp;AY(yAn< zw|X~Aoh)^$_GalTf!-QxegTEO6Ndlju|pHbkG?&5=y-JG*jV)Np_7@`SzS+QL;d|K zFeg^ix`g|HPlS}*|K6^Q?<`@O8LZCr?&+NtG#wK$0$z2%g6N#IU^ofeN7Gm_*n}!d zictexm6EWar?CVAM2k}fqbLc(4gw+T(qqD10C$M8K)BRkbu|^8%cySm*pXvLk57(F zOb+oU)ij?-YdU{)V*DhJB{(d}>$4I+4dTd;jcRgDVLDFG&1SrI?wu1;d9(frW|Ag=qAHk+!7`4qyTY-y@&@BUNc-o^%c)%i;9Uh zY@kr4jbKq@L=zyZFcVFu;4UOlBOJl=-;VdsstKuIRnqfXyiRWKVc}(0s2wx_!E})9H9z!i|zePGr z$1P1>S-L?#Vt*ENI=M!XA$2wFm!w47A_J6^^=0L#EMsZfMH{Rr>zWNK%9i*93bUC5 zB}LB*y!q68TC=!+eEbM6fMk~>T{bN(;hl8y^4r6t-UDvMf_=nSyFC8Js&ot8jRnA2KxYJn^Xvp5V# z;y!Y&v%IuMkSqI~)_EGaXr2_jldSiEh!K+(HNP{{x`~CKo9%}Unhw=D1bh|SbQcvKHSg z(I0wkaONWWznp2yQgH_mx2fDLjm!A#SvwA8?SRv<0jFaFPR9#49a-iCP)EUUk)5}C zj(OCHbf<-|3zX-gQ!n$j$Yz~a&_}d&8imLgo&?W%(fPjfM5&#p=FwT^B$}tf_RQ8p zMdquRF4JI*jFnlt&ortN8OQ0V)8fw4QyK5+DOo%5 z97=m!L15Fyri<~V`1R%+r&fY{&;KajGjP9g&-D{mnM(&haxORSIsYb9u&%lFTJz=R z<*rxm)~pVW0A^30RJKeq3RCy(~IqfhnD^NSk6@ zMX6WZVGCQFP`at;UJFoJANN_XN?SgoyuLE0`>lRFVD&1kwe$ZNQ?P=;>is`qB2L<- z@|~E7kG83N=lbtB-{OsU+ol6yxZ%CFyf_>0coA0NJByxp=ZmW>4qIfvNZ7K>D7rwM zIYShGmaWDai=eWry}QVY_GYv}kpr$phrZ3in03Uf&<)U+tQD2^mcud-U}(01p>Zxc zfSM{djR!<$oS6GfrTS{1&Q&rq5C$%~vaV{_0jJMKjEl>?vT@=CZ7R40R#&<8D@b&~ zw^Z2yhLcIwRNn>AA8bMZLpW!5RcMve5UhQr%362UndLzydz-4B0aLmPB&*^ynW&DM(4+$b1KTBPAcsYP!^Z)a^VOJ{4d-in3>*M=C`O~sNI z3YDD@a6L-yh^ySIA@0ccoltf*xXk4@h$Cyd8X7+@>$5y~0wlhaNw5mh9>gBnteh%2 z4~VOWwE@VE4H#6Mqo4=r|qINz`h^yK+KS1kCFyzy$EAOQ|u9pGxbzrVH2% zY)ub^oTwFjoX5oeJSGnjW(^iX@q|VKSsjvHwiLvahKtluP0mP=Mq>MB7IIAY{hAaIuzNN1SeP29aGe7B-tVz7(XV2i7A5);FWK~q6+Zw4&n{_xub z_ztOlOsaRuv>n26DJdFkOi?u{>?^v6reo+9r=l7}G$eLq%@;KS8wj!(1Hv*gi3M^T z1Yl-9v-nA@Ni;rbA}ZdX!I|`wud~X578}IWbW+D}!Dfwoj>Vf+YldBzNOflGW-s)` z*p7U$_D>LcpF>aF;1524SSuLH2OIMZ!9VUCxY~JX_DXNQwQaq|9;huKyQdbS{-NRL zAnSeOxD64+1b)?HI^)46Fq9 ztOiDMfsqHHZL6WZxzJvS=o*5MhiK@izGinWwEO$k^yWgn&#xKIbqucr_LVU2&UKKS z$XX-k_kYuj{Gp}bRo6A{GPfG)&V{;fgjPbkSLz17stYXD|FWUbfjs^~H|lz6nd9@J zwl&n{8KExN@-6Mx_Fdk0!3kkySI=ta{#@t&&*;zUmOBsrC(CU2KMJ+4GpM2S`phTs zkK?!J?_@vC-Vclx7?@l3bruk`1s@8vt~U05(MWP94NW9rb6a11^+@j3BlovV+>kD% zuVt=ezSuJHW#H(ernZ6u)wLG_k1$^@$hZ>c3rM>;GQ3_@_=;``OO#rOZ_eL&u}?b?yc zM=tHZ(RoAs`{Zwu%f0(@O|Ly_>bTZ+xh)^!$^GQFg;%!?<+cq~>F_)A-9OB?cIUfy zt~Ys#pV*4(wypYh=6pMsd-mV=ybMNNlY~loIjjz-*Mxuzy0~;j@u*W z4}VSgZbv@Y0eqKl-j;9OUgS7G@-(h;tvRmsU$`yLo}QuL@}51dMeRqaXHPl!s6Q7V zDZXLNx7w?~(*Jmferh+pS3f>1WOms6I zA=MzMh!p+?6*CLON5Le^ZC1qZ(TINg06nDxth>6uxW|dif#R z{t)f@hGr>x4MFuE9HP2G71p<0=p=r2u1o?D<7ARVv5WyrQt`d@>MIZU1Wdp7K)6Skq z*$wj10d96)yEC&pGxuTkH+x-w5CjJT_~YZhUjD+n<_c5)Sle^(B85aGGBQ`O=jH7*Cka` z@Kix7JmqFfs-{1oX3xD0yKW*0*cw(UH7(6!@(PAU0nxmqN!8bJ z7zCIjD5SgDB<0kPu)jzIK%l-0=o;Ge_iqM*H)qynHUr<;Tp#bt|F;wVO~-V>w+rb zfBpx+Kxr0_fG>lE1eDh#gNE1)TT!V(gqkswm%gXrLQ%_i^UIe+H9rR_<*}sJl&UHj zq5Pbr6)*3asQF8a#4OF=B(g$w`}CLWkzW4UBUA>Q#tGn*{(bv?YOj9Ro=bdMaaW|t{(X2f`k{(VWkN7q2RIqBSmtK`aYm*=r5f2^H48@W)bW`h%eN8 zv%FTm-M9YjMliejVr&1zr;(%U{#z$+(?2=A5jna#3#iTNf8z(z$EH4wO@F|?N8dj6 zGv`KZ`UVfE%_C2AgNr=g4xwoF@wNlSPw5#!I-Lc7#ZH3&i4dt28or0W2kA|;M5(;i z=`V!l(Os(BZRe3j>`c%Y-R6o>Hhs*@~w9G77r_gs^LTH)QVmn(AT4I-2h!OVCY}wOUePkVB%cOz61a~cWNWlRt+m@Je zf>@?)-TR`@8)vQP>0K@{Pw#TcQXYa}qTG2jggVTKm%ci4iq!nm!lMHdufS*+OH+ou zTaC~G#_nJ+{51t@bBbJ2@mR$M^fGX%j^AUsRZSwIFmSCTae8|V2h$^IjNt|q_S zT3(;`m;N->5r`se(WFJ;71>^DA;!^`F2Xbxaw$I2kbHEFmI&kiLcZ> z@%Q>0p7GDT;kD3a0GMXtg?i$JM&M+JJ6=y5Zv-Yg+>v_XNFy-O;SSXkhZ=#y5ABT4 z_b(p#!fU}>LNlDIhf~dPrXJ3$zuE{NYj}^hy-4u2gQ$1l9s67Mx1Fmq5W(Q^#tJ{s zijHg}PRQQiHshIA|41vfznPk0tobTdKxwW7n#Xr>;`wE70$8GCE&k&{jMZU}7` z@(wjU>AEMqKKhC0Xxj^?JPM#dd}HtA$NsN@2O-qtqjf&|2|w`f)ib;F6P3KFphS{!yLO^Y$I6K<%eWx4+og&M`|0va$o!1$zLx%BxF02WMUsoebn#&)l3@! zeLwSvEIVF^gLD+fbX(*(!LZAP>U{BXp;W~-{Y)eUy{<-v!geq+JiEI5&Tkhx^j}88 zD^8;Hsm04urJ#=07V{m_ENC-T!=+x43o@}-98+)wlg==#wFRj9K|*ro-)RAta0u9) zq%RW6{Bw|tc*0Fq6mdb8p2x2NO9qqr3ZOPUOi|RIkbMh<{*IDcXlM)dZlSMlp~G8f Z&lY;_Gn55FzZTu&mMAK=jR-c+{V$jekbM9E diff --git a/app/services/compatibility_score.py b/app/services/compatibility_score.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/crm.py b/app/services/crm.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/insight.py b/app/services/insight.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/querying.py b/app/services/querying.py index 27df87a..05b3fae 100644 --- a/app/services/querying.py +++ b/app/services/querying.py @@ -2,13 +2,18 @@ import os from typing import List from db.db import DATABASE_URL, get_db -from db.models import InvestorTable +from db.models import FundTable, InvestorTable from langchain import hub from langchain_community.agent_toolkits import SQLDatabaseToolkit from langchain_community.utilities import SQLDatabase from langchain_openai import ChatOpenAI from langgraph.prebuilt import create_react_agent -from schemas.py_schemas import InvestorData, InvestorList +from schemas.router_schemas import ( + CompanyMinimal, + InvestmentResponse, + PaginatedResponse, + SectorMinimal, +) from sqlalchemy.orm import selectinload # Connect to SQLite @@ -21,16 +26,16 @@ class QueryProcessor: self.llm = ChatOpenAI( api_key=os.getenv("OPENROUTER_API_KEY"), base_url="https://openrouter.ai/api/v1", - model="openai/gpt-4o-mini", + model="x-ai/grok-4-fast", temperature=0, ) self.toolkit = SQLDatabaseToolkit(db=db, llm=self.llm) - # Update system message to specifically request only investor IDs + # Update system message to specifically request only fund IDs system_message_updated = ( prompt_template.format(dialect="SQLite", top_k=5) - + "\n\nIMPORTANT: You must ONLY return the investor IDs (id field) that match the user's criteria. " + + "\n\nIMPORTANT: You must ONLY return the fund IDs (id field from the funds table) that match the user's criteria. " + "Do NOT return any other information, explanations, or data. " - + "Your response should be ONLY a comma-separated list of numbers representing the investor IDs. " + + "Your response should be ONLY a comma-separated list of numbers representing the fund IDs. " + "Example format: 1, 5, 12, 23" ) self.agent = create_react_agent( @@ -39,9 +44,9 @@ class QueryProcessor: prompt=system_message_updated, ) - def process_query(self, question: str) -> InvestorList: - """Process a query using the LLM and return investor data.""" - # Let the LLM handle all database interactions and filtering to get IDs + def process_query(self, question: str) -> PaginatedResponse[InvestmentResponse]: + """Process a query using the LLM and return investment response data.""" + # Let the LLM handle all database interactions and filtering to get fund IDs response = self.agent.invoke( {"messages": [("user", question)]}, ) @@ -51,70 +56,122 @@ class QueryProcessor: response["messages"][-1].content if response.get("messages") else "" ) - # Extract investor IDs from the AI response - investor_ids = self._extract_investor_ids_from_response(ai_response) + # Extract fund IDs from the AI response + fund_ids = self._extract_fund_ids_from_response(ai_response) - # Fetch full investor data using the IDs - return self._fetch_investors_by_ids(investor_ids) + # Fetch full fund data with investor relationships using the IDs + return self._fetch_funds_by_ids(fund_ids) - def _extract_investor_ids_from_response(self, ai_response: str) -> List[int]: - """Extract investor IDs from AI response.""" + def _extract_fund_ids_from_response(self, ai_response: str) -> List[int]: + """Extract fund IDs from AI response.""" import re - investor_ids = [] + fund_ids = [] try: # Try multiple patterns to extract IDs from the response # Pattern 1: Simple numbers (assuming they are IDs) numbers = re.findall(r"\b\d+\b", ai_response) - investor_ids = [int(num) for num in numbers] + fund_ids = [int(num) for num in numbers] # Pattern 2: If response contains explicit ID references id_matches = re.findall(r"\bid[:\s]*(\d+)", ai_response.lower()) if id_matches: - investor_ids = [int(id_str) for id_str in id_matches] + fund_ids = [int(id_str) for id_str in id_matches] except Exception as e: print(f"Error extracting IDs from response: {e}") return [] - return investor_ids + return fund_ids - def _fetch_investors_by_ids(self, investor_ids: List[int]) -> InvestorList: - """Fetch investors with all their relationships from the database using IDs.""" - if not investor_ids: - return InvestorList(investors=[]) + def _fetch_funds_by_ids( + self, fund_ids: List[int] + ) -> PaginatedResponse[InvestmentResponse]: + """Fetch funds with all their relationships from the database using fund IDs. + Constructs response similar to read_investors but starting from funds.""" + if not fund_ids: + return PaginatedResponse( + items=[], + total=0, + page=1, + page_size=len(fund_ids) if fund_ids else 10, + total_pages=0, + ) # Get database session db_session = next(get_db()) try: - # Build query with all relationships loaded - query = ( - db_session.query(InvestorTable) + # Query funds with all necessary relationships loaded + funds = ( + db_session.query(FundTable) .options( - selectinload(InvestorTable.portfolio_companies), - selectinload(InvestorTable.team_members), - selectinload(InvestorTable.sectors), - selectinload(InvestorTable.funds), + selectinload(FundTable.investor).selectinload( + InvestorTable.portfolio_companies + ), + selectinload(FundTable.investor).selectinload( + InvestorTable.team_members + ), + selectinload(FundTable.investor).selectinload( + InvestorTable.sectors + ), + selectinload(FundTable.investment_stages), + selectinload(FundTable.sectors), ) - .filter(InvestorTable.id.in_(investor_ids)) + .filter(FundTable.id.in_(fund_ids)) + .all() ) - investors = query.all() + # Transform to InvestmentResponse format (one row per fund) + investment_responses = [] + for fund in funds: + investor = fund.investor - # Transform to InvestorData format - investor_data_list = [] - for investor in investors: - investor_data = InvestorData( - investor=investor, - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - funds=investor.funds, + # Get top 3 portfolio companies (id and name only) + portfolio_companies = [ + CompanyMinimal(id=company.id, name=company.name) + for company in investor.portfolio_companies[:3] + ] + + # Get stage focus as comma-separated string + stage_focus = ( + ", ".join([stage.name for stage in fund.investment_stages]) + if fund.investment_stages + else None ) - investor_data_list.append(investor_data) - return InvestorList(investors=investor_data_list) + # Get top 3 sectors from fund (id and name only) + fund_sectors = [ + SectorMinimal(id=sector.id, name=sector.name) + for sector in (fund.sectors[:3] if fund.sectors else []) + ] + + investment_response = InvestmentResponse( + id=investor.id, + name=f"{investor.name} - {fund.fund_name}" + if fund.fund_name + else investor.name, + aum=investor.aum, + check_size_lower=fund.check_size_lower, + check_size_upper=fund.check_size_upper, + geographic_focus=fund.geographic_focus, + stage_focus=stage_focus, + portfolio_companies=portfolio_companies, + sectors=fund_sectors, + compatibility_score=1.0, + ) + investment_responses.append(investment_response) + + total_count = len(investment_responses) + total_pages = 1 if total_count > 0 else 0 + + return PaginatedResponse( + items=investment_responses, + total=total_count, + page=1, + page_size=total_count, + total_pages=total_pages, + ) finally: db_session.close() diff --git a/app/services/report_gen.py b/app/services/report_gen.py new file mode 100644 index 0000000..e69de29