From 17bc5acbc8a030e40986b1dc84aa3e4235fbf17b Mon Sep 17 00:00:00 2001 From: bolade Date: Mon, 29 Sep 2025 15:58:09 +0100 Subject: [PATCH] Refactor investor similarity search to utilize AI for improved query generation; adjust DataFrame parsing to skip initial rows for better data handling. --- app/__pycache__/main.cpython-312.pyc | Bin 4018 -> 4016 bytes .../__pycache__/investors.cpython-312.pyc | Bin 12563 -> 12064 bytes app/routers/investors.py | 87 +++++++----------- .../__pycache__/llm_parser.cpython-312.pyc | Bin 13263 -> 13305 bytes app/services/llm_parser.py | 4 +- 5 files changed, 34 insertions(+), 57 deletions(-) diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc index b5a03bdd3179e9b3d88a8fa85faef52c22313d86..4d1a394353946ec88ab58396d83648a2a8c632cb 100644 GIT binary patch delta 71 zcmdlazd@e&G%qg~0}y2Hyq>XgBkwgHo+@?&JuuAdRGj>g=N+TR~s1pD!Ru#hl delta 73 zcmdlWze%3=G%qg~0}#BQc0FVDM&4^Ye787r@{<#DGV+T{G9Bb6zvp?!=sEcwZv|ug c=5RiDMxp2pB?nxtv3P!9XJC;k5(nx90MCFK00000 diff --git a/app/routers/__pycache__/investors.cpython-312.pyc b/app/routers/__pycache__/investors.cpython-312.pyc index 1416da7f071c96bb3aaeb5b44b761597cc20c2b1..3c7a8ffac7b0eb9369b1db9f78f3a25a9c90991b 100644 GIT binary patch delta 2150 zcmah~O-vg{6rNcdV=y*07=sOMmIN^7FGR`@Mkq}}6hq5TQlL_46S{Z@+pyk+*#&}S zYN|$6t4eLiR!Sw*G?99Ugj7}XCF%+F6sZ@RR5i6k$)P=_qDd~zrF~;;P|{Xe$=}Yr z_q{jszL|a2CvRV|{b;q8B6!}rl?^X8uG%CoMj{eTQhC%qZjU;~9l3e2R1vKlugs0j zk~3O0zIvyr+O}?Bu?UUVa43L8>op|Wh;8-P#v+Fs%=))nt-(R6^d{Vj9r~O24z}x0 zaWAgYyEq?Mz+c=%>O{u5mbcV{H&yUjn75g@a}3@P0AJY9dXRlT`g_Al8P1lR6I3#& zh(yx;#)f9wrFlY^E>a~(R8^rqE?YbTqlhAB%TywfpcauOMG%uWjPn>jeHFnRdNv$e9m=>5}Jdyp~+|2*#@aSOT)w9}@7r(U|5>N)1W=$QZ`l4V$y#Jo?-av_8_zfxz0Kr@7t=$|#Y5?8Ux!4)Y5 zg9W(!zjNdnWukS>DB6H?_$8cj;5591V~-cXijH`KZNB0w$FfQD7DCqZgXQ9Q@wBgC zJ$T1W1qqhTar1P`fu}GU0a-EpmJN%@R~hqPh{z%zkr#-nDOBavNHih|l-CqqmwfgF zKd(mQ5P!Cx7eYkVXaMriJ_h{&F|Lh|l?w6_ufpw9XsorAhv)xu78c_B&-1Y^elenj zc`Z!%DMgZ$MYd8fEKnh+5gJjoNHC#x=Q6hQBN}V~c)@wKn~#;`)?}ubJ5LA@B}9eU za3r|TvNBdEZ(oXe^32{a3C=*JF~UXrxnu*1G6SX2eXp9+Z(krGN_g-V@~a1e6}Ff^+kzsX6eS^OA8MX3yg;ZV%+Bte z0~0B6pi+S}lF!1Fk;np(RKA%N7Lj=Y@(HRU3zJe_T3;r3Gc zT(py2C=-KY3=X=7e#+wxu#Qfv81yozWpJ4R`>JUT1J(^_HG^`1taWy;NlwhMj!BP$ z%~zr~x75Af!`wDapdq47>|I)wbx(GC)d`JU)v{(GvKN;v&xKFqlPNT7f?dx`npy%` z^e5d)-MZcque`ly#_tE=1tpts^Hq zT*JNNw5fBFAO*uDy40c2D76Dw|GRzc4JJQ5!hkhZ+6eH>+!IyAd5N5+OE6`B2vb?yg}M!Rx!~$L#gQfVdI- KCkMma8vg(cE<&vU delta 1841 zcmah}O>7fK6rS0&*PGaDd$Z2ZIwaT$u`zKHnv&3p03nbfASnsZ0}+rK?-J~+<1luh zK^rA)xga6YWG<*l+aUE2DOF45Q+q-^wW=z4WvQkX6b`L=YLx;~e-2e=Y%fXFNL}rI z@6GrAX5P%s@&$a+`K=&S0`d$k#co#bHb9fj^ z_!TBZ7-F{t)NKm&xlv~P#F{m&c52uYt zLbZEq@y!~M(2F%)@STiNbB-<2O@tcuo`p-9Rd2*ny+sL<-M>8ozsS71{Z|G)!B?6- zw%bT)+Vf{&F5_!{17_az@X)mvfK^CX^31S0i^I-(hO>yqbo0@Jj+iXhO z?syyi+#})7+LqbwrPwD2%gUbz5nV<| zM!ShX90ghi(R1H4OD`5wPl2=%pgs!pT0r!;A-XB3mO?d!!xZSQ71-FsDC(p`fx^oK zLJrhV^$>xAD;+^&YI-y_aaB#Flc-p{P!X9*jB3eiDk`Z{ldn#y2=&kisT`3acR&)2 zPDB!F3hki@yC{SybQ4H92>5(Hyci1FV?_Urk~)LG3(ey3s@~Z@cwiIWzI5w+UT83c zhUI!g2h{3Rfu)AL+-b<2_mcVUVS~K#aL#dRu78c&;IP+__vYpO zhP?kiyx)=UyJ+-X%pQtnqe}LGnv*Z(9Ag_I9`e&RK&GY#x=ft~EJ8 zcR$Q`4H#Vm-+KpFy6<#uC_w zzYbIMe0)bIt;J&a%n7Px# diff --git a/app/routers/investors.py b/app/routers/investors.py index b28b532..2687477 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -1,13 +1,12 @@ - from typing import List, Optional from db.db import get_db from db.models import InvestorTable, SectorTable from fastapi import APIRouter, Depends, HTTPException, Query -from schemas.router_schemas import InvestmentStage, InvestorData from pydantic import BaseModel -from sqlalchemy.orm import Session, selectinload +from schemas.router_schemas import InvestmentStage, InvestorData from services.querying import QueryProcessor +from sqlalchemy.orm import Session, selectinload router = APIRouter(tags=["Investor Routes"]) @@ -237,9 +236,9 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)): @router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): - """Find investors similar to a given investor""" - - # First, get the target investor + """Find investors similar to a given investor using AI agent""" + + # First, get the target investor to build the AI query target_investor = ( db.query(InvestorTable) .options( @@ -250,55 +249,33 @@ def find_similar_investors(investor_id: int, db: Session = Depends(get_db)): .filter(InvestorTable.id == investor_id) .first() ) - + if not target_investor: raise HTTPException(status_code=404, detail="Investor not found") + + # Build a descriptive query for the AI agent based on target investor characteristics + target_sectors = [sector.name for sector in target_investor.sectors] + sectors_text = ", ".join(target_sectors) if target_sectors else "any sector" + + ai_query = f""" + Find investors similar to investor ID {investor_id} with the following characteristics: + - Stage focus: {target_investor.stage_focus.value if target_investor.stage_focus else "any stage"} + - Geographic focus: {target_investor.geographic_focus or "any geography"} + - Check size range: ${target_investor.check_size_lower or 0:,} to ${target_investor.check_size_upper or 0:,} + - AUM (Assets Under Management): ${target_investor.aum or 0:,} + - Sectors: {sectors_text} - # Build query to find similar investors - query = db.query(InvestorTable).options( - selectinload(InvestorTable.portfolio_companies), - selectinload(InvestorTable.team_members), - selectinload(InvestorTable.sectors), - ).filter(InvestorTable.id != investor_id) # Exclude the target investor - - # Filter by same stage focus - query = query.filter(InvestorTable.stage_focus == target_investor.stage_focus) - - # Filter by similar geographic focus (partial match) - query = query.filter(InvestorTable.geographic_focus.ilike(f"%{target_investor.geographic_focus}%")) - - # Filter by overlapping check size ranges - query = query.filter( - InvestorTable.check_size_upper >= target_investor.check_size_lower, - InvestorTable.check_size_lower <= target_investor.check_size_upper - ) - - # Filter by similar AUM (within 50% range) - aum_lower = int(target_investor.aum * 0.5) - aum_upper = int(target_investor.aum * 1.5) - query = query.filter( - InvestorTable.aum >= aum_lower, - InvestorTable.aum <= aum_upper - ) - - # Filter by common sectors - target_sector_names = [sector.name for sector in target_investor.sectors] - if target_sector_names: - query = query.join(InvestorTable.sectors).filter( - SectorTable.name.in_(target_sector_names) - ) - - investors = query.all() - - # Transform to InvestorData format - investor_data_list = [] - for investor in investors: - investor_data = InvestorData( - investor=investor, - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_data_list.append(investor_data) - - return investor_data_list \ No newline at end of file + Find investors with similar characteristics but exclude investor ID {investor_id}. + Look for investors with: + - Same or similar stage focus + - Similar geographic regions + - Overlapping check size ranges + - Similar AUM levels (within a reasonable range) + - Common sector interests + """ + + # Use the AI agent to find similar investors + query_processor = QueryProcessor() + result = query_processor.process_query(ai_query) + + return result.investors diff --git a/app/services/__pycache__/llm_parser.cpython-312.pyc b/app/services/__pycache__/llm_parser.cpython-312.pyc index 7e4223f87d770f71be87f76752cce071e9fda6b5..f816176fc3305c8141e0c00f344afe21877a7608 100644 GIT binary patch delta 629 zcmb7=zi-n(6vyvJY}bHEKz0yLbBR-@4W+f5Mn#~qz+j(LXkltux)7AM1Fe`C;(`Q1 zM#>P$LWq9?RCz-PsiCzkF|d{m1|%4mSYYFwm6d^$?&Sy( z|GasyuQ|-zEg-avFnWo4D4>1Z#a&{e0BW?%stOLIKHjF$?ek{zoPEP2-l+xTBne7; zr6C@;@Y(x|)ktr6YR(4FfVguvqBo7p@(&$;wG#hq$i(atgubKSkI@&s9ID4!aY%+V zTnou@wBg*0SDOlXROXlPE5mQ0*QY&Oem4v0V48We6Qe=}@~DPAS02@`6`}dywxJ$2 zh^W&`UZ-$mIgpbg<_skz9d;$QCP+v1n@Vz(`E?~VWgux?0+KeV$SsvuXxgsx@+!^F zNHCn0B|uq)ip&h&p;?E*y1P`U8y}(Z3EW@z|3~1RXumbUB-*LX#P=@TB4cTLxyG$= z7v>6|$`2mG{+0Oc+zV`O{!?GP;j9aFi$a|()LBBEovWM4)!Dhac~Ezqz{BCxlIIqV zm;*UEg?SYvnhx`cr3vDyzNI8rm_Mnc=OiHMqy!|jsK~a;O`6V6af4?w0-zSW0t;^LFhKZ=l~7SkoIwKy;)VrPL(qdf287lzkBuP?c_{Pr@alR3+spE$Wu0ew1)T}M8x+{}S_?{uM`Rf*_xOD@Zt zn+$X`iFp}C*&^(UY)vFg^=e9V)$%GzY{-zr6$z5KMul#uyhP)6jpvtXG9|%ql9wP# zN>u2w!5K{$g>{=$kQ$eu_5{w4{{MM>!8q)-_Q;saeoCnt{Bc&siJy{sDzDyu04u}K z?Uy*$`txQ$40NPmK7*o(BFsmo zCK5;WOeMNvdDBW<0pmDRob(&-)m`!wv8wHveRG1lqFVbX@@=mzh G^~qoFzLA{( diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py index bc714f2..99d01d3 100644 --- a/app/services/llm_parser.py +++ b/app/services/llm_parser.py @@ -176,7 +176,7 @@ class InvestorProcessor: async def parse_investors(self, df, save_to_db: bool = True): """Parse investors from DataFrame and optionally save to database""" investors = [] - + df = df[20:] db = None if save_to_db: db = get_db_session() @@ -244,7 +244,7 @@ class InvestorProcessor: async def parse_companies(self, df, save_to_db: bool = True): """Parse companies from DataFrame and optionally save to database""" companies = [] - + df = df[20:] db = None if save_to_db: db = get_db_session()