From 358898b7db87bde4dc848f6a8b4ad18c4e26a7cc Mon Sep 17 00:00:00 2001 From: bolade Date: Sat, 4 Oct 2025 10:25:10 +0100 Subject: [PATCH] feat: Enhance context generation and report generation services with improved data handling and structure --- .gitignore | 8 +- .../context_generator.cpython-312.pyc | Bin 13873 -> 13565 bytes .../graph_generator.cpython-312.pyc | Bin 35112 -> 35202 bytes .../report_generator.cpython-312.pyc | Bin 15915 -> 16106 bytes app/services/context_generator.py | 184 ++++++++++++++---- app/services/graph_generator.py | 3 + app/services/report_generator.py | 16 +- 7 files changed, 163 insertions(+), 48 deletions(-) diff --git a/.gitignore b/.gitignore index 78abf35..0f3167c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,10 @@ data/ -.env \ No newline at end of file +.env + +/graphs + +/data + +/reports \ No newline at end of file diff --git a/app/services/__pycache__/context_generator.cpython-312.pyc b/app/services/__pycache__/context_generator.cpython-312.pyc index e1de3211600d4eac8324206eccb1259d4f24f1ee..c3983aea93113b00528fa760f7e7ee982e4eac59 100644 GIT binary patch delta 3523 zcmbtWYit|G5x(Q`_cZgysNc4l^V_TGvAE2jIcR;!}$Q}}G{T-iITGt>KL8*G7ubXb~ThuMT| zSVmJ3IwFnBhZPd4pNg=rQW4o*37JGu!^%miS3Y~CxI9AtmELu->0BxiZA!(*FNHJV zWFpqolNudQM3dP})5#Qgsp-(co+c1Yr$BaOESijha5k1oHo+dIkFHq$0R!lxYHSr~ zFqBjv8jg%aCq|=jpe2mTku;tQQlrsKW+a_VMMomxY}hLWdNRjIz(k-DLq>|hjvv6m zLDolC!6|m2)_|F#G=@hHiwOq58(+XbvljSfg;CQ(z0gWguh2>=%fh7^EBs|erQ$MO zjkWI5uv2DHUZz8M@LEs`!!k2mQ?qbajYTr0hToD^(+oT;uiau+!AV(2BCCc+RQA5Os<68sN7T9=M`NJZ-~DQ<@XqgRP!KyeSb&dV!s=*b~P+aWAMN zwDLr+XG`Y5W8fsAUj)v}nXF~O0Ivp&(5$MQCN5+q@pfOK z$}-Aiej_v+DjFM*vEe=?RaVY)mnDq0a@AFT=g{p#IoG@Xcl;|E)Y@He_1uuYp_$dp z9s7iHlv%7>)=}Du>jT#Y=B!B5u;5zqFZy$n$k&B5U1f$+YOiaqX>OcC3P)M-NOStX zsh&oPy2q;9k;bzSTna6QR#a$5H_~+HN0H{hW6RGWMeR4LE6@(&LmtoR|C;)=f~u;X zQQw!SttwtoU9O=_ZW6d*rd!Y(ZbaUQ&&G?}d}y2R6TNQ@r)OYxVBUyK?xLv?nHqDa z?uB_%W5IN|XnGEro-3G6;sq)yNn+-ix$*e}yfyF%*R+oFWBA8|r>IKX+e+T(yHC;D z$`WTP*D{#+B2$5P;Z{>`gXH}NeQyW*{vI0R@|Zfp=v`PewsZB$$NHbqoxlT2x)iO2 z&**n+by(t+Z6544cwOI4bMOOw=aEBXOAm=#x~A4&4_yl(wNt3Iy%o9{5;&8%X~T%~ z6gfZQ{6($}aczRhK0ltTB+j>QP?8G6IwQ5{W(LYC%4A<2TSX0jlb27{Oe#|cA@+fO_^8ZF5#l3LApS<%CiJQ2m3qnQ$$4ufz4oPbNlYV7fc z#>3B_BObqx;R>~8FwMw?%9i_-T-i_0^a_S;MS}|&Tt$Ni89dnkB3FmFy7>^|nu}Zz zalss@CgMjb9&T+CH4VLGqV6GLT=;;LqJz_9$8>5_!r4_~OdVipe@ zlL$`$lFE`S7MUP}D12D8OD6Wq7;H7~mR-bBa0#Z(#NY3md+71ykIjCPk$+aZC7KKt zKMP2+2Nz(Y#ZG@0K5MZzi`Lm#G7_CAG4W^;M9Cu3NbKxcFiDif)6FJR*<{$O2734t zOC5a?zGn&1m*BrGRvAZ>rl7;>k-v_~jFg6lt!Lb8Ru}Kyg{-@UYWsW@s`d(I+q?pq zz3XzR7`X-?V_2{r5B(3Cobg8Y%*2cykgg9l10~dz<&o{%8SVn6KNmCcJ;H5(uRETl z`{1f$r{fr5h$V=9C-he)li^j^Tz6(OhL_=7otL|dxtUKlJB@;>FfM;w_eYY$U%0Ny z0^MXvFNP_ld!OAM@!Q=)UIq|$NuC)8$#5cya~IA=y%Nv{Pw%Lu+u+0wduN(VBxhub z3ACOCV3TS&TjDb57{C{87EF$eo(qF4*h`G5L6iu>*%X+>rJ07pj$?roG15n%fxr(j ze6A$a9D$nz-Xw5~z>naNe@F8-O0c-3znE$rNsq@f(XC~(WYReNmER-VBLCuVRX;k}RA~N0SktgMVwZRUIXKvKL;(Rwp2g zx1l4j)0o3#hTLW3jwcVk=f?v3IC9c5`xvq3D{x;+lxLp)$ z{ixQza0=D#SvA|=(%jVC(iP23$lR1u7tA4CZCc#}My6EZzOSsfuD_;Vv%8A+7G!V9 z)g$|!Rjd20(>G7wda-B?B5N>rs$lI}XBjeyRjMB7C_~ka3($O@VwAd>u4zGIx;}Vq zP;j`5j#lJo&9x&($Gw(&7e2)O>BvJyqScidtcp8Qtz||psI5h{1F0SJzTc@E*Eq}E z5aJqmUub0*`Sv4jzrY#SS&7XwGl=hmE&sOr7WSa+ZMjk2+Ob;ao{#53sJ^pM*Tp-g z_X+hx;O-C_8t+Wqp5j}3@@EPS2ZTUy>HOmP73)1oA#iYAEh7OaaMXrJR$a|}OBZtO z%kSi!{lb?ya3lNfm3_#*H*Y}pZr;|D@5)DteJ4@hN&fj4P~VIEnF#8O@MAI57vrbn z3DlS1dy~91C0M;hYa_BYF1)nTTCnc@zsFARosQcbqBZB<^$H~kL5)K&KMr9!XG5;8 zypwnKtvX!ur*nSfe!Af3fd0l^bkQZ1_Zo!$AP1<(Ff7 zu{rLMIk&gyZb$BRepf$o@4si{U55l`L(v&T&S36EF?bhFopA6mm`X znXwMng_Srw;axe@`i$^N2vGTIe+_n zu4d(Ep}rG)UtxI2$hCT0UD#6DL~&-)4xup^oF><@c+CMTp~#ODkP>+ENCG+G$p}ml z5F5Mf82)36X28pY!xH!i!y>iWX~o_ywip7Wjb;Q4uTp=IHs6;@SzB47V&Mlt+q8!@ zl#kQZtRrVEQ~0xD+902DyPB<9h-^^!Q?A*DY3T-qKjmf%Yo702@Z^GvffY|a`0D_V z&x7m0289LXkeRI%jJ0z;vwO=7p*_tx(;|+Y(7pY?8G3I>q}P<%4Homt)4^jjYZffF zG9wb+#)Z+`&V_S}+cz0laVB!D<^fBjo3W9Iyefe32jC`yJ63iwjsH$JQzxcE%oWL6 zNtO;LBjL?oT_7f;K<0_p5lK?l7m83K6&a65_k!Q!F}^>j4C(h}hNkIt2}?^K>L_~q XpQu2A3Vcc#|1Nt*O5?6VP&D%&Gi?D@ delta 3846 zcmai1X>1$U5#F1mNSzcV(F%3gQp}Yl*`)(JQ50LDZ0o)+>o&!u4x5+XQnF-ArqdXR zTm-O<{o$m!>;_1j#zBYy!97yZFF2?7)?&>7NBouX*@ zW4<>t-`h8DX6L=-Kj+@_XZ%+z$}aj%BN-gi#3<55Mlrn2;R6_E#JnWR#;cK-flUbAp2+NaEBj}bE!U>` zV{G2~J3L`~Ex;q0AQBnG=TRnoi(a3fWTC+}8796k33+|0IvEaze3ObN;*kZCgLCpx z*iaOJPz4ZW@FTg}X|ake(q0zZS*u2EcJT!W=bDylDAaNa^0|nh9g-f&R@htIgE*I& zNv%gJC+rHh%pGCIhZ$ytU*@$dh61gjFkfpoIt)C{J!#fFg4Mt(Ik$4( z=Co4l4y~-vsNFIaTKSh4CGQmupZENzz${tie2p_X^AghBOfW=zGMkWmgoKEyRha^Z zcj68#gN;UKlp4$9mGYZoH^$a#Xl29ZV;|LjSg&^v)5Z~6 zIdY#hMvXh;B|Bg9zv#bth(=55Hu`@#_Wsyb6@6-emJH~}M`_9EeeM7?7XO?39o0#n z>Z2un`rt4vd9XNA?-UfRn(uLpqleRt_B+;!dkkk4G&9q8cG{VbTL*GKdBP zw6x#j^(CYQkAL2$%3>H$Z;f_Qsc=}z9khpE!E%H;P2&k_qT$s zeh32In!Nl~L)`X*dyHsmVposG^9o{l6*R9Rmgl5-&YSr$`AI52xzSGLmYCc@<&K!# zN9De4c|aeYpz;LGn}iBk3$$U|$t*KIBxzxC3Rkm;b)DMIaBQpAQgopx)dGCLvq&(l z&~3^hDl^&_MF*wSnRH4ku|F;?f|o35V|F`ad%=E$9a#T`y&AH#{+AthxR~J{aQ!gD!ZxEOJ;d)?_*yyX35A!K!|del>IQ)_=Y0bIjc9mCs16M{z)y1|3&=Y3jQ^Rw_6A6(ectMHw+v>jB(@1YdQHA0K5O+;D`Z0=^ zP`r%d6%?;(T{U}b|J`%B#wINw>5TT%8s~#yyvAj2M{5U)PR&+Z^*z0{wnxG}k$1Ek zwa%hZ90pNb1d)*7WcCFWlC6DFTUywUcFeqNyx;q1ctb1PyWjdV7^+4Pa9O5)&`{%h z-C4K+brRYP8p>$V-rf6H^m=}*+(pY>YX@j~-DV`#beuLF*M}9_x}z zm2Vj)%r9xxv=4A!GmL4FiyLz;XJ5+3eypQ%-NpebyKnal$9l$T z&-ixFWUObF_RQ*Yk?o#&TC{LGYCQ9&+=45owW50rYno(NpN?DdFLzz)y4fGAIYes? zZ7k55WBSl>eRPZtjnPNOKebFGd88&KrudQ7?6}zxGnZ3y`Sq&X=9+lvo|`SS^nm_E z+g2-mVu+Rw>7x_0WMZ}JPCB&f%WExk_n{3{f3)%T@#v^N2I0)pv4!p9XY`>(*jg{= z;Q*#MA3_f|jnKd!;~@zRd@B$8MF3+PKaMcTdw9r213!hDTWA)5=@!}rU?PCALl{9A z6D9;;x`j#f*-r>jOq)k|khU?su9eobZ8hrE-MV)s=8aHqM4vxPz2{(d1LwwME36wM zI?AQ$p+H$c^Rmqd&0GsYE7!?E46WQKPCx+T80;&hO`$f;&4LyM=PatHxIo$&KrPIj zPHU%8YvBljhgA{Wd?#kY&G%v^5Wv{SPavFt6yW}NFKUE8k4(b5akmIyY!o^Xx`kfM zM3nG-s7?t%Oxlz{kct3CRXC5(Ai71MP6>@7XuaZu2+Uq_N(3eX7`^x69j zSh4}bcO~1{OKV!U_UqMMx4qN)j0#ic*{3mmvrs6Qsj!WhH!pi6l|^m_OXJ~AAtj0k zPCs`Zp@DZ}{ybbG=5HL*2-ISL7)9Rk5NVW2T>`p+QYQ= zu-?#3YkTzG0lj8W_lINt73yDs*-^FwR}Zk!PF%K&>rDm0_92|)f*9dR?i_Luz_`q{ z;X)_5c3i!K??#y8dJuwq1mg@sWI#ax;}YM7#p@8du+Blg8#O-^7MOlv0m}>!Me7-% z1?$x&c3}1VLMK+%-X-Fey2XdihX($CoDbu4Xk1s9V(JQ2(|c^gz&p4uEJB3sMor;P z;@%Y4BCMfst{_@N+!De$ZY5POs0BD3QSKXt$!y^9xB)3KIYoFoYJYP*7R|%M7QcQ- z2?_@a{CpwbKeGuQ!o+}L8Aa-Tm-^s?+NY8gv`8pE2Jse?zIb!dfd9@?0&i9IHi(PN zXTrXFf*_VAEg7Qr+Wt~krntZ+OKf8Knl;G)Y~+81kgPR{g*TP27=UC!0gRyl0Lgv% zT5^9;G+!)ARu_o2>x*l`jq^!x**2Ot``_Zt?f!O z02`6b#x3h+^ZPItRK0gl9~z~-V_&0|-UEg&TgW!<0-r)c%pNek=Jn7A_c+M8GHg3myMN{Qv*} diff --git a/app/services/__pycache__/graph_generator.cpython-312.pyc b/app/services/__pycache__/graph_generator.cpython-312.pyc index 45c8c74483c3950cc4082fd9440acee54af8dec2..86f932d4084cfdee398e91d5e6f639c4b6f2c48b 100644 GIT binary patch delta 5052 zcmbVQdr(y86~Eu!eX+ckhky!;D7XrUJVcCwhyp&6R7(gFBf0F}-MzA}es>XAHH(iZ z#H32TCSqpNlD5-^I+K{J)3o^`NvDt4nzp8sc6M%JG)1lhRD#w0^l zkMR^i7eOF`o45^rbHLJVotBdVw(jI>-2GIyU~INfVpB-uT^Qo|t0=n`R31xfSX(Xhv1`d#IHS8xN8ExXDyx zC`thbnt@(6B_Mg_9*^ReRS^^phl8fF7Rw4!5|3IctjSBzbtytQdm()hsbGop)#l}> zXoweNEFtB5G>a?tb~xrl`dcP=d?) z0u(2#8NE~*nl#cIoACjnmiBFnd&2>zMnbfWy<1eYYBe^o1ti)hQ&ow|`pj{@*D~5= ziAEgTq=@X`t(*ov1Kp*Zt#XuEo50a&q|HomEUe=3N%AW}S&a<&Wr}^%WdQI0@^ySZ zXo4PMs5}M`Enq)dkegZ=4tnV(cy|(dC%fcunzy3IHkMvIXQ3O#od`Vun&@>&n%FB* z+KIY*5w^2U#RZK!&}8c#*8AO|3>XrQ}?ZwtE zgf8~Y;_6bQ6ZIf?5s*mK2jI-Tfk@Yiw-P_wvcOpy0HbE`Q5r`gj z59bQEMV)P9i0LJf2+o6d_yVlVzGU#{u)A#Rw$` z3lYlLcvVGgBZ!kGdR;Q2jnkP^yb@orBy{kNrd1#UadejTsDV%@;*;Ik)|gp*dCr{8s6xt38t7dp-wEI>WY4u%R&K}l`w+SikXQ5t0L`q1k=R-a z@+cfqm534wvbELDn2bKRW6xyz_3S~o!sk z;7yU`yU4>k8(9m5jS}cac4F|6#4>dLw=3>#ITt%?Qw^JU^M zq|sz!yP4uLcd}*n4e`}Y^KqHo%}uvk=Cf@JVl&H(CbzWT?O5zx1_jf3{YsI-e7Ram zYjejeagkBcG*CGfV@2leoaWopmuJQRcBV%MrFC41k&2}@L0B@)P^t}#J%l> zw+v#@EF-a^&DKSouiM#{>iPd?R5xz?5;+rpa^n&V=1(_&c)ODDTE~(MED7{A1Wufv zF>+5{U>Uw+^3$^jH|9J2IqH6az@spm(dSV2IzTjIT`NChZVJ71U8{cf+$xzD+Ntac z?Uw0KkIM%~6+cRDqF)ixZ{dyf)3dr8$HvsA=MmmuYpTnOzL=E1VZqKk$h9M#>&f5Z zuXp-H2v=SAIgt!8vNLZXp1=GMre!E)16=0AJOYwN2H^PH$JXpz7u$_yj78MeDn%T3 zN~-EuOb?;LEJ*51${KXSDs zkK}@w_(PQ6R-7jN#6F77qX-WJXzaf`SKNnzQo4ZndY&dz{oF;n7*=G>=vG`2dKw#@ zM6e><>U^~6749P1FCi4NV?C=1{)8ew25lhCrZFV(dp#XwbA06k-NcNgj9!h8OK~$f z$hP>3$*)+@w<3l)avv2mwNB@A1c!GKLJJM8>Mt0Lj?VxFe-FqC(4G2%LGs=RU zI0x^OC-Iqlh4X^)92y2a+{2P^`nbjVW*=@j&@+3=7Y z!poM*8Kh--Q6UW~YEOhpLDdtY0eS`}g~_KSqyKJ2ReIst_%Len! zZRp;}?iyT07PHYoM=dIxMhH_ixLyADXPnqI=*?GX_IJ8OB3G=sRLYPreJB3`iYd6)H4?e}I;Q*;~V(t4-8` z&ckT4xRn52^=D}E1ZZb=(+43$SAO>JjeiQwM-b`(@Y0Slr^r!|An7HyUcC0B0gD&K zMNAX(?Iv1`9bzrgFY$*_I?mo6Dj?_BCqu>LTk-s8CLwpQrF+(|{s63+L3U{dk4q=G zPTP}BdKWX(ftk!yrs?UhQ+wv#gWW^y>i9z+gExq+342YF+-|twM_fKmE;x^I*EPf7 zkY29%o|l7){uMwq1335trDyKmX1KoJzxUf4dF64!lCD`>H*_vv*4o(uVK2osmH}vF zi=H}5;3X?t0WBt2D1 zV^AK9+$s^Bi}V~U#4a)OG!c#*%}PT9YEM84YZg40saw_AmyAKyM6l94b}^RMFbCVs zMR*Bk!Sj^wpa|_WB59;or@x8U3jC-ZgvrqsRyDkmY+}mrynNhVEh!)=!5+LK>j%vu zP_n0o=dqWDZRS+;$Y&S!<+Ils3s~dc3^Ngd4id}S|6yUmo|VWhN@UGRK$K^zZ{e)%AlE}^( z-#U?b@}6fhPuKiFKD&EL6f*NZ77Us8DG}VKQrLG6L<VZVD8gjrFUUG zlh#WSsTDqQ;iT>jh5S@S`%e&lhmb&c8{r*LhW$5$1h4T0wZ28>!hhg=ZU zi>&_ecBf=8b&-TU`$YLz`IM-?Rn<;Qmy%Mhnz#-Y@WaCmWV^!M=NElr$H^U2qW*SA z)3gNcADOsL9~8%P=FmZ$1rDiMAhCUZrI+G*@jnw0JZMnyJ+^A3h-_uuBW1Bp@YhW6 zKb!CnXTD|*OF_4!QXU(NDoec~8l)ZAhkdq>N?tX9<(QHBT5nwcHS6j?$ldRk8|gzZ z1pEnV3&4~}2>H9gM2uI{1d`TpLCAYg$h;(^4cm^S?N1x&e^bc0AQW8?mcAv}-x9Lk U!q{-J)u2FqMhfrl7}E{(NRcO63_@tHlti%nLm$_>xV|#7kLuW+^@K&cyqjdDUdw z?3VP@$j-3Bg5LIMB-F#|c-^!jdnLvx;mWkFD;A=OmFTwJL7&#iW>Y)vDE9jo_$w6M zRxh&-)os)59Z^k-hS{h++k>94X4%|cP%G8&B?WV4?#EEc{%Azotpo#|n&@zeg!H@U ziQ8OVUOB|}@}~+)Qu;{74g>b{w+jk-u`JyhU~_OMGJbn+G8@4 z!>VoxMl`k+$_c8XwRb}ok~@@;%4GEJ0BqtP7FLyjQcMB(0U#3#5V&)1AyZk=I;o34 zSmZ7aky1BzG8Tqu1P}$p06PfOTz;Y}hhN-hH50%8Ues)dk@{R>adEeKZK^Z%cq*E4dnDM8c@$>6(3)4HlvJRpOI&G(x}7J5>rZEm86!$hcor0oB}Mz1t};ZsX1t4W1?5t((dgf_Lj#Y0k#?j zpw6I~-Hmn=fqNRehaUX97I#i7gl&LL0FZ>8|7 zHr^t4SKn^ub-SQ_weB!@R?T+s@2e(J0?Mgwkg^k7t6MFUb6%-AZXaiBw%5&fm13Sz z|Fb%Jy|D?w785uux1`jW;tQgM+tdn}HsRWdq zM}NfP4;ME|I}%eo0VK{tOFiR##JQD4y`vuyPpNOb*Hs#r8xBU?W4{CB5<9M^HY{y! zSHsb$)~WaeC*tWOa+Mttt&0J{s%WcVTjCu(C6d{SF}fax)&^gY132uO+tt@fc59 z)?lRi+s5OpcotK<(OnAbxm`FK)GHXJ-t-?*50h;MoCN$4a29mTAD51Q8i}Ek8<*Sv z4ga-sxq4$_-KI1IQ+wO{mbz{l1_!ZEsg}LsM6Ru^siAX5oZbRTFjF?ckVp!TQA?M0 zlgS<)*tV>9C)99EJh?&ETzAW=>MCUiA+Uw$R8PJp%@|6D(Zhg4P^7Ww&WLYzO!n1P zlvlGypy(lRTlA!kXty~2bf;hTlD|SxCQ1jl4acX6JqqXp9463tX?x8U#BZ>bcGhrW z3Nuba=q<6JqFa4Iuf|?Rrvbot9&;xfN$e9CeF~V$Uu(ZJ{}P(wa7w1uXk?=ROS`{W zTAk>)zfH1>kGz_AQ%=~W1AJF!q4WVC=&b2Qh`BA-9K{U{D^}FkGZAt#ZU~oGG*r}A zumhOFuw*9Mt08gZ7*^~$l-B@{qnpibF+|`p8?iwr(Y_&&ARznRkzN)tt})S=afd|2 zTgaHU6vG?Gl-PB__a8xqU3WW};67?H-5OH-nr`pBS%0z5V2I-@VALza9$nhS9?v34 z1MS{ZP%i{L%|8hiZNnM@&m6;V@U-9s69Lsra;k}vuzYN>vDy348Uj24utK|+TFrq6 zCcd=tZ}CH?$thxV(b2)TZFkKFFT`1A@FG{Y?Tp0~rrSc%-Bi7SC3=#&vjZiNs#~c> z>SQN)Vta9($X>sMJ{cuY4@w}Fz05CfzoRLEQ4xQ73Us@IYOsUKwO1e>mswc`v#!GE z8^E`KseFA`S$Qp*Si5dhH9Dv1G*lu1ty8y&X?00spTlGpUXCe*Lt(n3W#7Q}+3&=GVNP$c+n4R4jmsy1Bc@NS1?j97NlOWaakK`w8?|HgTzgO@vVZ>I8tG|1Z|_}F9pAAxUwVu`zBfN* z6`33L>>7S@Z;59K-U9@PBNmM5HYFU>de|=@vda|u5ub51q+z!EV7?9T5+I%D_O9YB<@8_q<|Lm5{JsG=#&I+%H4`;oH4DCStClfcDtnD zo?+yJsTgu9Of7WO=}G)jZT(7%ng=`sb+qbZM6^6}XxIecCH6^UJt5HUr)y+MsNc6fh zEC(a)sLUzrys@vGHsXtYjdqiTxUhlO?Y}&Epz?Uvsp)50&u-dswrT5E7IRMWVP~JE zuX#9$Hy&v6Om$8>ys~fE5zEDbnO99x+7_ul<%o2o@`!RSBkQQ6-!UK^&pf{F`J5Nl zolxJHadyMzBaV+U+J-GKr7`|;Af7jnJ+SH6lox7FEjv?lc2jF#)1Ol}pG!^Wk%RU2 zMKr%twOu$Ei|5PqyOf_ zdPIEa_Lv;;$tn}RbEt~k5oHm!0W)Te8AD_dqCWD9;fq`Sb1mtvaMZUmq%39!X$tx? zsZ|8D(~@+>Y?G{CrkkYn(hH_Z|1cF?FqNG*Wt}%=pEu=PFwObaeowm8DqS^+hxq>g E0dYL}8~^|S diff --git a/app/services/__pycache__/report_generator.cpython-312.pyc b/app/services/__pycache__/report_generator.cpython-312.pyc index dfc046793467ee2e8fcce8d8e587c79955fa3bb4..00b4f357199a0aa82e2dcc4584d1f891daddec95 100644 GIT binary patch delta 1701 zcmZWpYfKbZ6ux(6XZAS@0=vKhvO;+*2)OL>=;Br_h+0BXG+1LR4m%61?#sJFRk}p0 zKg5PqmlGxa!?Y#p{G`*92_uO;7 zbLQN0&dhZ3w-M_#K`;Tbav$9v{J!ym^&dU7W2@mlyo~=aG#2Zq?d8=5-dPI32x|l~ z#6Dx;fh3b;^9g4fE)eN?B@xG*I*?=}%|><|FGsKUq2}cf|DL1@M?{XSQS!Wm;aLi9{v>B8F*`Uz~$Xfa6sh@cTAQpmcVC>Rf zu#epa_Ci-SMy6}H%N$u%MkvFUp4P7V%1F2_5{kM|IaP5zTPnjYR7quH&)UjGTLn2I z7lmpH%kf|4>K22-#3jXmf}+$LNW81)1F`T>B$`l6a;!Hx6bYhGKr!`*;i2GKgknfB zZrLAS5b-99Q^yiMj$54ggr&2~L*gbXNYf62c{{LX9@~C&`()8f=`Epglu>z*V?XIT z?t21sM%!qs%CLg%o}=(=&8eCclk$9}zv);p&Ww8|^eRUfsukx7-?{&s_XAt;7slD_ zswV&nwx2U|RF>?|6HCb3QU$GM=BhcPwTQb~2nixz`k}B+2Yt9UtIWEdP!v8Alm}xH z+JMEZO;AdwvktNOUTWd`!!c1-*oe5##h^l>LO%SaeFr?8K5yU6+4`tPq7bCePhkMp z6nbH8y00+8=WQe!MYk(PN^f28@1>lb_)*DE@KpNi(qCA(0iUYq;}22&VGJvKpas8G zIm`df18@@H$5wl~&GiQFlsgDRTT)rF@OsAQa5btWr!A-*AMp&Y9?mD!AQI(Jbilc>t=)-&@febwqU?0B zih1GG8yDq4w3$X0#HbkFp9m#-B~cb#6}a46&c8-1Xe;jaRtqso;u7(2NJjPe1MhP9 zHooZH3OzWZq5Z%PO58)?eah35@L@3!M6~Cqo5FDduF{2~Pf&%E0;Zt#;3Fa_*@<`v z#Yq3remdO?>7Az>9|bK@^8ga#gCxyRKvp>Ipy2Bb7UK+!0Q*rNe$e2B63%bjZQe=U z&QUmok2U791!V3MmvBK-8GL}(G8l5X0nRlqK> zr}o@&H{Euxx#3=OdD*w+SIVze-gI}|c6Z-!ci+h?e#$cS_VJD>ZmR!N;YLpFGnV*h zj;cT+s*be2`3RGF&mhcZx>D7tqFHOxEkiT@(6^$Xiy_>{^d-*|7LwUFB@LY}?gtmw z>A`n>1FzN-onmYw{b)xb`LwlmkU0d1{0vEBDA4~i(#lUOuaz0y!tb@@!)$z_WqocP zmz7GWfNV49V)8=rDHYW|Jm2zh-vP*(?iV>#jtb;^&nr;%nrHxyBGssgCXib&>6v0D zn~z6Tv!=38tDuQiV6u(|#yz8hN0-iLP!%esX;=xkZ0IglSGr+6F7dB`4!qUxgn2mW pUkXd{S${2@!$11-;6wb#U*(tsO>@9A2R1$9*YVKw1W?ps{R=VJy)pm* delta 1488 zcmZWpZEO@p7@pa?+56mUDJ|F1UMZHgw-ic`-nEw$ibSA5LkMlH#R6q}yKS%B7jwr~ za|Hq#+XMqRH8zT-egtcvhI&SS_(S|iG!i3F3Ah%-7&Mv~PsL#Pqx0_J4|O(q=9%Yt z=iPVTo!y^C&ZS+KB*~8W`0ktAgZBe7uD>kU&0N;o_%Qp$+E_@y`}at(KV2S{PlzPs zj1eI%sDz!dl^!7_T;eM$%)<_Y+g9Z?`vcEmcs#M{d=Mf1C&Kl=h&+f;^G8erNS*fz z9x(I_-ghoi3q=g|XxA}1ArX21^NqnLpEu={a^DQj4o795y^Up!e%x8DA{32^H zm@go;@I?XgpIB|?wRa%7h`nvURG^FDfmobsGS)?n(l|}BBaY~r8Xh_9?0RJDN^dl> zA{vg#)C&^n>hwlrx)ivrYrXe@C-XI}1y~MH$7-FmwH94SsJfMEirO1W?$?QuNJI|l z_Efw#wlAvDa7ed=;*ovPSdyJ~ROG3cKdq67cgx$-5kmqPq)_EB|EASkHv# zLgklINwk+_o5O;P2bEYLDzJG;7I7e1Wvx%ZHw5GWcz zKfnOMAe$-kX}VoKytGMwyHm6Re^vv2*?;u`MI zZYBk4r33_#WFityQ9oNBScZ2p8h8u`*k^$aeZ3G!0ggeOg{O}wA&t%h=@|}kWv*Wg zgbILhfZYJka?s}|6JZ+XuZ|vqQ|yZ5H8q zJMh%r8@{INzO`3;YcDSTr22#EkG!+Kw(Gw3E57#29golYdQP=XlF9y;r7MMX_e`)Q zw=wI0oY}(6$HAur*3(??*(8AbtC9A8(#fm&@S(t#Qu28z*;2*MHV<^bk=JeOc$+s+ zg?}JMmqjQWeo(X!PBvb<<23yloPHGIctECR# iVo$de;~&`BmZb~tpyr#X=_Y#go|q=s{u=@`^8O7_`;Kt{ diff --git a/app/services/context_generator.py b/app/services/context_generator.py index f84fb05..2f33ef9 100644 --- a/app/services/context_generator.py +++ b/app/services/context_generator.py @@ -6,7 +6,6 @@ of the medical report. It performs analysis on Pnoe, Spirometry, and SECA data. """ from datetime import datetime -from pathlib import Path from typing import Dict, List, Optional, Tuple import pandas as pd @@ -35,28 +34,59 @@ class ContextGenerator: def _preprocess_pnoe_data(self): """Apply preprocessing steps to Pnoe data""" - self.pnoe_df = self.pnoe_df.apply(pd.to_numeric, errors="ignore") - self.pnoe_df["VO2 Pulse"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"] - self.pnoe_df["VO2 Breath"] = self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"] - self.pnoe_df["CHO"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100 - self.pnoe_df["FAT"] = self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100 - + # Convert numeric columns + for col in self.pnoe_df.columns: + try: + self.pnoe_df[col] = pd.to_numeric(self.pnoe_df[col]) + except (ValueError, TypeError): + pass + + self.pnoe_df["VO2 Pulse"] = ( + self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["HR(bpm)"] + ) + self.pnoe_df["VO2 Breath"] = ( + self.pnoe_df["VO2(ml/min)"] / self.pnoe_df["BF(bpm)"] + ) + self.pnoe_df["CHO"] = ( + self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["CARBS(%)"] / 100 + ) + self.pnoe_df["FAT"] = ( + self.pnoe_df["EE(kcal/min)"] * self.pnoe_df["FAT(%)"] / 100 + ) + window_size = 10 - columns_to_smooth = ["VO2(ml/min)", "VCO2(ml/min)", "HR(bpm)", "VT(l)", "BF(bpm)", "VE(l/min)", "VO2 Pulse", "VO2 Breath", "CHO", "FAT"] - + columns_to_smooth = [ + "VO2(ml/min)", + "VCO2(ml/min)", + "HR(bpm)", + "VT(l)", + "BF(bpm)", + "VE(l/min)", + "VO2 Pulse", + "VO2 Breath", + "CHO", + "FAT", + ] + for col in columns_to_smooth: if col in self.pnoe_df.columns: - self.pnoe_df[f"{col}_smoothed"] = self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean() + self.pnoe_df[f"{col}_smoothed"] = ( + self.pnoe_df[col].rolling(window=window_size, min_periods=1).mean() + ) def extract_patient_info(self, patient_name: str) -> Dict: """Extract patient information from SECA dataset""" if self.seca_df is not None: - patient_data = self.seca_df[self.seca_df["LastName"].str.contains(patient_name, case=False, na=False)] + patient_data = self.seca_df[ + self.seca_df["LastName"].str.contains( + patient_name, case=False, na=False + ) + ] if not patient_data.empty: row = patient_data.iloc[0] weight_kg = float(row.get("Weight", 0)) fat_pct = float(row.get("Adult_FMP", 0)) - + self.patient_info = { "name": f"{row.get('FirstName', '')} {row.get('LastName', '')}", "first_name": row.get("FirstName", ""), @@ -75,9 +105,11 @@ class ContextGenerator: """Calculate spirometry-related metrics""" metrics = {} for param in ["FVC", "FEV1", "FEV1/FVC%"]: - row = self.spirometry_df.loc[self.spirometry_df["Parameters"].str.strip() == param] + row = self.spirometry_df.loc[ + self.spirometry_df["Parameters"].str.strip() == param + ] if not row.empty: - param_key = param.lower().replace('/', '_').replace('%', '_pct') + param_key = param.lower().replace("/", "_").replace("%", "_pct") metrics[f"{param_key}_best"] = row["Best"].values[0] metrics[f"{param_key}_pred"] = row["%Pred."].values[0] return metrics @@ -87,21 +119,21 @@ class ContextGenerator: metrics = {} metrics["vo2_max"] = self.pnoe_df["VO2(ml/min)_smoothed"].max() metrics["vo2_max_per_kg"] = metrics["vo2_max"] / self.patient_info["weight"] - + peak_vt_idx = self.pnoe_df["VT(l)_smoothed"].idxmax() peak_vt_row = self.pnoe_df.loc[peak_vt_idx] metrics["peak_vt"] = peak_vt_row["VT(l)_smoothed"] metrics["peak_vt_hr"] = peak_vt_row["HR(bpm)_smoothed"] - + fat_max_idx = self.pnoe_df["FAT_smoothed"].idxmax() fat_max_row = self.pnoe_df.loc[fat_max_idx] metrics["fat_max_value"] = fat_max_row["FAT_smoothed"] metrics["fat_max_hr"] = fat_max_row["HR(bpm)_smoothed"] - + vt1, vt2 = self._detect_thresholds() metrics["vt1"] = vt1 metrics["vt2"] = vt2 - + zones = self._calculate_hr_zones(vt1, vt2, fat_max_row) metrics.update(zones) return metrics @@ -110,25 +142,35 @@ class ContextGenerator: """Detect VT1 and VT2 thresholds""" condition = self.pnoe_df["CHO_smoothed"] > self.pnoe_df["FAT_smoothed"] crossover_indices = condition[condition].index - + vt1 = None if len(crossover_indices) > 0: vt1_idx = crossover_indices[0] vt1_row = self.pnoe_df.loc[vt1_idx] - vt1 = {"HeartRate": vt1_row["HR(bpm)_smoothed"], "Speed": vt1_row["Speed"], "Time": vt1_row["T(sec)"]} - + vt1 = { + "HeartRate": vt1_row["HR(bpm)_smoothed"], + "Speed": vt1_row["Speed"], + "Time": vt1_row["T(sec)"], + } + ve_slope = self.pnoe_df["VE(l/min)_smoothed"].diff() second_derivative = ve_slope.diff() vt2_idx = second_derivative.idxmax() - + vt2 = None if pd.notna(vt2_idx): vt2_row = self.pnoe_df.loc[vt2_idx] - vt2 = {"HeartRate": vt2_row["HR(bpm)_smoothed"], "Speed": vt2_row["Speed"], "Time": vt2_row["T(sec)"]} - + vt2 = { + "HeartRate": vt2_row["HR(bpm)_smoothed"], + "Speed": vt2_row["Speed"], + "Time": vt2_row["T(sec)"], + } + return vt1, vt2 - def _calculate_hr_zones(self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series) -> Dict: + def _calculate_hr_zones( + self, vt1: Optional[Dict], vt2: Optional[Dict], fat_max_row: pd.Series + ) -> Dict: """Calculate heart rate zones based on thresholds""" zones = {} if vt1 and vt2: @@ -137,7 +179,7 @@ class ContextGenerator: zone_3_start = vt1["HeartRate"] zone_4_start = vt2["HeartRate"] - 10 zone_5_start = vt2["HeartRate"] + 10 - + zones["zone1_bpm"] = f"{int(zone_1_start)}-{int(zone_2_start)}bpm" zones["zone2_bpm"] = f"{int(zone_2_start)}-{int(vt1['HeartRate'])}bpm" zones["zone3_bpm"] = f"{int(zone_3_start)}-{int(zone_4_start)}bpm" @@ -152,29 +194,87 @@ class ContextGenerator: zones["zone5_bpm"] = f"{int(max_hr * 0.95)}+bpm" return zones - def generate_all_contexts(self, patient_name: str, graphs: Dict[str, str]) -> List[Dict]: + def generate_all_contexts( + self, patient_name: str, graphs: Dict[str, str] + ) -> List[Dict]: """Main method to generate all page contexts""" self.extract_patient_info(patient_name) spirometry_metrics = self.calculate_spirometry_metrics() pnoe_metrics = self.calculate_pnoe_metrics() - + contexts = [] - contexts.append({"name": self.patient_info["name"], "surname": self.patient_info["last_name"], "date": datetime.now().strftime("%B %d, %Y")}) - contexts.append({"patient_name": self.patient_info["name"], "test_date": datetime.now().strftime("%B %d, %Y")}) - + contexts.append( + { + "name": self.patient_info["name"], + "surname": self.patient_info["last_name"], + "date": datetime.now().strftime("%B %d, %Y"), + } + ) + contexts.append( + { + "patient_name": self.patient_info["name"], + "test_date": datetime.now().strftime("%B %d, %Y"), + } + ) + for i in range(4): - contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 3}) - + contexts.append( + {"patient_name": self.patient_info["name"], "page_number": i + 3} + ) + fev1_percentage = 0 if spirometry_metrics.get("fvc_best"): - fev1_percentage = (pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"]) * 100 - - contexts.append({"peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", "fev1_percentage": f"{fev1_percentage:.1f}", "lung_analysis_chart": graphs.get("spirometry_chart", ""), "respiratory_analysis_chart": graphs.get("respiratory", "")}) - contexts.append({"vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), "vo2_pulse_chart": graphs.get("vo2_pulse", "")}) - contexts.append({"fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", "fuel_utilization_chart": graphs.get("fuel_utilization", ""), "fat_metabolism_chart": graphs.get("fat_metabolism", "")}) - contexts.append({"fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", "body_composition_chart": graphs.get("body_composition", ""), "body_fat_percent_chart": graphs.get("body_fat_percent", "")}) - + fev1_percentage = ( + pnoe_metrics["peak_vt"] / spirometry_metrics["fvc_best"] + ) * 100 + + contexts.append( + { + "peak_vt": f"{pnoe_metrics['peak_vt']:.2f}", + "peak_vt_bpm": f"{int(pnoe_metrics['peak_vt_hr'])}", + "fev1_percentage": f"{fev1_percentage:.1f}", + "lung_analysis_chart": graphs.get("spirometry_chart", ""), + "respiratory_analysis_chart": graphs.get("respiratory", ""), + } + ) + contexts.append( + { + "vo2_max_value": f"{pnoe_metrics['vo2_max_per_kg']:.1f}", + "age_range": f"{self.patient_info['age'] // 10 * 10}-{self.patient_info['age'] // 10 * 10 + 9}", + "zone1_bpm": pnoe_metrics.get("zone1_bpm", ""), + "zone2_bpm": pnoe_metrics.get("zone2_bpm", ""), + "zone3_bpm": pnoe_metrics.get("zone3_bpm", ""), + "zone4_bpm": pnoe_metrics.get("zone4_bpm", ""), + "zone5_bpm": pnoe_metrics.get("zone5_bpm", ""), + "vo2_pulse_chart": graphs.get("vo2_pulse", ""), + } + ) + contexts.append( + { + "fat_max_value": f"{pnoe_metrics['fat_max_value']:.2f}", + "fat_max_hr": f"{int(pnoe_metrics['fat_max_hr'])}", + "fuel_utilization_chart": graphs.get("fuel_utilization", ""), + "fat_metabolism_chart": graphs.get("fat_metabolism", ""), + } + ) + contexts.append( + { + "fat_percentage": f"{self.patient_info['fat_percentage']:.1f}", + "fat_mass_lbs": f"{self.patient_info['fat_mass_lbs']:.1f}", + "lean_mass_lbs": f"{self.patient_info['lean_mass_lbs']:.1f}", + "body_composition_chart": graphs.get("body_composition", ""), + "body_fat_percent_chart": graphs.get("body_fat_percent", ""), + } + ) + for i in range(9): - contexts.append({"patient_name": self.patient_info["name"], "page_number": i + 11, "vo2_breath_chart": graphs.get("vo2_breath", ""), "recovery_chart": graphs.get("recovery", "")}) - + contexts.append( + { + "patient_name": self.patient_info["name"], + "page_number": i + 11, + "vo2_breath_chart": graphs.get("vo2_breath", ""), + "recovery_chart": graphs.get("recovery", ""), + } + ) + return contexts diff --git a/app/services/graph_generator.py b/app/services/graph_generator.py index 24f7f2f..82e3454 100644 --- a/app/services/graph_generator.py +++ b/app/services/graph_generator.py @@ -8,6 +8,9 @@ Based on the analysis notebooks in services_dfdf/. import base64 from pathlib import Path +import matplotlib + +matplotlib.use("Agg") # Use non-interactive backend import matplotlib.pyplot as plt import matplotlib.transforms as mtransforms import numpy as np diff --git a/app/services/report_generator.py b/app/services/report_generator.py index 5add2be..044c6f5 100644 --- a/app/services/report_generator.py +++ b/app/services/report_generator.py @@ -11,10 +11,9 @@ from typing import Any, Dict, List import pandas as pd from jinja2 import Environment, FileSystemLoader from playwright.sync_api import sync_playwright - -from app.services.context_generator import ContextGenerator -from app.services.graph_generator import GraphGenerator -from app.services.spirometry_table_extractor import extract_spirometry_table_from_pdf +from services.context_generator import ContextGenerator +from services.graph_generator import GraphGenerator +from services.spirometry_table_extractor import extract_spirometry_table_from_pdf class ReportGeneratorService: @@ -61,7 +60,13 @@ class ReportGeneratorService: """ # Load data df = pd.read_csv(pnoe_csv_path, delimiter=";") - df = df.apply(pd.to_numeric, errors="ignore") + + # Convert numeric columns (updated approach) + for col in df.columns: + try: + df[col] = pd.to_numeric(df[col]) + except (ValueError, TypeError): + pass # Keep as-is if not numeric # Calculate derived columns df["VO2 Pulse"] = df["VO2(ml/min)"] / df["HR(bpm)"] @@ -395,6 +400,7 @@ class ReportGeneratorService: ) report_path = self.reports_dir / output_filename + print(f"Generating PDF report at {report_path}") self.html_to_pdf(html_content, str(report_path)) return {