From 45bc62c745895416910b6b91f79d652604813d03 Mon Sep 17 00:00:00 2001 From: kowshik Date: Thu, 12 Sep 2024 21:36:02 +0000 Subject: [PATCH] assessment predictions pipelines added --- .../assessment_prediction/testid/output.csv | 19 +- .../testid_raw_data.csv | 13 + .../testid/testid_latest_data.csv | 2 + .../testid/testid_model.pkl | Bin 0 -> 198665 bytes notebooks/test | 13 + notebooks/test_prediction_pipeline.ipynb | 1431 +++++++++++++++++ scripts/run_assessment_prediction_trainer.py | 2 +- src/pipeline/data_preprocessor.py | 8 + src/pipeline/inference.py | 85 + test.py | 21 +- 10 files changed, 1583 insertions(+), 11 deletions(-) create mode 100644 data/raw/erp_assessment_prediction/testid_raw_data.csv create mode 100644 models/assessment_prediction/testid/testid_latest_data.csv create mode 100644 models/assessment_prediction/testid/testid_model.pkl create mode 100644 notebooks/test diff --git a/data/processed/assessment_prediction/testid/output.csv b/data/processed/assessment_prediction/testid/output.csv index fe2c484..3320ae3 100644 --- a/data/processed/assessment_prediction/testid/output.csv +++ b/data/processed/assessment_prediction/testid/output.csv @@ -1,6 +1,13 @@ -start_date,end_date,open_items,red_flags,num_employees,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,time_since_last_event,percentage_change_open_items -2023-01-01,2023-01-02,10,2,30,0,0,1,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0 -2023-01-08,2023-01-09,12,1,25,1,0,0,10.0,0.0,0.0,10.0,12.0,0.0,7.0,19.999999999999996 -2023-01-15,2023-01-16,11,3,28,0,1,0,0.0,12.0,0.0,10.0,12.0,11.0,7.0,-8.333333333333337 -2023-01-22,2023-01-23,9,1,30,0,0,1,0.0,0.0,11.0,9.0,12.0,11.0,7.0,-18.181818181818176 -2023-01-29,2023-01-30,13,4,27,1,0,0,9.0,0.0,0.0,9.0,13.0,11.0,7.0,44.44444444444444 +open_items,red_flags,num_employees,duration,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,percentage_change_open_items +10,2,30,1,0,0,1,0.0,0.0,0.0,10.0,0.0,0.0,0.0 +12,1,25,1,1,0,0,10.0,0.0,0.0,10.0,12.0,0.0,19.999999999999996 +11,3,28,1,0,1,0,0.0,12.0,0.0,10.0,12.0,11.0,-8.333333333333337 +9,1,30,1,0,0,1,0.0,0.0,11.0,9.0,12.0,11.0,-18.181818181818176 +13,4,27,1,1,0,0,9.0,0.0,0.0,9.0,13.0,11.0,44.44444444444444 +14,2,26,1,0,0,1,0.0,13.0,0.0,11.5,13.0,0.0,7.692307692307687 +15,1,31,1,0,1,0,14.0,0.0,0.0,14.0,13.0,15.0,7.14285714285714 +16,3,29,1,1,0,0,0.0,0.0,15.0,14.0,16.0,15.0,6.666666666666665 +12,2,25,1,0,0,1,0.0,16.0,0.0,12.0,16.0,15.0,-25.0 +11,1,30,1,0,1,0,12.0,0.0,0.0,12.0,16.0,11.0,-8.333333333333337 +10,4,27,1,0,0,1,0.0,0.0,11.0,11.0,0.0,11.0,-9.090909090909093 +9,3,26,1,1,0,0,10.0,0.0,0.0,10.0,9.0,11.0,-9.999999999999998 diff --git a/data/raw/erp_assessment_prediction/testid_raw_data.csv b/data/raw/erp_assessment_prediction/testid_raw_data.csv new file mode 100644 index 0000000..0474ab6 --- /dev/null +++ b/data/raw/erp_assessment_prediction/testid_raw_data.csv @@ -0,0 +1,13 @@ +start_date,end_date,open_items,red_flags,num_employees,assessment_type +2023-01-01,2023-01-02,10,2,30,weekly +2023-01-08,2023-01-09,12,1,25,biweekly +2023-01-15,2023-01-16,11,3,28,quarterly +2023-01-22,2023-01-23,9,1,30,weekly +2023-01-29,2023-01-30,13,4,27,biweekly +2023-02-05,2023-02-06,14,2,26,weekly +2023-02-12,2023-02-13,15,1,31,quarterly +2023-02-19,2023-02-20,16,3,29,biweekly +2023-02-26,2023-02-27,12,2,25,weekly +2023-03-05,2023-03-06,11,1,30,quarterly +2023-03-12,2023-03-13,10,4,27,weekly +2023-03-19,2023-03-20,9,3,26,biweekly diff --git a/models/assessment_prediction/testid/testid_latest_data.csv b/models/assessment_prediction/testid/testid_latest_data.csv new file mode 100644 index 0000000..df6e4c4 --- /dev/null +++ b/models/assessment_prediction/testid/testid_latest_data.csv @@ -0,0 +1,2 @@ +open_items,red_flags,num_employees,duration,assessment_type_biweekly,assessment_type_quarterly,assessment_type_weekly,open_items_assessment_type_weekly_lag_1,open_items_assessment_type_biweekly_lag_1,open_items_assessment_type_quarterly_lag_1,open_items_weekly_ma_3,open_items_biweekly_ma_3,open_items_quarterly_ma_3,percentage_change_open_items +9,3,26,1,1,0,0,10.0,0.0,0.0,10.0,9.0,11.0,-9.999999999999998 diff --git a/models/assessment_prediction/testid/testid_model.pkl b/models/assessment_prediction/testid/testid_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cb53fa102f1b0ccb4f547b8adaef8ebe6e2b8945 GIT binary patch literal 198665 zcmeHw3v?YvnPy9V$=5GAQ5?l~Y|XV~OLFiyneV7zp&zcfA`RH$iU?8xCs{_D*2(m_#=mBRz? z>pxf;oAhV)Pp=<7JkmcgFg$joG|?|Eo#}0yZjOG-1#;a?Z^LwJ@a20;hY$Ia{nw2T z3~~94^ot+&k@P%odUbH`>q-NYQ@qXm_G^Rl;F|uC(Szcf)#E}6{S%V|lO=w|8Npo- z506dpCLHC}8X7MROq6CKw-jQK59S3{4h@a<4;&fu2O<}(86F)Oog5uGJk&qLWxap8 zxnIh%|4&Nef#_zYmz}%of?cnjnHrp4J<=a`*~Com!RhAkw0~@1d|-HDrqgfsTl_Qp z4gQ&atACci$=~Ro?Qh}TxN>lGbaG;Hd|+(m*6Ed_ql5huL%d68Zkt{{(*K^(LEdbC zV{iBL3jW~1QQnkZZ@Tc_f$`y>$>WnVxA~j*Pp=B(BE`)>y$$}E zVe9?YeLcrXy8c=F{Y~nJv(LvX z#FaplnwefPa&REHmR~fzY{DNHH)ux6f3uTY4D#(f#$_>5V z&E-a}@Eln~6NS=9@Sj3?NnZn3n8EVWzUBOQWMJ~}$hqZZA1*Jys=VTsa?@4im7To} zyshPeyrF1ip}eZEkzfDTJ>}KBedRS*mDlnXihF&ayv}T8xq0xmYs%|8`_}L>y!QPi z&LE>lOQnhOX?;!nyMt5X{K=yu<@k|w|r(_*U7I&rR4RkecdO& zKCGoIaF(cRq&o8P!vp>2l{X4GM1BmPCVt)|>xo*WO8rCr zzz9F666Lc?ljSXgyY9Fw=zsp1UON&}u|Q7m>ghs%$o4b6h3Udb|EREPoGduQx2RwH z70U(Jg{;y)!y0%mHNfj}@cM7M)!!GO za%BK&@sZ;6?)ZrOUvEQze{T8x7R~{Mz_M`=PK`{?^seLtBMhBUimqWUw`427@`!&!ZyP@j^OBwXIKt0&^W$i6hpBTlk!YkUp1tS8GgG~cGgou8 zmzMAzLjOn~&Ra?9FaLJ2%HN;py!zuC*)r|>c81e{_JYq6I(Vz(ksprF-PAo>`A&KB zcgnx)!FBX=6T@XvO~S(~@_+L8SzL#D%5#SA!-DcZZsm{npgh_Y&~EU79O!JW$LqM_ z_Vz1&(S!EU_nR5&6PStMvw{wu-cDYA^UY7c{p&w8+TX(DVy*8?}WA3dMz zzxdp5=)ck*v;)U#@r!aezB=Wb8MRB{9&*?D^EL81v;C?(>W!mY{nN@%mA@l!>w=Ge zi>Y=N;I2Vc#gPhkF*Q2Gzv z0e-aVdzZ3G;C}r5i5|#@epmGe??FDeA6<7EqoavGrT?z=2Vc_ts&BuoJo;PjH+qWa zUBE^i(>J$279kv};-7Ju>I#Rq;7{5KrwR~|oY4e|%SGVH(d zf0F52TE1ywR6oy~-%j9H?!O89{dk)PEI!<-;F}e}G@q8%JtJH>(JL^|;}0D*gcOA$Rm6 zmETyS_({n>t^A;G@RxAT>%YkPYmF0sk-yT#sK3BhMR~r%_`Xx-fzOfoV$)9n&()6K z{_d{V@%aqeCm@pFr;;czN4_ z1I5`&OOTI!)N0RM^%c0JepC7ioB^*o-gy2+d`y2Jt-hwUhpN2k59saW@lZL)U%C4O z)L!|wa%tV?Z=f&m2jJIG3t%VfxBu*X<>n7iJhMac6!JwyM(yvAN5nbX<#jt{o&)@a zBgU5?w@z8UJ2QT}?!n_ba^<&#<4y3BP5nA1{Ox9j-%0L}cRW7~zM<>*cyX2f0F{$} zD;Klc%$G>ZZ;D@C`B@+DQO`H|4e^VRK!xjRkY0b}cZNTu@~gNW{RW;V3D*C3S2Fo2 z`(Wo6xA7+Ohjy@s{&txE>yYsT;tucw&Wi@GAl?AKAimBn|9P~vIKCM91Uvw}Bz@%b z#{zRbwqZd&fxk4rzjCjU|4f^|Q2LZi9~B<}f54?=GI`+shM51h)QGRtco}#WyZ|}U z4KRPEe%p`!M!%sQRDU_%?Qa^@-F2e&U@ft!#`2^*tGRG`Gn*y|1N^~EvDsUAkNScaa9=v|cu{;z zKF*F0)5;(EHh(?`^V;gyzq9k1TRw;UBj7~&MbvMdELdG}dpqQS>%d#_yp1SVshz)c z<)8h0ZLaZGLVg2Ip>JGHmEEf!xo4J-?(BQG;GD0-vWO}d=@Lear`dY`O9wnhI|tFZHnJ) z^Xqgz1n+_Gc1i!OQ}#QK+2yOhci_*aa^<&-^VLj@$8BKQ%G5Fs=V*_n+MIOL{yAe%U7LSCmJ5 z-)XFukp5H!c_kdP%P+m_>vvzXnE3?#ERK~+fGJ}zreHUZ<74}F~%p5zv(Z5AE^DvtJg0d9{ z@Vn%f3U~*6Q(6hQrR(oC@TAuJxauQ#kM5V9eMEb~3p$>mhxHr(puXTUk`MkOKb18fDgVf| z|G=MS|E2X$Xui&NM)w6z6y-UN;0y2$uJ5Glf_V?(BhS6;;+>%{|02J9b&B;wzyxn1AT;)x?{>WBMnso9^>n@f@m`;seA-;48F)uH(`CoJOjtb$AYi3i!Tm6 zeS3SZ{y@t4_ONTDHyc<*B!HG1<1O*|_P{TdlYfKwLVO-K;)^!FRM_ov5>ZO;wAfxnvF z_#^E%5)2>y;W zgRd~|2!F)nHTd(pWx4F;kKM8SjmvW7w}k!)_yu;FZUs_bJjep37hUPilhp22!TbG?e<7uV<8>JNScFX_+cdRyo7L(=*u zgzt74e-|YlZMZ&1`YYSzbvSlrmY+ZW<+1$cN2Qqm3H{OW*TuZ9V0*#MG5-Ym2)hmZ zM(hja)8zaf;Dm6m@Z+lgcn+0U{sDLgI53}6vCO|j{c%0;i+0d+Z1KE$zH9yQJi2f7 z{8Ov)=x^{wr2odg|Fz9E-=7H2o5w$Cx^X?m2kO_qvh$bQ{3se9=#u+8n!E*m>R_Zd z_#NNFUs3ZG=sNfXPeSHpRvt>30YJL|hmj zKScRgiyw4;Y+`sm_zn05Ptxyvevnvyw>-A~4txV%gxqi+Di6Q-al=1TYC+oko_Cb}m^NZX3D3Y(_iAs^tId}y59LX2qIAfeYwEKA3<#XQkh0>3HS{5lRy91M|Krw>)qc^h9~7O zflqWEBI$eHUgn8`Is1P6;34oCN{Ztt%SS5SOv@)UKf8;O+|2z`i{bb5yW}0u%%>>7 zgs#gjfAaK+jrq-wN)dm7Pa=30`zx{_{OVkIe?{;K@Ii8|emyUEh024EO#7_#DH)!X zeaokOoKNQG1wNld$p0lHKU{tM3jQX!s^^1;5dT0A=(>%Jj&}bFJP5tPk(XN(`Q5br zZ|Ae>;xCf(b}8>2@`!WrSW(7l;4k@o1^gDz3)6kGd;iXVdHD@vx%o?t++SK2+h1{+ z%l?Y+bD-}emk#FL5KP#5%D@-=Q}_8AHhh`>5U`1U@EtBmZya2h!??SG9iW`l-fG;5VS%u!B^8@Skb_tAD;tziWpNxA{oq z&u$}mR*F)t;2)d=FOWxe;{`XaJkQGwFB#<1BL0T(SC5JP?9Xt{r-7e6@qxejgu%xu zpX919y1qBbeob#rfEVCg_qVsOirk+Ld_wI~^D%%kl*9e#IzAuc9kty*2Ka^@)}POD zP?Pz`i-9i^pek@EMy@lzMfdi2S7{}~m(x$=>2{}dj8Kj1P_|F8V{m(I5CKO@`anS6rp>3A8V zW7g|yvg1?m5aM6x0o9w^g9XNK!NZUnVt`z^rl0WTHx z{b#~`Wz@VUk~8?Mi|OaMPj>O|H$K_$q1^nX4Ca4A-$_ngjNX5=@cd8sNzij%p5|AG z8@*`6&(-JWbbm?l6YvOJp&nF!_-pkWAHdHtd4ktp@GJQ{zcl2h;v?`f`c0P;+F!r? z3VpHitLVu3?Z34AM*BlZUJB>y7MWawT0)6FjCVrFaL%>5M|LXJacn+1f^BZ`AuH*TeZ`a~|oa*^$ z?LYVpJSE#F54C^49C#6OLw%?M*neg*@{3zMNc^%x?kfX-6Z0zIPvN+okz6o-0RF(Z zftts4$BgX4sX>3M-^Ty0>AG>|gEO6ev%lUy%|G35+3%mx+vsoTZSc?BColH=)_pz4 z>BK*4zrXRyV{bTpORoM>!ub`zdxQ_fdYLvc9(Trq^DDq7Ixf&Zrao5hFM+?L;2Am~ z8J-m%z|TUxBkjNZ_IB8v#@KuFYvmke;AL{Z~xi(%q_n} zH-Z1;effK~v+|cNddnS~bLF>`>+d4?67S>tgm_<- zyT24E&%3B)*4q%gkMCQ?cw9@$`4MUDwc;oEOW^lN{e{0o@7H-bR)5mJq~n2#U%_u0 zu5ojZ`zv)mo@U3l+x`Hw{%*S*?6f(psrU>1h5WEy59Du-8~b^w@@d(lX}=G)+V@RVMjkMo>H{*BIW)#sPN$5gMh_*DK) zT6}<)VOQqIcm2jsc6?XkpX?wrc%a?L(*nmuiEoMD3UG^KhU-(F`Q*qcg5PTo95|DC zV!hi7J-|2VzkPnJejUbHU%l~b9slUx`t>hN{K8&yInS0iGyXnc+8;&dFUZy1ztj0E zE&nP0N{e63K0D+O#B`wpu){cJm;da${$KLlpC{#bimw0s{bnLRu-)+{ROAe%nr2AJ}{N^+o!gVZH~h>z3EUK46?>xBNake_;13AH6bH ze;^^B02dK^B>Z{rs$l;5s@VL0*caF-_~nuEG@kNr`)_-}2l5gpp2CC4LlOB!#)sqk z2gv()$?&N7$mEmA^T{7swE0dqKEZze)$RWcKB;~F^PfKZuKeZ_@rrH6`s@LF>QJ93H*bI{KfkjD<`UZ zKO^K3w*S8Ropqkj9(?jO13KgmDZVeE@ql!N16Fd^UsilCYni_;5Li{S{kc{u2Fu()lO+63%16>$&;+XLa7G zzwue{i{5^&KV`oC_2vD^~Qj8Y^PtZ@2U$@+UEIywN z@dW%B;0bz7<@x?)BK}%p zdOSkTQZfv3LVuy&Xb1h?CaDJE=tug!{^RjAvLS2bu@MauM_&J$_?t@p$@)+A zznx##Iqg?P@o1MErw6|x4lgni2lo5lA4) zx$=?oLEnpV-ZFRv$4*AqgEzo4W}KQGpFH&Pck>+|Ou61F!Uwc|Q{=0*F6h6( z4~vv1J_)vqd=>Ngz!mj7=GD|Mo=iRgKZyDV{!^9t{1_jJ8`W?7p)c?sp-0qz+#W32 z@yTl9Q}{uq-voX`T)JKQS>Q4FOW?C^nde7*x|@+4vz!0$jl$(e7qh=a@{oV|`>)x< z!3)nCjQkno9na_M{L;m|Uj*+{{kf6P2Jg80OS-?J{0-Az!a217@ln?EnbP`8D!wrJ zErxH=5c_`F;6unA_oMb>zt;NYw`Bg3>VJED=H@TqeTp>yf$OXINWZAkEzfb3JQ9x6 zfRBKOqP#x4{fB?_$ak;FjZcF8u~Y6pEW{c9;rq7;K3THh`Uo8d@&-XZ{yQ(+-?#NC zgHJZ6T)&w%ep32~cm;W?7#_b@-}49Tc&cXq;T`OX+uQj+15bQiGtFm?<0h;1&TIeW zKVFg>UNTru6hbBVPX%~c8MD8zhwx{icRFsk{d@kiU%l-7$BpqTg%@0}+sz2x#Q536 z2Hw>8nJa&zo^|NUH^B%{D2T-{kGrkZ@A?L zuwR{MW~Io)(Nq37`5Qawd!8?%-{m>?N9Y{>eWyIHA-}zN?wPsqTi~Ch98b~l*~OYg z{xkl*QP^AX2J9;ImCK#-pXWb#%eP;ci}@$iFYfsPJcr6DegrRrr*L07^7URd*$=aN zeucbb{TS+ba(?6UQ!L{tDDTSe`tw!&(cj=zU5@oX9%JVhxBLM4N84pQvrYDkxgX|s z`8{|C<0;^m+VROc7Cxxemm5}+611M&VQnqSo%%b&>0J@^ga6P(lU7(aN{$bYE!HvvE3 z6X1%<^M0!H`Au2z33vf=rQ@Z{yC`^pl)0WG?f6db z7x?@p;uH8IICe^T6(yg*FTuJ5_$xSOH{av6KR)}sT>X=j`#b4+-Oc764Av)Yit!2j z3-AYc0eUKqeEgyk42N<&fG_Cx>hXZ*Pj z{t4tB!?U+#&Q^;hz@CkJxFZwBL! zy8d*rd--k-Z~uCmk&kqm|H(z(!dNi-^RvG(`~fwd;;R2Teyhhn_=faA=`V0Uk3Tf_ z{jc$OUF17{Y0Mv0`s-SM{rL*N$;zwx+woJ)_~Ul=wP5<=Lq$gN!4cyd3Mce?cKa<~ z`18*%$qg?Vg8lMaYZ>h~usUY%NN#HW5A+U4`W@qZ_WV8UBC6-dw?`jJj~epP%LjrZ58@tS1v zQ+%4Xe5QPYd~~HK+l?c5WrzG8`~n`qxc6@Pdv^Oh_T2Ere8(SC#^Vt@i1iou2jTcn zHaqhP^wJZbZdMWFkLLPI<XjBxijTlU;B&e^{_^}D zeGEKj$A_-%hy6f({DJK9X}|klAAE0a{y+`l@oR(a z72ob9iN|UF0G~Z@f8Vb&j>psDN9jN8AM85xl8%e{eJXD8IPEXk!A8V_MEKW5M(y7& z@dUevV|MoLPtSQY-}TE0?H%G_-QMu^ZozV#hZ|#fA$h8J9Ck~$3mDI;U;h_)fxRi9 zVdBW`HP49-aWde-#?Z7oEEnBYuH{i2jK2HSdPNAy{~P4Y-Mv=MZX{B^=#v_`_;`nydbU zU+8{H|6R-5@-M(W@B<#9=Wu`g@%i@$!2NJNa2RPn?awc6lyy9lmOs+kV`d{B>;ukokW}yCA<2Nn-6#miQtEJqb2jVoui+z4D`U|{; z^RWG=32-z z#&0S1^M_vQ_~h%$#QTrj_a6XWsGoSgQqGst`48*H;^Xz;3o6gY(Q2~4tFo`j{2Qg; zs4wIOIa2+>Z}EH${qDZL5&n=Y$50-81i9;e+^L%HX$sqK=NGqp1&vSa4EHM`p0M$Y z@`p(NU9ukFA^e^lpZw*XFEr=MCkg!}-~jqb^5XmXUma|S(7K?%1bwD*_`8=~dm`A+ zWVw;=h>zC;m->Bpe~J9vuMB?+^AtS~eirx&?VuWPJl8oNfrlV>$bssQc_qh<`M=rm zVOl;-R$kQ~eir;SXbv4OtNZzoc0P0Smx#~0SclkvX*)yy=A^vlfwz#a0iR*MW9`;2 zuRl;)l=G#*6DGeL6Y-$?`)6bPLW)|q^J&0~;1?XJ+I)Og%*XgwgI|@u2R;H%0k1*% z@07X!UgzU0=x_8J&Z+(2*ZLh_v-69azeMYo!4K_>+Kuy~TzAne--BfHkH9CGCzYMQ zbW_(;?|BtN3+pVr?@iF}aHQt`=`DyE~%G>c=&3tLtefR^&zryY-oRU0tO1S{v*{z?t$oClUDzMH{dYB zAICnjt62B@AAqlszrZ=kAMr&zpA3J{^dlne7yL{2G5JT~Kbic~_Fvlas{VHVa?78P ze7fX#QHMP4VD!E8H@t=RV?bPk{=zx(E7`?6Yrc41zWL#V@lGQN)YJ8QyL{hke0`|A z+)JjP%D#f%z^gdY{dxXkf&FU1i{KaV4V6cK+wlor*2~G(Rrvf;@i}$ziT(yp;hfqH z`PFax?fl}FKhf5k^EgKvmk#F>?38){zUY?chh3$D4r5h`p@r_>*VuVZrva_cs~&7nRQhpMV#1T(`-7^BArpJ#S*L*RFg9zNGTX z-no{?^QgSCx2P}pOvfYq_&A@*dL-myRe$sso{Rd>{cZ7*DxbFf`IJ}ja#}v2d~BB- zzlNXE&PaYmd6c{n@(XwbJOh3K-(<%pzyHFWgSqiZ2J3gAmn6?Fc3iACKYc+y0p25g zLhDuU3_iaw@*mabV|6}I{tb9&9zNsikD6zx9rbHp!9xYAs27nR&9@id$N32S_WviJ zC_YTfC)n?r#J`Qvu_%un@`yM{@=OIh0{;zu3HT(t^}J0xUwq4A=97p%i~VWa#QyoG z8~z3OTdfBKpXhvq{)o>%rG9hI=K?=ePWcBWpWvLXtJV4)rO#>YGx!9&fTO5?U=M5j zenRvY_zdSHKj7um9p527k^V*a{%x!y0*~N`^?;Czv zpV0bZu^uqaC%`xS2KWcMzug+n*VS=uk9VNgq|bbQT%E6nLwmssIH&rMkKF`KK*&*kdEU%5;yo{J(1YKpyqf<3zXp60;WK_|%PjZ=*HQWU&7bJ?tpEA<=-0I*SPyz2 zP6PkwvcRCcY{aXo|G-1&H{6eI$IFY)Tx@+ReV^VxT`h04Mjm0`=(JY;X?A=F9OC$j z{EuwTtUP+aDV?k5+xc`|Z$odxEr9+FA@tYaQR0aAk&vHe@@bLL^V;Qchdg%5<90dE zZ>Kz$ejXnmqxD)ihh4>acK+Lq4{gf#eXl9U_kkPF^MwCa30@cZ@zZ1Pi$woJzrm-v z{cmNn!FFg{UoiMKzCHqctK-$SpJ9{i-}HFZl#id-^d0TwatS_6{N;bpUifd|ANn2o zm9_t-@a9^7^cU4j$q(`dZ$9T1$?LCQ{8{WI2`;)#HZQiyWa6w*B|EQPX_t(gztYhaZ2*# z@nNw2+LkrO`YpRZY3g^i`?b<@@t2RU%lU))ebxSPuKKC-lghulme=_~@msR;3LnYJ zD}J)$tD5~DcgX!xyNvm}os4<{JPo`QSyuCP-~H3S-kU34Qu+h14vy~h{SAKr_MPg_ak9Yr z30x0<0Ow)*>wLa+GQLuLY3HkI{DDpx$G{&@_MYU3^KFdIm7hRy6XrMXl)sztPIm8S z9QlX;HkK>DrOYou-${;L?AWJ+&l$0JhkjS%U*I>Lr{M2CXXGc8JzkLC#DnUWA51?= zmmlU0)NlP!U+^1vii@4rgDfx~9ejpz=ugrQ-v4#ZZ^%m_zXm;~`tx|Z&g*aISGW8U zwG;kc2cvVuRh{zO&aa5K=opW;==(VDX5~+QV zQxc#0`qIDp;bY9sQ) zt^vMMmGZ%OmVPcm&tu97pg= zc6{=syE`}M#wS5OHsN}3-~xI{^5pp?G5^s0ebx~^q4~^WKmAu?c%=Kgj~D6wgwki= z2fUzP$MIC>>ruf&CNGeDFc0Op;m@e}H5nf%erJD#iY2QVHC{9@e`{Dkci*AmAb z{DAE7`_hyD^Fz7fH{p6o;2?s(%I?*V+%qTEBgFH=z#s5}xH(c@;Fqp@!mx*F_YYV2 zO{Ra!o&fj2Po)0Mmwc7mfk)|mK#&i$2YHeBcrorr*I`~`{rcnJ>jFAdJM#Rz_?Y!y z4E*8d59sj(@~=hN?(K5@B5|ujU$PsYUGe%iAIOzYQjRx8@Fe03@CooH z)Qi8z6!9|n1UQ0ULDyA3|0ME!ev6hxH{VakCG;?B|6cK7GCoy&3_d|V7XAtK-xm3P z^=ZTqt;Z}*4X#+$%r^UpsiGLob8ljfO!LcfFGpl>(_ zzhxKi-1Xmo(3C5`rHprU9^?Bv1jFHD@%)L6-$>s0dnxn&c-Deyc+Kt~R1@!@pH;qu z`VDy!@W)QspEyT3@X2oZdv^0rez51eer|jcjHe`wcYq7%7s-?JNtO8%y?lq1?~8^# z&(0ph&(Lu|wf7dmeo^2d#2d2a@?hVO5cU~7hwFKy5#Sf_B=raPE4Bs`_J3vYn_B;eyfSd5;nUkD`?Y@a zA<6s|6>oswz%Sq%k{^$EM7;U3Req2c%Ap^qyjqWiexmQ2q+H#{*T9EVUe!NY`xQT= zEpO)+H-8EILUEVm7wfojXE-0_)moqUxkEkwJ~uvTOul{xcqV<3e=C>B{Kh)2_lk(SqKky*hLG`cI{Ef7DRD58^liT_o!V&Tn+ZnaHD38+K zhWoK(A77li{0sTbw@k5r0`x`KcRt@zyx^cEwtoW2Rn2dQo<;0;Bwz8OF~36Tt1JKN z`mXQ-oI~&R>!9yuzCe|CeSWs~N8ttb7x4w`Dan`Td&Pcx=6YzAZ^iZKH`sA1kM%?G z{pQer$=a{vhv%W+aSZM+-iIU}RKN0HBL%Tm9?>6kx|rv?Rx`g{@?}N(A6}7B`*+AA zcvSKy-alLEmcJvOk@pGXA^eVf4#u;y8{av!?mu_s=AWdP&j#G$Nb>Gvv-bs`wF~+u z;5#ZO|8oCE{IBwBgKyjDd;TMM8oa4LAMuWf*XsFN=S#&GY54|r3OGf3M7urk{iznO zDf!v`Be(f%Bqxbe27Cjzz$^SD_`Bc*@IZF>z3H0%*X4@eg!==)ZDZBmI8s2>0TXVxp2J1oKr;>d5`+($r8sKyK9&(Mx7w|XfI{3Hs zt8XS=sNV8#fY;zN`&)xA+UR@!ldHaikAmBIUK0}s{s(e}+)y9-o$K)e>+^wc^c!$b zq?Ku%oM}VDLARub8v?&#wA}=g{pjuQ2|8cC>@8<9Wnym-!of zvR)tRF}lAypFmzzPT61Z33x&0lf^v0!OefB{385f;2HZt1J^jhUq<|!eZKML*KAw1 z81W1L6?TR64C}j3i{Tgg3tR!eI{x7gWIdlj>3=f(D|-yQ06)MJwQq~uKkp@DyiMi% zz!xY7c~E&C|JM2Y$6e)T;~%>{cEhjs{;F`Ct&@@bFrRUu_kU*>|6XwCB^Tt1--Pp# zpuZ9ODaPw5f93wZD(ExGpW8pV-!k}tzE}1WxX16n0bR%OUz7PT)#G2+UuA#MUep_M zr24~N)Neckz6S5&NcHb#-j@02ThQ<6{SW9H#!GO{?_b0Bji>E@#s7Bxa+{Ata_o|E zg1O%@{Ei}{>v()8KG3U9`Cf9YGvoagd}=rzdieYkt8(MF4CY&E_~h^Zm<_f!{`)w; z2|4mVu*cvjtvsJkG#f1U{Mc_}_8sMu*+-Qh1TVtR0N>F4F;Ag>`NibL2)_Fc94O9; zc2pXS^((6V$>gW-4SoZ^K%OK&?q7-hJ03Io&%VELr2Y|ps$co1m0wzZqkPE@x$mlq zuSovjGx%$g*D4r)#P=9yjK^JcpX~CnOMdju^||p|iu?(92}hDkH)G;`qs_7S3;qoF z4g3PUQ27p4@xQ$8qbC}S@k`_d;^U7IeDGVe%+h`87a!nL@D$Zo{tfcEVtvn}20y6z zrmpycoaxTl@tf=O(ckoZ7yrdA{z5)9SY38|JMe7cyj{+71fFpO&a=yZUhrS9%6EVA zjN`9k;{6L7ocCLXK2tyQ@yiAFw}W2;yI7E_PX``r`JgT27bzJc|Fdv^G|-(u{6K+?>$9MGlbAGkN4=A5Zzg7GKJ^;VrobHcyE*^e^ zpGoxxo_=TWo7(TmZ(Yw`Zt?H#O*Q_1mTfOLXv?-{&~M(-}HG!BhS4UZk&H8eV2+BH0NWb*L9 z`1rulLjQPaXmn&^a(rrNvQYNus=~-Y`nBB9>)l*#9O@q$Jy;r=D3nHm{}jqg`T}2l zu)MS{!1|Ga$-^V(mY03Fy!@*2id)J}SCvtZLt|_nY>A&1D1;nT#=n}i%9KdYOI-#7Pd=9L*MjSq1d4jd}= z5BUQl{Gdvd&n``tw+!yOgW zt`cwmY~f(Pws%?VeW1(2d!FQdpiAZLv_oYtdt2Zhzx@fr{yX^(W8eSUW_C=BZ~V}( zKWXFpCWd-OWfT10_I^3YO_r;=ylxlO_?q2*EUV&uj4$=jd&-u|dWG$i=f``p+TZ@K z_deB|xrP>0FF?*&>!&V%*^691?-%p`AGPYIsu}&yrXTQs>CX22TR}dW4qI5;cWM#8 zEC1BB{*b$@9z!+L>L=Yt=_S2?;Y5%Aob2@T>Lr5{x$0-a`R(9OT~FtF&Jp?H<2@_T zA>wG~PhGz@uzMwc;{J3S#Xqk4q1Ve}s7|u-AwPMDHv(TO_~>!wHpWE!`K=iL)79?r zqpsi8<3n%1$`9M|;WodW{QpKdzWjU)7nHwP$*8?}Zc}FOzgv69iTu{jrWj9wKAZaf zJ~ruOuUwph6_aXVt^)yR9*7;oNtE;|et8WT#3*qIM*hlWihCNgEC~f`0Ly)g2 zKgMU%ZvM_UjrqK4KAr3Hb@^A%S2p?E@#;370zZrPgLpyWHRKbiXV8~snRi@neD7t3 z-}gK>l;8WTYY;EoD#lZuH}uzT?|=uwFW>*<*5HNKKQiov+HccU|7?8kT3**b=!3hx z(x0!&+x6EiULbq2Oulb*m7$lgFWLEH`yTl8zTEt=4D1VdPUsh(kA!#uJV<(Hw=Y`x z$o@t;zd`Qq`l0Jr$UmvqF9A=eH_oY@n;B2)^Upt6`5k^7wW$@O;#gSUzfrOFjzxY5FH5 zpZg8{Rrq!7f83w$SG|8?D{t4IB~JMust@!>=|xugpO$mJ-J&ZixyZojH?(v1c{IH>q z7&mmc2f99m{eJTJn%gtPJGSz!ct~rH^zt5qoY5~B*F`_l?;FDW8RSggtMYw)cc0O=h4@>?Y?1U%dkwykN+2RCye3>g|gS572YmAG|^p;N$J$bKHoZ+Co0U_3-N=eDuJck6mQVx1m0+`mD}O+nf5~@@j;rMna?H;D{`hsj&e#4X94`ZYBl;u! zd0H=aVnO@@hqw>P8GiTUR{QB{|8Rf0ud;u@J@^CXbRF!sssF0H>+``^bYEqE@-A=3 zkK1^eZvS2i{T*uOQbtGM1ntfaFAqNVsr>RGDg0IVOHv=`z~?`RXBVO^U6Q_`P$t4)f(il zfL|Toar`2G6|tA%eZZf1?eWR$KlFrwe|!E4I3~TH>mh&jQ6qnqHXn+9_hR`gdanG7 z{Z~EHUP7rs1AjJ^8=qw059oGxt_OUg`^9lSfgTqm6zQJQ*DJP$mg z>z$f^qkoTnPKt-@_yBSNp90_1zHN+-ry8D>{Ox#l%U=P{_2Lh_>+5%4lN){me<0!e zYN2-#doJSN=DYSi{Qj254SOEPFZ369B)y&MY5L5_57vF;0V95n`vbs%j#I?1_Zsnl z%6|d(;19F|_?f#U2q^9`{E6)5uesVYT`tHAB;_CYxA~XP3cs+&)Ne2KjEEOhzihinN%vLjZA|>)oUQ}@&HQ)u{;bX`&?9&Jq%CjD2e|TwUcUPJ>*awPFLgXl{j`(; zuX;XhDWl)R=gD*67su@IJM`sWK|AA>>&A8o;LKmdi}KN2XF=d$i&B&#rxxG z_dXcN1-!50Ncms-_e#H$;Um;v=lSKj{L+?J^-mkWV7(pb6~^J=?;wwe`obT=^Ko5v z`2Evk$F}EYUsB``p@+Jj&GlR^*Zb>ny&b;=>InIl$1h?$@p}e-mH*?azk0n?{Bd&p zzQ#JA6#AU(`OqJC`-ta}V4#oj`7^qlw=w<_Nbx=k*l#Kiy-ol8wCzueU&JdS-7x%O znBUjutA_HD{q>f*3f4Z;o z@1U1>F6M1RRAc&IP#ABn%j{OU@b zua5>EVOMZY?b}fI^L6a_bQ_&XZ9Y_}wc~d^ zkBaw{Jlmt=L&Y(49nQ1MKRkV6V}9eO8Q=waPx5JDiPzMA--QRa8LK&$i_~Q^*?(1y-W+urec@u%Pw1cNFW)NjQ994t^U1gm>ET?@Y@P3am`p!aJ}s?&()z;} zE%U0g=g%8gp6BJJpEVe-{ORX{0rh8{{f&r!f9xZ>&Y2}WeZuhX+ay1@`x`p{qTk~F zM&$Woe`V}HqT{Q2{sqrddqQ6G;NJk(`g7F!FP*UN}FWl2Iy(sq!)%^~SK9yI0{i8Wwpx)za6Tby1Iy!VSI{w@G z#~Z@!_Sa~XbMo~LAm(!?+IPyMbxH1Qj;|6)7}_(l8ZT)p2%Q)YhRw*P%v`yJxKIlrj;4=Vml zYkx@|p4>-g8KZM^o)G*E;1}&jJ+i}Z%cq~s&;DjG9wg;S2X24+J|0Z?bN8Qh9@r-P z!(4Bm@aw9-y8n#+bNBzi6VxzO9=rn_7Bm6y_cw~q{TZ+Sg+991U&oIsZ^w^Yew*g& z05{mj2DpHnfG0dRJG>lw!|7XcwRahh2l--v?Rg_!jORn3ce=g8{#0|Z{5JF-dJKD@ zm*@La$@zKso%A>D{0a2CyS}2H^n8^MNrw08?G5Bb&sX(Nix>TVzw4oOvbJ^Xz)yDZ z-S_|ZFW!m80K_7CYs=6bp|hUY*Jfd?HgUGloc;~$##)iz%V z?SHlK207DntH*mIDojU)Q#)Rh9=QAM8{i%wwA3P6wH9vm&`y|9?@#iG*qXU2Gleg6mGj_SwfU&!~j;QsV|_5P)<@6ey5{w4U@y}T|z=qlVn@QiJ&qm~Vr4iR4;6UVuj`uk;mq2tC*R!}?!u zWcQDf%zr>04f|0;f3tfZ*3-{?awIqXOp)J){uD@ooebkEw?TITmOh5OOF#t)E>6bzHcg!U}dI}hX1u$MT}{X67y>NkF~ zeq{ep<%+`uFkm4)|U7$2u50 z|3Qv8Ge55I2KxwmsFz2+pnmOzD_(RyQvQN#d7W>7hg!x@ZuXA!N#z@8ogemTSt@ah z`YOCqIh<#gfB2`*zANAS!>ZW(^;WsOUr+awq*OgMpGyBdj$h!XfTH54=40S@`dN+V zLN9^Ch`$-hKh!V&z&qftNd3uQ77dQuTj0qRe>QvPS{~0M`73`Je20F+k*>pdcD(-d zdwTr4*5B5CyS=Y&KWbVJ40{iM7`y>_!Tu}WAUVXx^Rn`vFFyDer&RtQ`Ukwwd7Mwc z3(!xxu7iF3@mt>by;*ZViz1`nljNDC$_k};{ zUVj@NT%T{_13RAF{Abw@M*hD^uD^#}2A&}I?CkF=S6p*lZvJx+ZziXfN~=@&ga)?Uujt_0lB2Um5XwTK_rJ ze-HcyyBqNzwE3LqC#qTX^Esg>R37%roo{scsruXbqB{Fa`wTq4;CgAgZaJfNpdN9% zoR$6ktDpbPwp{rn;rtmJpJ2ZZUEkyO7kr{$*UiLm-qwGz+CS(i_ykAluelz)FJAvX zJ3P7CKh%@zRlWVy@c=vaZ~M7nzmWO&qYil|#IK&0Mn7b?pWDCgduCN`_|0H`b;KUb z9sSHE_CEPOQ`jxi-#C7C{idrtp1*#d<^AN)=VbWF4!__X@E4BMf{jvd;_+y;^?&v4 zp_~7V^+kF-3f#cnA>PD!cH^xNz5Lz$^3N&8TXj8A`d;n+a=N|S#*VcIFS`42%-+c- zq#mz?9W?cx?%%~Ikou<}!kG@g3{Ah0Wu4dy$zg=*B1oTk1dwl;>`Ti8(i~33BYj8dI zBVunw{&}JJ*~D+8Jl_wXI9u=QfnD(f`%3bI-X@J7)0PL$PB#CpI{d<4b&W$!yk`|} z{^*hK=J)=x4Cdnj4@ILU)KK1f4n3&`;tOGp&z;(Xkj9p z;_uf3|C1c;@sxgD7o+RbuUBoP8_Bp{E^|C$seuAk6rT|YN6djFz*yb8Pqe3Y< zdEZ~6=Kr`pU#~y(GAVy3`PuQ~mao(8+j1tmPad1(I#H~TK)tiGcXwUca7Au-Nn!86 zvypgN?2mnE;U|0tz88%3W^ueg?{s_B!Nz2MTepKs-=N2^ca5^c=rAYW+p&1&o9XSx zeh_iK0DfHY204*j6y9CS<9Sp*v>(p)S}*T{SGRayUk`!zJispnKG9y-!La^}+LPUU zmAgN(e^Kve0vwWDIvKt1?-}QK3OqpNw@LY?->+NOAM|H@eiwLvp3m1u34{GD!``a( zQLg&0<45VQYk9r?q5dVr5A^zE@mw|b4tA~}`>UCe9mMycyyfrN*}K>N`0Vp?vv(Qf zA0qZnjOSv%ZrxtR?H%;VX0LQTZj*9P?;pA9t6s0_^<6LTh4r`lXKwiitmpB>$Hw#7 z>F4z;o*c+cKLdX(;eH*Ee?&jUe#Xc@=se#Z_K&N6=zJFPL%3fD^uItICI4<~UXE#6R!k>SBNp5(_ zAl^6i74z$$&m>oc7xW|e9QsSw&GpPbUZ~<1SAEg(rufvgJnBjHQhW}ap?&lFZ}pom zOp6!ta~h5PxkLYqk-U^&MCaMz<=LfwbHk~Gmu5aX`2KmuUr288_s8jaS^x36D_(RS zXp?&6?!TqQJNSs?uJ|S`UTA!Bg=~+_p41L6JvV%DQRkC&y_oBn6XWSm#o}ulUQB+b zg!H$J{b`jy;c6dreGB=|IX=~K>~3F<$@MI{zNW3eS2f=B`YV4n8@w!$?-5=p>jn9k z?-vf`p4t0`e)qpV_};~AUn2g$h+i=O6Ff{fvHN3@^7p^-Qy0wA^=3X8{qgGYvyL~V zuV@$iF+CsL#jxL}j+g4^kLvoK7H^^6o@D-uTl_-(vy{;>ZeMKg`ORv+)}Nm9=%QX< zKys6RD;Kkh$bbIQnD1=Ii>?>Ug?{Omk8czy`9{zP{)g!FY5Yy zz5QwVLdh>1`?A7_Um)M(J;7VD|8{tl#1o#I-FU~NYdWsZ)xIo??T@j{Wq%C#VK@?E z=6d?R9ylcC`@ZaWf&NkX(^$pg&w%ff=^OepseMAfyO-DTqU?iRU)}Pz7+0V?72Y?1 z`h~d4%>Q}qx?4Bps-Fq-x6n(ScjkH?6z{Y9ZH&*MU$`Fq2S0-J6Z6OJG2(@^e5m3} zSN%qPsa|$I*XP-6VNA|HK|85D^v7Ml_2;L>m#V)VA8!5+=AZCIO8nn5a{fYie>wkT z4Wo7gFWKdzKKvJ*`MtlU2J=ryPS3^SYxF<#2t2O)+a1>R(R5$6-!ky3m*@M9)w#Zc zAAmEx{#{bf;_ttK{9N%4IZL_8`{spL#TRzGy3IeK_AF&sZ=%0H7`%e#A)Zlqrt;bO zV=eFd!p_|6T@BWYT`v7G=qJg`KAs-&|Buhz)ICe{6@O;nzsTr5N%b%6KM%M^{R+6T zIAXr%PYipl)~kSbfI}T8b3OGQ@8^SmCI7Ve)#nocueg7Mw6`xyJ8b?ATxW;h%b$M7 zL~i)aVEj~5f1ZD+HotA^zb!wU&igis2kHyXy041=Veg>#`gMH#cHZ>?^|PP6tGxm~sr}IB^mw7X2i~Kj{K1p^joHO_ z>kpLjo1c>bULyP|))(B%-%k4RzyH0V?{@!Ck8fUM-A@|*@2W344irC|c+v5`rS9WH z*Y-nRgctC9dc0tMj=rAlxZGE+k?HuvbF;HAH+4NVzL@Qcl#e`I#wt(0om&`lztuQi z!0*ssx&i0+&y4(F-A5iU;+gpV?I>r?SD^Pt-DAwhSMxuU>6`LrT+8cth23-nk3 zw?7-RFSrcXL!Tn{=_`N!rL*OJqdzqA?bWY8!E@-jVgEVL&(ZY_{l0Mg;%()_VPCJ` z`lrPU;uo?r@%@bH_f<@ndv^BalD|4Hzxg>C_+zjWBpUX}gt9`MxKP_JL@u@~e{RTclToc+e`FnQuWz84Q%P-!q zfqkL%IFCBp7d_s$@1IHilGL9G`_I|F*z^rJL%mJB+49@@)?ej2(&7c{lc_y%eCYGp zlz*1he74`TJ-a_Qe=Nm#E_hmx*Spv`Jla3ec|5kC0Q3cV2>YU!?_hM@6NWzqxz`_F z&@OQd=JP$W=VOoT`MD84RUf}VPV)W?&w>Kh9%cUGVc`V2y^eDT0-S59}=ip+*FZiD1h`)Q;tjO1$h~XFU3G@>9g&xu!wz0V& zeev|ehW;zQcJ+_&JXte_8iIFByG6fuS@*xi{al}qcAC$3Esy6>d1bGGSIDh^hKb__ zIUl%w*NddZFUE8Aai?ZEj=PMJIDlu5QGEAoY``fhh#kb^!-@so@IX_lYAMY|| zTi*wLgkA%`IuCTQW2fZ%mO_0x$@&Mk`LS5P-60ZpOJn*<{gGWf^~cw2Sh^VX6M6=| zCb`V@*!JIm9=q}@?oZ`l*W>g3z_WCndcTrhuj>6X;7Qh-$)j^T2K-SuRew9by6GqQ zmG0YQtUCzpP-gSD&o4Bbm79JB^Vt%<{|k0Ek`ET+mG_Cy@g6-cvFFE3{StDoJkq0o z-^Qpv(#J2*Tlf)Z2i<>z+<)|+4gXE$=jQjDWbOF-x8~muLGh<6K6QMAdgqJ}9p7Fk zKgbpRquZ}@{f%6*YO^j&%V$;3(Mv8p+3m-?DFk5KC~&n z{jP(2NJ2h=Ug~wLadT0TKO2J`Dla~q}Jh5cmRmsNis8LrQcPu}rZ*B|EQ zpJWi=b zzG?H1)%&lgC-jAmo`?3r-bLi+U4GL8_t16qyZ=Qpd@A|b?TuUhU&@hQ(^h_|$Ce+Y z>)_|Yu7D@9!|%vH{I{{(@LPlRTwf9EK4!$9_W6f8PHR8E z1oZ_D!DCeaZH$hm+WZpeefs=ETKwwyDUTuUB)J2p@o{+i9Xyg9e!ubVr}A4*U4!{5 zV|#-Q{eP2qzRLXZR@(iGRDLs={ZRV5*zs$}kK6oVj6cx46ujpGxPY7rlE)zT?8dvl z{n*~Er;@!RURikm0oXensa*B%XE*&-kC$)E#cBv=z8!y`N<7V$+~?>>b7fs9ksu zsST$~nG~!%;=1hY-HO-0`9N;?O=0h#hmriCc)!NA!Sw2@|9Qdu4D=g#B010X-2dc* zFMZ|me>Cu4l=64SFYZtGMZd-0_hjZ*u%1A#S6Y9!di~eypIv^~jvqIBhxl$>yjQP5 z_6y!?h<$Ft_R8P0n{Rg4fB!*KZg|OHybQcedfCnTBwm0g`rdBu^y{#G=ZA*9N$cM! zytvwnMyj~{n^s?4_}*>24C~FPVu))@{md@^P&_tpPHy^{V!RA`rT61p&%H9=j^Cl5 z=x$sOzKYm`%I?*V+;dFCKmTaVk4UQ@VgH5ur@ z{@{Hjz!&M2!Y^EtUNQyN3R;$5mP3<#UI6{(Y`^Nx8l(VsFIzbH(dQ`1|9aS0rD1JO%vFb>M4r zzv{I57V@7n-t>B3{4(XNrTiMGne@P!_ zjq4}$x1iks`Q0as{8$^qWv~O_OI?rZ|9w2L1MsUN`1ZWYxwD8TAs=dYcJU?>-+E=_|M=Ad|!}yjPJo)+40Gtb^p05S9_Q8 zeJ&;+1iP08`WBL7w{3FlYn`T+T-_wQWeGsHtwGw4TrehBbM*KK37 z-+tl#zOCOg@=IzxVY25#AKlxp%TMu9TK|ynw9Hta63R!;PXum&m+bPHH(k^Jx?J&+ z&_6_fnEGP#4}pth`V#g>`2PBZ>@R+&=c@U~uFr>^r}EkPr*?f!;vY6!`9G`prQyx@ zy)`%eOp$MgUPk;2v3}|U@_mc&f2e=${$Zqi+ZDI3X02j9pZ$F$&>QHVZZ|k@)%kir z(?3KzNKSJ-^*%q?72h^{Xvb$Y`Sw@j{?{*9_rX5-{N3#4KfJu;Cr5I_Z;J6;*cZ|} z`IpCcc;6886!^t;uKf?3QaR;M;Ctvj;uN~B)w*ABwa?e}FReeM{7Jk1y4e>!zGz}J zzKeZ#QP1q+g_~cqZCP&mS%dj?=Zl2>Z;knJar>g%AE`ZDzdv!}vtPaJd^5kM-v0>w z)b$_bFJjnV2l`9DDE-FwuuHI$blvuF{|_C1O21t3rR%ry-;$Mw-X)C}sxPnCKW%v> zKRZ6%>NV=g8jR&4*}$uo&7z0{)tt&;x}Qw&W2y? z*9|=*dB*Vz`$xay?^weY!+%=* zWbb}2-&@yU-nl+Eyks!{!OUm0iTt*i&uEwOi|4m>UdMQD{mvf( zj)7AgNk2AP?G5mk3@=LGUCZNnRIgBvPqJTWb$FrsHZnTGj+pi(d~Rm@LtgM-ugZ7- zrG)kc{4V812OghcKKVlS1^lnyXRc>f%=i7F;U8A7FFFp^K=C}T$2Kf-o$AG_2dJ^}?fG5*FEw=q+ z&<<)R=9AQ~eRRdE&KK3=wR-#JmJdPxL-Nl8`^li*uyZ)_e8@%e4pW2vR=;g(u!O_q zy{rA*{&`(D&U|pD)4$Na$iKLE_02Q>rT$+3GXM4dn{M;>^)^opRvL#!riRCk?iv~$ zFYOwhIx=~9V0?VwXrX_+G&DLgF*!apG+8KnbX8&GApKfy==E+cHxBg=jUFrwO%zHa z!G8+nC4GU&2FpwP0+;2;z~tePbIZ#ZD%jt>jUL=`}gm^GU##fr?+Ra+&p;OHRbi4eQS6bUi<#i z@Ys>jqovYB`Lw6ZrE&iI8GRe(`CS&?(6@vCI677u=|4PK8lLDkA4Avm9~n5*e{T8AzOIvB zjY`SuTl>0CetlR=S>P;D*GP5b\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start_dateend_dateopen_itemsred_flagsnum_employeesassessment_type
02023-01-012023-01-0210230weekly
12023-01-082023-01-0912125biweekly
22023-01-152023-01-1611328quarterly
32023-01-222023-01-239130weekly
42023-01-292023-01-3013427biweekly
\n", + "" + ], + "text/plain": [ + " start_date end_date open_items red_flags num_employees assessment_type\n", + "0 2023-01-01 2023-01-02 10 2 30 weekly\n", + "1 2023-01-08 2023-01-09 12 1 25 biweekly\n", + "2 2023-01-15 2023-01-16 11 3 28 quarterly\n", + "3 2023-01-22 2023-01-23 9 1 30 weekly\n", + "4 2023-01-29 2023-01-30 13 4 27 biweekly" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_dummy.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start_dateend_dateopen_itemsred_flagsnum_employeesassessment_type_biweeklyassessment_type_quarterlyassessment_type_weeklyopen_items_weekly_lag_1open_items_biweekly_lag_1open_items_quarterly_lag_1open_items_weekly_ma_3open_items_biweekly_ma_3open_items_quarterly_ma_3time_since_last_eventpercentage_change_open_items
22023-01-152023-01-1611328FalseTrueFalse0.00.012.00.0000000.011.07.0-8.333333
32023-01-222023-01-239130FalseFalseTrue11.00.00.010.6666670.00.07.0-18.181818
42023-01-292023-01-3013427TrueFalseFalse0.09.00.00.00000011.00.07.044.444444
\n", + "
" + ], + "text/plain": [ + " start_date end_date open_items red_flags num_employees \\\n", + "2 2023-01-15 2023-01-16 11 3 28 \n", + "3 2023-01-22 2023-01-23 9 1 30 \n", + "4 2023-01-29 2023-01-30 13 4 27 \n", + "\n", + " assessment_type_biweekly assessment_type_quarterly \\\n", + "2 False True \n", + "3 False False \n", + "4 True False \n", + "\n", + " assessment_type_weekly open_items_weekly_lag_1 open_items_biweekly_lag_1 \\\n", + "2 False 0.0 0.0 \n", + "3 True 11.0 0.0 \n", + "4 False 0.0 9.0 \n", + "\n", + " open_items_quarterly_lag_1 open_items_weekly_ma_3 \\\n", + "2 12.0 0.000000 \n", + "3 0.0 10.666667 \n", + "4 0.0 0.000000 \n", + "\n", + " open_items_biweekly_ma_3 open_items_quarterly_ma_3 time_since_last_event \\\n", + "2 0.0 11.0 7.0 \n", + "3 0.0 0.0 7.0 \n", + "4 11.0 0.0 7.0 \n", + "\n", + " percentage_change_open_items \n", + "2 -8.333333 \n", + "3 -18.181818 \n", + "4 44.444444 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Create a dummy dataset with past 5 assessments\n", + "data_dummy = {\n", + " 'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n", + " 'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n", + " 'open_items': [10, 12, 11, 9, 13],\n", + " 'red_flags': [2, 1, 3, 1, 4],\n", + " 'num_employees': [30, 25, 28, 30, 27],\n", + " 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n", + "}\n", + "\n", + "df_dummy = pd.DataFrame(data_dummy)\n", + "\n", + "# Convert 'assessment_type' to categorical (one-hot encoding)\n", + "df_dummy = pd.get_dummies(df_dummy, columns=['assessment_type'], drop_first=False)\n", + "\n", + "# Create lagged features for each assessment type\n", + "df_dummy['open_items_weekly_lag_1'] = df_dummy['open_items'].shift(1) * df_dummy['assessment_type_weekly']\n", + "df_dummy['open_items_biweekly_lag_1'] = df_dummy['open_items'].shift(1) * df_dummy['assessment_type_biweekly']\n", + "df_dummy['open_items_quarterly_lag_1'] = df_dummy['open_items'].shift(1) * df_dummy['assessment_type_quarterly']\n", + "\n", + "# Create moving averages for each assessment type\n", + "df_dummy['open_items_weekly_ma_3'] = df_dummy['open_items'].rolling(window=3).mean() * df_dummy['assessment_type_weekly']\n", + "df_dummy['open_items_biweekly_ma_3'] = df_dummy['open_items'].rolling(window=3).mean() * df_dummy['assessment_type_biweekly']\n", + "df_dummy['open_items_quarterly_ma_3'] = df_dummy['open_items'].rolling(window=3).mean() * df_dummy['assessment_type_quarterly']\n", + "\n", + "# Add time since last event (days between assessments)\n", + "df_dummy['start_date'] = pd.to_datetime(df_dummy['start_date'])\n", + "df_dummy['time_since_last_event'] = df_dummy['start_date'].diff().dt.days\n", + "\n", + "# Add percentage change in open items\n", + "df_dummy['percentage_change_open_items'] = df_dummy['open_items'].pct_change() * 100\n", + "\n", + "# Remove any rows with NaN values created by lagging or rolling window calculations\n", + "df_dummy.dropna(inplace=True)\n", + "\n", + "# Display the final DataFrame with all time-based features\n", + "df_dummy.head() \n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Create a dummy dataset with past 5 assessments\n", + "data_dummy = {\n", + " 'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n", + " 'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n", + " 'open_items': [10, 12, 11, 9, 13],\n", + " 'red_flags': [2, 1, 3, 1, 4],\n", + " 'num_employees': [30, 25, 28, 30, 27],\n", + " 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n", + "}\n", + "\n", + "df_dummy = pd.DataFrame(data_dummy)\n", + "\n", + "# Save the DataFrame as a CSV file\n", + "df_dummy.to_csv('dummy_assessment_data.csv', index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start_dateend_dateopen_itemsred_flagsnum_employeesassessment_type_biweeklyassessment_type_quarterlyassessment_type_weeklyopen_items_assessment_type_weekly_lag_1open_items_assessment_type_biweekly_lag_1open_items_assessment_type_quarterly_lag_1open_items_weekly_ma_3open_items_biweekly_ma_3open_items_quarterly_ma_3time_since_last_eventpercentage_change_open_items
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [start_date, end_date, open_items, red_flags, num_employees, assessment_type_biweekly, assessment_type_quarterly, assessment_type_weekly, open_items_assessment_type_weekly_lag_1, open_items_assessment_type_biweekly_lag_1, open_items_assessment_type_quarterly_lag_1, open_items_weekly_ma_3, open_items_biweekly_ma_3, open_items_quarterly_ma_3, time_since_last_event, percentage_change_open_items]\n", + "Index: []" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Create a dummy dataset with past 5 assessments\n", + "data_dummy = {\n", + " 'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n", + " 'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n", + " 'open_items': [10, 12, 11, 9, 13],\n", + " 'red_flags': [2, 1, 3, 1, 4],\n", + " 'num_employees': [30, 25, 28, 30, 27],\n", + " 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n", + "}\n", + "\n", + "df_dummy = pd.DataFrame(data_dummy)\n", + "\n", + "# Convert 'assessment_type' to categorical (one-hot encoding)\n", + "df_dummy = pd.get_dummies(df_dummy, columns=['assessment_type'], drop_first=False)\n", + "\n", + "# Define a function to create lagged features based on assessment type\n", + "def create_lagged_features(df, col, assessment_col):\n", + " lagged_col = f\"{col}_{assessment_col}_lag_1\"\n", + " df[lagged_col] = df[df[assessment_col] == 1][col].shift(1)\n", + " return df\n", + "\n", + "# Create lagged features for each assessment type\n", + "df_dummy = create_lagged_features(df_dummy, 'open_items', 'assessment_type_weekly')\n", + "df_dummy = create_lagged_features(df_dummy, 'open_items', 'assessment_type_biweekly')\n", + "df_dummy = create_lagged_features(df_dummy, 'open_items', 'assessment_type_quarterly')\n", + "\n", + "# Fill NaNs with 0 or forward-fill them depending on how you want to handle missing lags\n", + "df_dummy.fillna(0, inplace=True)\n", + "\n", + "# Create moving averages for each assessment type\n", + "df_dummy['open_items_weekly_ma_3'] = df_dummy[df_dummy['assessment_type_weekly'] == 1]['open_items'].rolling(window=3).mean()\n", + "df_dummy['open_items_biweekly_ma_3'] = df_dummy[df_dummy['assessment_type_biweekly'] == 1]['open_items'].rolling(window=3).mean()\n", + "df_dummy['open_items_quarterly_ma_3'] = df_dummy[df_dummy['assessment_type_quarterly'] == 1]['open_items'].rolling(window=3).mean()\n", + "\n", + "# Add time since last event (days between assessments)\n", + "df_dummy['start_date'] = pd.to_datetime(df_dummy['start_date'])\n", + "df_dummy['time_since_last_event'] = df_dummy['start_date'].diff().dt.days\n", + "\n", + "# Add percentage change in open items\n", + "df_dummy['percentage_change_open_items'] = df_dummy['open_items'].pct_change() * 100\n", + "\n", + "# Remove any rows with NaN values created by lagging or rolling window calculations\n", + "df_dummy.dropna(inplace=True)\n", + "\n", + "# Display the final DataFrame with all time-based features\n", + "df_dummy.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start_dateend_dateopen_itemsred_flagsnum_employeesassessment_type_biweeklyassessment_type_quarterlyassessment_type_weeklyopen_items_assessment_type_weekly_lag_1open_items_assessment_type_biweekly_lag_1open_items_assessment_type_quarterly_lag_1open_items_weekly_ma_3open_items_biweekly_ma_3open_items_quarterly_ma_3time_since_last_eventpercentage_change_open_items
02023-01-012023-01-02102300010.00.00.010.00.00.00.00.000000
12023-01-082023-01-091212510010.00.00.010.012.00.07.020.000000
22023-01-152023-01-16113280100.012.00.010.012.011.07.0-8.333333
32023-01-222023-01-2391300010.00.011.09.012.011.07.0-18.181818
42023-01-292023-01-30134271009.00.00.09.013.011.07.044.444444
\n", + "
" + ], + "text/plain": [ + " start_date end_date open_items red_flags num_employees \\\n", + "0 2023-01-01 2023-01-02 10 2 30 \n", + "1 2023-01-08 2023-01-09 12 1 25 \n", + "2 2023-01-15 2023-01-16 11 3 28 \n", + "3 2023-01-22 2023-01-23 9 1 30 \n", + "4 2023-01-29 2023-01-30 13 4 27 \n", + "\n", + " assessment_type_biweekly assessment_type_quarterly \\\n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 1 \n", + "3 0 0 \n", + "4 1 0 \n", + "\n", + " assessment_type_weekly open_items_assessment_type_weekly_lag_1 \\\n", + "0 1 0.0 \n", + "1 0 10.0 \n", + "2 0 0.0 \n", + "3 1 0.0 \n", + "4 0 9.0 \n", + "\n", + " open_items_assessment_type_biweekly_lag_1 \\\n", + "0 0.0 \n", + "1 0.0 \n", + "2 12.0 \n", + "3 0.0 \n", + "4 0.0 \n", + "\n", + " open_items_assessment_type_quarterly_lag_1 open_items_weekly_ma_3 \\\n", + "0 0.0 10.0 \n", + "1 0.0 10.0 \n", + "2 0.0 10.0 \n", + "3 11.0 9.0 \n", + "4 0.0 9.0 \n", + "\n", + " open_items_biweekly_ma_3 open_items_quarterly_ma_3 time_since_last_event \\\n", + "0 0.0 0.0 0.0 \n", + "1 12.0 0.0 7.0 \n", + "2 12.0 11.0 7.0 \n", + "3 12.0 11.0 7.0 \n", + "4 13.0 11.0 7.0 \n", + "\n", + " percentage_change_open_items \n", + "0 0.000000 \n", + "1 20.000000 \n", + "2 -8.333333 \n", + "3 -18.181818 \n", + "4 44.444444 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import os\n", + "\n", + "class DataPreprocessor:\n", + " def __init__(self, input_path, output_dir):\n", + " self.input_path = input_path\n", + " self.output_dir = output_dir\n", + " self.df = None\n", + "\n", + " def load_data(self):\n", + " self.df = pd.read_csv(self.input_path)\n", + "\n", + " def preprocess(self):\n", + " # Convert 'assessment_type' to categorical (one-hot encoding)\n", + " self.df = pd.get_dummies(self.df, columns=['assessment_type'], drop_first=False)\n", + "\n", + " # Convert boolean columns to 1s and 0s\n", + " self.df['assessment_type_weekly'] = self.df['assessment_type_weekly'].astype(int)\n", + " self.df['assessment_type_biweekly'] = self.df['assessment_type_biweekly'].astype(int)\n", + " self.df['assessment_type_quarterly'] = self.df['assessment_type_quarterly'].astype(int)\n", + "\n", + " # Function to create lagged features based on assessment type\n", + " def create_lagged_features(df, col, assessment_col):\n", + " lagged_col = f\"{col}_{assessment_col}_lag_1\"\n", + " df[lagged_col] = df[col].where(df[assessment_col] == 1).shift(1)\n", + " return df\n", + "\n", + " # Create lagged features for each assessment type\n", + " self.df = create_lagged_features(self.df, 'open_items', 'assessment_type_weekly')\n", + " self.df = create_lagged_features(self.df, 'open_items', 'assessment_type_biweekly')\n", + " self.df = create_lagged_features(self.df, 'open_items', 'assessment_type_quarterly')\n", + "\n", + " # Fill NaNs with 0 instead of dropping rows\n", + " self.df.fillna(0, inplace=True)\n", + "\n", + " # Create moving averages for each assessment type\n", + " self.df['open_items_weekly_ma_3'] = self.df['open_items'].where(self.df['assessment_type_weekly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + " self.df['open_items_biweekly_ma_3'] = self.df['open_items'].where(self.df['assessment_type_biweekly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + " self.df['open_items_quarterly_ma_3'] = self.df['open_items'].where(self.df['assessment_type_quarterly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + "\n", + " # Add time since last event (days between assessments)\n", + " self.df['start_date'] = pd.to_datetime(self.df['start_date'])\n", + " self.df['time_since_last_event'] = self.df['start_date'].diff().dt.days.fillna(0)\n", + "\n", + " # Add percentage change in open items\n", + " self.df['percentage_change_open_items'] = self.df['open_items'].pct_change().fillna(0) * 100\n", + "\n", + " def save_data(self):\n", + " output_path = os.path.join(self.output_dir, 'preprocessed_data.csv')\n", + " self.df.to_csv(output_path, index=False)\n", + " return output_path\n", + "\n", + " def run(self):\n", + " self.load_data()\n", + " self.preprocess()\n", + " return self.save_data()\n", + "\n", + "\n", + "preprocessor = DataPreprocessor('path/to/input.csv', 'path/to/output/directory')\n", + "output_file = preprocessor.run()\n", + "# print(f\"Preprocessed data saved to: {output_file}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start_dateend_dateopen_itemsred_flagsnum_employeesassessment_type_biweeklyassessment_type_quarterlyassessment_type_weeklyopen_items_assessment_type_weekly_lag_1open_items_assessment_type_biweekly_lag_1open_items_assessment_type_quarterly_lag_1open_items_weekly_ma_3open_items_biweekly_ma_3open_items_quarterly_ma_3time_since_last_eventpercentage_change_open_items
22023-01-152023-01-1611328FalseTrueFalse0.012.00.010.012.011.07.0-8.333333
32023-01-222023-01-239130FalseFalseTrue0.00.011.09.012.011.07.0-18.181818
42023-01-292023-01-3013427TrueFalseFalse9.00.00.09.013.011.07.044.444444
\n", + "
" + ], + "text/plain": [ + " start_date end_date open_items red_flags num_employees \\\n", + "2 2023-01-15 2023-01-16 11 3 28 \n", + "3 2023-01-22 2023-01-23 9 1 30 \n", + "4 2023-01-29 2023-01-30 13 4 27 \n", + "\n", + " assessment_type_biweekly assessment_type_quarterly \\\n", + "2 False True \n", + "3 False False \n", + "4 True False \n", + "\n", + " assessment_type_weekly open_items_assessment_type_weekly_lag_1 \\\n", + "2 False 0.0 \n", + "3 True 0.0 \n", + "4 False 9.0 \n", + "\n", + " open_items_assessment_type_biweekly_lag_1 \\\n", + "2 12.0 \n", + "3 0.0 \n", + "4 0.0 \n", + "\n", + " open_items_assessment_type_quarterly_lag_1 open_items_weekly_ma_3 \\\n", + "2 0.0 10.0 \n", + "3 11.0 9.0 \n", + "4 0.0 9.0 \n", + "\n", + " open_items_biweekly_ma_3 open_items_quarterly_ma_3 time_since_last_event \\\n", + "2 12.0 11.0 7.0 \n", + "3 12.0 11.0 7.0 \n", + "4 13.0 11.0 7.0 \n", + "\n", + " percentage_change_open_items \n", + "2 -8.333333 \n", + "3 -18.181818 \n", + "4 44.444444 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start_dateend_dateopen_itemsred_flagsnum_employeesassessment_type
02023-01-012023-01-0210230weekly
12023-01-082023-01-0912125biweekly
22023-01-152023-01-1611328quarterly
32023-01-222023-01-239130weekly
42023-01-292023-01-3013427biweekly
\n", + "
" + ], + "text/plain": [ + " start_date end_date open_items red_flags num_employees assessment_type\n", + "0 2023-01-01 2023-01-02 10 2 30 weekly\n", + "1 2023-01-08 2023-01-09 12 1 25 biweekly\n", + "2 2023-01-15 2023-01-16 11 3 28 quarterly\n", + "3 2023-01-22 2023-01-23 9 1 30 weekly\n", + "4 2023-01-29 2023-01-30 13 4 27 biweekly" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_dummy.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Create a dummy dataset with past 5 assessments\n", + "data_dummy = {\n", + " 'start_date': pd.date_range(start='2023-01-01', periods=5, freq='7D'),\n", + " 'end_date': pd.date_range(start='2023-01-02', periods=5, freq='7D'),\n", + " 'open_items': [10, 12, 11, 9, 13],\n", + " 'red_flags': [2, 1, 3, 1, 4],\n", + " 'num_employees': [30, 25, 28, 30, 27],\n", + " 'assessment_type': ['weekly', 'biweekly', 'quarterly', 'weekly', 'biweekly']\n", + "}\n", + "\n", + "df = pd.DataFrame(data_dummy)\n", + "\n", + "# Convert 'assessment_type' to categorical (one-hot encoding)\n", + "df = pd.get_dummies(df, columns=['assessment_type'], drop_first=False)\n", + "\n", + "# Convert boolean columns to 1s and 0s\n", + "df['assessment_type_weekly'] = df['assessment_type_weekly'].astype(int)\n", + "df['assessment_type_biweekly'] = df['assessment_type_biweekly'].astype(int)\n", + "df['assessment_type_quarterly'] = df['assessment_type_quarterly'].astype(int)\n", + "\n", + "# Function to create lagged features based on assessment type\n", + "def create_lagged_features(df, col, assessment_col):\n", + " lagged_col = f\"{col}_{assessment_col}_lag_1\"\n", + " df[lagged_col] = df[col].where(df[assessment_col] == 1).shift(1)\n", + " return df\n", + "\n", + "# Create lagged features for each assessment type\n", + "df = create_lagged_features(df, 'open_items', 'assessment_type_weekly')\n", + "df = create_lagged_features(df, 'open_items', 'assessment_type_biweekly')\n", + "df = create_lagged_features(df, 'open_items', 'assessment_type_quarterly')\n", + "\n", + "# Fill NaNs with 0 instead of dropping rows\n", + "df.fillna(0, inplace=True)\n", + "\n", + "# Create moving averages for each assessment type\n", + "df['open_items_weekly_ma_3'] = df['open_items'].where(df['assessment_type_weekly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + "df['open_items_biweekly_ma_3'] = df['open_items'].where(df['assessment_type_biweekly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + "df['open_items_quarterly_ma_3'] = df['open_items'].where(df['assessment_type_quarterly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + "\n", + "# Add time since last event (days between assessments)\n", + "df['start_date'] = pd.to_datetime(df['start_date'])\n", + "df['time_since_last_event'] = df['start_date'].diff().dt.days.fillna(0)\n", + "\n", + "# Add percentage change in open items\n", + "df['percentage_change_open_items'] = df['open_items'].pct_change().fillna(0) * 100\n", + "\n", + "# Display the final DataFrame with all time-based features\n", + "df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "\n", + "class DataPreprocessor:\n", + " def __init__(self, input_path, company_id):\n", + " self.input_path = input_path\n", + " self.output_dir = os.path.join('data', 'processed', 'assessment_prediction', company_id)\n", + " self.company_id = company_id\n", + " self.df = None\n", + "\n", + " def load_data(self):\n", + " self.df = pd.read_csv(self.input_path)\n", + "\n", + " def preprocess(self):\n", + " # Convert 'start_date' and 'end_date' to datetime\n", + " self.df['start_date'] = pd.to_datetime(self.df['start_date'])\n", + " self.df['end_date'] = pd.to_datetime(self.df['end_date'])\n", + "\n", + " # Add duration (in days) by subtracting start_date from end_date\n", + " self.df['duration'] = (self.df['end_date'] - self.df['start_date']).dt.days\n", + "\n", + " # Drop the 'start_date' and 'end_date' columns as they are not needed for training\n", + " self.df.drop(columns=['start_date', 'end_date'], inplace=True)\n", + "\n", + " # Convert 'assessment_type' to categorical (one-hot encoding)\n", + " self.df = pd.get_dummies(self.df, columns=['assessment_type'], drop_first=False)\n", + "\n", + " # Convert boolean columns to 1s and 0s\n", + " self.df['assessment_type_weekly'] = self.df['assessment_type_weekly'].astype(int)\n", + " self.df['assessment_type_biweekly'] = self.df['assessment_type_biweekly'].astype(int)\n", + " self.df['assessment_type_quarterly'] = self.df['assessment_type_quarterly'].astype(int)\n", + "\n", + " # Function to create lagged features based on assessment type\n", + " def create_lagged_features(df, col, assessment_col):\n", + " lagged_col = f\"{col}_{assessment_col}_lag_1\"\n", + " df[lagged_col] = df[col].where(df[assessment_col] == 1).shift(1)\n", + " return df\n", + "\n", + " # Create lagged features for each assessment type\n", + " self.df = create_lagged_features(self.df, 'open_items', 'assessment_type_weekly')\n", + " self.df = create_lagged_features(self.df, 'open_items', 'assessment_type_biweekly')\n", + " self.df = create_lagged_features(self.df, 'open_items', 'assessment_type_quarterly')\n", + "\n", + " # Fill NaNs with 0 instead of dropping rows\n", + " self.df.fillna(0, inplace=True)\n", + "\n", + " # Create moving averages for each assessment type\n", + " self.df['open_items_weekly_ma_3'] = self.df['open_items'].where(self.df['assessment_type_weekly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + " self.df['open_items_biweekly_ma_3'] = self.df['open_items'].where(self.df['assessment_type_biweekly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + " self.df['open_items_quarterly_ma_3'] = self.df['open_items'].where(self.df['assessment_type_quarterly'] == 1).rolling(window=3, min_periods=1).mean().fillna(0)\n", + "\n", + " # Add percentage change in open items\n", + " self.df['percentage_change_open_items'] = self.df['open_items'].pct_change().fillna(0) * 100\n", + "\n", + " def save_data(self):\n", + " os.makedirs(self.output_dir, exist_ok=True) # Ensure output directory exists\n", + " output_path = os.path.join(self.output_dir, 'output.csv')\n", + " self.df.to_csv(output_path, index=False)\n", + " return output_path\n", + "\n", + " def run(self):\n", + " self.load_data()\n", + " self.preprocess()\n", + " return self.save_data()\n", + "\n", + "# Example usage:\n", + "# preprocessor = DataPreprocessor(input_path='path_to_raw_data.csv', company_id='company_123')\n", + "# processed_data_path = preprocessor.run()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "dp = DataPreprocessor(\n", + " input_path=\"/root/ds_erp_ai/data/raw/dummy_assessment_data.csv\",\n", + " company_id=\"testid\"\n", + ")\n", + "\n", + "\n", + "res = dp.run()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'data/processed/assessment_prediction/testid/output.csv'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model saved to models/assessment_prediction/testid/testid_model.pkl\n", + "Latest assessment data saved to models/assessment_prediction/testid/testid_latest_data.csv\n", + "Model Evaluation Metrics:\n", + "Mean Absolute Error (MAE): 1.3099999999999996\n", + "Mean Squared Error (MSE): 2.3089999999999997\n", + "R-squared (R²): nan\n", + "The model was saved at: models/assessment_prediction/testid/testid_model.pkl\n", + "The latest data was saved at: models/assessment_prediction/testid/testid_latest_data.csv\n", + "Evaluation Results: {'mae': 1.3099999999999996, 'mse': 2.3089999999999997, 'r2': nan}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/ds_erp_ai/erp/lib/python3.10/site-packages/sklearn/metrics/_regression.py:1211: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.\n", + " warnings.warn(msg, UndefinedMetricWarning)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import os\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.multioutput import MultiOutputRegressor\n", + "import joblib\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", + "\n", + "class ModelTrainer:\n", + " def __init__(self, preprocessed_data_path, company_id, model):\n", + " self.preprocessed_data_path = preprocessed_data_path\n", + " self.output_dir = os.path.join('models', 'assessment_prediction', company_id)\n", + " self.company_id = company_id\n", + " self.df = None\n", + " self.model = model # Model passed as an argument\n", + " self.X_test = None\n", + " self.y_test = None\n", + "\n", + " def load_data(self):\n", + " self.df = pd.read_csv(self.preprocessed_data_path)\n", + "\n", + " def train_model(self):\n", + " # Split data into features (X) and target variables (y)\n", + " X = self.df.drop(columns=['open_items', 'red_flags'])\n", + " y = self.df[['open_items', 'red_flags']] # Multi-target for open items and red flags\n", + "\n", + " # Split into training and test sets with 10% as test size\n", + " X_train, self.X_test, y_train, self.y_test = train_test_split(X, y, test_size=0.1, random_state=42)\n", + "\n", + " # Train the model\n", + " self.model.fit(X_train, y_train)\n", + "\n", + " # Save the trained model\n", + " os.makedirs(self.output_dir, exist_ok=True)\n", + " model_path = os.path.join(self.output_dir, f'{self.company_id}_model.pkl')\n", + " joblib.dump(self.model, model_path)\n", + " print(f\"Model saved to {model_path}\")\n", + "\n", + " # Save the latest row (last assessment data) for inference\n", + " latest_data_path = os.path.join(self.output_dir, f'{self.company_id}_latest_data.csv')\n", + " self.df.tail(1).to_csv(latest_data_path, index=False)\n", + " print(f\"Latest assessment data saved to {latest_data_path}\")\n", + "\n", + " # Return the model path and latest data path\n", + " return model_path, latest_data_path\n", + "\n", + " def evaluate_model(self):\n", + " # Predict using the test data\n", + " y_pred = self.model.predict(self.X_test)\n", + "\n", + " # Calculate evaluation metrics\n", + " mae = mean_absolute_error(self.y_test, y_pred)\n", + " mse = mean_squared_error(self.y_test, y_pred)\n", + " r2 = r2_score(self.y_test, y_pred)\n", + "\n", + " print(\"Model Evaluation Metrics:\")\n", + " print(f\"Mean Absolute Error (MAE): {mae}\")\n", + " print(f\"Mean Squared Error (MSE): {mse}\")\n", + " print(f\"R-squared (R²): {r2}\")\n", + "\n", + " # Return evaluation results\n", + " return {'mae': mae, 'mse': mse, 'r2': r2}\n", + "\n", + " def run(self):\n", + " # Load data and train the model\n", + " self.load_data()\n", + " model_path, latest_data_path = self.train_model()\n", + "\n", + " # Evaluate the model immediately after training\n", + " evaluation_results = self.evaluate_model()\n", + "\n", + " return model_path, latest_data_path, evaluation_results\n", + "\n", + "# Example usage\n", + "model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))\n", + "trainer = ModelTrainer(preprocessed_data_path=res, company_id='testid', model=model)\n", + "model_path, latest_data_path, evaluation_results = trainer.run()\n", + "print(f\"The model was saved at: {model_path}\")\n", + "print(f\"The latest data was saved at: {latest_data_path}\")\n", + "print(f\"Evaluation Results: {evaluation_results}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'data/processed/assessment_prediction/testid/output.csv'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'models/assessment_prediction/testid/testid_model.pkl'" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model loaded from models/assessment_prediction/testid/testid_model.pkl\n", + "Latest data loaded from models/assessment_prediction/testid/testid_latest_data.csv\n", + "\n", + "Forecasting assessment 1/5\n", + "\n", + "Forecasting assessment 2/5\n", + "\n", + "Forecasting assessment 3/5\n", + "\n", + "Forecasting assessment 4/5\n", + "\n", + "Forecasting assessment 5/5\n", + "[{'forecast_step': 1, 'weekly': {'assessment_type': 'weekly', 'open_items': 11, 'red_flags': 3}, 'biweekly': {'assessment_type': 'biweekly', 'open_items': 12, 'red_flags': 3}, 'quarterly': {'assessment_type': 'quarterly', 'open_items': 12, 'red_flags': 3}}, {'forecast_step': 2, 'weekly': {'assessment_type': 'weekly', 'open_items': 11, 'red_flags': 3}, 'biweekly': {'assessment_type': 'biweekly', 'open_items': 12, 'red_flags': 3}, 'quarterly': {'assessment_type': 'quarterly', 'open_items': 12, 'red_flags': 3}}, {'forecast_step': 3, 'weekly': {'assessment_type': 'weekly', 'open_items': 11, 'red_flags': 3}, 'biweekly': {'assessment_type': 'biweekly', 'open_items': 12, 'red_flags': 3}, 'quarterly': {'assessment_type': 'quarterly', 'open_items': 12, 'red_flags': 3}}, {'forecast_step': 4, 'weekly': {'assessment_type': 'weekly', 'open_items': 11, 'red_flags': 3}, 'biweekly': {'assessment_type': 'biweekly', 'open_items': 12, 'red_flags': 3}, 'quarterly': {'assessment_type': 'quarterly', 'open_items': 12, 'red_flags': 3}}, {'forecast_step': 5, 'weekly': {'assessment_type': 'weekly', 'open_items': 11, 'red_flags': 3}, 'biweekly': {'assessment_type': 'biweekly', 'open_items': 12, 'red_flags': 3}, 'quarterly': {'assessment_type': 'quarterly', 'open_items': 12, 'red_flags': 3}}]\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import joblib\n", + "import os\n", + "\n", + "class AssessmentInference:\n", + " def __init__(self, company_id, num_assessments, model_dir='models'):\n", + " self.company_id = company_id\n", + " self.num_assessments = num_assessments\n", + " self.model_dir = model_dir\n", + " self.model = None\n", + " self.latest_data = None\n", + "\n", + " def load_model(self):\n", + " # Load the trained model\n", + " model_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_model.pkl')\n", + " self.model = joblib.load(model_path)\n", + " print(f\"Model loaded from {model_path}\")\n", + "\n", + " def load_latest_data(self):\n", + " # Load the latest assessment data\n", + " latest_data_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_latest_data.csv')\n", + " self.latest_data = pd.read_csv(latest_data_path)\n", + " print(f\"Latest data loaded from {latest_data_path}\")\n", + "\n", + " def predict_next_assessment(self, current_data, assessment_type):\n", + " # Update assessment type (weekly, biweekly, quarterly) in the data for prediction\n", + " current_data['assessment_type_weekly'] = 1 if assessment_type == 'weekly' else 0\n", + " current_data['assessment_type_biweekly'] = 1 if assessment_type == 'biweekly' else 0\n", + " current_data['assessment_type_quarterly'] = 1 if assessment_type == 'quarterly' else 0\n", + "\n", + " # Exclude target variables (open_items, red_flags) from the feature set\n", + " features = current_data.drop(columns=['open_items', 'red_flags'])\n", + "\n", + " # Predict the next open items and red flags\n", + " prediction = self.model.predict(features)\n", + " open_items_pred, red_flags_pred = prediction[0]\n", + "\n", + " # Ensure the predictions are integers by rounding\n", + " open_items_pred = int(round(open_items_pred))\n", + " red_flags_pred = int(round(red_flags_pred))\n", + "\n", + " return {\n", + " 'assessment_type': assessment_type,\n", + " 'open_items': open_items_pred,\n", + " 'red_flags': red_flags_pred\n", + " }\n", + "\n", + " def predict_next_assessments(self):\n", + " predictions = []\n", + " current_data = self.latest_data.copy()\n", + "\n", + " # Iteratively forecast the next assessments\n", + " for i in range(self.num_assessments):\n", + " print(f\"\\nForecasting assessment {i + 1}/{self.num_assessments}\")\n", + "\n", + " # Predict for weekly, biweekly, and quarterly for the same forecast step\n", + " weekly_prediction = self.predict_next_assessment(current_data, 'weekly')\n", + " biweekly_prediction = self.predict_next_assessment(current_data, 'biweekly')\n", + " quarterly_prediction = self.predict_next_assessment(current_data, 'quarterly')\n", + "\n", + " # Append predictions for all types in one forecast step\n", + " predictions.append({\n", + " 'forecast_step': i + 1,\n", + " 'weekly': weekly_prediction,\n", + " 'biweekly': biweekly_prediction,\n", + " 'quarterly': quarterly_prediction\n", + " })\n", + "\n", + " # Update the current data with the weekly prediction (or any of the predictions) for the next step\n", + " current_data['open_items'] = weekly_prediction['open_items']\n", + " current_data['red_flags'] = weekly_prediction['red_flags']\n", + "\n", + " return predictions\n", + "\n", + " def run(self):\n", + " self.load_model()\n", + " self.load_latest_data()\n", + " predictions = self.predict_next_assessments()\n", + " return predictions\n", + "\n", + "\n", + "# Example usage\n", + "inference = AssessmentInference(company_id='testid', num_assessments=5)\n", + "predictions = inference.run()\n", + "print(predictions)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "erp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/run_assessment_prediction_trainer.py b/scripts/run_assessment_prediction_trainer.py index 2618b86..44d7b89 100644 --- a/scripts/run_assessment_prediction_trainer.py +++ b/scripts/run_assessment_prediction_trainer.py @@ -26,7 +26,7 @@ class CompanyModelPipeline: logger.info(f"Starting preprocessing for company {company_id}.") - # Step 1: Preprocess the data + # Step 1 : Preprocess the data preprocessor = DataPreprocessor(input_path=input_path, company_id=company_id) processed_data_path = preprocessor.run() logger.info(f"Data preprocessing completed for company {company_id}. Processed data saved to {processed_data_path}.") diff --git a/src/pipeline/data_preprocessor.py b/src/pipeline/data_preprocessor.py index 8bfa129..adccf94 100644 --- a/src/pipeline/data_preprocessor.py +++ b/src/pipeline/data_preprocessor.py @@ -1,5 +1,13 @@ import pandas as pd import os +import logging +from logging.handlers import RotatingFileHandler + + +handler = RotatingFileHandler('/root/ds_erp_ai/logs/prediction_pipeline.log', maxBytes=100000, backupCount=3) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.addHandler(handler) class DataPreprocessor: def __init__(self, input_path, company_id): diff --git a/src/pipeline/inference.py b/src/pipeline/inference.py index e69de29..141feae 100644 --- a/src/pipeline/inference.py +++ b/src/pipeline/inference.py @@ -0,0 +1,85 @@ +import pandas as pd +import joblib +import os + +class AssessmentInference: + def __init__(self, company_id, num_assessments, model_dir='models'): + self.company_id = company_id + self.num_assessments = num_assessments + self.model_dir = model_dir + self.model = None + self.latest_data = None + + def load_model(self): + # Load the trained model + model_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_model.pkl') + self.model = joblib.load(model_path) + print(f"Model loaded from {model_path}") + + def load_latest_data(self): + # Load the latest assessment data + latest_data_path = os.path.join(self.model_dir, 'assessment_prediction', self.company_id, f'{self.company_id}_latest_data.csv') + self.latest_data = pd.read_csv(latest_data_path) + print(f"Latest data loaded from {latest_data_path}") + + def predict_next_assessment(self, current_data, assessment_type): + # Update assessment type (weekly, biweekly, quarterly) in the data for prediction + current_data['assessment_type_weekly'] = 1 if assessment_type == 'weekly' else 0 + current_data['assessment_type_biweekly'] = 1 if assessment_type == 'biweekly' else 0 + current_data['assessment_type_quarterly'] = 1 if assessment_type == 'quarterly' else 0 + + # Exclude target variables (open_items, red_flags) from the feature set + features = current_data.drop(columns=['open_items', 'red_flags']) + + # Predict the next open items and red flags + prediction = self.model.predict(features) + open_items_pred, red_flags_pred = prediction[0] + + # Ensure the predictions are integers by rounding + open_items_pred = int(round(open_items_pred)) + red_flags_pred = int(round(red_flags_pred)) + + return { + 'assessment_type': assessment_type, + 'open_items': open_items_pred, + 'red_flags': red_flags_pred + } + + def predict_next_assessments(self): + predictions = [] + current_data = self.latest_data.copy() + + # Iteratively forecast the next assessments + for i in range(self.num_assessments): + print(f"\nForecasting assessment {i + 1}/{self.num_assessments}") + + # Predict for weekly, biweekly, and quarterly for the same forecast step + weekly_prediction = self.predict_next_assessment(current_data, 'weekly') + biweekly_prediction = self.predict_next_assessment(current_data, 'biweekly') + quarterly_prediction = self.predict_next_assessment(current_data, 'quarterly') + + # Append predictions for all types in one forecast step + predictions.append({ + 'forecast_step': i + 1, + 'weekly': weekly_prediction, + 'biweekly': biweekly_prediction, + 'quarterly': quarterly_prediction + }) + + # Update the current data with the weekly prediction (or any of the predictions) for the next step + current_data['open_items'] = weekly_prediction['open_items'] + current_data['red_flags'] = weekly_prediction['red_flags'] + + return predictions + + def run(self): + self.load_model() + self.load_latest_data() + predictions = self.predict_next_assessments() + return predictions + + +# Example usage +#inference = AssessmentInference(company_id='testid', num_assessments=5) +#predictions = inference.run() +#print(predictions) diff --git a/test.py b/test.py index 1e47022..11fbc78 100644 --- a/test.py +++ b/test.py @@ -1,7 +1,20 @@ # Example usage -from scripts.run_assessment_prediction_trainer import CompanyModelPipeline -company_ids = ['company_123', 'company_456', 'company_789'] -input_base_path = '/root/ds_erp_ai/data/raw/dummy_assessment_data.csv' # The base path where the raw data for each company is stored +'''from scripts.run_assessment_prediction_trainer import CompanyModelPipeline +company_ids = ['testid'] +input_base_path = '/root/ds_erp_ai/data/raw/erp_assessment_prediction' # The base path where the raw data for each company is stored pipeline = CompanyModelPipeline(company_ids=company_ids, input_base_path=input_base_path) -pipeline.run_pipeline() +pipeline.run_pipeline()''' + +from src.pipeline.inference import AssessmentInference + + + +inference = AssessmentInference( + company_id="testid",num_assessments=2 +) + +result = inference.run() + + +print(result)