From fc96d44e720296a90085d7ba4058031b2e7814fe Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Mon, 8 Apr 2024 19:02:28 +0200 Subject: [PATCH] more pl --- rowers/dataroutines.py | 137 ++++++-------------------- rowers/interactiveplots.py | 8 +- rowers/tests/testdata/testdata.tcx.gz | Bin 4000 -> 4000 bytes 3 files changed, 33 insertions(+), 112 deletions(-) diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index 534e137a..855d0831 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -773,55 +773,21 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, # clean data for useful ranges per column if not ignorehr: - try: - datadf = datadf.filter(pl.col("hr")>=30) - except (KeyError, TypeError): # pragma: no cover - pass + datadf = datadf.filter(pl.col("hr")>=30) - try: - datadf = datadf.filter(pl.col("spm") >=0) - except (KeyError, TypeError): - pass + + datadf = datadf.filter( + pl.col("spm") >=0, + pl.col("efficiency")<=200, + pl.col("spm")>=10, + pl.col("pace")<=300*1000., + pl.col("efficiency")>=0, + pl.col("pace")>=60*1000, + pl.col("power")<=5000, + pl.col("spm")<=120, + pl.col("wash")>=1 + ) - try: - datadf = datadf.filter(pl.col("efficiency")<=200) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("spm")>=10) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("pace")<=300*1000.) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("efficiency")>=0) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("pace")>=60*1000) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("power")<=5000) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("spm")<=120) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("wash")>=1) - except (KeyError, TypeError): - pass # try to guess ignoreadvanced if not ignoreadvanced: @@ -834,70 +800,25 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, pass if not ignoreadvanced: - try: - datadf = datadf.filter(pl.col("rhythm")>=0) - except (KeyError, TypeError): - pass + datadf = datadf.filter(pl.col("rhythm")>=0, + pl.col("rhythm")<=70, + pl.col("power")>=20, + pl.col("drivelength")>=0.5, + pl.col("forceratio")>=0.2, + pl.col("forceratio")<=1.0, + pl.col("drivespeed")>=0.5, + pl.col("drivespeed")<=4, + pl.col("driveenergy")<=2000, + pl.col("driveenergy")>=100, + pl.col("catch")<=-30) - try: - datadf = datadf.filter(pl.col("rhythm")<=70) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("power")>=20) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("drivelength")>=0.5) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("forceratio")>=0.2) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("forceratio")<=1.0) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("drivespeed")>=0.5) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("drivespeed")<=4) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("driveenergy")<=2000) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("driveenergy")>=100) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("catch")<=-30) - except (KeyError, TypeError): - pass # workoutstateswork = [1, 4, 5, 8, 9, 6, 7] workoutstatesrest = [3] # workoutstatetransition = [0, 2, 10, 11, 12, 13] if workstrokesonly == 'True' or workstrokesonly is True: - try: - datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest)) - except: - pass + datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest)) after = {} @@ -1571,7 +1492,7 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp return data - + df = df.fill_nan(None).drop_nulls() return df @@ -2207,7 +2128,11 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, df = dd.from_pandas(data, npartitions=1) if polars: pldf = pl.from_pandas(data) - pldf.write_parquet(filename, compression='gzip') + try: + pldf.write_parquet(filename, compression='gzip') + except IsADirectoryError: + shutil.rmtree(filename) + pldf.write_parquet(filename, compression='gzip') else: try: df.to_parquet(filename, engine='fastparquet', compression='gzip') diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index f916d7c9..bb5ce5b7 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -478,7 +478,7 @@ def interactive_forcecurve(theworkouts): rowdata.dropna(axis=0, how='any', inplace=True) if rowdata.empty: - return "", "No Valid Data Available", "", "" + return "", "No Valid Data Available" data_dict = rowdata.to_dict("records") @@ -2312,7 +2312,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, columns_basic = columns_basic + ['spm', 'driveenergy', 'distance', 'workoutstate'] datadf = pd.DataFrame() - start = timezone.now() if promember: datadf = dataprep.getsmallrowdata_pl(columns, ids=ids, doclean=True, workstrokesonly=workstrokesonly, for_chart=True) @@ -2320,7 +2319,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, datadf = dataprep.getsmallrowdata_pl(columns_basic, ids=ids, doclean=True, workstrokesonly=workstrokesonly, for_chart=True) - print(timezone.now()-start) try: _ = datadf[yparam2] except (KeyError, ColumnNotFoundError): # pragma: no cover @@ -2331,9 +2329,7 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, except (KeyError, ColumnNotFoundError): yparam1 = 'None' - datadf.drop_nulls() - #datadf.dropna(axis=1, how='all', inplace=True) - #datadf.dropna(axis=0, how='any', inplace=True) + datadf = datadf.fill_nan(None).drop_nulls() # test if we have drive energy try: # pragma: no cover diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index e31e8ffccd1cf1fc926f11124b4ccdd456793ea8..b20a87644e3a3da95d688d2ee73932e04e6f81c4 100644 GIT binary patch delta 3644 zcmV-C4#V-FAD|xxABzYGyBidd2PJ=sf^35^U}u8Z$mBMt$Q8L~q?VvAFZuUFcH6RS ztB^ctieMG!gQ2djt?R2tzMV&3zd1iUc-3ER*B6_w9-@JV2VWmQIC^%~tvBn<>398R z=&!!-hVHj++aIKzH`}isz8Hqf!`15b>(}PAyE|A>sRaHk00I){U+bwjT5}tu0J@#Yc>~Grz--$)vr$;Ki!?;i}hwZ zbeoet9kpE_?$7)9;-njPzxGZyaG*cx6`42N=YM|S-@DLy0MgIMAK!2IKJ00Kask z=H~oGdI$c(ySUr!!y`X>CB)eA*JO9VJqg^TK>ia7dlI)J_}~Gt0u2FqleGdW5;5GJ zt%!R}@@~azxW|(6&afDh@d6=6&4hbh@*cP%?%o6sPNUB|SJI4QS`K$#4|f6M16qx| zdoeN-?v;)671?pZCW4I`?mME5)%rNmj#iJ6_HQooFE5Pc*YgABCzT`HFmamJK6XO_Y{=(G2@WejM zC`xOC@#$4m+4B*MZyMg*=XpVsL|$djGeFUZ&4Nbznq*$ANzWEYLmqK9x_yh%j+1xB z3kFS;mI;g6^GhOMkv*R+G`6aFo*7M?33+Vu{O#m9=iV|PX*@aUMJbteZC&%*PeNV{ zFa(v%3l)vnH1Yw{lluf7enT~Td^wL-j=PXw4urTB;>70Lw(ff_Lf{xnGemXn}obK6wztC zx0YSeeEZNw`zgpX>{?=JbbD)EQQbbY`1~Z~$vB3nYJw+a$uFs*f6`S%UW^rH)jaQt z*3vGDyi`O!n8aO`foaQDR0=eipMtzalQd38s+Olkd)>-{s;I?QM4pXzx+txUv1Crp zlE~LnW5YT3)_FwDXfFt}k@;rNkCTsv0>q#qADeDJ|A>uH?v|p8(h5VYH24SfA5X3E@C56mVEQ!O+elz*ap2-WnGJFV)VBi|Y1EB45*J9}OS|O?=)lWfiTW_ANd?1^JwN8!%}4 z-a1P~b8nYKz9M^`tRaW2n&*XPlY7gpetwj^H)xSmqrIb|_WXj#JF19$?wmlZ>2L2m z7oF#qM7|>3K51(lQMGEB3*4f#3m`8BKvZ4MUE~&}o&S^j1|U_&$#d?0s`B3YWC_NW zfjN8fV!U;ziPia}8x)My2`UUz-mv)I(k{*V3xiwD3~2 zettY`hR^e0biul79O|lW+$fLyd0VytgCdZD7P0SFX-33)b1x*nJz6kWA!N#tvq=SfTujG7G05V2@x1}$iw_Z5*3 zMv}~HDlH;Vlli&F>g15oSSD3wMnrDp=N_?1&cQLAoA5)1`d|bJNrgVChCCCazDfNk`rOWxkag&t zEo$3Io9ic&I|&(K8>z1hJ-d3)Cp*ms0;@tFeNi=UNdrDsgr3t}PoSFX5yEWH*EZnS zB=ycPAgShh#Iof!OG3{Tp^sU^k2qGy9n*EI_%7G1b>Q3F0zgx(pjq|3-<;MuHk)8_hd^ua_?R^Kq% zqAr~_*H40;&_p6oC1^3tXHB!_!yiX)O<-m94YQj~)3lbQGmhRHZ-rGFKHIYE+)%5z zn*x2#uzwWP)z`C>ZKpy_&6@;08v%?usdr&E13tC{tx@#OSk$d$i0ot0#%a{-`bp47 zLlLb`!w+S{H{85=Q=qq$`)BFAc^qR?^X5O`TQMNY!Yhj~n|OU~$=DSIZEe!nkYZG! zCu(#3{Lu@Ts9d-hEmP6RUGw2jgPu)vNILYEXEL+AS##u$qu;em^eQ#aift~P`8#rV zEfdk}=6WZyNziI(nIq`Ku4N+Vmb^jhr5Sf~lZ*=?e@}rP(J+fsNxeYX3uW^Kdf%^OF*E3zh4 zygpe3t;l)-QtxtCji^KKsHkz*x@t^=K4hu)x&hzwY^1&o_@n3m}Ht8h#7f zPJ`YWe}{;wt{M_;^XARpk;~qACaVhCI&iZ|)o5zoDEeflS%CqqR{QNAHY*&!G+Y^LOOhn9vic3a^}vv#GRgSvsTWg$!$b_|8F_ z>lc7t43SpN^UyMK=U#Xfa?3XhGX4&?DuBDoUy6vLzVM zg0_>Om)uZ=bW(4nXygu>nl}mhU|2Bdq+VRnRE?(QO@f}1aYxG53f{Tq$ep`OC+3!k zf1q8{)_Xsjxt?0kb`*UsvSv_)Z+!60u3rH9Xe3#8I;oF-Hg-KVHE#?(lM!om;hO-V z+4T!RFS+mvbm*}t`n*(78}KJU9}F<+`r(J1ygw84)Vz6P=<`a}fI)|z<808k_3Ovc zvk5}F$X$9-N^1DI6GcNm0eWXV+o*Ese*}jmT{??GFBPGWc}-i;NxjFS1HNx17*n7Z zGR|U9W$A=aw9@uo=v&R*IC{y|H$jz=8)DH)+j|Z8t(VR?`jFSO)kW?i6kY6WQ5t?N zSvmr{m8=21%8?sUX5-R<);Dhy{ca^|-8y%X$_8zr&GnO@_qqB;iJtvzR<~?(fBh)> zyplB|sl0j7`l8i0)cWR4f}VGan5&GtD6y?>IdgAbM(=Xrl@h&+MWwzqN}UFM%!OAv zyFLbJ&C_P?%}eNaD_JWecQIn?jJ*K#)})zUwEzkMv>v&4MenCV|90!U4M(fT-R1R% zA1{V}d;Hni@Z}F@{dvC`4nE&)fBO8O8xG)&|K0W6dtm!Je0Dl~d9>Oe@L+$YcfT=z zzW%h{e}DAdRpNMY^>*JMyY%*4UA*3Qo9Erx+3snjkL%6p^KR(UdAt4Fm8iTw?&zDB z!;6cn8?VVXmzQVjlWs^q{rbhYKKCalOW%F_a((vv`seJ8@#36rr`{-`m*U{r{bNcA};~##^ zY<7R{xBryh;vZk0|JJ8F1NZ`9|NqhI#uxi1zk5u_4|b^6oAeyMODOxR@6*%xEnWKf O!T$id0FVI0fB^t`9f^35^U}u8Z$mBMtNFBLnq?XWKUh?mU?6zgu zRv~`U6u~Oc2SZ(5Th~{Qd^?Z6d2?}o@M^i)t}fS)9-@JV2j3h&IC{S6R_oRJ?EB@q zUv7Tr`tJ8`yF5rcueXmLzUcd_!^PtD>(}P2+gz;H=H&8Xk-prXyjWgz+r@wFhc8~y z!~H1^yEmV4mTyn`)vHzi=Z|mtC5!q2{Fz zvhU9GVb7K)m+RB*v9Lc{e6scZ0Y9wvZ_v|a`mx(|pZQ)s+W3C&cldvf5yR2q`qTRz zzD=Umoh|>dOn3iF*Dt@jy4by%{hwFqmt1dty#8+4_1mA9o2RSw%YOOq{>G0;r}?!z zf4MwHn=Z2d^ZgYTw_M`J*H61QtBaQx|EC)ecgqdja)}#XKVI)%)&9@p-CMCtWUsf} ze2}Nh^ni~E^JCoqeS?4EW_5O!Zf)7WxI8_6+O1!9=ZWG6TW{{&o&5KMyWefOx?HXM zo160&=^gltcX7Adhev+&N{F$;ugUI!dlI-wf&3>F_9Sjc@WBIyTb}Q3hwIU6`^jaW z|NPr$&jh!Q;UAwdo%3H^-zTw&M~nSmA768kF8cA$yZsjPi*S>t0u2FaleGdW5;okO zt%!R}@@~azxW|(6&M+I3@d6=6O@w=0@*cP%?%o6sPNUB|SJI4Qnh$qh4|f6M16qx| zdoeN*?v;)671?pZCW4I`?mME5)%rNml=_d6_HQooFE5Pc*YgAR79SQ zWfazsFYLREGa^qFk#`22&WVH0mz?J^zNXQh#jpdfndixtC2wa$zM_Z?2Lk|JG~@+JM(6E) z{Px_ex*_sOeyu}Q8Wu?`>BE~9`HFmamJK6XO_Y{=(G2@Wek6bL&VW^t4=@>-uWg>M zC`xOC@#$4m+4B*MZyMgz=XpVsL|$djGeFUZ&5TC-nq*$ANzWEYLmqK5x_yh%4wHAr z3kFS;mI;g6^K&9!kv*R+G`6aFo*7M?2zhMt{O#m9=iV|PX*@aUMJbteZC&%*k3wDy zFa(v%3l)vnH1YxClluf7e@7wDhAoO}o)279!}Cq%M{?=JbbD)EQQbbY`1~m3$vB3nYJw+a$;6s;I?QM4pXzx+txUv1Crp zoXFQxW5YT3)_FwDXfFtpk@;rN50j6E0>q#qADeDJ{fLcF?v|p8(h5VYH3FyfA5X3E@C56mVEQ!jX>Te*ap2-WnGJF|I ze3SWM@?tEZs=7)-*}A&c19Kem!4RUVbArUOH8#HK_G6GoFcFD0-dhLQa&MV)VBi|Y1sB45*J9}OS|O?=)lWfiTW_ANd?2Kk(O8!%}4 z-a1P~b8qKFz9M^`tRaW2n&*WklY7gpetwX=H)xSmqrIb|_WX>2L2m z7oF$lM7|>3K51(lQMGEB3*4f#GaxSpKvZ4MUE~&}o&J;i1|U_2$#d?0s`B3YWC_NW zfjN2dV!U;ziPia}8x)My3C_r#uP8omgR#h{$;t8QZLt#}Uz-mv)I(k{*V3xiwD3~2 zetu3eUz3I>Y`hR^e0biul79PHlW+$fLyT_Uytf17ZEg%0SFX-3VAk2x*nJz6kWA!PULHv=SfTujG7G05V2@x2F+-m_Z5*3 zMv}~HDlH;VlliH~>g15oSSD3wMnrDpryj9M&cQLAoFcP3b3O_UY^%7$s@ljI07CX+#rt>5(^`d|bJNrgVChCC6YzDfNc`rOWxkag&t zEo$3Io9joDI|&(K8L6)fJ-d3)Cp*ms0;@tFeNi=UP6Iwxgr3t}PoSFX5yE89*EZnS zB=ycPAgShh#Iof!b3)G*p^sU^k2Rs%j&gx(pjq|3-<;K{6U)8_hN^ua_?R^Kq% zqAr~_*N=jp&_p6oC1^2CXHB!_!yiU(O<-m94YQj})3lbQGmPFFZ-rGFKHIYE+)%5z z8v}jLuzwWP)z`C>ZKpy_%^L+h8v%?usdr&A13tC{twHq8Sk$d$i0ot0#%a{-`ccqF zLlLb`!w+S{H{85=W1zQ``)BFAc^qR?^QJ%GTQMNY!Yhj~nRtC|$=DSIZEe!nkYZG! zCu(#3^wA5Ls9d-hEmP6RUGw3OgPu)vNILYECo;3VS##tLqu;em^eQ#aift~P={s_F zEfdk}=6WZSNziI(nFHv2DW^Kdf%^OC)E3zh4 zygpe3t;l)?QtxtCji^KKsHkz*x@wGrK4hu)x&hzwWTd_g_=D&fO%T?_>m}Ht8h#7f zj)UGBe}{;wt{M_;^X5(8k;~qACaVhCI&hOo)o5zoAo^sdS%P;oM@>*&!G+Y(|6?Bn9vic3a^}vlc}_ASvrI0g$!$b_|8F_ z>t}#o43SpN^UyMKr(Sp!a?3XhGXC&?DuBDoUy6vLzVM zg0`cem)uZ=bW(4nXygu>nl}piU|2Bdq+VRnRE?(Qje?$%aYxG53f{Tq$ep@NC+3!k zf1q8{)_Xshxt?0kb`X6ovSv_)Z+!60uAc$=Xe3#8I;oF-GIl*RHE#$#lM!om;hO-V z+4VC(FS+mvbm*}t`n*(78}LU!9}F<+`r(J1ygw22)Vz5^=<`a}fI)|z<7Cjc_3MYx zvk5}F$X$9-N^1DI6GcNm0(xgW+o*Ese*}jmT{^QuFBPGWc}-i;NxjFS1HNx17-OIp zGR|U9W$A=aw9@uo=v&R*FnY<=H$jz=8)DH)+j|Z8t(VR)`jFSO)kW?i6kY6WRvLaS zSvmr{m8=21%8?sUCgak9);Dhu{ca^|-8y%X$_8zr&Gn<8_qqB;iJtvrR<~?(fBhi( zyplB|sl0j7`l8i0)cWR)f}VGan5&GtD6y?>IdN}ZM(=Xrl@h&+MWwzqN*xD%%!OAv zyFLbJ&C@3C%}eNaD_JWecQIn?j6DPN)})zUwEzkMv>v&4MeoN#|90!U4M&S7-PQGn zpDz35_V|nQ{_7vlmlw-*fAHmQf76!-U4H;?{2zRJ5-}|O34C$Ze|@yrAMjv*rgy(F zf4=^-+JAra{U&j|+`QfQr!Kuco6FbRZhhLFpYNVl`nX!3opybf&fD$Zu0-YiaYx_2 z>|b1NZoDSnUR|B9PP#t*^y?Sn`rMzKEPeOg%hmbm_0Rn^y^rb88xQC0dX}f%f6^th zv*70EPj_3b(`GO4_uFl_ezYH*|LA$T>EmCP=gV%pOdRs3w~y||eH}gN)@P4@IR5dc z%x3rJe*4eqE&l1{#qY~>X8>OT?EgPn-1uVuo