From fd46732b6ef7b2bc735768718d461d1c162338ac Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Mon, 8 Apr 2024 20:50:08 +0200 Subject: [PATCH] histo converted to polars --- rowers/dataroutines.py | 96 +++++++++++++++++++++++++- rowers/interactiveplots.py | 40 ++++++----- rowers/tests/testdata/testdata.tcx.gz | Bin 4000 -> 4001 bytes 3 files changed, 116 insertions(+), 20 deletions(-) diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index b3e43767..2e6283da 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -1414,6 +1414,37 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True, return data, row +def getrowdata_pl(id=0, doclean=False, convertnewtons=True, + checkefficiency=True, for_chart=False): + data = read_df_sql(id,polars=True) + print(data) + try: + data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff() + except KeyError: # pragma: no cover + data = pl.DataFrame() + + if data.is_empty(): + rowdata, row = getrowdata(id=id) + if not rowdata.empty: # pragma: no cover + data = dataprep(rowdata.df, id=id, bands=True, + barchart=True, otwpower=True, polars=True) + else: + data = pl.DataFrame() # returning empty dataframe + else: + row = Workout.objects.get(id=id) + + if checkefficiency is True and not data.is_empty(): + try: + if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover + data = add_efficiency_pl(id=id, polars=True) + except KeyError: # pragma: no cover + data = add_efficiency_pl(id=id) + + if doclean: # pragma: no cover + data = clean_df_stats(data, ignorehr=True, for_chart=for_chart) + + return data, row + # Fetch a subset of the data from the DB def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True, @@ -1707,7 +1738,28 @@ def read_cols_df_sql(ids, columns, convertnewtons=True): # Read stroke data from the DB for a Workout ID. Returns a pandas dataframe -def read_df_sql(id): +def read_df_sql(id, polars=False): + if polars: + try: + f = 'media/strokedata_{id}.parquet.gz'.format(id=id) + df = pd.read_parquet(f) + except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover + rowdata, row = getrowdata(id=id) + try: + shutil.rmtree(f) + except: + pass + if rowdata and len(rowdata.df): + _ = dataprep(rowdata.df, id=id, + bands=True, otwpower=True, barchart=True, + polars=True) + try: + df = pl.read_parquet(f, columns=columns) + except (OSError, ArrowInvalid, IndexError): + pass + df = df.fill_nan(None).drop_nulls() + + return df try: f = 'media/strokedata_{id}.parquet.gz'.format(id=id) df = pd.read_parquet(f) @@ -1802,6 +1854,13 @@ def fix_newtons(id=0, limit=3000): # pragma: no cover pass +def remove_invalid_columns_pl(df): # pragma: no cover + for c in df.get_columns(): + if c not in allowedcolumns: + df = df.drop(c) + + return df + def remove_invalid_columns(df): # pragma: no cover for c in df.columns: if c not in allowedcolumns: @@ -1809,6 +1868,36 @@ def remove_invalid_columns(df): # pragma: no cover return df +def add_efficiency_pl(id=0): # pragma: no cover + rowdata, row = getrowdata_pl(id=id, + doclean=False, + convertnewtons=False, + checkefficiency=False) + power = rowdata['power'] + pace = rowdata['pace'] / 1.0e3 + velo = 500. / pace + ergpw = 2.8 * velo**3 + efficiency = 100. * ergpw / power + + efficiency = efficiency.replace([-np.inf, np.inf], np.nan) + efficiency.fillna(method='ffill') + rowdata = rowdata.with_columns(pl.col(efficiency).alias("efficiency")) # ['efficiency'] = efficiency + + rowdata = remove_invalid_columns_pl(rowdata) + rowdata = rowdata.replace([-np.inf, np.inf], np.nan) + rowdata = rowdata.fillna(method='ffill') + + delete_strokedata(id) + + + if id != 0: + rowdata = rowdata.with_column(pl.lit(id).alias("workoutid")) + filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) + rowdata.write_parquet(filename, compression='gzip') + + + return rowdata + def add_efficiency(id=0): # pragma: no cover rowdata, row = getrowdata_db(id=id, @@ -2144,7 +2233,10 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, os.remove(filename) df.to_parquet(filename, engine='fastparquet', compression='GZIP') - + if polars: + pldf = pl.from_pandas(data) + return pldf + return data diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index bb5ce5b7..22b4e606 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -469,18 +469,20 @@ def interactive_forcecurve(theworkouts): columns = ['catch', 'slip', 'wash', 'finish', 'averageforce', 'peakforceangle', 'peakforce', 'spm', 'distance', - 'workoutstate', 'driveenergy', 'cumdist'] + 'workoutstate', 'driveenergy', 'cumdist', 'workoutid'] + columns = columns + [name for name, d in metrics.rowingmetrics] - rowdata = dataprep.getsmallrowdata_db(columns, ids=ids, + + rowdata = dataprep.getsmallrowdata_pl(columns, ids=ids, workstrokesonly=False) - rowdata.dropna(axis=1, how='all', inplace=True) - rowdata.dropna(axis=0, how='any', inplace=True) + rowdata = rowdata.fill_nan(None).drop_nulls() - if rowdata.empty: + + if rowdata.is_empty(): return "", "No Valid Data Available" - data_dict = rowdata.to_dict("records") + data_dict = rowdata.to_dicts() thresholdforce = 100. if 'x' in boattype else 200. @@ -490,7 +492,7 @@ def interactive_forcecurve(theworkouts): 'thresholdforce': thresholdforce, } - script, div = get_chart("/forcecurve", chart_data) + script, div = get_chart("/forcecurve", chart_data, debug=False) return script, div @@ -822,25 +824,27 @@ def interactive_histoall(theworkouts, histoparam, includereststrokes, ids = [int(w.id) for w in theworkouts] + columns = [name for name, d in metrics.rowingmetrics]+['spm', 'driveenergy', 'distance', 'workoutstate', 'workoutid'] + workstrokesonly = not includereststrokes - rowdata = dataprep.getsmallrowdata_db( - [histoparam], ids=ids, doclean=True, workstrokesonly=workstrokesonly) + rowdata = dataprep.getsmallrowdata_pl( + columns, ids=ids, doclean=True, workstrokesonly=workstrokesonly) - rowdata.dropna(axis=0, how='any', inplace=True) + rowdata = rowdata.fill_nan(None).drop_nulls() - rowdata = dataprep.filter_df(rowdata, 'spm', spmmin, largerthan=True) - rowdata = dataprep.filter_df(rowdata, 'spm', spmmax, largerthan=False) + #rowdata = dataprep.filter_df(rowdata, 'spm', spmmin, largerthan=True) + #rowdata = dataprep.filter_df(rowdata, 'spm', spmmax, largerthan=False) - rowdata = dataprep.filter_df( - rowdata, 'driveenergy', workmin, largerthan=True) - rowdata = dataprep.filter_df( - rowdata, 'driveenergy', workmax, largerthan=False) + #rowdata = dataprep.filter_df( + # rowdata, 'driveenergy', workmin, largerthan=True) + #rowdata = dataprep.filter_df( + # rowdata, 'driveenergy', workmax, largerthan=False) - if rowdata.empty: + if rowdata.is_empty(): return "", "No Valid Data Available" try: - histopwr = rowdata[histoparam].values + histopwr = rowdata[histoparam].to_numpy() except KeyError: return "", "No data" if len(histopwr) == 0: # pragma: no cover diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index b20a87644e3a3da95d688d2ee73932e04e6f81c4..ec58e900f4ac114fc7a8c354fae66e67af9f19ea 100644 GIT binary patch delta 3644 zcmV-C4#V-FAE6%xABzYGIy#XCCV#SmY=bdiXM)(s(%<~`{lY{ zZhq+c?)PrHJV-mQw~rpa==-a~#p3nr*XFF-T&&jS)^r0aLT_D(l&pg-yrnK#?hzdrEqUH^S|ez{pKw~JT!*Zcp`^(XuN zm)DoV&%4whpt~Nr|BVliAAcMi9KC%2>1X7R?>GE;@a*bx(?5Fn&;G@7^Kk!V`ug$d zF#)=-K)xb)j(lkGkerE*IKtlm4gro9X{UGlKk2Rxp7q_Pf4;g{K6?09$6jA39s2yT z@6PjK&z2{b>(lMAus>RSvi1D|Kdkm|(9>o5vDphLe);eI#*av+`L#QL zxjaUjF0%jg{S_9sT;j&pPrEm(i?b#|6+ZP~xLJUxEetzUNMiQ)%aZ|>cl{P%;q-)*_NT&??? zoAVdx9r%oQaktxtM}G85h_S=3$?kxA61Yi${3jIlByLCW!2^a{p6_ml>(Oib$z`Aa z{M%>G1hS=qf{hyPK}y>1bCUN^5qD=C5NPzeC`I-8`IA@!C4cjgd6tUETN9H~ zuabEZ6AF@-8Ig|_kx%BFAO}@=#uc^W-F(PfR}Xmv<1CU&=9BSX(sSqMGtUcDM4pXh z6xNV0?7NFIB2N{OcLtr#iG$9UoaZgH(S8*2!9)_P3D2PqCH;5zBHu>)LGpyg5qQ;T zAIg%SPlOJoBCC$95r05bX?QV~wCCqU-iC_ECq11BRmoQnOGfD|&1kg8ipWP35WH2T z-8J$vAkSo6#Hb>llXDA_pON3brqQ0oumi7|=gF2OZ)ZflqKFL#0{~t$cM0mZFN%3PaQFr!VtjO-Q=E zO6G0Ztn1inX-6S{?~Sl7Vk1(PeDmRrK;9KNw$VLK@ zCQ9pYI^?-|at6t}oOP}0gy;y1>h^OYU(;wG4Il&$?@rJu@fO*n-4G4LtZe~(yG?9@KUsX zeoiu9lZGd3ybx-9c;2~^e*0OIa0ebkjBek&w*%zm5YTv~56{Psgg z%m*mC7WJ%T-c>~28w3K4RTnT!M&=v&Ve-+qG}voK`#_msa zx;c@r$dhA(p-7;~w~yWz9qnCPi+T+5``laIJRd?)t8Px@EAr$>ve1}7MLve2IXSM) z^P`Y=CRk!klokQXhH2-MqnD22^nD-sjmz@yL!+kJIw|Ht3n@rQ8jN)13p%Sp3_`UpqlFu!er3bHsIGJ z_0BLLspfjbvgI~&LeCYUk6FWyI;k(aYGFe^4*J{))v+ow3)76%H2fK$mptIBLbixP zMWgj|leGybe?j!t01)e>KG?~i$7b9OqW1~Cu8Bs@<|>Mw8~RbuM-#2mr9WFVXteSlNZLAOy{I#*AJuje};kt-GJ|*Xkk4zHE#^`m;_i^ zaEBCdG97WTrDYDHXM;}HH47;gUAS~s13p!R-Wjl@%gANm$*ghH=K5ju!9-D3-!R&u zE}b^lkAj}iL?TcnXfaJ^O|$01A4YFYU}g0UvztuQw3ekajNTh>g;g3p+p_E2P^-Bc z1AWf0e-zZ!*Rzyur$SB58wEWZ0gO7ScVRLEKDGp{LG;d8)U9QR>|@c!Y1HicQP4+2 z5v@+c4`stQ+`M^XptqF!XX(6o9Ai`Sra$0YF(AsqD~m9hcztcj*cAnBZPM6~VpO3g zYIFVc(F>TUT(}r5Q_;v>^Wl$!o=tQ}I`oz&GPAr{bL0-A-?dEiDmBlFZ7!YZJ92j| z6VdDDdMA@f&}wO!1L(u9Wg_U7yg}=w8Fy2Yj0+)ukAWW1FpE=3y+GOJ>UlZXlmXd`edhBhq^2shpr9yQ_syy=-CKm zCyfR@O{WB-wNV;I?~H|sji%;}f}WCbN6OX;-nr(;ow`dW=9YAH1eDKY#p8@)4Bw2SlsgHg#c0DyUZwNh;5o>kfn*gEN z^)o;(x$p{f=&>mJyi`yd@JB!&3^3~Y;fI{OKN0lQym>?D^Geo$L5H5>WYD+u>xa>^ z2|~KaU3yVUYWTSmMMFOVdS^V_sB-Cl1cxPEI4Z?U()M2HTg}}tddbx{L6wmkV$n+5dky%lm(DQykk_=;MeZULUF>XD8h$NV zIs&_utO32sksDDarM@*v9S42Pg;zSe zJ_cyb(1%Y>mSdT7t3{j@a1lQ)0YQbe*ka%AK~QGx)Vqf{}*Td*GG%}0T1?PdiNXi z=j%_a{r5-TZxYAL&D(u{>eAb@xqQ9t)~DV1`R-|@kE`|BY1en@yxsooN>tt-cl7Pc z{>A0y#%uEJ)z$gxr0dg9zkV^U&;7~C(s$pzT%Dg@|J+~G`B@o?UIZh6}MCtWf- z3vPb?bhp(yZT9khzukuGNBhzFkDjNSKK^BSzU;Qk#36rr`{-`m*U^)1efIc=;~#&@ zY<7R{xBr~p;-6k#{JuYcsBfB^tE2#Wy# delta 3643 zcmV-B4#e@HAD|xwABzYGyBm=PCVz^8Y=bdiXM)(s#Ik;okw54IX^pi)n9Gb7n`phqJf78Umrg>dUn;VH|x#mcl~DQ zufFew?ze8+AEcc(+piwJ7>3Kk)#~-@*XFdlI$v+h$;J69eYrh((VutQ)qm}WuU^r^ z{V5K+H-F(Q-<%BVSL@-AAKnc8Cg0$V6TI23KRCl{HWyc?D+0jPuTLI7-JRl#^=3PC zo0C2rwOt?X&-?h|q#Jg>_D(l&pg-yrnK#?#e}3TKyWzX;?BZ(OZ&$DI&-eem8&3B7 z_t%%gzjmonKzBWK{~I43KYutlIC}d4($B~r-*5Qi;OXVX)$rBBe-1DDtB3n9)7Oul z9}}Sa0^|#VXUK;Z56PM6h$H+B;1J+wm3Df!|KslR;OWp^4bRr+{Z|kF?AYrIr9+=x z4Bc5i>}h{;v3b5d7WPN0Pqw~4;QRId4SLe2AG=NWneXMJjqmqoulkUy>{N?%o=?28zas#(q;>OpHHoI4K_~Ur@R`iMN^_H6t z@_C;g@G)V2jQhWDP=CBypPr^$>xUN?&ySyUo0r{LqWHnqn|pUB|NY?ZcUvwm)|=tx z=KMu^2mZplxZCZ+BR_g2#MtrIWOu+l3EZSW{u2s&61OAx-~q!e&v&=O_2{+zi*1hS=qf{hyPK}y>1OOp3c5qD=C5NPzeC`I-8<&#(gC4bA2d6tUETN9H~ zuabEZ6AF@-1(A;xkx%BFAO}@=#uc^W-EzoVR}Xmv<1CU&=9BSX(sSpRGtUcDM4pXh z6xNV0?7NE#B2N{OcLtr#iG$9UoaZgH(S8#0!9)_P3D2PqCH;5zBHu>)QSyYw5qQ;T zAIg$nPJ|AnBCC$95r05bX?QV~wC9&Z-iC_ECq11BRmoQnOGfD|EoijIipWP35WH2T z-8J$HAkSo6#Hb>llXDA_Uy$FvrqQ0oumi7|=gF2OZx=+qqKFL#0{~t$cM0mZFN%3PaQF=P&bOO-Q=E zO6G0Ztn1inX(u6n?~Sl7Vk1(PeDmQ=K;9h?<_U(;wG4IlAB72^!A&0D*=Y?jId&{kUew4g7XpvN-y`!S`{DR0ks)&5&$?@rJu`?lGn-4G4LtZe~(yG?9@KUsX zen~Q4lZGd3ybx-9c;2~^e)~m}a0ebkjc(t(wsgg z%m*mC7WJZJ-c>~28w3K4RTnVKM&=v&aq`i)G}voK`#_msa zx+RgX$dhA(p-7;~w~yWz9qnCPi+T$3``laIJRd?)t8Pi;EAr$>ve1}7MLve2IXSM) z^OKNwCRk!klokQXhH00RnD>t2^nDKG@ly$7bA(qW1~Cu8Bs@<|>Mw8~RDmM-#2mr9z3e};kt-GJ|*Xkk4zHE#;^m;_i^ zaEBCdHXU)XrDcwyXM;}HH47;gUAS~n13p!R-Wjl@%gANm*{pHX=K68;!9-D3-!R&u zE}b^lPlBG%L?TcnXfe%aO|$01A4hLZU}g0UvztxRw3ekaj@}z@g;g3p+p_E2P^-C{ z0)5V~e-zZ!*Rzyur$SB5n*==@0gO7ScVRXIKDGp{QS{DO)U9QR>|@c!Y1HicNzg|_ z5v@+c4`stQ+`M^HptqF!XX(6o9Ai`S=0D(DF(AsqD~m9jcztcj*cAnBZPM6~VpO3g zYIFVk(F>TUT(}r5Q_;v>^Wjf}o=tQ}I`o!jGPAr{bL5Vr-?dEiDmBlFZ7!YpJ92j| z6VdDDdMC3<&}wO!Bk04fWg_U7yg}=w8FzD&j0+)uPk|oMFpE=3y+GOJ>UlwG9#n+h z?piCT)I2HNMrld)^|m7P-jG!WZH1wzH_x^Ke+u*j28oeX=q=1sji%;Jf}WCbN6OX;-nr(;ox4jX=9YAH1eDKY#UjX`OBw2SlsgHg(c0DyUZwx(?5o>kfn*gEN z^$S2Rx$p{f=&>mJyi`yd@FzeY3^3~Y;fI{OKNIxSym@2j^Geo$L5H5>Y|yv$>&MZv z2|~KaU3yVUYWTSmMMFOUdS^V_sB-Cl1cxPEI*URt6`_xLO4Z?U()M2HTg}}#ddbx{L6wmkV$n+5dky%lm(Do)kk_=;MeZULUF>X88h$NV zIs&_utO32sksDEF6Yx%x(lp8afAw`_BN{V4jp zk~Jf#ym`_3qSZIl`sPi7o_C9wtBkuSv8`@7b8lWo?{eXl61|H>rM@*vod$i(g;zSe zJ_cyb(`N3?OXzniSt}!VF=FeCy#Vyqq?ul|015%L9=Uf#@25ilcI&$hN2|x(<@JXj zFNS`5{Mp&?&@x&Zs^i^yZzghsJuV!=$n_r zi;JrpugN!;muKsfZb(1<`o*|D_a`Sy-+lXXefIqN=l+u3$8_k8hx2xN%aiUu>5|!5 zaP#x0yRA2AvzPb#?KWIL+K