From 1a48e73a6f77142747a7823bdee9487a2b2fdcbb Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Wed, 10 Apr 2024 21:14:18 +0200 Subject: [PATCH] adding dataplep --- rowers/dataroutines.py | 185 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 184 insertions(+), 1 deletion(-) diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index 4943f296..085c869a 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -1417,7 +1417,6 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True, def getrowdata_pl(id=0, doclean=False, convertnewtons=True, checkefficiency=True, for_chart=False): data = read_df_sql(id,polars=True) - print(data) try: data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff() except KeyError: # pragma: no cover @@ -1973,7 +1972,191 @@ def add_efficiency(id=0): # pragma: no cover # saves it to the stroke_data table in the database # Takes a rowingdata object's DataFrame as input +# polars +def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs'): + # rowdatadf is pd.DataFrame + if rowdatadf.empty: + return 0 + df = pl.from_pandas(rowdatadf) + + df = df.with_columns((pl.col("TimeStamp (sec)")-df[0, "TimeStamp (sec)"]).alias("TimeStamp (sec)")) + df = df.with_columns((pl.col(" Stroke500mPace (sec/500m)").clip(1,3000)).alias(" Stroke500mPace")) + if ' AverageBoatSpeed (m/s)' not in df.columns: + df = df.with_columns((500./pl.col(' Stroke500mPace (sec/500m)')).alias(' AverageBoatSpeed (m/s)')) + if ' WorkoutState' not in df.columns: + df = df.with_columns((pl.lit(0)).alias(" WorkoutState")) + df = df.with_columns((100.*pl.col(" DriveTime (ms)")/(pl.col(" DriveTime (ms)")+pl.col(" StrokeRecoveryTime (ms)"))).alias("rhythm")) + df = df.with_columns((pl.col(" AverageDriveForce (lbs)")/pl.col(" PeakDriveForce (lbs)")).alias("forceratio")) + f = df['TimeStamp (sec)'].diff().mean() + if f != 0 and not np.isinf(f): + try: + windowsize = 2 * (int(10. / (f))) + 1 + except ValueError: + windowsize = 1 + else: + windowsize = 1 + + if windowsize <= 3: + windowsize = 5 + + df.with_columns( + (pl.col(" Cadence (stokes/min)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode() + ).alias(" Cadence (stokes/min)")) + df.with_columns( + (pl.col(" DriveLength (meters)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode() + ).alias(" DriveLength (meters)")) + df.with_columns( + (pl.col(" HRCur (bpm)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode() + ).alias(" HRCur (bpm)")) + df.with_columns((pl.col("forceratio").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()).alias("forceratio")) + + df = df.with_columns((pl.col(" DriveLength (meters)") / pl.col(" DriveTime (ms)") * 1.0e3).alias("drivespeed")) + + if 'driveenergy' not in df.columns: + if forceunit == 'lbs': + df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)") * lbstoN).alias("driveenergy")) + df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)")) + df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)")) + else: + df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)")).alias("driveenergy")) + + if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0: + df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy")) + + df = df.with_columns((60. * pl.col(" AverageBoatSpeed (m/s)")/pl.col(" Cadence (stokes/min)")).alias("distanceperstroke")) + + t2 = df["TimeStamp (sec)"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime) + p2 = df[" Stroke500mPace"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime) + + data = pl.DataFrame( + dict( + time=df["TimeStamp (sec)"] * 1e3, + hr=df[" HRCur (bpm)"], + pace=df[" Stroke500mPace"] * 1e3, + spm=df[" Cadence (stokes/min)"], + velo=df[" AverageBoatSpeed (m/s)"], + cumdist=df["cum_dist"], + ftime=niceformat(t2), + fpace=nicepaceformat(p2), + driveenergy=df["driveenergy"], + power=df[' Power (watts)'], + workoutstate=df[" WorkoutState"], + averageforce=df[" AverageDriveForce (lbs)"], + drivelength=df[" DriveLength (meters)"], + peakforce=df[" PeakDriveForce (lbs)"], + forceratio=df["forceratio"], + distance=df["cum_dist"], + drivespeed=df["drivespeed"], + rhythm=df["rhythm"], + distanceperstroke=df["distanceperstroke"], + ) + ) + + data = data.with_columns( + hr_ut2 = df['hr_ut2'], + hr_ut1 = df['hr_ut1'], + hr_at = df['hr_at'], + hr_tr = df['hr_tr'], + hr_an = df['hr_an'], + hr_max = df['hr_max'], + hr_bottom = 0.0*df[' HRCur (bpm)'], + ) + + if 'wash' not in df.columns: + data = data.with_columns( + wash = pl.lit("0"), + catch = pl.lit("0"), + peakforceangle = pl.lit("0"), + finish = pl.lit("0"), + slip = pl.lit("0"), + totalangle = pl.lit("0"), + effectiveangle = pl.lit("0"), + efficiency = pl.lit("0"), + ) + else: + wash = df['wash'] + catch = df['catch'] + finish = df['finish'] + peakforceangle = df['peakforceangle'] + arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch)) + if arclength.mean() > 0: + drivelength = arclength + + slip = df['slip'] + totalangle = finish - catch + effectiveangle = finish - wash - catch - slip + + if windowsize > 3 and windowsize < len(slip): + try: + wash = savgol_filter(wash, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + slip = savgol_filter(slip, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + catch = savgol_filter(catch, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + finish = savgol_filter(finish, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + peakforceangle = savgol_filter(peakforceangle, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + driveenergy = savgol_filter(driveenergy, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + drivelength = savgol_filter(drivelength, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + totalangle = savgol_filter(totalangle, windowsize, 3) + except TypeError: # pragma: no cover + pass + try: + effectiveangle = savgol_filter(effectiveangle, windowsize, 3) + except TypeError: # pragma: no cover + pass + + data = data.with_columns( + wash = wash, + catch = catch, + slip = slip, + finish = finish, + peakforceangle = peakforceangle, + driveenergy = driveenergy, + drivelength = drivelength, + totalangle = totalangle, + effectiveangle = effectiveangle, + ) + + ergpw = 2.8*data['velo']**3 + efficiency = 100. * ergpw / data['power'] + + data = data.with_columns(efficiency=efficiency) + + if id != 0: + data = data.with_columns( + workoutid = pl.lit(id) + ) + filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) + try: + data.write_parquet(filename, compression='gzip') + except IsADirectoryError: + shutil.rmtree(filename) + data.write_parquet(filename, compression='gzip') + + + return data + +# pandas/a little polars def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, empower=True, inboard=0.88, forceunit='lbs', debug=False, polars=True):