adding dataplep

2024-04-10 21:14:18 +02:00
parent beab7aa548
commit 1a48e73a6f
1 changed files with 184 additions and 1 deletions
--- a/rowers/dataroutines.py
+++ b/rowers/dataroutines.py
@@ -1417,7 +1417,6 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True,
 def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
                  checkefficiency=True, for_chart=False):
    data = read_df_sql(id,polars=True)
-    print(data)
    try:
        data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff()
    except KeyError:  # pragma: no cover
@@ -1973,7 +1972,191 @@ def add_efficiency(id=0):  # pragma: no cover
 # saves it to the stroke_data table in the database
 # Takes a rowingdata object's DataFrame as input

+# polars
+def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs'):
+    # rowdatadf is pd.DataFrame
+    if rowdatadf.empty:
+        return 0

+    df = pl.from_pandas(rowdatadf)
+
+    df = df.with_columns((pl.col("TimeStamp (sec)")-df[0, "TimeStamp (sec)"]).alias("TimeStamp (sec)"))
+    df = df.with_columns((pl.col(" Stroke500mPace (sec/500m)").clip(1,3000)).alias(" Stroke500mPace"))
+    if ' AverageBoatSpeed (m/s)' not in df.columns:
+        df = df.with_columns((500./pl.col(' Stroke500mPace (sec/500m)')).alias(' AverageBoatSpeed (m/s)'))
+    if ' WorkoutState' not in df.columns:
+        df = df.with_columns((pl.lit(0)).alias(" WorkoutState"))
+    df = df.with_columns((100.*pl.col(" DriveTime (ms)")/(pl.col(" DriveTime (ms)")+pl.col(" StrokeRecoveryTime (ms)"))).alias("rhythm"))
+    df = df.with_columns((pl.col(" AverageDriveForce (lbs)")/pl.col(" PeakDriveForce (lbs)")).alias("forceratio"))
+    f = df['TimeStamp (sec)'].diff().mean()
+    if f != 0 and not np.isinf(f):
+        try:
+            windowsize = 2 * (int(10. / (f))) + 1
+        except ValueError:
+            windowsize = 1
+    else:
+        windowsize = 1
+
+    if windowsize <= 3:
+        windowsize = 5
+
+    df.with_columns(
+        (pl.col(" Cadence (stokes/min)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
+         ).alias(" Cadence (stokes/min)"))
+    df.with_columns(
+        (pl.col(" DriveLength (meters)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
+         ).alias(" DriveLength (meters)"))
+    df.with_columns(
+        (pl.col(" HRCur (bpm)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
+         ).alias(" HRCur (bpm)"))
+    df.with_columns((pl.col("forceratio").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()).alias("forceratio"))
+
+    df = df.with_columns((pl.col(" DriveLength (meters)") / pl.col(" DriveTime (ms)") * 1.0e3).alias("drivespeed"))
+
+    if 'driveenergy' not in df.columns:
+        if forceunit == 'lbs':
+            df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)") * lbstoN).alias("driveenergy"))
+            df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)"))
+            df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)"))
+        else:
+            df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)")).alias("driveenergy"))
+
+    if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0:
+        df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy"))
+
+    df = df.with_columns((60. * pl.col(" AverageBoatSpeed (m/s)")/pl.col(" Cadence (stokes/min)")).alias("distanceperstroke"))
+
+    t2 = df["TimeStamp (sec)"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
+    p2 = df[" Stroke500mPace"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
+    
+    data = pl.DataFrame(
+        dict(
+            time=df["TimeStamp (sec)"] * 1e3,
+            hr=df[" HRCur (bpm)"],
+            pace=df[" Stroke500mPace"] * 1e3,
+            spm=df[" Cadence (stokes/min)"],
+            velo=df[" AverageBoatSpeed (m/s)"],
+            cumdist=df["cum_dist"],
+            ftime=niceformat(t2),
+            fpace=nicepaceformat(p2),
+            driveenergy=df["driveenergy"],
+            power=df[' Power (watts)'],
+            workoutstate=df[" WorkoutState"],
+            averageforce=df[" AverageDriveForce (lbs)"],
+            drivelength=df[" DriveLength (meters)"],
+            peakforce=df[" PeakDriveForce (lbs)"],
+            forceratio=df["forceratio"],
+            distance=df["cum_dist"],
+            drivespeed=df["drivespeed"],
+            rhythm=df["rhythm"],
+            distanceperstroke=df["distanceperstroke"],
+        )
+    )
+
+    data = data.with_columns(
+        hr_ut2 = df['hr_ut2'],
+        hr_ut1 = df['hr_ut1'],
+        hr_at = df['hr_at'],
+        hr_tr = df['hr_tr'],
+        hr_an = df['hr_an'],
+        hr_max = df['hr_max'],
+        hr_bottom = 0.0*df[' HRCur (bpm)'],
+    )
+
+    if 'wash' not in df.columns:
+        data = data.with_columns(
+            wash = pl.lit("0"),
+            catch = pl.lit("0"),
+            peakforceangle = pl.lit("0"),
+            finish = pl.lit("0"),
+            slip = pl.lit("0"),
+            totalangle = pl.lit("0"),
+            effectiveangle = pl.lit("0"),
+            efficiency = pl.lit("0"),
+        )
+    else:
+        wash = df['wash']
+        catch = df['catch']
+        finish = df['finish']
+        peakforceangle = df['peakforceangle']
+        arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
+        if arclength.mean() > 0:
+            drivelength = arclength
+
+        slip = df['slip']
+        totalangle = finish - catch
+        effectiveangle = finish - wash - catch - slip
+
+        if windowsize > 3 and windowsize < len(slip):
+            try:
+                wash = savgol_filter(wash, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                slip = savgol_filter(slip, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                catch = savgol_filter(catch, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                finish = savgol_filter(finish, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                driveenergy = savgol_filter(driveenergy, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                drivelength = savgol_filter(drivelength, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                totalangle = savgol_filter(totalangle, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+            try:
+                effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
+            except TypeError:  # pragma: no cover
+                pass
+
+        data = data.with_columns(
+            wash = wash,
+            catch = catch,
+            slip = slip,
+            finish = finish,
+            peakforceangle = peakforceangle,
+            driveenergy = driveenergy,
+            drivelength = drivelength,
+            totalangle = totalangle,
+            effectiveangle = effectiveangle,
+        )
+
+    ergpw = 2.8*data['velo']**3
+    efficiency = 100. * ergpw / data['power']
+
+    data = data.with_columns(efficiency=efficiency)
+
+    if id != 0:
+        data = data.with_columns(
+            workoutid = pl.lit(id)
+        )
+        filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
+        try:
+            data.write_parquet(filename, compression='gzip')
+        except IsADirectoryError:
+            shutil.rmtree(filename)
+            data.write_parquet(filename, compression='gzip')
+
+
+    return data
+
+# pandas/a little polars
 def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
             empower=True, inboard=0.88, forceunit='lbs', debug=False, polars=True):