diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index 38f2ccce..aa562ba2 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -569,6 +569,7 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, except KeyError: pass + if not ignoreadvanced: try: mask = datadf['rhythm'] < 0 @@ -1418,7 +1419,7 @@ def getrowdata_pl(id=0, doclean=False, convertnewtons=True, checkefficiency=True, for_chart=False): data = read_df_sql(id,polars=True) try: - data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff() + data = data.with_columns((pl.col('time').diff()).alias("deltat")) # data['time'].diff() except KeyError: # pragma: no cover data = pl.DataFrame() @@ -1781,7 +1782,7 @@ def read_df_sql(id, polars=False): if polars: try: f = 'media/strokedata_{id}.parquet.gz'.format(id=id) - df = pd.read_parquet(f) + df = pl.read_parquet(f) except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover rowdata, row = getrowdata(id=id) try: @@ -2018,11 +2019,15 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar if 'driveenergy' not in df.columns: if forceunit == 'lbs': df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)") * lbstoN).alias("driveenergy")) - df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)")) - df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)")) else: df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)")).alias("driveenergy")) + + if forceunit == 'lbs': + df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)")) + df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)")) + + if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0: df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy")) @@ -2067,14 +2072,14 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar if 'wash' not in df.columns: data = data.with_columns( - wash = pl.lit("0"), - catch = pl.lit("0"), - peakforceangle = pl.lit("0"), - finish = pl.lit("0"), - slip = pl.lit("0"), - totalangle = pl.lit("0"), - effectiveangle = pl.lit("0"), - efficiency = pl.lit("0"), + wash = pl.lit(0.0), + catch = pl.lit(0.0), + peakforceangle = pl.lit(0.0), + finish = pl.lit(0.0), + slip = pl.lit(0.0), + totalangle = pl.lit(0.0), + effectiveangle = pl.lit(0.0), + efficiency = pl.lit(0.0), ) else: wash = df['wash'] @@ -2145,6 +2150,10 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar data = data.with_columns( workoutid = pl.lit(id) ) + # cast data + for k, v in dtypes.items(): + if v == 'int': + data = data.cast({k: pl.Int64}) filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) try: data.write_parquet(filename, compression='gzip') diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index 62b2f593..4623d996 100644 Binary files a/rowers/tests/testdata/testdata.tcx.gz and b/rowers/tests/testdata/testdata.tcx.gz differ