diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index 534e137a..855d0831 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -773,55 +773,21 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, # clean data for useful ranges per column if not ignorehr: - try: - datadf = datadf.filter(pl.col("hr")>=30) - except (KeyError, TypeError): # pragma: no cover - pass + datadf = datadf.filter(pl.col("hr")>=30) - try: - datadf = datadf.filter(pl.col("spm") >=0) - except (KeyError, TypeError): - pass + + datadf = datadf.filter( + pl.col("spm") >=0, + pl.col("efficiency")<=200, + pl.col("spm")>=10, + pl.col("pace")<=300*1000., + pl.col("efficiency")>=0, + pl.col("pace")>=60*1000, + pl.col("power")<=5000, + pl.col("spm")<=120, + pl.col("wash")>=1 + ) - try: - datadf = datadf.filter(pl.col("efficiency")<=200) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("spm")>=10) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("pace")<=300*1000.) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("efficiency")>=0) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("pace")>=60*1000) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("power")<=5000) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("spm")<=120) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("wash")>=1) - except (KeyError, TypeError): - pass # try to guess ignoreadvanced if not ignoreadvanced: @@ -834,70 +800,25 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, pass if not ignoreadvanced: - try: - datadf = datadf.filter(pl.col("rhythm")>=0) - except (KeyError, TypeError): - pass + datadf = datadf.filter(pl.col("rhythm")>=0, + pl.col("rhythm")<=70, + pl.col("power")>=20, + pl.col("drivelength")>=0.5, + pl.col("forceratio")>=0.2, + pl.col("forceratio")<=1.0, + pl.col("drivespeed")>=0.5, + pl.col("drivespeed")<=4, + pl.col("driveenergy")<=2000, + pl.col("driveenergy")>=100, + pl.col("catch")<=-30) - try: - datadf = datadf.filter(pl.col("rhythm")<=70) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("power")>=20) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("drivelength")>=0.5) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("forceratio")>=0.2) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("forceratio")<=1.0) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("drivespeed")>=0.5) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("drivespeed")<=4) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("driveenergy")<=2000) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("driveenergy")>=100) - except (KeyError, TypeError): - pass - - try: - datadf = datadf.filter(pl.col("catch")<=-30) - except (KeyError, TypeError): - pass # workoutstateswork = [1, 4, 5, 8, 9, 6, 7] workoutstatesrest = [3] # workoutstatetransition = [0, 2, 10, 11, 12, 13] if workstrokesonly == 'True' or workstrokesonly is True: - try: - datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest)) - except: - pass + datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest)) after = {} @@ -1571,7 +1492,7 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp return data - + df = df.fill_nan(None).drop_nulls() return df @@ -2207,7 +2128,11 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, df = dd.from_pandas(data, npartitions=1) if polars: pldf = pl.from_pandas(data) - pldf.write_parquet(filename, compression='gzip') + try: + pldf.write_parquet(filename, compression='gzip') + except IsADirectoryError: + shutil.rmtree(filename) + pldf.write_parquet(filename, compression='gzip') else: try: df.to_parquet(filename, engine='fastparquet', compression='gzip') diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index f916d7c9..bb5ce5b7 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -478,7 +478,7 @@ def interactive_forcecurve(theworkouts): rowdata.dropna(axis=0, how='any', inplace=True) if rowdata.empty: - return "", "No Valid Data Available", "", "" + return "", "No Valid Data Available" data_dict = rowdata.to_dict("records") @@ -2312,7 +2312,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, columns_basic = columns_basic + ['spm', 'driveenergy', 'distance', 'workoutstate'] datadf = pd.DataFrame() - start = timezone.now() if promember: datadf = dataprep.getsmallrowdata_pl(columns, ids=ids, doclean=True, workstrokesonly=workstrokesonly, for_chart=True) @@ -2320,7 +2319,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, datadf = dataprep.getsmallrowdata_pl(columns_basic, ids=ids, doclean=True, workstrokesonly=workstrokesonly, for_chart=True) - print(timezone.now()-start) try: _ = datadf[yparam2] except (KeyError, ColumnNotFoundError): # pragma: no cover @@ -2331,9 +2329,7 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, except (KeyError, ColumnNotFoundError): yparam1 = 'None' - datadf.drop_nulls() - #datadf.dropna(axis=1, how='all', inplace=True) - #datadf.dropna(axis=0, how='any', inplace=True) + datadf = datadf.fill_nan(None).drop_nulls() # test if we have drive energy try: # pragma: no cover diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index e31e8ffc..b20a8764 100644 Binary files a/rowers/tests/testdata/testdata.tcx.gz and b/rowers/tests/testdata/testdata.tcx.gz differ