From 3deb558050608f7d1c6e66888c4adaf74b1b336f Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Mon, 28 Dec 2020 15:49:23 +0100 Subject: [PATCH] fixing a data selection bug (multiple workouts) --- rowers/dataprep.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/rowers/dataprep.py b/rowers/dataprep.py index e6708ae0..6733edb3 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -545,7 +545,11 @@ def df_resample(datadf): def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values - before = len(datadf) + + before = {} + for workoutid in datadf['workoutid'].unique(): + before[workoutid] = len(datadf[datadf['workoutid']==workoutid]) + data_orig = datadf.copy() @@ -790,11 +794,14 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, except: pass - after = len(datadf.dropna()) - ratio = float(after)/float(before) + after = {} + for workoutid in data_orig['workoutid'].unique(): + after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna()) + ratio = float(after[workoutid])/float(before[workoutid]) + - if ratio < 0.01 or after < 2: - return data_orig + if ratio < 0.01 or after < 2: + return data_orig return datadf