fixing a data selection bug (multiple workouts)

2020-12-28 15:49:23 +01:00
parent 0621f73f81
commit 3deb558050
1 changed files with 12 additions and 5 deletions
--- a/rowers/dataprep.py
+++ b/rowers/dataprep.py
@@ -545,7 +545,11 @@ def df_resample(datadf):
 def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
                   ignoreadvanced=False):
    # clean data remove zeros and negative values
-    before = len(datadf)
+
    before = {}
    for workoutid in datadf['workoutid'].unique():
        before[workoutid] = len(datadf[datadf['workoutid']==workoutid])
    data_orig = datadf.copy()
@@ -790,8 +794,11 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
        except:
            pass
-    after = len(datadf.dropna())
+    after = {}
-    ratio = float(after)/float(before)
+    for workoutid in data_orig['workoutid'].unique():
        after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna())
        ratio = float(after[workoutid])/float(before[workoutid])
        if ratio < 0.01 or after < 2:
            return data_orig