Private
Public Access
1
0

fixing a data selection bug (multiple workouts)

This commit is contained in:
Sander Roosendaal
2020-12-28 15:49:23 +01:00
parent 0621f73f81
commit 3deb558050

View File

@@ -545,7 +545,11 @@ def df_resample(datadf):
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
before = len(datadf)
before = {}
for workoutid in datadf['workoutid'].unique():
before[workoutid] = len(datadf[datadf['workoutid']==workoutid])
data_orig = datadf.copy()
@@ -790,11 +794,14 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
except:
pass
after = len(datadf.dropna())
ratio = float(after)/float(before)
after = {}
for workoutid in data_orig['workoutid'].unique():
after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna())
ratio = float(after[workoutid])/float(before[workoutid])
if ratio < 0.01 or after < 2:
return data_orig
if ratio < 0.01 or after < 2:
return data_orig
return datadf