fixing a data selection bug (multiple workouts)
This commit is contained in:
@@ -545,7 +545,11 @@ def df_resample(datadf):
|
|||||||
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
||||||
ignoreadvanced=False):
|
ignoreadvanced=False):
|
||||||
# clean data remove zeros and negative values
|
# clean data remove zeros and negative values
|
||||||
before = len(datadf)
|
|
||||||
|
before = {}
|
||||||
|
for workoutid in datadf['workoutid'].unique():
|
||||||
|
before[workoutid] = len(datadf[datadf['workoutid']==workoutid])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
data_orig = datadf.copy()
|
data_orig = datadf.copy()
|
||||||
@@ -790,8 +794,11 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
after = len(datadf.dropna())
|
after = {}
|
||||||
ratio = float(after)/float(before)
|
for workoutid in data_orig['workoutid'].unique():
|
||||||
|
after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna())
|
||||||
|
ratio = float(after[workoutid])/float(before[workoutid])
|
||||||
|
|
||||||
|
|
||||||
if ratio < 0.01 or after < 2:
|
if ratio < 0.01 or after < 2:
|
||||||
return data_orig
|
return data_orig
|
||||||
|
|||||||
Reference in New Issue
Block a user