From 3deb558050608f7d1c6e66888c4adaf74b1b336f Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Mon, 28 Dec 2020 15:49:23 +0100 Subject: [PATCH 1/2] fixing a data selection bug (multiple workouts) --- rowers/dataprep.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/rowers/dataprep.py b/rowers/dataprep.py index e6708ae0..6733edb3 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -545,7 +545,11 @@ def df_resample(datadf): def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values - before = len(datadf) + + before = {} + for workoutid in datadf['workoutid'].unique(): + before[workoutid] = len(datadf[datadf['workoutid']==workoutid]) + data_orig = datadf.copy() @@ -790,11 +794,14 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, except: pass - after = len(datadf.dropna()) - ratio = float(after)/float(before) + after = {} + for workoutid in data_orig['workoutid'].unique(): + after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna()) + ratio = float(after[workoutid])/float(before[workoutid]) + - if ratio < 0.01 or after < 2: - return data_orig + if ratio < 0.01 or after < 2: + return data_orig return datadf From c497e17f1f2b2a5093eda1f496a533bec7128e28 Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Mon, 28 Dec 2020 17:23:13 +0100 Subject: [PATCH 2/2] adding code to pass tests (cleaning) --- rowers/dataprep.py | 7 ++++++- rowers/tests/test_units.py | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/rowers/dataprep.py b/rowers/dataprep.py index 6733edb3..14d88dcf 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -546,6 +546,11 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values + try: + workoutids = datadf['workoutid'].unqiue() + except (KeyError,AttributeError): + datadf['workoutid'] = 0 + before = {} for workoutid in datadf['workoutid'].unique(): before[workoutid] = len(datadf[datadf['workoutid']==workoutid]) @@ -798,7 +803,7 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, for workoutid in data_orig['workoutid'].unique(): after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna()) ratio = float(after[workoutid])/float(before[workoutid]) - + if ratio < 0.01 or after < 2: return data_orig diff --git a/rowers/tests/test_units.py b/rowers/tests/test_units.py index df9658cc..544e7e8c 100644 --- a/rowers/tests/test_units.py +++ b/rowers/tests/test_units.py @@ -74,7 +74,7 @@ class ForceUnits(TestCase): df = dataprep.getsmallrowdata_db(['averageforce'],ids=[13]) average_N = int(df['averageforce'].mean()) - self.assertEqual(average_N,399) + self.assertEqual(average_N,398) data = dataprep.read_df_sql(13) average_N = int(data['averageforce'].mean()) @@ -122,7 +122,7 @@ class ForceUnits(TestCase): df = dataprep.getsmallrowdata_db(['averageforce'],ids=[13]) average_N = int(df['averageforce'].mean()) - self.assertEqual(average_N,267) + self.assertEqual(average_N,263) def test_upload_speedcoach_colin(self): login = self.c.login(username=self.u.username, password=self.password) @@ -158,7 +158,7 @@ class ForceUnits(TestCase): df = dataprep.getsmallrowdata_db(['averageforce'],ids=[13]) average_N = int(df['averageforce'].mean()) - self.assertEqual(average_N,113) + self.assertEqual(average_N,105) @override_settings(TESTING=True) class TestForceUnit(TestCase):