From 24565e2ef223d4a75af0ced2ce535b825972f974 Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Tue, 7 Feb 2017 10:49:42 +0100 Subject: [PATCH] filtered out non meaningul values --- rowers/views.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/rowers/views.py b/rowers/views.py index b7ee0c28..f72ed0c2 100644 --- a/rowers/views.py +++ b/rowers/views.py @@ -2737,7 +2737,52 @@ def cumstats(request,theuser=0, # prepare data frame datadf = dataprep.read_cols_df_sql(ids,fieldlist) - + + # clean data remove zeros and negative values + datadf=datadf.clip(lower=0) + datadf.replace(to_replace=0,value=np.nan,inplace=True) + + # clean data for useful ranges per column + mask = datadf['hr'] < 30 + datadf.loc[mask,'hr'] = np.nan + + mask = datadf['rhythm'] < 5 + datadf.loc[mask,'rhythm'] = np.nan + + mask = datadf['rhythm'] > 70 + datadf.loc[mask,'rhythm'] = np.nan + + mask = datadf['power'] < 20 + datadf.loc[mask,'power'] = np.nan + + mask = datadf['drivelength'] < 0.5 + datadf.loc[mask,'drivelength'] = np.nan + + mask = datadf['forceratio'] < 0.2 + datadf.loc[mask,'forceratio'] = np.nan + + mask = datadf['forceratio'] > 1.0 + datadf.loc[mask,'forceratio'] = np.nan + + mask = datadf['spm'] < 10 + datadf.loc[mask,'spm'] = np.nan + + + mask = datadf['spm'] > 60 + datadf.loc[mask,'spm'] = np.nan + + mask = datadf['drivespeed'] < 0.5 + datadf.loc[mask,'drivespeed'] = np.nan + + mask = datadf['drivespeed'] > 4 + datadf.loc[mask,'drivespeed'] = np.nan + + mask = datadf['driveenergy'] > 2000 + datadf.loc[mask,'driveenergy'] = np.nan + + mask = datadf['driveenergy'] < 100 + datadf.loc[mask,'driveenergy'] = np.nan + if datadf.empty: return HttpResponse("No data found") @@ -2812,6 +2857,51 @@ def workout_stats_view(request,id=0,message="",successmessage=""): url = reverse(workouts_view,args=[str(message)]) return HttpResponseRedirect(url) + + # clean data remove zeros and negative values + datadf=datadf.clip(lower=0) + datadf.replace(to_replace=0,value=np.nan,inplace=True) + + # clean data for useful ranges per column + mask = datadf['hr'] < 30 + datadf.loc[mask,'hr'] = np.nan + + mask = datadf['rhythm'] < 5 + datadf.loc[mask,'rhythm'] = np.nan + + mask = datadf['rhythm'] > 70 + datadf.loc[mask,'rhythm'] = np.nan + + mask = datadf['power'] < 20 + datadf.loc[mask,'power'] = np.nan + + mask = datadf['drivelength'] < 0.5 + datadf.loc[mask,'drivelength'] = np.nan + + mask = datadf['forceratio'] < 0.2 + datadf.loc[mask,'forceratio'] = np.nan + + mask = datadf['forceratio'] > 1.0 + datadf.loc[mask,'forceratio'] = np.nan + + mask = datadf['spm'] < 10 + datadf.loc[mask,'spm'] = np.nan + + + mask = datadf['spm'] > 60 + datadf.loc[mask,'spm'] = np.nan + + mask = datadf['drivespeed'] < 0.5 + datadf.loc[mask,'drivespeed'] = np.nan + + mask = datadf['drivespeed'] > 4 + datadf.loc[mask,'drivespeed'] = np.nan + + mask = datadf['driveenergy'] > 2000 + datadf.loc[mask,'driveenergy'] = np.nan + + mask = datadf['driveenergy'] < 100 + datadf.loc[mask,'driveenergy'] = np.nan if datadf.empty: return HttpResponse("CSV data file not found")