From 34b2a9335bb5257f4b6e9651c8643468e61a830b Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Tue, 28 Feb 2017 13:28:11 +0100 Subject: [PATCH 1/3] advanced data filtering --- rowers/dataprep.py | 173 +++++++++++++++++++++++++++++-------- rowers/interactiveplots.py | 61 +++++++++---- rowers/views.py | 6 +- 3 files changed, 185 insertions(+), 55 deletions(-) diff --git a/rowers/dataprep.py b/rowers/dataprep.py index c2ea9fdf..646ab4fa 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -87,52 +87,148 @@ from scipy.signal import savgol_filter import datetime -def clean_df_stats(datadf,workstrokesonly=True): +def clean_df_stats(datadf,workstrokesonly=True,ignorehr=True, + ignoreadvanced=False): # clean data remove zeros and negative values + + # bring metrics which have negative values to positive domain + try: + datadf['catch'] = -datadf['catch'] + except KeyError: + pass + + try: + datadf['peakforceangle'] = datadf['peakforceangle']+1000 + except KeyError: + pass + datadf=datadf.clip(lower=0) datadf.replace(to_replace=0,value=np.nan,inplace=True) + # return from positive domain to negative + try: + datadf['catch'] = -datadf['catch'] + except KeyError: + pass + + try: + datadf['peakforceangle'] = datadf['peakforceangle']-1000 + except KeyError: + pass + # clean data for useful ranges per column - mask = datadf['hr'] < 30 - datadf.loc[mask,'hr'] = np.nan + if not ignorehr: + try: + mask = datadf['hr'] < 30 + datadf.loc[mask,'hr'] = np.nan + except KeyError: + pass - mask = datadf['rhythm'] < 5 - datadf.loc[mask,'rhythm'] = np.nan + try: + mask = datadf['spm'] < 10 + datadf.loc[mask,'spm'] = np.nan + except KeyError: + pass - mask = datadf['rhythm'] > 70 - datadf.loc[mask,'rhythm'] = np.nan - mask = datadf['power'] < 20 - datadf.loc[mask,'power'] = np.nan + try: + mask = datadf['pace']/1000. > 300. + datadf.loc[mask,'pace'] = np.nan + except KeyError: + pass + + try: + mask = datadf['pace']/1000. < 60. + datadf.loc[mask,'pace'] = np.nan + except KeyError: + pass + + try: + mask = datadf['spm'] > 60 + datadf.loc[mask,'spm'] = np.nan + except KeyError: + pass - mask = datadf['drivelength'] < 0.5 - datadf.loc[mask,'drivelength'] = np.nan - mask = datadf['forceratio'] < 0.2 - datadf.loc[mask,'forceratio'] = np.nan - - mask = datadf['forceratio'] > 1.0 - datadf.loc[mask,'forceratio'] = np.nan + if not ignoreadvanced: + try: + mask = datadf['rhythm'] < 5 + datadf.loc[mask,'rhythm'] = np.nan + except KeyError: + pass - mask = datadf['spm'] < 10 - datadf.loc[mask,'spm'] = np.nan + + try: + mask = datadf['rhythm'] > 70 + datadf.loc[mask,'rhythm'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['power'] < 20 + datadf.loc[mask,'power'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['drivelength'] < 0.5 + datadf.loc[mask,'drivelength'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['forceratio'] < 0.2 + datadf.loc[mask,'forceratio'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['forceratio'] > 1.0 + datadf.loc[mask,'forceratio'] = np.nan + except KeyError: + pass + + + + try: + mask = datadf['drivespeed'] < 0.5 + datadf.loc[mask,'drivespeed'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['drivespeed'] > 4 + datadf.loc[mask,'drivespeed'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['driveenergy'] > 2000 + datadf.loc[mask,'driveenergy'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['driveenergy'] < 100 + datadf.loc[mask,'driveenergy'] = np.nan + except KeyError: + pass + + + try: + mask = datadf['catch'] > -30. + datadf.loc[mask,'catch'] = np.nan + except KeyError: + pass - mask = datadf['spm'] > 60 - datadf.loc[mask,'spm'] = np.nan - - mask = datadf['drivespeed'] < 0.5 - datadf.loc[mask,'drivespeed'] = np.nan - - mask = datadf['drivespeed'] > 4 - datadf.loc[mask,'drivespeed'] = np.nan - - mask = datadf['driveenergy'] > 2000 - datadf.loc[mask,'driveenergy'] = np.nan - - mask = datadf['driveenergy'] < 100 - datadf.loc[mask,'driveenergy'] = np.nan - workoutstateswork = [1,4,5,8,9,6,7] workoutstatesrest = [3] workoutstatetransition = [0,2,10,11,12,13] @@ -621,7 +717,7 @@ def testdata(time,distance,pace,spm): # Get data from DB for one workout (fetches all data). If data # is not in DB, read from CSV file (and create DB entry) -def getrowdata_db(id=0): +def getrowdata_db(id=0,doclean=False): data = read_df_sql(id) data['x_right'] = data['x_right']/1.0e6 if data.empty: @@ -633,13 +729,18 @@ def getrowdata_db(id=0): else: row = Workout.objects.get(id=id) + if doclean: + data = clean_df_stats(data,ignorehr=True) + return data,row # Fetch a subset of the data from the DB -def getsmallrowdata_db(columns,ids=[]): +def getsmallrowdata_db(columns,ids=[],doclean=True): prepmultipledata(ids) data = read_cols_df_sql(ids,columns) - + if doclean: + data = clean_df_stats(data,ignorehr=True) + return data # Fetch both the workout and the workout stroke data (from CSV file) diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index 3cf6b670..b8f6147e 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -142,6 +142,7 @@ def interactive_forcecurve(theworkouts,workstrokesonly=False): 'workoutstate'] rowdata = dataprep.getsmallrowdata_db(columns,ids=ids) + rowdata.dropna(axis=0,how='any',inplace=True) workoutstateswork = [1,4,5,8,9,6,7] workoutstatesrest = [3] @@ -153,6 +154,7 @@ def interactive_forcecurve(theworkouts,workstrokesonly=False): except KeyError: pass + catchav = rowdata['catch'].mean() finishav = rowdata['finish'].mean() washav = rowdata['wash'].mean() @@ -381,15 +383,17 @@ def interactive_histoall(theworkouts): ids = [int(w.id) for w in theworkouts] - rowdata = dataprep.getsmallrowdata_db(['power'],ids=ids) + rowdata = dataprep.getsmallrowdata_db(['power'],ids=ids,doclean=True) + rowdata.dropna(axis=0,how='any',inplace=True) histopwr = rowdata['power'].values if len(histopwr) == 0: - return "","CSV file not found","","" + return "","No valid data available","","" # throw out nans histopwr = histopwr[~np.isinf(histopwr)] histopwr = histopwr[histopwr > 25] + histopwr = histopwr[histopwr < 1000] plot = Figure(tools=TOOLS,plot_width=900, toolbar_sticky=False, @@ -763,7 +767,7 @@ def interactive_streamchart(id=0,promember=0): rowdata = rdata(f1,rower=rr) if rowdata == 0: - return "","CSV Data File Not Found" + return "","No Valid Data Available" dist = rowdata.df.ix[:,'cum_dist'] @@ -810,15 +814,16 @@ def interactive_chart(id=0,promember=0): columns = ['time','pace','hr','fpace','ftime'] datadf = dataprep.getsmallrowdata_db(columns,ids=[id]) + datadf.dropna(axis=0,how='any',inplace=True) row = Workout.objects.get(id=id) if datadf.empty: - return "","CSV Data File Not Found" + return "","No Valid Data Available" else: datadf.sort_values(by='time',ascending=True,inplace=True) #datadf,row = dataprep.getrowdata_db(id=id) #if datadf.empty: - #return "","CSV Data File Not Found" + #return "","No Valid Data Available" source = ColumnDataSource( datadf @@ -887,8 +892,7 @@ def interactive_cum_flex_chart2(theworkouts,promember=0, # datadf = dataprep.smalldataprep(theworkouts,xparam,yparam1,yparam2) ids = [int(w.id) for w in theworkouts] - datadf = dataprep.getsmallrowdata_db([xparam,yparam1,yparam2],ids=ids) - + datadf = dataprep.getsmallrowdata_db([xparam,yparam1,yparam2],ids=ids,doclean=False) yparamname1 = axlabels[yparam1] if yparam2 != 'None': @@ -1155,10 +1159,13 @@ def interactive_flex_chart2(id=0,promember=0, 'ftime','distance','fpace', 'power','hr','spm', 'time','pace','workoutstate'] - rowdata = dataprep.getsmallrowdata_db(columns,ids=[id]) + + rowdata = dataprep.getsmallrowdata_db(columns,ids=[id],doclean=True) + rowdata.dropna(axis=1,how='all',inplace=True) + row = Workout.objects.get(id=id) if rowdata.empty: - return "","CSV Data File Not Found" + return "","No valid data" else: rowdata.sort_values(by='time',ascending=True,inplace=True) @@ -1172,13 +1179,23 @@ def interactive_flex_chart2(id=0,promember=0, except KeyError: pass - rowdata['x1'] = rowdata.ix[:,xparam] - rowdata['y1'] = rowdata.ix[:,yparam1] - + try: + rowdata['x1'] = rowdata.ix[:,xparam] + except KeyError: + rowdata['x1'] = 0*rowdata.ix[:'time'] + + try: + rowdata['y1'] = rowdata.ix[:,yparam1] + except KeyError: + rowdata['y1'] = 0*rowdata.ix[:,xparam] + tseconds = rowdata.ix[:,'time'] if yparam2 != 'None': - rowdata['y2'] = rowdata.ix[:,yparam2] + try: + rowdata['y2'] = rowdata.ix[:,yparam2] + except KeyError: + rowdata['y2'] = rowdata.ix[:,yparam1] else: rowdata['y2'] = rowdata['y1'] @@ -1495,8 +1512,9 @@ def interactive_flex_chart2(id=0,promember=0, def interactive_bar_chart(id=0,promember=0): # check if valid ID exists (workout exists) rowdata,row = dataprep.getrowdata_db(id=id) + rowdata.dropna(axis=0,how='any',inplace=True) if rowdata.empty: - return "","CSV Data File Not Found" + return "","No Valid Data Available" # Add hover to this comma-separated string and see what changes if (promember==1): @@ -1592,6 +1610,7 @@ def interactive_multiple_compare_chart(ids,xparam,yparam,plottype='line', 'workoutid'] datadf = dataprep.getsmallrowdata_db(columns,ids=ids) + datadf.dropna(axis=0,how='any',inplace=True) tseconds = datadf.ix[:,'time'] yparamname = axlabels[yparam] @@ -1749,17 +1768,22 @@ def interactive_comparison_chart(id1=0,id2=0,xparam='distance',yparam='spm', rowdata1 = dataprep.getsmallrowdata_db(columns,ids=[id1]) rowdata2 = dataprep.getsmallrowdata_db(columns,ids=[id2]) + for n in ['distance','power','hr','spm','time','pace','workoutstate']: + rowdata1[n].fillna(value=0,inplace=True) + rowdata2[n].fillna(value=0,inplace=True) + + rowdata1.dropna(axis=0,how='any',inplace=True) + rowdata2.dropna(axis=0,how='any',inplace=True) row1 = Workout.objects.get(id=id1) row2 = Workout.objects.get(id=id2) - if rowdata1.empty: - return "","CSV Data File Not Found" + return "","No Valid Data Available" else: rowdata1.sort_values(by='time',ascending=True,inplace=True) if rowdata2.empty: - return "","CSV Data File Not Found" + return "","No Valid Data Available" else: rowdata2.sort_values(by='time',ascending=True,inplace=True) @@ -1919,8 +1943,9 @@ def interactive_comparison_chart(id1=0,id2=0,xparam='distance',yparam='spm', def interactive_otw_advanced_pace_chart(id=0,promember=0): # check if valid ID exists (workout exists) rowdata,row = dataprep.getrowdata_db(id=id) + rowdata.dropna(axis=0,how='any',inplace=True) if rowdata.empty: - return "","CSV Data File Not Found" + return "","No Valid Data Available" # Add hover to this comma-separated string and see what changes if (promember==1): diff --git a/rowers/views.py b/rowers/views.py index 26ad82c9..40e82e19 100644 --- a/rowers/views.py +++ b/rowers/views.py @@ -3152,8 +3152,12 @@ def workout_stats_view(request,id=0,message="",successmessage=""): stats = {} fieldlist,fielddict = dataprep.getstatsfields() - + fielddict.pop('workoutstate') + print "aap" + print datadf['catch'].mean() + print "noot" + for field,verbosename in fielddict.iteritems(): thedict = { 'mean':datadf[field].mean(), From d6e0e84960b0870288df9e36a60f298e8dc72834 Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Tue, 28 Feb 2017 16:38:41 +0100 Subject: [PATCH 2/3] empower plots --- rowers/tasks.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rowers/tasks.py b/rowers/tasks.py index afa33a11..f37fe6ab 100644 --- a/rowers/tasks.py +++ b/rowers/tasks.py @@ -197,6 +197,9 @@ def handle_makeplot(f1,f2,t,hrdata,plotnr,imagename): row = rdata(f2,rower=rr) except IOError: row = rdata(f2+'.gz',rower=rr) + + + haspower = row.df[' Power (watts)'].mean() > 50 nr_rows = len(row.df) if (plotnr in [1,2,4,5,8,11,9,12]) and (nr_rows > 1200): @@ -211,9 +214,15 @@ def handle_makeplot(f1,f2,t,hrdata,plotnr,imagename): elif (plotnr==3): fig1 = row.get_piechart(t) elif (plotnr==4): - fig1 = row.get_timeplot_otw(t) + if haspower: + fig1 = row.get_timeplot_otwempower(t) + else: + fig1 = row.get_timeplot_otw(t) elif (plotnr==5): - fig1 = row.get_metersplot_otw(t) + if haspower: + fig1 = row.get_metersplot_otwempower(t) + else: + fig1 = row.get_metersplot_otw(t) elif (plotnr==6): fig1 = row.get_piechart(t) elif (plotnr==7) or (plotnr==10): From 39f8ad1c77dac34227059a35068a34a58caff05b Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Tue, 28 Feb 2017 16:56:37 +0100 Subject: [PATCH 3/3] error checking around notes saved with interval editor --- rowers/views.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rowers/views.py b/rowers/views.py index 40e82e19..a025f86e 100644 --- a/rowers/views.py +++ b/rowers/views.py @@ -4990,9 +4990,10 @@ def workout_summary_edit_view(request,id,message="",successmessage="" nrintervals = len(idist) row.summary = intervalstats #intervalstats = rowdata.allstats() - row.notes += u'{n} \n {s}'.format( - n = row.notes, - s = s + if s: + row.notes += u'{n} \n {s}'.format( + n = row.notes, + s = s ) row.save() rowdata.write_csv(f1,gzip=True)