Private
Public Access
1
0

Merge branch 'develop' into feature/sensorfusion

This commit is contained in:
Sander Roosendaal
2017-02-28 17:05:10 +01:00
4 changed files with 200 additions and 60 deletions

View File

@@ -87,52 +87,148 @@ from scipy.signal import savgol_filter
import datetime
def clean_df_stats(datadf,workstrokesonly=True):
def clean_df_stats(datadf,workstrokesonly=True,ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
# bring metrics which have negative values to positive domain
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle']+1000
except KeyError:
pass
datadf=datadf.clip(lower=0)
datadf.replace(to_replace=0,value=np.nan,inplace=True)
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle']-1000
except KeyError:
pass
# clean data for useful ranges per column
mask = datadf['hr'] < 30
datadf.loc[mask,'hr'] = np.nan
if not ignorehr:
try:
mask = datadf['hr'] < 30
datadf.loc[mask,'hr'] = np.nan
except KeyError:
pass
mask = datadf['rhythm'] < 5
datadf.loc[mask,'rhythm'] = np.nan
try:
mask = datadf['spm'] < 10
datadf.loc[mask,'spm'] = np.nan
except KeyError:
pass
mask = datadf['rhythm'] > 70
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['power'] < 20
datadf.loc[mask,'power'] = np.nan
try:
mask = datadf['pace']/1000. > 300.
datadf.loc[mask,'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace']/1000. < 60.
datadf.loc[mask,'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] > 60
datadf.loc[mask,'spm'] = np.nan
except KeyError:
pass
mask = datadf['drivelength'] < 0.5
datadf.loc[mask,'drivelength'] = np.nan
mask = datadf['forceratio'] < 0.2
datadf.loc[mask,'forceratio'] = np.nan
mask = datadf['forceratio'] > 1.0
datadf.loc[mask,'forceratio'] = np.nan
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 5
datadf.loc[mask,'rhythm'] = np.nan
except KeyError:
pass
mask = datadf['spm'] < 10
datadf.loc[mask,'spm'] = np.nan
try:
mask = datadf['rhythm'] > 70
datadf.loc[mask,'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['power'] < 20
datadf.loc[mask,'power'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.loc[mask,'drivelength'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.loc[mask,'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.loc[mask,'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask,'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] > 4
datadf.loc[mask,'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.loc[mask,'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] < 100
datadf.loc[mask,'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['catch'] > -30.
datadf.loc[mask,'catch'] = np.nan
except KeyError:
pass
mask = datadf['spm'] > 60
datadf.loc[mask,'spm'] = np.nan
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['drivespeed'] > 4
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['driveenergy'] > 2000
datadf.loc[mask,'driveenergy'] = np.nan
mask = datadf['driveenergy'] < 100
datadf.loc[mask,'driveenergy'] = np.nan
workoutstateswork = [1,4,5,8,9,6,7]
workoutstatesrest = [3]
workoutstatetransition = [0,2,10,11,12,13]
@@ -621,7 +717,7 @@ def testdata(time,distance,pace,spm):
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0):
def getrowdata_db(id=0,doclean=False):
data = read_df_sql(id)
data['x_right'] = data['x_right']/1.0e6
if data.empty:
@@ -633,13 +729,18 @@ def getrowdata_db(id=0):
else:
row = Workout.objects.get(id=id)
if doclean:
data = clean_df_stats(data,ignorehr=True)
return data,row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns,ids=[]):
def getsmallrowdata_db(columns,ids=[],doclean=True):
prepmultipledata(ids)
data = read_cols_df_sql(ids,columns)
if doclean:
data = clean_df_stats(data,ignorehr=True)
return data
# Fetch both the workout and the workout stroke data (from CSV file)