Private
Public Access
1
0

moved data field prep for stats to dataprep from views

This commit is contained in:
Sander Roosendaal
2017-02-09 08:43:07 +01:00
parent d279180d5d
commit 0969399e36
3 changed files with 103 additions and 161 deletions

View File

@@ -1,6 +1,6 @@
# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import Workout, User, Rower
from rowers.models import Workout, User, Rower,StrokeData
from rowingdata import rowingdata as rrdata
from rowers.tasks import handle_sendemail_unrecognized
@@ -67,6 +67,98 @@ from scipy.signal import savgol_filter
import datetime
def clean_df_stats(datadf,workstrokesonly=True):
# clean data remove zeros and negative values
datadf=datadf.clip(lower=0)
datadf.replace(to_replace=0,value=np.nan,inplace=True)
# clean data for useful ranges per column
mask = datadf['hr'] < 30
datadf.loc[mask,'hr'] = np.nan
mask = datadf['rhythm'] < 5
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['rhythm'] > 70
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['power'] < 20
datadf.loc[mask,'power'] = np.nan
mask = datadf['drivelength'] < 0.5
datadf.loc[mask,'drivelength'] = np.nan
mask = datadf['forceratio'] < 0.2
datadf.loc[mask,'forceratio'] = np.nan
mask = datadf['forceratio'] > 1.0
datadf.loc[mask,'forceratio'] = np.nan
mask = datadf['spm'] < 10
datadf.loc[mask,'spm'] = np.nan
mask = datadf['spm'] > 60
datadf.loc[mask,'spm'] = np.nan
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['drivespeed'] > 4
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['driveenergy'] > 2000
datadf.loc[mask,'driveenergy'] = np.nan
mask = datadf['driveenergy'] < 100
datadf.loc[mask,'driveenergy'] = np.nan
workoutstateswork = [1,4,5,8,9,6,7]
workoutstatesrest = [3]
workoutstatetransition = [0,2,10,11,12,13]
if workstrokesonly=='True' or workstrokesonly==True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
return datadf
def getstatsfields():
# Get field names and remove those that are not useful in stats
fields = StrokeData._meta.get_fields()
fielddict = {field.name:field.verbose_name for field in fields}
fielddict.pop('workoutid')
fielddict.pop('ergpace')
fielddict.pop('hr_an')
fielddict.pop('hr_tr')
fielddict.pop('hr_at')
fielddict.pop('hr_ut2')
fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
fielddict.pop('nowindpace')
fielddict.pop('fnowindpace')
fielddict.pop('fergpace')
fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
fielddict.pop('fpace')
fielddict.pop('pace')
fielddict.pop('id')
fielddict.pop('ftime')
fielddict.pop('x_right')
fielddict.pop('hr_max')
fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
fieldlist = [field for field,value in fielddict.iteritems()]
return fieldlist,fielddict
# A string representation for time deltas
def niceformat(values):
out = []

View File

@@ -83,9 +83,14 @@ class PowerZonesField(models.TextField):
# For future Team functionality
class Team(models.Model):
choices = (
('private','private'),
('open','open'),
)
name = models.CharField(max_length=150,unique=True)
notes = models.CharField(blank=True,max_length=200)
manager = models.ForeignKey(User)
private = models.CharField(max_length=30,choices=choices,default='open')
def __unicode__(self):
return self.name

View File

@@ -2738,97 +2738,17 @@ def cumstats(request,theuser=0,
u = ''
ids = [int(workout.id) for workout in allergworkouts]
# Get field names and remove those that are not useful in stats
fields = StrokeData._meta.get_fields()
fielddict = {field.name:field.verbose_name for field in fields}
fielddict.pop('workoutid')
fielddict.pop('ergpace')
fielddict.pop('hr_an')
fielddict.pop('hr_tr')
fielddict.pop('hr_at')
fielddict.pop('hr_ut2')
fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
fielddict.pop('nowindpace')
fielddict.pop('fnowindpace')
fielddict.pop('fergpace')
fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
fielddict.pop('fpace')
fielddict.pop('pace')
fielddict.pop('id')
fielddict.pop('ftime')
fielddict.pop('x_right')
fielddict.pop('hr_max')
fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
fieldlist,fielddict = dataprep.getstatsfields()
# prepare data frame
datadf = dataprep.read_cols_df_sql(ids,fieldlist)
# clean data remove zeros and negative values
datadf=datadf.clip(lower=0)
datadf.replace(to_replace=0,value=np.nan,inplace=True)
# clean data for useful ranges per column
mask = datadf['hr'] < 30
datadf.loc[mask,'hr'] = np.nan
mask = datadf['rhythm'] < 5
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['rhythm'] > 70
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['power'] < 20
datadf.loc[mask,'power'] = np.nan
mask = datadf['drivelength'] < 0.5
datadf.loc[mask,'drivelength'] = np.nan
mask = datadf['forceratio'] < 0.2
datadf = dataprep.clean_df_stats(datadf,workstrokesonly=workstrokesonly)
mask = datadf['forceratio'] > 1.0
datadf.loc[mask,'forceratio'] = np.nan
mask = datadf['spm'] < 10
datadf.loc[mask,'spm'] = np.nan
mask = datadf['spm'] > 60
datadf.loc[mask,'spm'] = np.nan
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['drivespeed'] > 4
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['driveenergy'] > 2000
datadf.loc[mask,'driveenergy'] = np.nan
mask = datadf['driveenergy'] < 100
if datadf.empty:
return HttpResponse("No data found")
workoutstateswork = [1,4,5,8,9,6,7]
workoutstatesrest = [3]
workoutstatetransition = [0,2,10,11,12,13]
if workstrokesonly=='True' or workstrokesonly==True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
# Create stats
@@ -2908,51 +2828,8 @@ def workout_stats_view(request,id=0,message="",successmessage=""):
message = "You are not allowed to see the stats of this workout"
url = reverse(workouts_view,args=[str(message)])
return HttpResponseRedirect(url)
datadf = dataprep.clean_df_stats(datadf,workstrokesonly=workstrokesonly)
# clean data remove zeros and negative values
datadf=datadf.clip(lower=0)
datadf.replace(to_replace=0,value=np.nan,inplace=True)
# clean data for useful ranges per column
mask = datadf['hr'] < 30
datadf.loc[mask,'hr'] = np.nan
mask = datadf['rhythm'] < 5
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['rhythm'] > 70
datadf.loc[mask,'rhythm'] = np.nan
mask = datadf['power'] < 20
datadf.loc[mask,'power'] = np.nan
mask = datadf['drivelength'] < 0.5
datadf.loc[mask,'drivelength'] = np.nan
mask = datadf['forceratio'] < 0.2
datadf.loc[mask,'forceratio'] = np.nan
mask = datadf['forceratio'] > 1.0
datadf.loc[mask,'forceratio'] = np.nan
mask = datadf['spm'] < 10
datadf.loc[mask,'spm'] = np.nan
mask = datadf['spm'] > 60
datadf.loc[mask,'spm'] = np.nan
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['drivespeed'] > 4
datadf.loc[mask,'drivespeed'] = np.nan
mask = datadf['driveenergy'] > 2000
datadf.loc[mask,'driveenergy'] = np.nan
mask = datadf['driveenergy'] < 100
if datadf.empty:
@@ -2961,44 +2838,12 @@ def workout_stats_view(request,id=0,message="",successmessage=""):
workoutstateswork = [1,4,5,8,9,6,7]
workoutstatesrest = [3]
workoutstatetransition = [0,2,10,11,12,13]
if workstrokesonly=='True' or workstrokesonly==True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
# Create stats
stats = {}
# Get field names and remove those that are not useful in stats
fields = StrokeData._meta.get_fields()
fielddict = {field.name:field.verbose_name for field in fields}
fielddict.pop('workoutid')
fielddict.pop('ergpace')
fielddict.pop('hr_an')
fielddict.pop('hr_tr')
fielddict.pop('hr_at')
fielddict.pop('hr_ut2')
fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
fielddict.pop('nowindpace')
fielddict.pop('fnowindpace')
fielddict.pop('fergpace')
fielddict.pop('equivergpower')
fielddict.pop('workoutstate')
fielddict.pop('fpace')
fielddict.pop('pace')
fielddict.pop('id')
fielddict.pop('ftime')
fielddict.pop('x_right')
fielddict.pop('hr_max')
fielddict.pop('hr_bottom')
fieldlist,fielddict = dataprep.getstatsfields()
for field,verbosename in fielddict.iteritems():