Private
Public Access
1
0

Web Import now uses dataprep

This commit is contained in:
Sander Roosendaal
2017-02-11 15:51:42 +01:00
parent f287f9af21
commit 8d9a8b0d2b
2 changed files with 176 additions and 245 deletions

View File

@@ -217,6 +217,116 @@ def timedeltaconv(x):
return dt
# Processes painsled CSV file to database
def save_workout_database(f2,r,dosmooth=True,workouttype='rower',
dosummary=True,title='Workout',
notes='',totaldist=0,totaltime=0):
message = None
powerperc = 100*np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr,r.pw_an])/r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp,
powerperc=powerperc,powerzones=r.powerzones)
row = rdata(f2,rower=rr)
if row == 0:
return (0,'Error: CSV data file not found')
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500./pace
f = row.df['TimeStamp (sec)'].diff().mean()
windowsize = 2*(int(10./(f)))+1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize<len(velo):
velo2 = savgol_filter(velo,windowsize,3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf,np.inf],np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500./abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2,gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2,gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max()-row.df['TimeStamp (sec)'].min()
totaltime = totaltime+row.df.ix[0,' ElapsedTime (sec)']
hours = int(totaltime/3600.)
if hours>23:
message = 'Warning: The workout duration was longer than 23 hours'
hours = 23
minutes = int((totaltime - 3600.*hours)/60.)
seconds = int(totaltime - 3600.*hours - 60.*minutes)
tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds))
duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths)
if dosummary:
summary = row.summary()
summary += '\n'
summary += row.intervalstats()
workoutdate = row.rowdatetime.strftime('%Y-%m-%d')
workoutstarttime = row.rowdatetime.strftime('%H:%M:%S')
workoutstartdatetime = thetimezone.localize(row.rowdatetime).astimezone(utc)
# check for duplicate start times
ws = Workout.objects.filter(starttime=workoutstarttime,
user=r)
if (len(ws) != 0):
message = "Warning: This workout probably already exists in the database"
w = Workout(user=r,name=title,date=workoutdate,
workouttype=workouttype,
duration=duration,distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
csvfilename=f2,notes=notes,summary=summary,
maxhr=maxhr,averagehr=averagehr,
startdatetime=workoutstartdatetime)
w.save()
# put stroke data in database
res = dataprep(row.df,id=w.id,bands=True,
barchart=True,otwpower=True,empower=True)
return (w.id,message)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
@@ -336,105 +446,14 @@ def new_workout_from_file(r,f2,
except:
os.remove(f_to_be_deleted+'.gz')
powerperc = 100*np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr,r.pw_an])/r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp,
powerperc=powerperc,powerzones=r.powerzones)
row = rdata(f2,rower=rr)
if row == 0:
return (0,'Error: CSV data file not found')
dosummary = (fileformat != 'fit')
id,message = save_workout_database(f2,r,
workouttype=workouttype,
dosummary=dosummary,
title=title)
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500./pace
f = row.df['TimeStamp (sec)'].diff().mean()
windowsize = 2*(int(10./(f)))+1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize<len(velo):
velo2 = savgol_filter(velo,windowsize,3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf,np.inf],np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500./abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2,gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2,gzip=True)
except:
pass
if fileformat != 'fit' and summary == '':
summary = row.summary()
summary += '\n'
summary += row.intervalstats_painsled()
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
totaldist = row.df['cum_dist'].max()
totaltime = row.df['TimeStamp (sec)'].max()-row.df['TimeStamp (sec)'].min()
totaltime = totaltime+row.df.ix[0,' ElapsedTime (sec)']
hours = int(totaltime/3600.)
if hours>23:
message = 'Warning: The workout duration was longer than 23 hours'
hours = 23
minutes = int((totaltime - 3600.*hours)/60.)
seconds = int(totaltime - 3600.*hours - 60.*minutes)
tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds))
duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths)
workoutdate = row.rowdatetime.strftime('%Y-%m-%d')
workoutstarttime = row.rowdatetime.strftime('%H:%M:%S')
workoutstartdatetime = thetimezone.localize(row.rowdatetime).astimezone(utc)
# check for duplicate start times
ws = Workout.objects.filter(starttime=workoutstarttime,
user=r)
if (len(ws) != 0):
message = "Warning: This workout probably already exists in the database"
w = Workout(user=r,name=title,date=workoutdate,
workouttype=workouttype,
duration=duration,distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
csvfilename=f2,notes=notes,summary=summary,
maxhr=maxhr,averagehr=averagehr,
startdatetime=workoutstartdatetime)
w.save()
# put stroke data in database
res = dataprep(row.df,id=w.id,bands=True,
barchart=True,otwpower=True,empower=True)
return (w.id,message)
return (id,message)
# Compare the data from the CSV file and the database
# Currently only calculates number of strokes. To be expanded with

View File

@@ -67,13 +67,8 @@ from shutil import copyfile
from rowingdata import rower as rrower
from rowingdata import main as rmain
from rowingdata import rowingdata as rrdata
from rowingdata import TCXParser,RowProParser,ErgDataParser,TCXParserNoHR
from rowingdata import BoatCoachParser,RowPerfectParser,BoatCoachAdvancedParser
from rowingdata import MysteryParser
from rowingdata import painsledDesktopParser,speedcoachParser,ErgStickParser
from rowingdata import SpeedCoach2Parser,FITParser,fitsummarydata
from rowingdata import make_cumvalues
from rowingdata import summarydata,get_file_type
from rowingdata import summarydata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
@@ -451,69 +446,27 @@ def add_workout_from_strokedata(user,importid,data,strokedata,
compression='gzip')
# make workout
powerperc = 100*np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr,r.pw_an])/r.ftp
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp,
powerperc=powerperc,
powerzones=r.powerzones,
)
row = rdata(csvfilename,rower=rr)
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
totaldist = row.df['cum_dist'].max()
totaltime = row.df['TimeStamp (sec)'].max()-row.df['TimeStamp (sec)'].min()
totaltime = totaltime+row.df.ix[0,' ElapsedTime (sec)']
# with Concept2
if source=='c2':
try:
totaldist = data['distance']
totaltime = data['time']/10.
except KeyError:
pass
totaldist = 0
totaltime = 0
else:
totaldist = 0
totaltime = 0
id,message = dataprep.save_workout_database(csvfilename,r,
workouttype=workouttype,
title=title,notes=comments,
totaldist=totaldist,
totaltime=totaltime)
hours = int(totaltime/3600.)
minutes = int((totaltime - 3600.*hours)/60.)
seconds = int(totaltime - 3600.*hours - 60.*minutes)
tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds))
duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths)
summary = row.summary()
summary += '\n'
summary += row.intervalstats()
workoutdate = row.rowdatetime.strftime('%Y-%m-%d')
workoutstarttime = row.rowdatetime.strftime('%H:%M:%S')
# check for duplicate start times
ws = Workout.objects.filter(starttime=workoutstarttime,
user=r)
if (len(ws) != 0):
warnings.warn("Probably a duplicate workout",UserWarning)
# Create the Workout object
w = Workout(user=r,name=title,
date=workoutdate,workouttype=workouttype,
duration=duration,distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
csvfilename=csvfilename,notes=comments,
uploadedtoc2=0,summary=summary,
averagehr=averagehr,maxhr=maxhr,
startdatetime=rowdatetime)
w.save()
return w.id
return id,message
# Create workout from SportTracks Data, which are slightly different
# than Strava or Concept2 data
@@ -582,7 +535,8 @@ def add_workout_from_stdata(user,importid,data):
times_location = times_distance
latcoord = np.zeros(len(times_distance))
loncoord = np.zeros(len(times_distance))
if workouttype == 'water':
workouttype = 'rower'
try:
res = splitstdata(data['cadence'])
@@ -668,89 +622,17 @@ def add_workout_from_stdata(user,importid,data):
timestr = strftime("%Y%m%d-%H%M%S")
# auto smoothing
pace = df[' Stroke500mPace (sec/500m)'].values
velo = 500./pace
f = df['TimeStamp (sec)'].diff().mean()
windowsize = 2*(int(10./(f)))+1
df['originalvelo'] = velo
if windowsize > 3 and windowsize<len(velo):
velo2 = savgol_filter(velo,windowsize,3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf,np.inf],np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500./abs(velo3)
df[' Stroke500mPace (sec/500m)'] = pace2
df = df.fillna(0)
# end autosmoothing
csvfilename ='media/Import_'+str(importid)+'.csv'
res = df.to_csv(csvfilename+'.gz',index_label='index',
compression='gzip')
# make workout
powerperc = 100*np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr,r.pw_an])/r.ftp
id,message = dataprep.save_workout_database(csvfilename,r,
workouttype=workouttype,
title=title,
notes=comments)
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp,
powerperc=powerperc,powerzones=r.powerzones)
row = rdata(csvfilename,rower=rr)
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
totaldist = row.df['cum_dist'].max()
totaltime = row.df['TimeStamp (sec)'].max()-row.df['TimeStamp (sec)'].min()
totaltime = totaltime+row.df.ix[0,' ElapsedTime (sec)']
hours = int(totaltime/3600.)
minutes = int((totaltime - 3600.*hours)/60.)
seconds = int(totaltime - 3600.*hours - 60.*minutes)
tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds))
duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths)
summary = row.summary()
summary += '\n'
summary += row.intervalstats()
workoutdate = row.rowdatetime.strftime('%Y-%m-%d')
workoutstarttime = row.rowdatetime.strftime('%H:%M:%S')
# check for duplicate start times
ws = Workout.objects.filter(starttime=workoutstarttime,
user=r)
if (len(ws) != 0):
print "Warning: This workout probably already exists in the database"
w = Workout(user=r,name=title,
date=workoutdate,workouttype=workouttype,
duration=duration,distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
csvfilename=csvfilename,notes=comments,
uploadedtoc2=0,summary=summary,
averagehr=averagehr,maxhr=maxhr,
startdatetime=rowdatetime)
w.save()
return w.id
return (id,message)
# Checks if user has Concept2 tokens, resets tokens if they are
# expired.
@@ -3933,12 +3815,23 @@ def workout_getstravaworkout_view(request,stravaid):
def workout_getstravaworkout_view(request,stravaid):
res = stravastuff.get_strava_workout(request.user,stravaid)
strokedata = res[1]
data = res[0]
data = res[0]
id,message = add_workout_from_strokedata(request.user,stravaid,data,strokedata,
source='strava')
w = Workout.objects.get(id=id)
w.uploadedtostrava=stravaid
w.save()
w.save()
if message:
url = reverse(workout_edit_view,
kwargs = {
'message':message,
'id':id,
})
else:
url = reverse(workout_edit_view,
kwargs = {
'id':id,
})
return HttpResponseRedirect(url)
@@ -3947,11 +3840,21 @@ def workout_getsporttracksworkout_view(request,sporttracksid):
def workout_getsporttracksworkout_view(request,sporttracksid):
res = sporttracksstuff.get_sporttracks_workout(request.user,sporttracksid)
data = res.json()
id,message = add_workout_from_stdata(request.user,sporttracksid,data)
w = Workout.objects.get(id=id)
w.uploadedtosporttracks=sporttracksid
w.save()
w.save()
if message:
url = reverse(workout_edit_view,
kwargs = {
'id':id,
'message':message,
})
else:
url = reverse(workout_edit_view,
kwargs = {
'id':id,
})
return HttpResponseRedirect(url)
@@ -3987,7 +3890,7 @@ def workout_getc2workout_view(request,c2id):
# We have stroke data
if res2.status_code == 200:
strokedata = pd.DataFrame.from_dict(res2.json()['data'])
# create the workout
# create the workout
id,message = add_workout_from_strokedata(request.user,c2id,data,strokedata,
source='c2')
w = Workout.objects.get(id=id)
@@ -4021,8 +3924,19 @@ def workout_getc2workout_view(request,c2id):
units,types,results)
rowdata.write_csv(w.csvfilename,gzip=True)
dataprep.update_strokedata(w.id,rowdata.df)
dataprep.update_strokedata(w.id,rowdata.df)
if message:
url = reverse(workout_edit_view,
kwargs = {
'message':message,
'id':id,
})
else:
url = reverse(workout_edit_view,
kwargs = {
'id':id,
})
return HttpResponseRedirect(url)
else:
@@ -5175,8 +5089,6 @@ def rower_teams_view(request,message='',successmessage=''):
myteams = Team.objects.filter(manager=request.user)
otherteams = Team.objects.filter(private='open').exclude(rower=r).exclude(manager=request.user).order_by('name')
teams.remove_expired_invites()
print "aap",otherteams
invites = TeamInvite.objects.filter(user=request.user)
requests = TeamRequest.objects.filter(user=request.user)