Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
Sander Roosendaal ec3a9700a5 inheritance in gluing
2017-11-26 08:32:47 +01:00

2141 lines
62 KiB
Python

# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import Workout, StrokeData,Team
import pytz
from rowingdata import rowingdata as rrdata
from rowingdata import rower as rrower
from shutil import copyfile
from rowingdata import get_file_type, get_empower_rigging
from rowers.tasks import handle_sendemail_unrecognized
from rowers.tasks import handle_zip_file
from pandas import DataFrame, Series
from django.utils import timezone
from django.utils.timezone import get_current_timezone
from django_mailbox.models import Message,Mailbox,MessageAttachment
from time import strftime
import arrow
thetimezone = get_current_timezone()
from rowingdata import (
TCXParser, RowProParser, ErgDataParser,
CoxMateParser,
BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser,
MysteryParser, BoatCoachOTWParser,QuiskeParser,
painsledDesktopParser, speedcoachParser, ErgStickParser,
SpeedCoach2Parser, FITParser, fitsummarydata,
make_cumvalues,cumcpdata,
summarydata, get_file_type,
)
from rowers.metrics import axes
from async_messages import messages as a_messages
import os
import zipfile
import pandas as pd
import numpy as np
import itertools
import math
from tasks import (
handle_sendemail_unrecognized, handle_sendemail_breakthrough,
handle_sendemail_hard, handle_updatecp,handle_updateergcp
)
from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys
import utils
import datautils
from utils import lbstoN,myqueue
from timezonefinder import TimezoneFinder
import django_rq
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
host = settings.DATABASES['default']['HOST']
port = settings.DATABASES['default']['PORT']
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
# Use SQLite local database when we're in debug mode
if settings.DEBUG or user == '':
# database_url = 'sqlite:///db.sqlite3'
database_url = 'sqlite:///' + database_name
# mapping the DB column names to the CSV file column names
columndict = {
'time': 'TimeStamp (sec)',
'hr': ' HRCur (bpm)',
'pace': ' Stroke500mPace (sec/500m)',
'spm': ' Cadence (stokes/min)',
'power': ' Power (watts)',
'averageforce': ' AverageDriveForce (lbs)',
'drivelength': ' DriveLength (meters)',
'peakforce': ' PeakDriveForce (lbs)',
'distance': ' Horizontal (meters)',
'catch': 'catch',
'finish': 'finish',
'peakforceangle': 'peakforceangle',
'wash': 'wash',
'slip': 'slip',
'workoutstate': ' WorkoutState',
'cumdist': 'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def get_latlon(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist:
return False
rowdata = rdata(w.csvfilename)
try:
try:
latitude = rowdata.df.ix[:, ' latitude']
longitude = rowdata.df.ix[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)']
return [latitude, longitude]
except AttributeError:
return [pd.Series([]), pd.Series([])]
return [pd.Series([]), pd.Series([])]
def get_workouts(ids, userid):
goodids = []
for id in ids:
w = Workout.objects.get(id=id)
if int(w.user.user.id) == int(userid):
goodids.append(id)
return [Workout.objects.get(id=id) for id in goodids]
def filter_df(datadf, fieldname, value, largerthan=True):
try:
x = datadf[fieldname]
except KeyError:
return datadf
if largerthan:
mask = datadf[fieldname] < value
else:
mask = datadf[fieldname] >= value
datadf.loc[mask, fieldname] = np.nan
return datadf
# joins workouts
def join_workouts(r,ids,title='Joined Workout',
parent=None,
setprivate=False,
forceunit='lbs'):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate == True and makeprivate == False:
makeprivate = True
elif setprivate == False and makeprivate == True:
makeprivate = False
# reorder in chronological order
ws = Workout.objects.filter(id__in=ids).order_by("date", "starttime")
if not parent:
parent = ws[0]
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
files = [w.csvfilename for w in ws]
row = rdata(files[0])
files = files[1:]
while len(files):
row2 = rdata(files[0])
if row2 != 0:
row = row+row2
files = files[1:]
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
row.write_csv(csvfilename,gzip=True)
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
return (id, message)
def df_resample(datadf):
# time stamps must be in seconds
timestamps = datadf['TimeStamp (sec)'].astype('int')
datadf['timestamps'] = timestamps
newdf = datadf.groupby(['timestamps']).mean()
return newdf
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
# bring metrics which have negative values to positive domain
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
except KeyError:
pass
try:
datadf['hr'] = datadf['hr'] + 10
except KeyError:
pass
try:
datadf = datadf.clip(lower=0)
except TypeError:
pass
datadf.replace(to_replace=0, value=np.nan, inplace=True)
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
except KeyError:
pass
try:
datadf['hr'] = datadf['hr'] - 10
except KeyError:
pass
# clean data for useful ranges per column
if not ignorehr:
try:
mask = datadf['hr'] < 30
datadf.loc[mask, 'hr'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] < 10
datadf.loc[mask, 'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace'] / 1000. > 300.
datadf.loc[mask, 'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['efficiency'] < 0.
datadf.loc[mask, 'efficiency'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace'] / 1000. < 60.
datadf.loc[mask, 'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] > 60
datadf.loc[mask, 'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['wash'] < 1
datadf.loc[mask, 'wash'] = np.nan
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 5
datadf.loc[mask, 'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['rhythm'] > 70
datadf.loc[mask, 'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['power'] < 20
datadf.loc[mask, 'power'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.loc[mask, 'drivelength'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.loc[mask, 'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.loc[mask, 'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask, 'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] > 4
datadf.loc[mask, 'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.loc[mask, 'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] < 100
datadf.loc[mask, 'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['catch'] > -30.
datadf.loc[mask, 'catch'] = np.nan
except KeyError:
pass
workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly == True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
return datadf
def getstatsfields():
# Get field names and remove those that are not useful in stats
fields = StrokeData._meta.get_fields()
fielddict = {field.name: field.verbose_name for field in fields}
# fielddict.pop('workoutid')
fielddict.pop('ergpace')
fielddict.pop('hr_an')
fielddict.pop('hr_tr')
fielddict.pop('hr_at')
fielddict.pop('hr_ut2')
fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
fielddict.pop('nowindpace')
fielddict.pop('fnowindpace')
fielddict.pop('fergpace')
fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
fielddict.pop('fpace')
fielddict.pop('pace')
fielddict.pop('id')
fielddict.pop('ftime')
fielddict.pop('x_right')
fielddict.pop('hr_max')
fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
fieldlist = [field for field, value in fielddict.iteritems()]
return fieldlist, fielddict
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes, seconds = divmod(tdelta.seconds, 60)
tenths = int(tdelta.microseconds / 1e5)
except AttributeError:
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
seconds, rest = divmod(seconds, 1e9)
tenths = int(rest / 1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def paceformatsecs(values):
out = []
for v in values:
td = timedeltaconv(v)
formattedv = strfdelta(td)
out.append(formattedv)
return out
def getcpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print "Database locked"
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]):
deletecpdata_sql(rower_id)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def runcpupdate(
rower,type='water',
startdate=timezone.now()-datetime.timedelta(days=365),
enddate=timezone.now()+datetime.timedelta(days=5)
):
if type == 'water':
theworkouts = Workout.objects.filter(
user=rower,rankingpiece=True,
workouttype='water',
startdatetime__gte=startdate,
startdatetime__lte=enddate
)
table = 'cpdata'
else:
theworkouts = Workout.objects.filter(
user=rower,rankingpiece=True,
workouttype__in=[
'rower',
'dynamic',
'slides'
],
startdatetime__gte=startdate,
startdatetime__lte=enddate
)
table = 'cpergdata'
theids = [w.id for w in theworkouts]
if settings.DEBUG:
job = handle_updatecp.delay(rower.id,theids,debug=True,table=table)
else:
job = queue.enqueue(handle_updatecp,rower.id,theids,table=table)
return job
def fetchcperg(rower,theworkouts):
theids = [int(w.id) for w in theworkouts]
thefilenames = [w.csvfilename for w in theworkouts]
cpdf = getcpdata_sql(rower.id,table='ergcpdata')
if settings.DEBUG:
res = handle_updateergcp.delay(rower.id,thefilenames,debug=True)
else:
res = queue.enqueue(handle_updateergcp,rower.id,thefilenames)
return cpdf
def fetchcp(rower,theworkouts,table='cpdata'):
# get all power data from database (plus workoutid)
theids = [int(w.id) for w in theworkouts]
columns = ['power','workoutid','time']
df = getsmallrowdata_db(columns,ids=theids)
df.dropna(inplace=True,axis=0)
if df.empty:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([]),pd.Series([]),avgpower2
dfgrouped = df.groupby(['workoutid'])
avgpower2 = dict(dfgrouped.mean()['power'].astype(int))
cpdf = getcpdata_sql(rower.id,table=table)
if not cpdf.empty:
return cpdf['delta'],cpdf['cp'],avgpower2
else:
if settings.DEBUG:
res = handle_updatecp.delay(rower.id,theids,debug=True,table=table)
else:
res = queue.enqueue(handle_updatecp,rower.id,theids,table=table)
return [],[],avgpower2
return [],[],avgpower2
# create a new workout from manually entered data
def create_row_df(r,distance,duration,startdatetime,
title = 'Manually added workout',notes='',
workouttype='rower'):
nr_strokes = int(distance/10.)
unixstarttime = arrow.get(startdatetime).timestamp
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
spm = 60.*nr_strokes/totalseconds
step = totalseconds/float(nr_strokes)
elapsed = np.arange(0,totalseconds+step,step)
dstep = distance/float(nr_strokes)
d = np.arange(0,distance+dstep,dstep)
unixtime = unixstarttime + elapsed
pace = 500.*totalseconds/distance
if workouttype in ['rower','slides','dynamic']:
velo = distance/totalseconds
power = 2.8*velo**3
else:
power = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)':pace,
' ElapsedTime (sec)':elapsed,
' Power (watts)':power,
})
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
row = rrdata(df=df)
row.write_csv(csvfilename, gzip = True)
id, message = save_workout_database(csvfilename, r,
title=title,
notes=notes,
dosmooth=False,
workouttype=workouttype,
consistencychecks=False,
totaltime=totalseconds)
return (id, message)
# Processes painsled CSV file to database
def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
dosummary=True, title='Workout',
workoutsource='unknown',
notes='', totaldist=0, totaltime=0,
summary='',
makeprivate=False,
oarlength=2.89, inboard=0.88,
forceunit='lbs',
consistencychecks=False):
message = None
powerperc = 100 * np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr, r.pw_an]) / r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp,
powerperc=powerperc, powerzones=r.powerzones)
row = rdata(f2, rower=rr)
dtavg = row.df['TimeStamp (sec)'].diff().mean()
if dtavg < 1:
newdf = df_resample(row.df)
try:
os.remove(f2)
except:
pass
return new_workout_from_df(r, newdf,
title=title)
try:
checks = row.check_consistency()
allchecks = 1
for key, value in checks.iteritems():
if not value:
allchecks = 0
if consistencychecks:
a_messages.error(
r.user, 'Failed consistency check: ' + key + ', autocorrected')
else:
pass
# a_messages.error(r.user,'Failed consistency check: '+key+', not corrected')
except ZeroDivisionError:
pass
if not allchecks and consistencychecks:
# row.repair()
pass
if row == 0:
return (0, 'Error: CSV data file not found')
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500. / pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize < len(velo):
velo2 = savgol_filter(velo, windowsize, 3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf, np.inf], np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500. / abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2, gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2, gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max(
) - row.df['TimeStamp (sec)'].min()
try:
totaltime = totaltime + row.df.ix[0, ' ElapsedTime (sec)']
except KeyError:
pass
if np.isnan(totaltime):
totaltime = 0
hours = int(totaltime / 3600.)
if hours > 23:
message = 'Warning: The workout duration was longer than 23 hours. '
hours = 23
minutes = int((totaltime - 3600. * hours) / 60.)
if minutes > 59:
minutes = 59
if not message:
message = 'Warning: there is something wrong with the workout duration'
seconds = int(totaltime - 3600. * hours - 60. * minutes)
if seconds > 59:
seconds = 59
if not message:
message = 'Warning: there is something wrong with the workout duration'
tenths = int(10 * (totaltime - 3600. * hours - 60. * minutes - seconds))
if tenths > 9:
tenths = 9
if not message:
message = 'Warning: there is something wrong with the workout duration'
duration = "%s:%s:%s.%s" % (hours, minutes, seconds, tenths)
if dosummary:
summary = row.allstats()
timezone_str = 'UTC'
try:
workoutstartdatetime = timezone.make_aware(row.rowdatetime)
except ValueError:
workoutstartdatetime = row.rowdatetime
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except ValueError:
timezone_str = 'UTC'
if timezone_str == None:
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str == None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
)
except KeyError:
timezone_str = r.defaulttimezone
workoutdate = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%Y-%m-%d')
workoutstarttime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%H:%M:%S')
if makeprivate:
privacy = 'hidden'
else:
privacy = 'visible'
# checking for inf values
totaldist = np.nan_to_num(totaldist)
maxhr = np.nan_to_num(maxhr)
averagehr = np.nan_to_num(averagehr)
# check for duplicate start times and duration
ws = Workout.objects.filter(startdatetime=workoutstartdatetime,
distance=totaldist,
user=r)
if (len(ws) != 0):
message = "Warning: This workout probably already exists in the database"
privacy = 'hidden'
w = Workout(user=r, name=title, date=workoutdate,
workouttype=workouttype,
duration=duration, distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
workoutsource=workoutsource,
forceunit=forceunit,
csvfilename=f2, notes=notes, summary=summary,
maxhr=maxhr, averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard, oarlength=oarlength,
timezone=timezone_str,
privacy=privacy)
try:
w.save()
except ValidationError:
w.startdatetime = timezone.now()
w.save()
isbreakthrough = False
ishard = False
if workouttype == 'water':
df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id])
if df['power'].mean():
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = df.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
else:
res = 0
res2 = 0
if res:
isbreakthrough = True
res = datautils.updatecp(delta, cpvalues, r)
if res2 and not isbreakthrough:
ishard = True
# submit email task to send email about breakthrough workout
if isbreakthrough:
a_messages.info(
r.user, 'It looks like you have a new breakthrough workout')
if settings.DEBUG and r.getemailnotifications:
res = handle_sendemail_breakthrough.delay(w.id, r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
elif r.getemailnotifications:
try:
res = queuehigh.enqueue(
handle_sendemail_breakthrough(w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json()))
except AttributeError:
pass
else:
pass
# submit email task to send email about breakthrough workout
if ishard:
a_messages.info(r.user, 'That was a pretty hard workout')
if settings.DEBUG and r.getemailnotifications:
res = handle_sendemail_hard.delay(w.id, r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
elif r.getemailnotifications:
try:
res = queuehigh.enqueue(
handle_sendemail_hard(w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json()))
except AttributeError:
pass
else:
pass
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
res = dataprep(row.df, id=w.id, bands=True,
barchart=True, otwpower=True, empower=True, inboard=inboard)
return (w.id, message)
def parsenonpainsled(fileformat,f2,summary):
# handle RowPro:
if (fileformat == 'rp'):
row = RowProParser(f2)
hasrecognized = True
# handle TCX
if (fileformat == 'tcx'):
row = TCXParser(f2)
hasrecognized = True
# handle Mystery
if (fileformat == 'mystery'):
row = MysteryParser(f2)
hasrecognized = True
# handle Quiske
if (fileformat == 'quiske'):
row = QuiskeParser(f2)
hasrecognized = True
# handle RowPerfect
if (fileformat == 'rowperfect3'):
row = RowPerfectParser(f2)
hasrecognized = True
# handle ErgData
if (fileformat == 'ergdata'):
row = ErgDataParser(f2)
hasrecognized = True
# handle CoxMate
if (fileformat == 'coxmate'):
row = CoxMateParser(f2)
hasrecognized = True
# handle Mike
if (fileformat == 'bcmike'):
row = BoatCoachAdvancedParser(f2)
hasrecognized = True
# handle BoatCoach
if (fileformat == 'boatcoach'):
row = BoatCoachParser(f2)
hasrecognized = True
# handle BoatCoach OTW
if (fileformat == 'boatcoachotw'):
row = BoatCoachOTWParser(f2)
hasrecognized = True
# handle painsled desktop
if (fileformat == 'painsleddesktop'):
row = painsledDesktopParser(f2)
hasrecognized = True
# handle speed coach GPS
if (fileformat == 'speedcoach'):
row = speedcoachParser(f2)
hasrecognized = True
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
row = SpeedCoach2Parser(f2)
hasrecognized = True
try:
oarlength, inboard = get_empower_rigging(f2)
summary = row.allstats()
except:
pass
# handle ErgStick
if (fileformat == 'ergstick'):
row = ErgStickParser(f2)
hasrecognized = True
# handle FIT
if (fileformat == 'fit'):
row = FITParser(f2)
hasrecognized = True
try:
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
except:
pass
hasrecognized = True
return row,hasrecognized,summary
def handle_nonpainsled(f2, fileformat, summary=''):
oarlength = 2.89
inboard = 0.88
hasrecognized = False
try:
row,hasrecognized,summary = parsenonpainsled(fileformat,f2,summary)
except:
pass
# Handle c2log
if (fileformat == 'c2log' or fileformat == 'rowprolog'):
return (0,0,0,0)
if not hasrecognized:
return (0,0,0,0)
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4] + 'o.csv'
try:
row.write_csv(f2, gzip=True)
except:
return (0,0,0,0)
# os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
os.remove(f_to_be_deleted + '.gz')
return (f2, summary, oarlength, inboard)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_file(r, f2,
workouttype='rower',
title='Workout',
makeprivate=False,
notes=''):
message = None
try:
fileformat = get_file_type(f2)
except IOError:
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
summary = ''
oarlength = 2.89
inboard = 0.88
if len(fileformat) == 3 and fileformat[0] == 'zip':
f_to_be_deleted = f2
workoutsbox = Mailbox.objects.filter(name='workouts')[0]
msg = Message(mailbox=workoutsbox,
from_header=r.user.email,
subject = title)
msg.save()
f3 = 'media/mailbox_attachments/'+f2[6:]
copyfile(f2,f3)
f3 = f3[6:]
a = MessageAttachment(message=msg,document=f3)
a.save()
# res = myqueue(
# queuelow,
# handle_zip_file,
# r.user.email,
# title,
# f2
# )
return -1, message, f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'c2log':
os.remove(f2)
message = "This summary does not contain stroke data. Use the files containing stroke by stroke data."
return (0, message, f2)
if fileformat == 'nostrokes':
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0, message, f2)
if fileformat == 'kml':
os.remove(f2)
message = "KML files are not supported"
return (0, message, f2)
# Some people upload corrupted zip files
if fileformat == 'notgzip':
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog':
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0, message, f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'unknown':
message = "We couldn't recognize the file type"
if settings.DEBUG:
res = handle_sendemail_unrecognized.delay(f2,
r.user.email)
else:
res = queuehigh.enqueue(handle_sendemail_unrecognized,
f2, r.user.email)
return (0, message, f2)
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
try:
f2, summary, oarlength, inboard = handle_nonpainsled(f2,
fileformat,
summary=summary)
if not f2:
message = 'Something went wrong'
return (0, message, '')
except:
errorstring = str(sys.exc_info()[0])
message = 'Something went wrong: ' + errorstring
return (0, message, '')
dosummary = (fileformat != 'fit')
id, message = save_workout_database(
f2, r,
workouttype=workouttype,
makeprivate=makeprivate,
dosummary=dosummary,
workoutsource=fileformat,
summary=summary,
inboard=inboard, oarlength=oarlength,
title=title
)
return (id, message, f2)
def split_workout(r, parent, splitsecond, splitmode):
data, row = getrowdata_db(id=parent.id)
latitude, longitude = get_latlon(parent.id)
if not latitude.empty and not longitude.empty:
data[' latitude'] = latitude
data[' longitude'] = longitude
data['time'] = data['time'] / 1000.
data1 = data[data['time'] <= splitsecond].copy()
data2 = data[data['time'] > splitsecond].copy()
data1 = data1.sort_values(['time'])
data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data1.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data1 = data1.groupby('time', axis=0).mean()
data1['time'] = data1.index
data1.reset_index(drop=True, inplace=True)
data2 = data2.sort_values(['time'])
data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data2.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data2 = data2.groupby('time', axis=0).mean()
data2['time'] = data2.index
data2.reset_index(drop=True, inplace=True)
data1['pace'] = data1['pace'] / 1000.
data2['pace'] = data2['pace'] / 1000.
data1.drop_duplicates(subset='time', inplace=True)
data2.drop_duplicates(subset='time', inplace=True)
messages = []
ids = []
if 'keep first' in splitmode:
if 'firstprivate' in splitmode:
setprivate = True
else:
setprivate = False
id, message = new_workout_from_df(r, data1,
title=parent.name + ' (1)',
parent=parent,
setprivate=setprivate,
forceunit='N')
messages.append(message)
ids.append(id)
if 'keep second' in splitmode:
data2['cumdist'] = data2['cumdist'] - data2.ix[0, 'cumdist']
data2['distance'] = data2['distance'] - data2.ix[0, 'distance']
data2['time'] = data2['time'] - data2.ix[0, 'time']
if 'secondprivate' in splitmode:
setprivate = True
else:
setprivate = False
dt = datetime.timedelta(seconds=splitsecond)
id, message = new_workout_from_df(r, data2,
title=parent.name + ' (2)',
parent=parent,
setprivate=setprivate,
dt=dt, forceunit='N')
messages.append(message)
ids.append(id)
if not 'keep original' in splitmode:
if 'keep second' in splitmode or 'keep first' in splitmode:
parent.delete()
messages.append('Deleted Workout: ' + parent.name)
else:
messages.append('That would delete your workout')
ids.append(parent.id)
elif 'originalprivate' in splitmode:
parent.privacy = 'hidden'
parent.save()
return ids, messages
# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_df(r, df,
title='New Workout',
parent=None,
setprivate=False,
forceunit='lbs',
dt=datetime.timedelta()):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime + dt
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate:
makeprivate = True
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
if forceunit == 'N':
# change to lbs for now
df['peakforce'] /= lbstoN
df['averageforce'] /= lbstoN
df.rename(columns=columndict, inplace=True)
#starttimeunix = mktime(startdatetime.utctimetuple())
starttimeunix = arrow.get(startdatetime).timestamp
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
# res = df.to_csv(csvfilename+'.gz',index_label='index',
# compression='gzip')
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
return (id, message)
# Compare the data from the CSV file and the database
# Currently only calculates number of strokes. To be expanded with
# more elaborate testing if needed
def compare_data(id):
row = Workout.objects.get(id=id)
f1 = row.csvfilename
try:
rowdata = rdata(f1)
l1 = len(rowdata.df)
except AttributeError:
rowdata = 0
l1 = 0
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
res = conn.execute(query)
l2 = res.fetchall()[0][0]
except:
print "Database Locked"
conn.close()
engine.dispose()
lfile = l1
ldb = l2
return l1 == l2 and l1 != 0, ldb, lfile
# Repair data for workouts where the CSV file is lost (or the DB entries
# don't exist)
def repair_data(verbose=False):
ws = Workout.objects.all()
for w in ws:
if verbose:
sys.stdout.write(".")
test, ldb, lfile = compare_data(w.id)
if not test:
if verbose:
print w.id, lfile, ldb
try:
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except IOError, AttributeError:
pass
if lfile == 0:
# if not ldb - delete workout
try:
data = read_df_sql(w.id)
try:
datalength = len(data)
except AttributeError:
datalength = 0
if datalength != 0:
data.rename(columns=columndict, inplace=True)
res = data.to_csv(w.csvfilename + '.gz',
index_label='index',
compression='gzip')
print 'adding csv file'
else:
print w.id, ' No stroke records anywhere'
w.delete()
except:
print 'failed'
print str(sys.exc_info()[0])
pass
# A wrapper around the rowingdata class, with some error catching
def rdata(file, rower=rrower()):
try:
res = rrdata(csvfile=file, rower=rower)
except IOError, IndexError:
try:
res = rrdata(csvfile=file + '.gz', rower=rower)
except IOError, IndexError:
res = 0
except:
res = 0
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print "Database Locked"
conn.close()
engine.dispose()
# Replace stroke data in DB with data from CSV file
def update_strokedata(id, df):
delete_strokedata(id)
rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True)
# Test that all data are of a numerical time
def testdata(time, distance, pace, spm):
t1 = np.issubdtype(time, np.number)
t2 = np.issubdtype(distance, np.number)
t3 = np.issubdtype(pace, np.number)
t4 = np.issubdtype(spm, np.number)
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0, doclean=False, convertnewtons=True):
data = read_df_sql(id)
data['x_right'] = data['x_right'] / 1.0e6
if data.empty:
rowdata, row = getrowdata(id=id)
if rowdata:
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if data['efficiency'].mean() == 0 and data['power'].mean() != 0:
data = add_efficiency(id=id)
if doclean:
data = clean_df_stats(data, ignorehr=True)
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True):
prepmultipledata(ids)
data = read_cols_df_sql(ids, columns)
# convert newtons
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
return data
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
row = Workout.objects.get(id=id)
f1 = row.csvfilename
# get user
r = row.user
u = r.user
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp)
rowdata = rdata(f1, rower=rr)
return rowdata, row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
def prepmultipledata(ids, verbose=False):
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
res = conn.execute(query)
res = list(itertools.chain.from_iterable(res.fetchall()))
conn.close()
engine.dispose()
try:
ids2 = [int(id) for id in ids]
except ValueError:
ids2 = ids
res = list(set(ids2) - set(res))
for id in res:
rowdata, row = getrowdata(id=id)
if verbose:
print id
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
return res
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_df_sql(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
axx = [f.name for f in StrokeData._meta.get_fields()]
for c in columns:
if not c in axx:
columns.remove(c)
columns = list(columns) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
ids = [int(id) for id in ids]
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column + ', '
cls = cls[:-2]
if len(ids) == 0:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
columns=cls,
))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format(
id=ids[0],
columns=cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format(
columns=cls,
ids=tuple(ids),
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
engine.dispose()
return df
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format(
id=id)), engine)
engine.dispose()
df = df.fillna(value=0)
funit = Workout.objects.get(id=id).forceunit
if funit == 'lbs':
try:
df['peakforce'] = df['peakforce'] * lbstoN
except KeyError:
pass
try:
df['averageforce'] = df['averageforce'] * lbstoN
except KeyError:
pass
return df
# Get the necessary data from the strokedata table in the DB.
# For the flex plot
def smalldataprep(therows, xparam, yparam1, yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(f1 + '.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
pass
return df
# data fusion
def datafusion(id1, id2, columns, offset):
workout1 = Workout.objects.get(id=id1)
workout2 = Workout.objects.get(id=id2)
df1, w1 = getrowdata_db(id=id1)
df1 = df1.drop([ # 'cumdist',
'hr_ut2',
'hr_ut1',
'hr_at',
'hr_tr',
'hr_an',
'hr_max',
'ftime',
'fpace',
'workoutid',
'id'],
1, errors='ignore')
# Add coordinates to DataFrame
latitude, longitude = get_latlon(id1)
df1[' latitude'] = latitude
df1[' longitude'] = longitude
df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False)
forceunit = 'N'
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
offsetmillisecs += offset.days * (3600 * 24 * 1000)
df2['time'] = df2['time'] + offsetmillisecs
keep1 = {c: c for c in set(df1.columns)}
for c in columns:
keep1.pop(c)
for c in df1.columns:
if not c in keep1:
df1 = df1.drop(c, 1, errors='ignore')
df = pd.concat([df1, df2], ignore_index=True)
df = df.sort_values(['time'])
df = df.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
df.fillna(method='bfill', inplace=True)
# Some new stuff to try out
df = df.groupby('time', axis=0).mean()
df['time'] = df.index
df.reset_index(drop=True, inplace=True)
df['time'] = df['time'] / 1000.
df['pace'] = df['pace'] / 1000.
df['cum_dist'] = df['cumdist']
return df, forceunit
def fix_newtons(id=0, limit=3000):
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False)
try:
#avgforce = rowdata['averageforce']
peakforce = rowdata['peakforce']
if peakforce.mean() > limit:
w = Workout.objects.get(id=id)
print "fixing ", id
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except KeyError:
pass
def add_efficiency(id=0):
rowdata, row = getrowdata_db(id=id, doclean=False, convertnewtons=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata['efficiency'] = efficiency
delete_strokedata(id)
if id != 0:
rowdata['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
rowdata.to_sql('strokedata', engine,
if_exists='append', index=False)
conn.close()
engine.dispose()
return rowdata
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
empower=True, inboard=0.88, forceunit='lbs'):
if rowdatadf.empty:
return 0
rowdatadf.set_index([range(len(rowdatadf))], inplace=True)
t = rowdatadf.ix[:, 'TimeStamp (sec)']
t = pd.Series(t - rowdatadf.ix[0, 'TimeStamp (sec)'])
row_index = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)']
hr = rowdatadf.ix[:, ' HRCur (bpm)']
spm = rowdatadf.ix[:, ' Cadence (stokes/min)']
cumdist = rowdatadf.ix[:, 'cum_dist']
power = rowdatadf.ix[:, ' Power (watts)']
averageforce = rowdatadf.ix[:, ' AverageDriveForce (lbs)']
drivelength = rowdatadf.ix[:, ' DriveLength (meters)']
try:
workoutstate = rowdatadf.ix[:, ' WorkoutState']
except KeyError:
workoutstate = 0 * hr
peakforce = rowdatadf.ix[:, ' PeakDriveForce (lbs)']
forceratio = averageforce / peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.ix[:, ' DriveTime (ms)']
recoverytime = rowdatadf.ix[:, ' StrokeRecoveryTime (ms)']
rhythm = 100. * drivetime / (recoverytime + drivetime)
rhythm = rhythm.fillna(value=0)
except:
rhythm = 0.0 * forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isinf(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize < len(hr):
spm = savgol_filter(spm, windowsize, 3)
hr = savgol_filter(hr, windowsize, 3)
drivelength = savgol_filter(drivelength, windowsize, 3)
forceratio = savgol_filter(forceratio, windowsize, 3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0 * t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
except TypeError:
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError:
if forceunit == 'lbs':
driveenergy = drivelength * averageforce * lbstoN
else:
drivenergy = drivelength * averageforce
distance = rowdatadf.ix[:, 'cum_dist']
velo = 500. / p
distanceperstroke = 60. * velo / spm
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.ix[:, 'hr_ut2']
data['hr_ut1'] = rowdatadf.ix[:, 'hr_ut1']
data['hr_at'] = rowdatadf.ix[:, 'hr_at']
data['hr_tr'] = rowdatadf.ix[:, 'hr_tr']
data['hr_an'] = rowdatadf.ix[:, 'hr_an']
data['hr_max'] = rowdatadf.ix[:, 'hr_max']
data['hr_bottom'] = 0.0 * data['hr']
try:
tel = rowdatadf.ix[:, ' ElapsedTime (sec)']
except KeyError:
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if barchart:
# time increments for bar chart
time_increments = rowdatadf.ix[:, ' ElapsedTime (sec)'].diff()
time_increments[0] = time_increments[1]
time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments)
x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x)))
data['x_right'] = x_right
if empower:
try:
wash = rowdatadf.ix[:, 'wash']
except KeyError:
wash = 0 * power
try:
catch = rowdatadf.ix[:, 'catch']
except KeyError:
catch = 0 * power
try:
finish = rowdatadf.ix[:, 'finish']
except KeyError:
finish = 0 * power
try:
peakforceangle = rowdatadf.ix[:, 'peakforceangle']
except KeyError:
peakforceangle = 0 * power
if data['driveenergy'].mean() == 0:
try:
driveenergy = rowdatadf.ix[:, 'driveenergy']
except KeyError:
driveenergy = power * 60 / spm
else:
driveenergy = data['driveenergy']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() > 0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy / (averageforce * 4.44822)
try:
slip = rowdatadf.ix[:, 'slip']
except KeyError:
slip = 0 * power
try:
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
except ValueError:
totalangle = 0 * power
effectiveangle = 0 * power
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError:
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError:
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError:
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError:
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError:
pass
try:
driveenergy = savgol_filter(driveenergy, windowsize, 3)
except TypeError:
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError:
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError:
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError:
pass
velo = 500. / p
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError:
pass
if otwpower:
try:
nowindpace = rowdatadf.ix[:, 'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.ix[:, 'equivergpower']
except KeyError:
equivergpower = 0 * p + 50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower / 2.8)**(1. / 3.)
ergpace = 500. / ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace * 1e3
data['nowindpace'] = nowindpace * 1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf, np.inf], np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
data.to_sql('strokedata', engine, if_exists='append', index=False)
conn.close()
engine.dispose()
return data