Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
2017-10-26 16:39:02 +02:00

1911 lines
56 KiB
Python

# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import Workout, User, Rower, StrokeData
from rowingdata import rowingdata as rrdata
from rowers.tasks import handle_sendemail_unrecognized
from rowers.tasks import handle_zip_file
import pytz
from rowingdata import rower as rrower
from rowingdata import main as rmain
from rowingdata import get_file_type, get_empower_rigging
from pandas import DataFrame, Series
from pytz import timezone as tz, utc
from django.utils import timezone
from time import strftime, strptime, mktime, time, daylight
import arrow
from django.utils.timezone import get_current_timezone
thetimezone = get_current_timezone()
from rowingdata import (
TCXParser, RowProParser, ErgDataParser,
CoxMateParser,
BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser,
MysteryParser, BoatCoachOTWParser,
painsledDesktopParser, speedcoachParser, ErgStickParser,
SpeedCoach2Parser, FITParser, fitsummarydata,
make_cumvalues,cumcpdata,
summarydata, get_file_type,
)
from rowers.models import Team
from rowers.metrics import axes
from async_messages import messages as a_messages
import os
import zipfile
import pandas as pd
import numpy as np
import itertools
import math
from tasks import (
handle_sendemail_unrecognized, handle_sendemail_breakthrough,
handle_sendemail_hard, handle_updatecp,handle_updateergcp
)
from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys
import utils
import datautils
from utils import lbstoN
from scipy.interpolate import griddata
from timezonefinder import TimezoneFinder
import django_rq
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
host = settings.DATABASES['default']['HOST']
port = settings.DATABASES['default']['PORT']
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
# Use SQLite local database when we're in debug mode
if settings.DEBUG or user == '':
# database_url = 'sqlite:///db.sqlite3'
database_url = 'sqlite:///' + database_name
# mapping the DB column names to the CSV file column names
columndict = {
'time': 'TimeStamp (sec)',
'hr': ' HRCur (bpm)',
'pace': ' Stroke500mPace (sec/500m)',
'spm': ' Cadence (stokes/min)',
'power': ' Power (watts)',
'averageforce': ' AverageDriveForce (lbs)',
'drivelength': ' DriveLength (meters)',
'peakforce': ' PeakDriveForce (lbs)',
'distance': ' Horizontal (meters)',
'catch': 'catch',
'finish': 'finish',
'peakforceangle': 'peakforceangle',
'wash': 'wash',
'slip': 'slip',
'workoutstate': ' WorkoutState',
'cumdist': 'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def get_latlon(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist:
return False
rowdata = rdata(w.csvfilename)
try:
try:
latitude = rowdata.df.ix[:, ' latitude']
longitude = rowdata.df.ix[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)']
return [latitude, longitude]
except AttributeError:
return [pd.Series([]), pd.Series([])]
return [pd.Series([]), pd.Series([])]
def get_workouts(ids, userid):
goodids = []
for id in ids:
w = Workout.objects.get(id=id)
if int(w.user.user.id) == int(userid):
goodids.append(id)
return [Workout.objects.get(id=id) for id in goodids]
def filter_df(datadf, fieldname, value, largerthan=True):
try:
x = datadf[fieldname]
except KeyError:
return datadf
if largerthan:
mask = datadf[fieldname] < value
else:
mask = datadf[fieldname] >= value
datadf.loc[mask, fieldname] = np.nan
return datadf
def df_resample(datadf):
# time stamps must be in seconds
timestamps = datadf['TimeStamp (sec)'].astype('int')
datadf['timestamps'] = timestamps
newdf = datadf.groupby(['timestamps']).mean()
return newdf
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
# bring metrics which have negative values to positive domain
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
except KeyError:
pass
try:
datadf['hr'] = datadf['hr'] + 10
except KeyError:
pass
try:
datadf = datadf.clip(lower=0)
except TypeError:
pass
datadf.replace(to_replace=0, value=np.nan, inplace=True)
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
except KeyError:
pass
try:
datadf['hr'] = datadf['hr'] - 10
except KeyError:
pass
# clean data for useful ranges per column
if not ignorehr:
try:
mask = datadf['hr'] < 30
datadf.loc[mask, 'hr'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] < 10
datadf.loc[mask, 'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace'] / 1000. > 300.
datadf.loc[mask, 'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['efficiency'] < 0.
datadf.loc[mask, 'efficiency'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace'] / 1000. < 60.
datadf.loc[mask, 'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] > 60
datadf.loc[mask, 'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['wash'] < 1
datadf.loc[mask, 'wash'] = np.nan
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 5
datadf.loc[mask, 'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['rhythm'] > 70
datadf.loc[mask, 'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['power'] < 20
datadf.loc[mask, 'power'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.loc[mask, 'drivelength'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.loc[mask, 'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.loc[mask, 'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask, 'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] > 4
datadf.loc[mask, 'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.loc[mask, 'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] < 100
datadf.loc[mask, 'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['catch'] > -30.
datadf.loc[mask, 'catch'] = np.nan
except KeyError:
pass
workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly == True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
return datadf
def getstatsfields():
# Get field names and remove those that are not useful in stats
fields = StrokeData._meta.get_fields()
fielddict = {field.name: field.verbose_name for field in fields}
# fielddict.pop('workoutid')
fielddict.pop('ergpace')
fielddict.pop('hr_an')
fielddict.pop('hr_tr')
fielddict.pop('hr_at')
fielddict.pop('hr_ut2')
fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
fielddict.pop('nowindpace')
fielddict.pop('fnowindpace')
fielddict.pop('fergpace')
fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
fielddict.pop('fpace')
fielddict.pop('pace')
fielddict.pop('id')
fielddict.pop('ftime')
fielddict.pop('x_right')
fielddict.pop('hr_max')
fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
fieldlist = [field for field, value in fielddict.iteritems()]
return fieldlist, fielddict
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes, seconds = divmod(tdelta.seconds, 60)
tenths = int(tdelta.microseconds / 1e5)
except AttributeError:
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
seconds, rest = divmod(seconds, 1e9)
tenths = int(rest / 1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def paceformatsecs(values):
out = []
for v in values:
td = timedeltaconv(v)
formattedv = strfdelta(td)
out.append(formattedv)
return out
def getcpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print "Database locked"
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]):
deletecpdata_sql(rower_id)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def runcpupdate(rower):
startdate = timezone.now()-datetime.timedelta(days=365)
enddate = timezone.now()+datetime.timedelta(days=5)
theworkouts = Workout.objects.filter(user=rower,rankingpiece=True,
workouttype='water',
startdatetime__gte=startdate,
startdatetime__lte=enddate)
theids = [w.id for w in theworkouts]
if settings.DEBUG:
res = handle_updatecp.delay(rower.id,theids,debug=True)
else:
res = queue.enqueue(handle_updatecp,rower.id,theids)
def fetchcperg(rower,theworkouts):
theids = [int(w.id) for w in theworkouts]
thefilenames = [w.csvfilename for w in theworkouts]
cpdf = getcpdata_sql(rower.id,table='ergcpdata')
if settings.DEBUG:
res = handle_updateergcp.delay(rower.id,thefilenames,debug=True)
else:
res = queue.enqueue(handle_updateergcp,rower.id,thefilenames)
return cpdf
def fetchcp(rower,theworkouts):
# get all power data from database (plus workoutid)
theids = [int(w.id) for w in theworkouts]
columns = ['power','workoutid','time']
df = getsmallrowdata_db(columns,ids=theids)
dfgrouped = df.groupby(['workoutid'])
avgpower2 = dict(dfgrouped.mean()['power'].astype(int))
cpdf = getcpdata_sql(rower.id)
if not cpdf.empty:
return cpdf['delta'],cpdf['cp'],avgpower2
else:
if settings.DEBUG:
res = handle_updatecp.delay(rower.id,theids,debug=True)
else:
res = queue.enqueue(handle_updatecp,rower.id,theids)
return [],[],avgpower2
return [],[],avgpower2
# Processes painsled CSV file to database
def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
dosummary=True, title='Workout',
workoutsource='unknown',
notes='', totaldist=0, totaltime=0,
summary='',
makeprivate=False,
oarlength=2.89, inboard=0.88,
forceunit='lbs',
consistencychecks=False):
message = None
powerperc = 100 * np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr, r.pw_an]) / r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp,
powerperc=powerperc, powerzones=r.powerzones)
row = rdata(f2, rower=rr)
dtavg = row.df['TimeStamp (sec)'].diff().mean()
if dtavg < 1:
newdf = df_resample(row.df)
try:
os.remove(f2)
except:
pass
return new_workout_from_df(r, newdf,
title=title)
try:
checks = row.check_consistency()
allchecks = 1
for key, value in checks.iteritems():
if not value:
allchecks = 0
if consistencychecks:
a_messages.error(
r.user, 'Failed consistency check: ' + key + ', autocorrected')
else:
pass
# a_messages.error(r.user,'Failed consistency check: '+key+', not corrected')
except ZeroDivisionError:
pass
if not allchecks and consistencychecks:
# row.repair()
pass
if row == 0:
return (0, 'Error: CSV data file not found')
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500. / pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize < len(velo):
velo2 = savgol_filter(velo, windowsize, 3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf, np.inf], np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500. / abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2, gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2, gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max(
) - row.df['TimeStamp (sec)'].min()
try:
totaltime = totaltime + row.df.ix[0, ' ElapsedTime (sec)']
except KeyError:
pass
if np.isnan(totaltime):
totaltime = 0
hours = int(totaltime / 3600.)
if hours > 23:
message = 'Warning: The workout duration was longer than 23 hours. '
hours = 23
minutes = int((totaltime - 3600. * hours) / 60.)
if minutes > 59:
minutes = 59
if not message:
message = 'Warning: there is something wrong with the workout duration'
seconds = int(totaltime - 3600. * hours - 60. * minutes)
if seconds > 59:
seconds = 59
if not message:
message = 'Warning: there is something wrong with the workout duration'
tenths = int(10 * (totaltime - 3600. * hours - 60. * minutes - seconds))
if tenths > 9:
tenths = 9
if not message:
message = 'Warning: there is something wrong with the workout duration'
duration = "%s:%s:%s.%s" % (hours, minutes, seconds, tenths)
if dosummary:
summary = row.allstats()
timezone_str = 'UTC'
try:
workoutstartdatetime = timezone.make_aware(row.rowdatetime)
except ValueError:
workoutstartdatetime = row.rowdatetime
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except ValueError:
timezone_str = 'UTC'
if timezone_str == None:
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str == None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
)
except KeyError:
timezone_str = r.defaulttimezone
workoutdate = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%Y-%m-%d')
workoutstarttime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%H:%M:%S')
if makeprivate:
privacy = 'hidden'
else:
privacy = 'visible'
# checking for inf values
totaldist = np.nan_to_num(totaldist)
maxhr = np.nan_to_num(maxhr)
averagehr = np.nan_to_num(averagehr)
# check for duplicate start times and duration
ws = Workout.objects.filter(startdatetime=workoutstartdatetime,
distance=totaldist,
user=r)
if (len(ws) != 0):
message = "Warning: This workout probably already exists in the database"
privacy = 'hidden'
w = Workout(user=r, name=title, date=workoutdate,
workouttype=workouttype,
duration=duration, distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
workoutsource=workoutsource,
forceunit=forceunit,
csvfilename=f2, notes=notes, summary=summary,
maxhr=maxhr, averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard, oarlength=oarlength,
timezone=timezone_str,
privacy=privacy)
w.save()
isbreakthrough = False
ishard = False
if workouttype == 'water':
df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id])
if df['power'].mean():
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = df.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
else:
res = 0
res2 = 0
if res:
isbreakthrough = True
res = datautils.updatecp(delta, cpvalues, r)
if res2 and not isbreakthrough:
ishard = True
# submit email task to send email about breakthrough workout
if isbreakthrough:
a_messages.info(
r.user, 'It looks like you have a new breakthrough workout')
if settings.DEBUG and r.getemailnotifications:
res = handle_sendemail_breakthrough.delay(w.id, r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
elif r.getemailnotifications:
try:
res = queuehigh.enqueue(
handle_sendemail_breakthrough(w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json()))
except AttributeError:
pass
else:
pass
# submit email task to send email about breakthrough workout
if ishard:
a_messages.info(r.user, 'That was a pretty hard workout')
if settings.DEBUG and r.getemailnotifications:
res = handle_sendemail_hard.delay(w.id, r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
elif r.getemailnotifications:
try:
res = queuehigh.enqueue(
handle_sendemail_hard(w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json()))
except AttributeError:
pass
else:
pass
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
res = dataprep(row.df, id=w.id, bands=True,
barchart=True, otwpower=True, empower=True, inboard=inboard)
return (w.id, message)
def handle_nonpainsled(f2, fileformat, summary=''):
oarlength = 2.89
inboard = 0.88
# handle RowPro:
if (fileformat == 'rp'):
row = RowProParser(f2)
# handle TCX
if (fileformat == 'tcx'):
row = TCXParser(f2)
# handle Mystery
if (fileformat == 'mystery'):
row = MysteryParser(f2)
# handle RowPerfect
if (fileformat == 'rowperfect3'):
row = RowPerfectParser(f2)
# handle ErgData
if (fileformat == 'ergdata'):
row = ErgDataParser(f2)
# handle CoxMate
if (fileformat == 'coxmate'):
row = CoxMateParser(f2)
# handle Mike
if (fileformat == 'bcmike'):
row = BoatCoachAdvancedParser(f2)
# handle BoatCoach
if (fileformat == 'boatcoach'):
row = BoatCoachParser(f2)
# handle BoatCoach OTW
if (fileformat == 'boatcoachotw'):
row = BoatCoachOTWParser(f2)
# handle painsled desktop
if (fileformat == 'painsleddesktop'):
row = painsledDesktopParser(f2)
# handle speed coach GPS
if (fileformat == 'speedcoach'):
row = speedcoachParser(f2)
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
row = SpeedCoach2Parser(f2)
try:
oarlength, inboard = get_empower_rigging(f2)
summary = row.allstats()
except:
pass
# handle ErgStick
if (fileformat == 'ergstick'):
row = ErgStickParser(f2)
# handle FIT
if (fileformat == 'fit'):
row = FITParser(f2)
try:
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
except:
pass
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4] + 'o.csv'
row.write_csv(f2, gzip=True)
# os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
os.remove(f_to_be_deleted + '.gz')
return (f2, summary, oarlength, inboard)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_file(r, f2,
workouttype='rower',
title='Workout',
makeprivate=False,
notes=''):
message = None
try:
fileformat = get_file_type(f2)
except IOError:
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
summary = ''
oarlength = 2.89
inboard = 0.88
if len(fileformat) == 3 and fileformat[0] == 'zip':
f_to_be_deleted = f2
title = os.path.basename(f2)
if settings.DEBUG:
res = handle_zip_file.delay(
r.user.email, title, f2
)
else:
res = queuelow.enqueue(
handle_zip_file,
r.user.email,
title,
f2
)
return -1, message, f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'c2log':
os.remove(f2)
message = "This C2 logbook summary does not contain stroke data. Please download the Export Stroke Data file from the workout details on the C2 logbook."
return (0, message, f2)
if fileformat == 'nostrokes':
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0, message, f2)
# Some people upload corrupted zip files
if fileformat == 'notgzip':
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog':
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0, message, f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'unknown':
message = "We couldn't recognize the file type"
if settings.DEBUG:
res = handle_sendemail_unrecognized.delay(f2,
r.user.email)
else:
res = queuehigh.enqueue(handle_sendemail_unrecognized,
f2, r.user.email)
return (0, message, f2)
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
try:
f2, summary, oarlength, inboard = handle_nonpainsled(f2,
fileformat,
summary=summary)
except:
errorstring = str(sys.exc_info()[0])
message = 'Something went wrong: ' + errorstring
return (0, message, '')
dosummary = (fileformat != 'fit')
id, message = save_workout_database(f2, r,
workouttype=workouttype,
makeprivate=makeprivate,
dosummary=dosummary,
workoutsource=fileformat,
summary=summary,
inboard=inboard, oarlength=oarlength,
title=title)
return (id, message, f2)
def split_workout(r, parent, splitsecond, splitmode):
data, row = getrowdata_db(id=parent.id)
latitude, longitude = get_latlon(parent.id)
if not latitude.empty and not longitude.empty:
data[' latitude'] = latitude
data[' longitude'] = longitude
data['time'] = data['time'] / 1000.
data1 = data[data['time'] <= splitsecond].copy()
data2 = data[data['time'] > splitsecond].copy()
data1 = data1.sort_values(['time'])
data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data1.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data1 = data1.groupby('time', axis=0).mean()
data1['time'] = data1.index
data1.reset_index(drop=True, inplace=True)
data2 = data2.sort_values(['time'])
data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data2.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data2 = data2.groupby('time', axis=0).mean()
data2['time'] = data2.index
data2.reset_index(drop=True, inplace=True)
data1['pace'] = data1['pace'] / 1000.
data2['pace'] = data2['pace'] / 1000.
data1.drop_duplicates(subset='time', inplace=True)
data2.drop_duplicates(subset='time', inplace=True)
messages = []
ids = []
if 'keep first' in splitmode:
if 'firstprivate' in splitmode:
setprivate = True
else:
setprivate = False
id, message = new_workout_from_df(r, data1,
title=parent.name + ' (1)',
parent=parent,
setprivate=setprivate,
forceunit='N')
messages.append(message)
ids.append(id)
if 'keep second' in splitmode:
data2['cumdist'] = data2['cumdist'] - data2.ix[0, 'cumdist']
data2['distance'] = data2['distance'] - data2.ix[0, 'distance']
data2['time'] = data2['time'] - data2.ix[0, 'time']
if 'secondprivate' in splitmode:
setprivate = True
else:
setprivate = False
dt = datetime.timedelta(seconds=splitsecond)
id, message = new_workout_from_df(r, data2,
title=parent.name + ' (2)',
parent=parent,
setprivate=setprivate,
dt=dt, forceunit='N')
messages.append(message)
ids.append(id)
if not 'keep original' in splitmode:
if 'keep second' in splitmode or 'keep first' in splitmode:
parent.delete()
messages.append('Deleted Workout: ' + parent.name)
else:
messages.append('That would delete your workout')
ids.append(parent.id)
elif 'originalprivate' in splitmode:
parent.privacy = 'hidden'
parent.save()
return ids, messages
# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_df(r, df,
title='New Workout',
parent=None,
setprivate=False,
forceunit='lbs',
dt=datetime.timedelta()):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime + dt
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate:
makeprivate = True
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
if forceunit == 'N':
# change to lbs for now
df['peakforce'] /= lbstoN
df['averageforce'] /= lbstoN
df.rename(columns=columndict, inplace=True)
#starttimeunix = mktime(startdatetime.utctimetuple())
starttimeunix = arrow.get(startdatetime).timestamp
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
# res = df.to_csv(csvfilename+'.gz',index_label='index',
# compression='gzip')
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
return (id, message)
# Compare the data from the CSV file and the database
# Currently only calculates number of strokes. To be expanded with
# more elaborate testing if needed
def compare_data(id):
row = Workout.objects.get(id=id)
f1 = row.csvfilename
try:
rowdata = rdata(f1)
l1 = len(rowdata.df)
except AttributeError:
rowdata = 0
l1 = 0
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
res = conn.execute(query)
l2 = res.fetchall()[0][0]
except:
print "Database Locked"
conn.close()
engine.dispose()
lfile = l1
ldb = l2
return l1 == l2 and l1 != 0, ldb, lfile
# Repair data for workouts where the CSV file is lost (or the DB entries
# don't exist)
def repair_data(verbose=False):
ws = Workout.objects.all()
for w in ws:
if verbose:
sys.stdout.write(".")
test, ldb, lfile = compare_data(w.id)
if not test:
if verbose:
print w.id, lfile, ldb
try:
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except IOError, AttributeError:
pass
if lfile == 0:
# if not ldb - delete workout
try:
data = read_df_sql(w.id)
try:
datalength = len(data)
except AttributeError:
datalength = 0
if datalength != 0:
data.rename(columns=columndict, inplace=True)
res = data.to_csv(w.csvfilename + '.gz',
index_label='index',
compression='gzip')
print 'adding csv file'
else:
print w.id, ' No stroke records anywhere'
w.delete()
except:
print 'failed'
print str(sys.exc_info()[0])
pass
# A wrapper around the rowingdata class, with some error catching
def rdata(file, rower=rrower()):
try:
res = rrdata(csvfile=file, rower=rower)
except IOError, IndexError:
try:
res = rrdata(csvfile=file + '.gz', rower=rower)
except IOError, IndexError:
res = 0
except:
res = 0
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print "Database Locked"
conn.close()
engine.dispose()
# Replace stroke data in DB with data from CSV file
def update_strokedata(id, df):
delete_strokedata(id)
rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True)
# Test that all data are of a numerical time
def testdata(time, distance, pace, spm):
t1 = np.issubdtype(time, np.number)
t2 = np.issubdtype(distance, np.number)
t3 = np.issubdtype(pace, np.number)
t4 = np.issubdtype(spm, np.number)
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0, doclean=False, convertnewtons=True):
data = read_df_sql(id)
data['x_right'] = data['x_right'] / 1.0e6
if data.empty:
rowdata, row = getrowdata(id=id)
if rowdata:
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if data['efficiency'].mean() == 0 and data['power'].mean() != 0:
data = add_efficiency(id=id)
if doclean:
data = clean_df_stats(data, ignorehr=True)
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True):
prepmultipledata(ids)
data = read_cols_df_sql(ids, columns)
# convert newtons
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
return data
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
row = Workout.objects.get(id=id)
f1 = row.csvfilename
# get user
r = row.user
u = r.user
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp)
rowdata = rdata(f1, rower=rr)
return rowdata, row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
def prepmultipledata(ids, verbose=False):
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
res = conn.execute(query)
res = list(itertools.chain.from_iterable(res.fetchall()))
conn.close()
engine.dispose()
try:
ids2 = [int(id) for id in ids]
except ValueError:
ids2 = ids
res = list(set(ids2) - set(res))
for id in res:
rowdata, row = getrowdata(id=id)
if verbose:
print id
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
return res
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_df_sql(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
axx = [f.name for f in StrokeData._meta.get_fields()]
for c in columns:
if not c in axx:
columns.remove(c)
columns = list(columns) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column + ', '
cls = cls[:-2]
if len(ids) == 0:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
columns=cls,
))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format(
id=ids[0],
columns=cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format(
columns=cls,
ids=tuple(ids),
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
engine.dispose()
return df
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format(
id=id)), engine)
engine.dispose()
df = df.fillna(value=0)
funit = Workout.objects.get(id=id).forceunit
if funit == 'lbs':
try:
df['peakforce'] = df['peakforce'] * lbstoN
except KeyError:
pass
try:
df['averageforce'] = df['averageforce'] * lbstoN
except KeyError:
pass
return df
# Get the necessary data from the strokedata table in the DB.
# For the flex plot
def smalldataprep(therows, xparam, yparam1, yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(f1 + '.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
pass
return df
# data fusion
def datafusion(id1, id2, columns, offset):
workout1 = Workout.objects.get(id=id1)
workout2 = Workout.objects.get(id=id2)
df1, w1 = getrowdata_db(id=id1)
df1 = df1.drop([ # 'cumdist',
'hr_ut2',
'hr_ut1',
'hr_at',
'hr_tr',
'hr_an',
'hr_max',
'ftime',
'fpace',
'workoutid',
'id'],
1, errors='ignore')
# Add coordinates to DataFrame
latitude, longitude = get_latlon(id1)
df1[' latitude'] = latitude
df1[' longitude'] = longitude
df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False)
forceunit = 'N'
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
offsetmillisecs += offset.days * (3600 * 24 * 1000)
df2['time'] = df2['time'] + offsetmillisecs
keep1 = {c: c for c in set(df1.columns)}
for c in columns:
keep1.pop(c)
for c in df1.columns:
if not c in keep1:
df1 = df1.drop(c, 1, errors='ignore')
df = pd.concat([df1, df2], ignore_index=True)
df = df.sort_values(['time'])
df = df.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
df.fillna(method='bfill', inplace=True)
# Some new stuff to try out
df = df.groupby('time', axis=0).mean()
df['time'] = df.index
df.reset_index(drop=True, inplace=True)
df['time'] = df['time'] / 1000.
df['pace'] = df['pace'] / 1000.
df['cum_dist'] = df['cumdist']
return df, forceunit
def fix_newtons(id=0, limit=3000):
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False)
try:
#avgforce = rowdata['averageforce']
peakforce = rowdata['peakforce']
if peakforce.mean() > limit:
w = Workout.objects.get(id=id)
print "fixing ", id
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except KeyError:
pass
def add_efficiency(id=0):
rowdata, row = getrowdata_db(id=id, doclean=False, convertnewtons=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata['efficiency'] = efficiency
delete_strokedata(id)
if id != 0:
rowdata['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
rowdata.to_sql('strokedata', engine,
if_exists='append', index=False)
conn.close()
engine.dispose()
return rowdata
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
empower=True, inboard=0.88, forceunit='lbs'):
if rowdatadf.empty:
return 0
rowdatadf.set_index([range(len(rowdatadf))], inplace=True)
t = rowdatadf.ix[:, 'TimeStamp (sec)']
t = pd.Series(t - rowdatadf.ix[0, 'TimeStamp (sec)'])
row_index = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)']
hr = rowdatadf.ix[:, ' HRCur (bpm)']
spm = rowdatadf.ix[:, ' Cadence (stokes/min)']
cumdist = rowdatadf.ix[:, 'cum_dist']
power = rowdatadf.ix[:, ' Power (watts)']
averageforce = rowdatadf.ix[:, ' AverageDriveForce (lbs)']
drivelength = rowdatadf.ix[:, ' DriveLength (meters)']
try:
workoutstate = rowdatadf.ix[:, ' WorkoutState']
except KeyError:
workoutstate = 0 * hr
peakforce = rowdatadf.ix[:, ' PeakDriveForce (lbs)']
forceratio = averageforce / peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.ix[:, ' DriveTime (ms)']
recoverytime = rowdatadf.ix[:, ' StrokeRecoveryTime (ms)']
rhythm = 100. * drivetime / (recoverytime + drivetime)
rhythm = rhythm.fillna(value=0)
except:
rhythm = 0.0 * forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0:
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize < len(hr):
spm = savgol_filter(spm, windowsize, 3)
hr = savgol_filter(hr, windowsize, 3)
drivelength = savgol_filter(drivelength, windowsize, 3)
forceratio = savgol_filter(forceratio, windowsize, 3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0 * t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
except TypeError:
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError:
if forceunit == 'lbs':
driveenergy = drivelength * averageforce * lbstoN
else:
drivenergy = drivelength * averageforce
distance = rowdatadf.ix[:, 'cum_dist']
velo = 500. / p
distanceperstroke = 60. * velo / spm
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.ix[:, 'hr_ut2']
data['hr_ut1'] = rowdatadf.ix[:, 'hr_ut1']
data['hr_at'] = rowdatadf.ix[:, 'hr_at']
data['hr_tr'] = rowdatadf.ix[:, 'hr_tr']
data['hr_an'] = rowdatadf.ix[:, 'hr_an']
data['hr_max'] = rowdatadf.ix[:, 'hr_max']
data['hr_bottom'] = 0.0 * data['hr']
try:
tel = rowdatadf.ix[:, ' ElapsedTime (sec)']
except KeyError:
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if barchart:
# time increments for bar chart
time_increments = rowdatadf.ix[:, ' ElapsedTime (sec)'].diff()
time_increments[0] = time_increments[1]
time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments)
x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x)))
data['x_right'] = x_right
if empower:
try:
wash = rowdatadf.ix[:, 'wash']
except KeyError:
wash = 0 * power
try:
catch = rowdatadf.ix[:, 'catch']
except KeyError:
catch = 0 * power
try:
finish = rowdatadf.ix[:, 'finish']
except KeyError:
finish = 0 * power
try:
peakforceangle = rowdatadf.ix[:, 'peakforceangle']
except KeyError:
peakforceangle = 0 * power
if data['driveenergy'].mean() == 0:
try:
driveenergy = rowdatadf.ix[:, 'driveenergy']
except KeyError:
driveenergy = power * 60 / spm
else:
driveenergy = data['driveenergy']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() > 0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy / (averageforce * 4.44822)
try:
slip = rowdatadf.ix[:, 'slip']
except KeyError:
slip = 0 * power
try:
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
except ValueError:
totalangle = 0 * power
effectiveangle = 0 * power
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError:
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError:
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError:
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError:
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError:
pass
try:
driveenergy = savgol_filter(driveenergy, windowsize, 3)
except TypeError:
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError:
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError:
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError:
pass
velo = 500. / p
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError:
pass
if otwpower:
try:
nowindpace = rowdatadf.ix[:, 'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.ix[:, 'equivergpower']
except KeyError:
equivergpower = 0 * p + 50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower / 2.8)**(1. / 3.)
ergpace = 500. / ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace * 1e3
data['nowindpace'] = nowindpace * 1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf, np.inf], np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
data.to_sql('strokedata', engine, if_exists='append', index=False)
conn.close()
engine.dispose()
return data