Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
2020-10-12 08:50:32 +02:00

3085 lines
88 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
# All the data preparation, data cleaning and data mangling should
# be defined here
from __future__ import unicode_literals, absolute_import
from rowers.models import Workout, Team
import pytz
import collections
from rowingdata import rowingdata as rrdata
from rowingdata import rower as rrower
import yaml
import shutil
from shutil import copyfile
from rowingdata import (
get_file_type, get_empower_rigging,get_empower_firmware
)
from rowers.tasks import handle_sendemail_unrecognized
from rowers.tasks import handle_zip_file
from pandas import DataFrame, Series
import dask.dataframe as dd
from dask.delayed import delayed
import pyarrow.parquet as pq
import pyarrow as pa
from pyarrow.lib import ArrowInvalid
from django.utils import timezone
from django.utils.timezone import get_current_timezone
from django_mailbox.models import Message,Mailbox,MessageAttachment
from django.core.exceptions import ValidationError
from time import strftime
import arrow
thetimezone = get_current_timezone()
from rowingdata import (
TCXParser, RowProParser, ErgDataParser,
CoxMateParser,
BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser,
ETHParser,
MysteryParser, BoatCoachOTWParser,QuiskeParser,
painsledDesktopParser, speedcoachParser, ErgStickParser,
SpeedCoach2Parser, FITParser, fitsummarydata,
RitmoTimeParser,KinoMapParser,
make_cumvalues,cumcpdata,ExcelTemplate,
summarydata, get_file_type,
)
from rowingdata.csvparsers import HumonParser
from rowers.metrics import axes,calc_trimp,rowingmetrics,dtypes,metricsgroups
from rowers.models import strokedatafields
#allowedcolumns = [item[0] for item in rowingmetrics]
allowedcolumns = [key for key,value in strokedatafields.items()]
#from async_messages import messages as a_messages
import os
import zipfile
import pandas as pd
import numpy as np
import itertools
from fitparse import FitFile
from fitparse.base import FitHeaderError
import math
from rowers.tasks import (
handle_sendemail_unrecognized, handle_sendemail_breakthrough,
handle_sendemail_hard, handle_updatecp,handle_updateergcp,
handle_calctrimp,
)
from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys
import rowers.utils as utils
import rowers.datautils as datautils
from rowers.utils import lbstoN,myqueue,is_ranking_piece,wavg
from timezonefinder import TimezoneFinder
import django_rq
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')
from rowsandall_app.settings import SITE_URL
from rowers.mytypes import otwtypes,otetypes,rowtypes
from rowers import mytypes
from rowers.database import *
from rowers.opaque import encoder
# mapping the DB column names to the CSV file column names
columndict = {
'time': 'TimeStamp (sec)',
'hr': ' HRCur (bpm)',
'velo': ' AverageBoatSpeed (m/s)',
'pace': ' Stroke500mPace (sec/500m)',
'spm': ' Cadence (stokes/min)',
'power': ' Power (watts)',
'averageforce': ' AverageDriveForce (lbs)',
'drivelength': ' DriveLength (meters)',
'peakforce': ' PeakDriveForce (lbs)',
'distance': ' Horizontal (meters)',
'catch': 'catch',
'finish': 'finish',
'peakforceangle': 'peakforceangle',
'wash': 'wash',
'slip': 'slip',
'workoutstate': ' WorkoutState',
'cumdist': 'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def get_video_data(w,groups=['basic'],mode='water'):
modes = [mode,'both','basic']
columns = ['time','velo','spm']
columns += [name for name,d in rowingmetrics if d['group'] in groups and d['mode'] in modes]
columns = list(set(columns))
df = getsmallrowdata_db(columns,ids=[w.id],
workstrokesonly=False,doclean=False,compute=False)
df['time'] = (df['time']-df['time'].min())/1000.
df.sort_values(by='time',inplace=True)
df.set_index(pd.to_timedelta(df['time'],unit='s'),inplace=True)
df2 = df.resample('1s').first().fillna(method='ffill')
df2['time'] = df2.index.total_seconds()
if 'pace' in columns:
df2['pace'] = df2['pace']/1000.
p = df2['pace']
p = p.apply(lambda x:timedeltaconv(x))
p = nicepaceformat(p)
df2['pace'] = p
#mask = df2['time'] < delay
#df2 = df2.mask(mask).dropna()
df2['time'] = (df2['time']-df2['time'].min())
df2 = df2.round(decimals=2)
boatspeed = (100*df2['velo']).astype(int)/100.
try:
coordinates = get_latlon_time(w.id)
except KeyError:
nulseries = df['time']*0
coordinates = pd.DataFrame({
'time': df['time'],
'latitude': nulseries,
'longitude': nulseries,
})
coordinates.set_index(pd.to_timedelta(coordinates['time'],unit='s'),inplace=True)
coordinates = coordinates.resample('1s').mean().interpolate()
#mask = coordinates['time'] < delay
#coordinates = coordinates.mask(mask).dropna()
coordinates['time'] = coordinates['time']-coordinates['time'].min()
latitude = coordinates['latitude']
longitude = coordinates['longitude']
# bundle data
data = {
'boatspeed':boatspeed.values.tolist(),
'latitude':latitude.values.tolist(),
'longitude':longitude.values.tolist(),
}
# metrics = {
# 'boatspeed': {
# 'unit': 'm/s',
# 'metric': 'boatspeed',
# 'name': 'Boat Speed'
# },
# }
metrics = {}
for c in columns:
if c != 'time':
try:
if dict(rowingmetrics)[c]['numtype'] == 'integer':
data[c] = df2[c].astype(int).tolist()
else:
sigfigs = dict(rowingmetrics)[c]['sigfigs']
if (c != 'pace'):
da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs)
else:
da = df2[c]
data[c] = da.values.tolist()
metrics[c] = {
'name': dict(rowingmetrics)[c]['verbose_name'],
'metric': c,
'unit': ''
}
except KeyError:
pass
metrics['boatspeed'] = metrics.pop('velo')
# metrics['workperstroke'] = metrics.pop('driveenergy')
metrics = collections.OrderedDict(sorted(metrics.items()))
maxtime = coordinates['time'].max()
return data, metrics, maxtime
def polarization_index(df,rower):
df['dt'] = df['time'].diff()/6.e4
# remove rest (spm<15)
df.dropna(axis=0,inplace=True)
df['dt'] = df['dt'].clip(upper=4,lower=0)
masklow = (df['power']>0) & (df['power']<int(rower.pw_at))
maskmid = (df['power']>=rower.pw_at) & (df['power']<int(rower.pw_an))
maskhigh = (df['power']>rower.pw_an)
time_low_pw = df.loc[masklow,'dt'].sum()
time_mid_pw = df.loc[maskmid,'dt'].sum()
time_high_pw = df.loc[maskhigh,'dt'].sum()
frac_low = time_low_pw/(time_low_pw+time_mid_pw+time_high_pw)
frac_mid = time_mid_pw/(time_low_pw+time_mid_pw+time_high_pw)
frac_high = time_high_pw/(time_low_pw+time_mid_pw+time_high_pw)
index = math.log10(frac_high*100.*frac_low/frac_mid)
return index
def get_latlon(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist:
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty:
return [pd.Series([]), pd.Series([])]
try:
try:
latitude = rowdata.df.loc[:, ' latitude']
longitude = rowdata.df.loc[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
return [latitude, longitude]
except AttributeError:
return [pd.Series([]), pd.Series([])]
return [pd.Series([]), pd.Series([])]
def get_latlon_time(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist:
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty:
return [pd.Series([]), pd.Series([])]
try:
try:
latitude = rowdata.df.loc[:, ' latitude']
longitude = rowdata.df.loc[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
except AttributeError:
return pd.DataFrame()
df = pd.DataFrame({
'time': rowdata.df['TimeStamp (sec)']-rowdata.df['TimeStamp (sec)'].min(),
'latitude': rowdata.df[' latitude'],
'longitude': rowdata.df[' longitude']
})
return df
def workout_summary_to_df(
rower,
startdate=datetime.datetime(1970,1,1),
enddate=timezone.now()+timezone.timedelta(days=1)):
ws = Workout.objects.filter(user=rower).order_by("startdatetime")
types = []
names = []
startdatetimes = []
timezones = []
distances = []
durations = []
weightcategories = []
adaptivetypes = []
weightvalues = []
notes = []
tcx_links = []
csv_links = []
rscores = []
trimps = []
for w in ws:
types.append(w.workouttype)
names.append(w.name)
startdatetimes.append(w.startdatetime)
timezones.append(w.timezone)
distances.append(w.distance)
durations.append(w.duration)
weightcategories.append(w.weightcategory)
adaptivetypes.append(w.adaptiveclass)
weightvalues.append(w.weightvalue)
notes.append(w.notes)
tcx_link = SITE_URL+'/rowers/workout/{id}/emailtcx'.format(
id=encoder.encode_hex(w.id)
)
tcx_links.append(tcx_link)
csv_link = SITE_URL+'/rowers/workout/{id}/emailcsv'.format(
id=encoder.encode_hex(w.id)
)
csv_links.append(csv_link)
trimps.append(workout_trimp(w)[0])
rscore = workout_rscore(w)
rscores.append(int(rscore[0]))
df = pd.DataFrame({
'name':names,
'date':startdatetimes,
'timezone':timezones,
'type':types,
'distance (m)':distances,
'duration ':durations,
'weight category':weightcategories,
'adaptive classification':adaptivetypes,
'weight (kg)':weightvalues,
'notes':notes,
'Stroke Data TCX':tcx_links,
'Stroke Data CSV':csv_links,
'TRIMP Training Load':trimps,
'TSS Training Load':rscores,
})
return df
def get_workouts(ids, userid):
goodids = []
for id in ids:
w = Workout.objects.get(id=id)
if int(w.user.user.id) == int(userid):
goodids.append(id)
return [Workout.objects.get(id=id) for id in goodids]
def filter_df(datadf, fieldname, value, largerthan=True):
try:
x = datadf[fieldname]
except KeyError:
return datadf
try:
if largerthan:
mask = datadf[fieldname] < value
else:
mask = datadf[fieldname] >= value
datadf.loc[mask, fieldname] = np.nan
except TypeError:
pass
return datadf
# joins workouts
def join_workouts(r,ids,title='Joined Workout',
parent=None,
setprivate=False,
forceunit='lbs',killparents=False):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate == True and makeprivate == False:
makeprivate = True
elif setprivate == False and makeprivate == True:
makeprivate = False
# reorder in chronological order
ws = Workout.objects.filter(id__in=ids).order_by("startdatetime")
if not parent:
parent = ws[0]
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
files = [w.csvfilename for w in ws]
row = rdata(files[0])
files = files[1:]
while len(files):
row2 = rdata(files[0])
if row2 != 0:
row = row+row2
files = files[1:]
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
row.write_csv(csvfilename,gzip=True)
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
if killparents:
for w in ws:
w.delete()
w = Workout.objects.get(id=id)
w.duplicate = False
w.save()
if message is not None and "duplicate" in message:
message = ""
return (id, message)
def df_resample(datadf):
# time stamps must be in seconds
timestamps = datadf['TimeStamp (sec)'].astype('int')
datadf['timestamps'] = timestamps
newdf = datadf.groupby(['timestamps']).mean()
return newdf
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
before = len(datadf)
data_orig = datadf.copy()
# bring metrics which have negative values to positive domain
if len(datadf)==0:
return datadf
try:
datadf['catch'] = -datadf['catch']
except (KeyError,TypeError):
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
except (KeyError,TypeError):
pass
try:
datadf['hr'] = datadf['hr'] + 10
except (KeyError,TypeError):
pass
# protect 0 spm values from being nulled
try:
datadf['spm'] = datadf['spm'] + 1.0
except (KeyError,TypeError) as e:
pass
# protect 0 workoutstate values from being nulled
try:
datadf['workoutstate'] = datadf['workoutstate'] + 1
except (KeyError,TypeError) as e:
pass
try:
datadf = datadf.clip(lower=0)
except TypeError:
pass
# protect advanced metrics columns
advancedcols = [
'rhythm',
'power',
'drivelength',
'forceratio',
'drivespeed',
'driveenergy',
'catch',
'finish',
'averageforce',
'peakforce',
'slip',
'wash',
'peakforceangle',
'effectiveangle',
]
datadf.replace(to_replace=0, value=np.nan, inplace=True)
# datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan))
# bring spm back to real values
try:
datadf['spm'] = datadf['spm'] - 1
except (TypeError,KeyError) as e:
pass
# bring workoutstate back to real values
try:
datadf['workoutstate'] = datadf['workoutstate'] - 1
except (TypeError,KeyError) as e:
pass
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except (KeyError,TypeError):
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
except (KeyError,TypeError):
pass
try:
datadf['hr'] = datadf['hr'] - 10
except (KeyError,TypeError):
pass
# clean data for useful ranges per column
if not ignorehr:
try:
mask = datadf['hr'] < 30
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['spm'] < 0
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['efficiency'] > 200.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['spm'] < 10
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['pace'] / 1000. > 300.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['efficiency'] < 0.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['pace'] / 1000. < 60.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['spm'] > 60
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['wash'] < 1
datadf.loc[mask, 'wash'] = np.nan
except (KeyError,TypeError):
pass
# try to guess ignoreadvanced
if not ignoreadvanced:
for metric in advancedcols:
try:
sum = datadf[metric].std()
if sum == 0 or np.isnan(sum):
ignoreadvanced = True
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 0
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['rhythm'] > 70
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['power'] < 20
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['drivespeed'] > 4
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['driveenergy'] < 100
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['catch'] > -30.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly == True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
after = len(datadf.dropna())
ratio = float(after)/float(before)
if ratio < 0.001 or after < 2:
return data_orig
return datadf
def getpartofday(row,r):
workoutstartdatetime = row.rowdatetime
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except (ValueError,OverflowError):
timezone_str = 'UTC'
if timezone_str == None:
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str == None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = row.rowdatetime
except KeyError:
timezone_str = r.defaulttimezone
workoutstartdatetime = row.rowdatetime
h = workoutstartdatetime.astimezone(pytz.timezone(timezone_str)).hour
if h < 12:
return "Morning"
elif h < 18:
return "Afternoon"
elif h < 22:
return "Evening"
else:
return "Night"
return None
def getstatsfields():
fielddict = {name:d['verbose_name'] for name,d in rowingmetrics}
# fielddict.pop('ergpace')
# fielddict.pop('hr_an')
# fielddict.pop('hr_tr')
# fielddict.pop('hr_at')
# fielddict.pop('hr_ut2')
# fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
# fielddict.pop('nowindpace')
# fielddict.pop('fnowindpace')
# fielddict.pop('fergpace')
# fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
# fielddict.pop('fpace')
# fielddict.pop('pace')
# fielddict.pop('id')
# fielddict.pop('ftime')
# fielddict.pop('x_right')
# fielddict.pop('hr_max')
# fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
try:
fieldlist = [field for field, value in fielddict.iteritems()]
except AttributeError:
fieldlist = [field for field, value in fielddict.items()]
return fieldlist, fielddict
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes, seconds = divmod(tdelta.seconds, 60)
tenths = int(tdelta.microseconds / 1e5)
except AttributeError:
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
seconds, rest = divmod(seconds, 1e9)
tenths = int(rest / 1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def timedelta_to_seconds(tdelta):
return 60.*tdelta.minute+tdelta.second
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def paceformatsecs(values):
out = []
for v in values:
td = timedeltaconv(v)
formattedv = strfdelta(td)
out.append(formattedv)
return out
def update_c2id_sql(id,c2id):
engine = create_engine(database_url, echo=False)
table = 'rowers_workout'
query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % (table,c2id,id)
with engine.connect() as conn, conn.begin():
result = conn.execute(query)
conn.close()
engine.dispose()
return 1
def fitnessmetric_to_sql(m,table='powertimefitnessmetric',debug=False):
engine = create_engine(database_url, echo=False)
columns = ', '.join(m.keys())
placeholders = ", ".join(["?"] * len(m))
query = "INSERT into %s ( %s ) Values (%s)" % (table, columns, placeholders)
values = tuple(m[key] for key in m.keys())
with engine.connect() as conn, conn.begin():
result = conn.execute(query,values)
conn.close()
engine.dispose()
return 1
def getcpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print("Database locked")
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]):
deletecpdata_sql(rower_id)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def runcpupdate(
rower,type='water',
startdate=timezone.now()-datetime.timedelta(days=365),
enddate=timezone.now()+datetime.timedelta(days=5)
):
if type == 'water':
theworkouts = Workout.objects.filter(
user=rower,rankingpiece=True,
workouttype='water',
startdatetime__gte=startdate,
startdatetime__lte=enddate
)
table = 'cpdata'
else:
theworkouts = Workout.objects.filter(
user=rower,rankingpiece=True,
workouttype__in=[
'rower',
'dynamic',
'slides'
],
startdatetime__gte=startdate,
startdatetime__lte=enddate
)
table = 'cpergdata'
theids = [w.id for w in theworkouts]
job = myqueue(
queue,
handle_updatecp,
rower.id,
theids,
table=table)
return job
def fetchcperg(rower,theworkouts):
theids = [int(w.id) for w in theworkouts]
thefilenames = [w.csvfilename for w in theworkouts]
cpdf = getcpdata_sql(rower.id,table='ergcpdata')
job = myqueue(
queue,
handle_updateergcp,
rower.id,
thefilenames)
return cpdf
def fetchcp_new(rower,workouts):
data = []
for workout in workouts:
cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
try:
df = pd.read_parquet(cpfile)
data.append(df)
except OSError:
# CP data file doesn't exist yet. has to be created
strokesdf = getsmallrowdata_db(['power','workoutid','time'],ids = [workout.id])
if not strokesdf.empty:
totaltime = strokesdf['time'].max()
try:
powermean = strokesdf['power'].mean()
except KeyError:
powermean = 0
if powermean != 0:
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = strokesdf.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
filename = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
df = pd.DataFrame({
'delta':delta,
'cp':cpvalues,
'id':workout.id,
})
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
data.append(df)
if len(data) == 0:
return pd.Series(),pd.Series(),0
if len(data)>1:
df = pd.concat(data,axis=0)
df = df.groupby(['delta']).max()
df = df.sort_values(['delta']).reset_index()
#dindex = df['id'].shift(1)-df['id']
#dpowerplus = df['cp'].shift(1)-df['cp']
#dpowermin = df['cp'].shift(-1)-df['cp']
#badrows = []
#badid = 0
#for index,row in df.iterrows():
# if dindex[index] != 0 and dpowermin[index] > 0:
# badrows.append(index)
# badid = row['id']
# elif row['id'] == badid:
# badrows.append(index)
# else:
# badid = 0
#df = df.drop(index = badrows)
return df['delta'],df['cp'],0
def fetchcp(rower,theworkouts,table='cpdata'):
# get all power data from database (plus workoutid)
theids = [int(w.id) for w in theworkouts]
columns = ['power','workoutid','time']
df = getsmallrowdata_db(columns,ids=theids)
df.dropna(inplace=True,axis=0)
if df.empty:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([]),pd.Series([]),avgpower2
try:
dfgrouped = df.groupby(['workoutid'])
except KeyError:
avgpower2 = {}
return pd.Series([]),pd.Series([]),avgpower2
try:
avgpower2 = dict(dfgrouped.mean()['power'].astype(int))
except KeyError:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([]),pd.Series([]),avgpower2
cpdf = getcpdata_sql(rower.id,table=table)
if not cpdf.empty:
return cpdf['delta'],cpdf['cp'],avgpower2
else:
job = myqueue(queue,
handle_updatecp,
rower.id,
theids,
table=table)
return pd.Series([]),pd.Series([]),avgpower2
return pd.Series([]),pd.Series([]),avgpower2
# create a new workout from manually entered data
def create_row_df(r,distance,duration,startdatetime,workouttype='rower',
avghr=None,avgpwr=None,avgspm=None,
rankingpiece = False,
duplicate=False,
title='Manual entry',notes='',weightcategory='hwt',
adaptiveclass='None'):
if duration is not None:
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
else:
totalseconds = 60.
if distance is None:
distance = 0
try:
nr_strokes = int(distance/10.)
except TypeError:
nr_strokes = int(20.*totalseconds)
if nr_strokes == 0:
nr_strokes = 100
unixstarttime = arrow.get(startdatetime).timestamp
if not avgspm:
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20.
else:
spm = avgspm
step = totalseconds/float(nr_strokes)
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
dstep = distance/float(nr_strokes)
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = unixstarttime + elapsed
try:
pace = 500.*totalseconds/distance
except ZeroDivisionError:
pace = 240.
if workouttype in ['rower','slides','dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError:
velo = 2.4
power = 2.8*velo**3
elif avgpwr is not None:
power = avgpwr
else:
power = 0
if avghr is not None:
hr = avghr
else:
hr = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)':pace,
' ElapsedTime (sec)':elapsed,
' Power (watts)':power,
' HRCur (bpm)':hr,
})
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
row = rrdata(df=df)
row.write_csv(csvfilename, gzip = True)
id, message = save_workout_database(csvfilename, r,
title=title,
notes=notes,
rankingpiece=rankingpiece,
duplicate=duplicate,
dosmooth=False,
workouttype=workouttype,
consistencychecks=False,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
totaltime=totalseconds)
return (id, message)
from rowers.utils import totaltime_sec_to_string
# Processes painsled CSV file to database
def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
boattype='1x',
adaptiveclass='None',
weightcategory='hwt',
dosummary=True, title='Workout',
workoutsource='unknown',
notes='', totaldist=0, totaltime=0,
rankingpiece=False,
duplicate=False,
summary='',
makeprivate=False,
oarlength=2.89, inboard=0.88,
forceunit='lbs',
consistencychecks=False,
impeller=False):
message = None
powerperc = 100 * np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr, r.pw_an]) / r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp,
powerperc=powerperc, powerzones=r.powerzones)
row = rdata(f2, rower=rr)
if title is None or title == '':
title = 'Workout'
partofday = getpartofday(row,r)
if partofday is not None:
title = '{partofday} {workouttype}'.format(
partofday=partofday,
workouttype=workouttype,
)
if row.df.empty:
return (0, 'Error: CSV data file was empty')
dtavg = row.df['TimeStamp (sec)'].diff().mean()
if dtavg < 1:
newdf = df_resample(row.df)
try:
os.remove(f2)
except:
pass
return new_workout_from_df(r, newdf,
title=title,boattype=boattype,
workouttype=workouttype,
workoutsource=workoutsource)
try:
checks = row.check_consistency()
allchecks = 1
for key, value in checks.items():
if not value:
allchecks = 0
except ZeroDivisionError:
pass
if not allchecks and consistencychecks:
# row.repair()
pass
if row == 0:
return (0, 'Error: CSV data file not found')
try:
lat = row.df[' latitude']
if lat.mean() != 0 and lat.std() != 0 and workouttype == 'rower':
workouttype = 'water'
except KeyError:
pass
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500. / pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize < len(velo):
velo2 = savgol_filter(velo, windowsize, 3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf, np.inf], np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500. / abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2, gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2, gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max(
) - row.df['TimeStamp (sec)'].min()
try:
totaltime = totaltime + row.df.loc[:, ' ElapsedTime (sec)'].iloc[0]
except KeyError:
pass
if np.isnan(totaltime):
totaltime = 0
if dosummary:
summary = row.allstats()
timezone_str = 'UTC'
try:
workoutstartdatetime = timezone.make_aware(row.rowdatetime)
except ValueError:
workoutstartdatetime = row.rowdatetime
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except (ValueError,OverflowError):
timezone_str = 'UTC'
if timezone_str == None:
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str == None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
)
except KeyError:
timezone_str = r.defaulttimezone
duration = totaltime_sec_to_string(totaltime)
workoutdate = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%Y-%m-%d')
workoutstarttime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%H:%M:%S')
if makeprivate:
privacy = 'hidden'
else:
privacy = 'visible'
# checking for inf values
totaldist = np.nan_to_num(totaldist)
maxhr = np.nan_to_num(maxhr)
averagehr = np.nan_to_num(averagehr)
dragfactor = 0
if workouttype in otetypes:
dragfactor = row.dragfactor
t = datetime.datetime.strptime(duration,"%H:%M:%S.%f")
delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
workoutenddatetime = workoutstartdatetime+delta
# check for duplicate start times and duration
ws = Workout.objects.filter(user=r,date=workoutdate,duplicate=False).exclude(
startdatetime__gt=workoutenddatetime
)
ws2 = []
for ww in ws:
t = ww.duration
delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
enddatetime = ww.startdatetime+delta
if enddatetime > workoutstartdatetime:
ws2.append(ww)
if (len(ws2) != 0):
message = "Warning: This workout overlaps with an existing one and was marked as a duplicate"
duplicate = True
# test title length
if title is not None and len(title)>140:
title = title[0:140]
w = Workout(user=r, name=title, date=workoutdate,
workouttype=workouttype,
boattype=boattype,
dragfactor=dragfactor,
duration=duration, distance=totaldist,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
starttime=workoutstarttime,
duplicate=duplicate,
workoutsource=workoutsource,
rankingpiece=rankingpiece,
forceunit=forceunit,
csvfilename=f2, notes=notes, summary=summary,
maxhr=maxhr, averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard, oarlength=oarlength,
timezone=timezone_str,
privacy=privacy,
impeller=impeller)
try:
w.save()
except ValidationError:
try:
w.startdatetime = timezone.now()
w.save()
except ValidationError:
return (0,'Unable to create your workout')
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
res = dataprep(row.df, id=w.id, bands=True,
barchart=True, otwpower=True, empower=True, inboard=inboard)
rscore,normp = workout_rscore(w)
trimp,hrtss = workout_trimp(w)
isbreakthrough = False
ishard = False
if workouttype in rowtypes:
df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id])
try:
powermean = df['power'].mean()
except KeyError:
powermean = 0
if powermean != 0:
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = df.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
filename = 'media/cpdata_{id}.parquet.gz'.format(id=w.id)
cpdf = pd.DataFrame({
'delta':delta,
'cp':cpvalues,
'id':w.id,
})
cpdf.to_parquet(filename,engine='fastparquet',compression='GZIP')
if workouttype in otwtypes:
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
elif workouttype in otetypes:
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.ep0, r.ep1, r.ep2, r.ep3, r.ecpratio)
else:
res = 0
res2 = 0
if res:
isbreakthrough = True
res = datautils.updatecp(delta, cpvalues, r,workouttype=workouttype)
if res2 and not isbreakthrough:
ishard = True
# submit email task to send email about breakthrough workout
if isbreakthrough:
if r.getemailnotifications and not r.emailbounced:
job = myqueue(queuehigh,handle_sendemail_breakthrough,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
# submit email task to send email about breakthrough workout
if ishard:
if r.getemailnotifications and not r.emailbounced:
job = myqueue(queuehigh,handle_sendemail_hard,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
return (w.id, message)
parsers = {
'kinomap': KinoMapParser,
'xls': ExcelTemplate,
'rp': RowProParser,
'tcx':TCXParser,
'mystery':MysteryParser,
'ritmotime':RitmoTimeParser,
'quiske': QuiskeParser,
'rowperfect3': RowPerfectParser,
'coxmate': CoxMateParser,
'bcmike': BoatCoachAdvancedParser,
'boatcoach': BoatCoachParser,
'boatcoachotw': BoatCoachOTWParser,
'painsleddesktop': painsledDesktopParser,
'speedcoach': speedcoachParser,
'speedcoach2': SpeedCoach2Parser,
'ergstick': ErgStickParser,
'fit': FITParser,
'ergdata': ErgDataParser,
'humon': HumonParser,
'eth': ETHParser,
}
def parsenonpainsled(fileformat,f2,summary):
try:
row = parsers[fileformat](f2)
hasrecognized = True
except (KeyError,IndexError,ValueError):
hasrecognized = False
return None, hasrecognized, '', 'unknown'
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
oarlength, inboard = get_empower_rigging(f2)
empowerfirmware = get_empower_firmware(f2)
if empowerfirmware != '':
fileformat = fileformat+'v'+str(empowerfirmware)
else:
fileformat = 'speedcoach2v0'
try:
summary = row.allstats()
except ZeroDivisionError:
summary = ''
# handle FIT
if (fileformat == 'fit'):
try:
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
except:
pass
hasrecognized = True
return row,hasrecognized,summary,fileformat
def handle_nonpainsled(f2, fileformat, summary=''):
oarlength = 2.89
inboard = 0.88
hasrecognized = False
impeller = False
row,hasrecognized,summary,fileformat = parsenonpainsled(fileformat,f2,summary)
# Handle c2log
if (fileformat == 'c2log' or fileformat == 'rowprolog'):
return (0,'',0,0,'',impeller)
if not hasrecognized:
return (0,'',0,0,'',impeller)
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4] + 'o.csv'
row2 = rrdata(df = row.df)
if 'speedcoach2' in fileformat:
# impeller consistency
impellerdata, consistent, ratio = row.impellerconsistent(threshold=0.3)
if impellerdata and consistent:
impeller = True
if impellerdata and not consistent:
row2.use_gpsdata()
row2.write_csv(f2, gzip=True)
# os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
try:
os.remove(f_to_be_deleted + '.gz')
except:
pass
return (f2, summary, oarlength, inboard, fileformat, impeller)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def get_workouttype_from_fit(filename,workouttype='water'):
try:
fitfile = FitFile(filename,check_crc=False)
except FitHeaderError:
return workouttype
records = fitfile.messages
fittype = 'rowing'
for record in records:
if record.name in ['sport','lap']:
try:
fittype = record.get_values()['sport'].lower()
except KeyError:
return 'water'
try:
workouttype = mytypes.fitmappinginv[fittype]
except KeyError:
return workouttype
return workouttype
import rowingdata.tcxtools as tcxtools
def get_workouttype_from_tcx(filename,workouttype='water'):
tcxtype = 'rowing'
if workouttype in mytypes.otwtypes:
return workouttype
try:
d = tcxtools.tcx_getdict(filename)
try:
tcxtype = d['Activities']['Activity']['@Sport'].lower()
if tcxtype == 'other':
tcxtype = 'rowing'
except KeyError:
return workouttype
except TypeError:
pass
try:
workouttype = mytypes.garminmappinginv[tcxtype.upper()]
except KeyError:
return workouttype
return workouttype
def new_workout_from_file(r, f2,
workouttype='rower',
workoutsource=None,
title='Workout',
boattype='1x',
makeprivate=False,
notes='',
uploadoptions={'boattype':'1x','workouttype':'rower'}):
message = ""
impeller = False
try:
fileformat = get_file_type(f2)
except (IOError,UnicodeDecodeError):
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
summary = ''
oarlength = 2.89
inboard = 0.88
# Save zip files to email box for further processing
if len(fileformat) == 3 and fileformat[0] == 'zip':
uploadoptions['fromuploadform'] = True
bodyyaml = yaml.safe_dump(uploadoptions,default_flow_style=False)
f_to_be_deleted = f2
impeller = False
workoutsbox = Mailbox.objects.filter(name='workouts')[0]
msg = Message(mailbox=workoutsbox,
from_header=r.user.email,
subject = title,body=bodyyaml)
msg.save()
f3 = 'media/mailbox_attachments/'+f2[6:]
copyfile(f2,f3)
f3 = f3[6:]
a = MessageAttachment(message=msg,document=f3)
a.save()
message = "Zip file was stored for offline processing"
return -1, message, f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'c2log':
os.remove(f2)
message = "This summary does not contain stroke data. Use the files containing stroke by stroke data."
return (0, message, f2)
if fileformat == 'nostrokes':
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0, message, f2)
if fileformat == 'kml':
os.remove(f2)
message = "KML files are not supported"
return (0, message, f2)
# Some people upload corrupted zip files
if fileformat == 'notgzip':
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog':
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0, message, f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'gpx':
print('aap')
os.remove(f2)
message = "GPX files support is on our roadmap. Check back soon."
return (0, message, f2)
if fileformat == 'unknown':
message = "We couldn't recognize the file type"
extension = os.path.splitext(f2)[1]
filename = os.path.splitext(f2)[0]
if extension == '.gz':
filename = os.path.splitext(filename)[0]
extension2 = os.path.splitext(filename)[1]+extension
extension = extension2
f4 = filename+'a'+extension
copyfile(f2,f4)
job = myqueue(queuehigh,
handle_sendemail_unrecognized,
f4,
r.user.email)
return (0, message, f2)
if fileformat == 'att':
# email attachment which can safely be ignored
return (0, '', f2)
# Get workout type from fit & tcx
if (fileformat == 'fit'):
workouttype = get_workouttype_from_fit(f2,workouttype=workouttype)
if (fileformat == 'tcx'):
workouttype = get_workouttype_from_tcx(f2,workouttype=workouttype)
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
f2, summary, oarlength, inboard, fileformat, impeller = handle_nonpainsled(
f2,
fileformat,
summary=summary
)
if not f2:
message = 'Something went wrong'
return (0, message, '')
dosummary = (fileformat != 'fit' and 'speedcoach2' not in fileformat)
dosummary = dosummary or summary == ''
if 'speedcoach2' in fileformat and workouttype == 'rower':
workouttype = 'water'
if workoutsource is None:
workoutsource = fileformat
id, message = save_workout_database(
f2, r,
notes=notes,
workouttype=workouttype,
weightcategory=r.weightcategory,
adaptiveclass=r.adaptiveclass,
boattype=boattype,
makeprivate=makeprivate,
dosummary=dosummary,
workoutsource=workoutsource,
summary=summary,
inboard=inboard, oarlength=oarlength,
title=title,
forceunit='N',
impeller=impeller,
)
job = myqueue(queuehigh,handle_calctrimp,id,f2,r.ftp,r.sex,r.hrftp,r.max,r.rest)
return (id, message, f2)
def split_workout(r, parent, splitsecond, splitmode):
data, row = getrowdata_db(id=parent.id)
latitude, longitude = get_latlon(parent.id)
if not latitude.empty and not longitude.empty:
data[' latitude'] = latitude
data[' longitude'] = longitude
data['time'] = data['time'] / 1000.
data1 = data[data['time'] <= splitsecond].copy()
data2 = data[data['time'] > splitsecond].copy()
data1 = data1.sort_values(['time'])
data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data1.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data1 = data1.groupby('time', axis=0).mean()
data1['time'] = data1.index
data1.reset_index(drop=True, inplace=True)
data2 = data2.sort_values(['time'])
data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data2.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data2 = data2.groupby('time', axis=0).mean()
data2['time'] = data2.index
data2.reset_index(drop=True, inplace=True)
data1['pace'] = data1['pace'] / 1000.
data2['pace'] = data2['pace'] / 1000.
data1.drop_duplicates(subset='time', inplace=True)
data2.drop_duplicates(subset='time', inplace=True)
messages = []
ids = []
if 'keep first' in splitmode:
if 'firstprivate' in splitmode:
setprivate = True
else:
setprivate = False
id, message = new_workout_from_df(r, data1,
title=parent.name + ' (1)',
parent=parent,
setprivate=setprivate,
forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if 'keep second' in splitmode:
data2['cumdist'] = data2['cumdist'] - data2.iloc[
0,
data2.columns.get_loc('cumdist')
]
data2['distance'] = data2['distance'] - data2.iloc[
0,
data2.columns.get_loc('distance')
]
data2['time'] = data2['time'] - data2.iloc[
0,
data2.columns.get_loc('time')
]
if 'secondprivate' in splitmode:
setprivate = True
else:
setprivate = False
dt = datetime.timedelta(seconds=splitsecond)
id, message = new_workout_from_df(r, data2,
title=parent.name + ' (2)',
parent=parent,
setprivate=setprivate,
dt=dt, forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if not 'keep original' in splitmode:
if 'keep second' in splitmode or 'keep first' in splitmode:
parent.delete()
messages.append('Deleted Workout: ' + parent.name)
else:
messages.append('That would delete your workout')
ids.append(encoder.encode_hex(parent.id))
elif 'originalprivate' in splitmode:
parent.privacy = 'hidden'
parent.save()
return ids, messages
# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_df(r, df,
title='New Workout',
workoutsource='unknown',
boattype='1x',
workouttype='rower',
parent=None,
setprivate=False,
forceunit='lbs',
dt=datetime.timedelta()):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workoutsource = parent.workoutsource
workouttype = parent.workouttype
boattype = parent.boattype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime + dt
else:
oarlength = 2.89
inboard = 0.88
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate:
makeprivate = True
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
if forceunit == 'N':
# change to lbs for now
df['peakforce'] /= lbstoN
df['averageforce'] /= lbstoN
df.rename(columns=columndict, inplace=True)
#starttimeunix = mktime(startdatetime.utctimetuple())
starttimeunix = arrow.get(startdatetime).timestamp
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
# res = df.to_csv(csvfilename+'.gz',index_label='index',
# compression='gzip')
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
boattype=boattype,
title=title,
workoutsource=workoutsource,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
return (id, message)
# A wrapper around the rowingdata class, with some error catching
def rdata(file, rower=rrower()):
try:
res = rrdata(csvfile=file, rower=rower)
except (IOError, IndexError):
try:
res = rrdata(csvfile=file + '.gz', rower=rower)
except (IOError, IndexError):
res = rrdata()
except:
res = rrdata()
except EOFError:
res = rrdata()
except:
res = rrdata()
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id):
dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
shutil.rmtree(dirname)
except OSError:
try:
os.remove(dirname)
except FileNotFoundError:
pass
except FileNotFoundError:
pass
# Replace stroke data in DB with data from CSV file
def update_strokedata(id, df):
delete_strokedata(id)
rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True)
# Test that all data are of a numerical time
def testdata(time, distance, pace, spm):
t1 = np.issubdtype(time, np.number)
t2 = np.issubdtype(distance, np.number)
t3 = np.issubdtype(pace, np.number)
t4 = np.issubdtype(spm, np.number)
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0, doclean=False, convertnewtons=True,
checkefficiency=True):
data = read_df_sql(id)
try:
data['deltat'] = data['time'].diff()
except KeyError:
data = pd.DataFrame()
if data.empty:
rowdata, row = getrowdata(id=id)
if not rowdata.empty:
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if checkefficiency==True and not data.empty:
try:
if data['efficiency'].mean() == 0 and data['power'].mean() != 0:
data = add_efficiency(id=id)
except KeyError:
data = add_efficiency(id=id)
if doclean:
data = clean_df_stats(data, ignorehr=True)
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True,compute=True):
# prepmultipledata(ids)
if ids:
csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pd.DataFrame()
data = []
columns = [c for c in columns if c != 'None']
columns = list(set(columns))
if len(ids)>1:
for id,f in zip(ids,csvfilenames):
try:
#df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = pd.read_parquet(f,columns=columns)
data.append(df)
except OSError:
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
# df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = pd.read_parquet(f,columns=columns)
data.append(df)
df = pd.concat(data,axis=0)
# df = dd.concat(data,axis=0)
else:
try:
df = pd.read_parquet(csvfilenames[0],columns=columns)
except (OSError,ArrowInvalid):
rowdata,row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(csvfilenames[0],columns=columns)
# df = dd.read_parquet(csvfilenames[0],
# column=columns,engine='pyarrow',
# )
# df = df.loc[:,~df.columns.duplicated()]
else:
df = pd.DataFrame()
if compute and len(df):
data = df.copy()
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
return df
def getsmallrowdata_db_dask(columns, ids=[], doclean=True,workstrokesonly=True,compute=True):
# prepmultipledata(ids)
csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
data = []
columns = [c for c in columns if c != 'None']
columns = list(set(columns))
if len(ids)>1:
for id,f in zip(ids,csvfilenames):
try:
#df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = dd.read_parquet(f,columns=columns)
data.append(df)
except OSError:
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
# df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = dd.read_parquet(f,columns=columns)
data.append(df)
df = dd.concat(data,axis=0)
# df = dd.concat(data,axis=0)
else:
try:
df = dd.read_parquet(csvfilenames[0],columns=columns)
except OSError:
rowdata,row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True)
df = dd.read_parquet(csvfilenames[0],columns=columns)
# df = dd.read_parquet(csvfilenames[0],
# column=columns,engine='pyarrow',
# )
# df = df.loc[:,~df.columns.duplicated()]
if compute:
data = df.compute()
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
return df
def getsmallrowdata_db_old(columns, ids=[], doclean=True, workstrokesonly=True):
prepmultipledata(ids)
data,extracols = read_cols_df_sql(ids, columns)
if extracols and len(ids)==1:
w = Workout.objects.get(id=ids[0])
row = rdata(w.csvfilename)
try:
row.set_instroke_metrics()
except (AttributeError,TypeError):
pass
try:
f = row.df['TimeStamp (sec)'].diff().mean()
except (AttributeError,KeyError) as e:
f = 0
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
for c in extracols:
try:
cdata = row.df[c]
cdata.fillna(inplace=True,method='bfill')
# This doesn't work because sometimes data are duplicated at save
try:
cdata2 = savgol_filter(cdata.values,windowsize,3)
data[c] = cdata2
except ValueError:
data[c] = cdata
except (KeyError, AttributeError):
data[c] = 0
# convert newtons
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
try:
row = Workout.objects.get(id=id)
except Workout.DoesNotExist:
return rrdata(),None
f1 = row.csvfilename
# get user
r = row.user
u = r.user
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp)
rowdata = rdata(f1, rower=rr)
return rowdata, row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
import glob
def prepmultipledata(ids, verbose=False):
filenames = glob.glob('media/*.parquet')
ids = [id for id in ids if 'media/strokedata_{id}.parquet.gz'.format(id=id) not in filenames]
for id in ids:
rowdata, row = getrowdata(id=id)
if verbose:
print(id)
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
return ids
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_df_sql(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
extracols = []
columns = list(columns) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
ids = [int(id) for id in ids]
if len(ids) == 0:
return pd.DataFrame(),extracols
elif len(ids) == 1:
try:
filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0])
df = pd.read_parquet(filename,columns=columns)
except OSError:
rowdata,row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(filename,columns=columns)
else:
data = []
filenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
for id,f in zip(ids,filenames):
try:
df = pd.read_parquet(f,columns=columns)
data.append(df)
except (OSError,IndexError):
rowdata,row = getrowdata(id=id)
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(f,columns=columns)
data.append(df)
df = pd.concat(data,axis=0)
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
return df,extracols
def read_cols_df_sql_old(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
prepmultipledata(ids)
axx = [f.name for f in StrokeData._meta.get_fields()]
extracols = []
columns2 = list(columns)
for c in columns:
if not c in axx:
columns2.remove(c)
extracols.append(c)
columns = list(columns2) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
ids = [int(id) for id in ids]
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column + ', '
cls = cls[:-2]
if len(ids) == 0:
return pd.DataFrame(),extracols
# query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
# columns=cls,
# ))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format(
id=ids[0],
columns=cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids} ORDER BY time ASC'.format(
columns=cls,
ids=tuple(ids),
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
engine.dispose()
return df,extracols
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
except (OSError,ArrowInvalid):
rowdata,row = getrowdata(id=id)
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(f)
else:
df = pd.DataFrame()
df = df.fillna(value=0)
return df
def read_df_sql_old(id):
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format(
id=id)), engine)
engine.dispose()
df = df.fillna(value=0)
funit = Workout.objects.get(id=id).forceunit
if funit == 'lbs':
try:
df['peakforce'] = df['peakforce'] * lbstoN
except KeyError:
pass
try:
df['averageforce'] = df['averageforce'] * lbstoN
except KeyError:
pass
return df
# Get the necessary data from the strokedata table in the DB.
# For the flex plot
def smalldataprep(therows, xparam, yparam1, yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(csvfile=f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(csvfile=f1 + '.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
pass
return df
# data fusion
def datafusion(id1, id2, columns, offset):
workout1 = Workout.objects.get(id=id1)
workout2 = Workout.objects.get(id=id2)
df1, w1 = getrowdata_db(id=id1)
df1 = df1.drop([ # 'cumdist',
'hr_ut2',
'hr_ut1',
'hr_at',
'hr_tr',
'hr_an',
'hr_max',
'ftime',
'fpace',
'workoutid',
'id'],
1, errors='ignore')
# Add coordinates to DataFrame
latitude, longitude = get_latlon(id1)
df1[' latitude'] = latitude
df1[' longitude'] = longitude
df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False)
forceunit = 'N'
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
offsetmillisecs += offset.days * (3600 * 24 * 1000)
df2['time'] = df2['time'] + offsetmillisecs
keep1 = {c: c for c in set(df1.columns)}
for c in columns:
keep1.pop(c)
for c in df1.columns:
if not c in keep1:
df1 = df1.drop(c, 1, errors='ignore')
df = pd.concat([df1, df2], ignore_index=True)
df = df.sort_values(['time'])
df = df.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
df.fillna(method='bfill', inplace=True)
# Some new stuff to try out
df = df.groupby('time', axis=0).mean()
df['time'] = df.index
df.reset_index(drop=True, inplace=True)
df['time'] = df['time'] / 1000.
df['pace'] = df['pace'] / 1000.
df['cum_dist'] = df['cumdist']
return df, forceunit
def fix_newtons(id=0, limit=3000):
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False)
try:
#avgforce = rowdata['averageforce']
peakforce = rowdata['peakforce']
if peakforce.mean() > limit:
w = Workout.objects.get(id=id)
print("fixing ", id)
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except KeyError:
pass
def remove_invalid_columns(df):
for c in df.columns:
if not c in allowedcolumns:
df.drop(labels=c,axis=1,inplace=True)
return df
def add_efficiency(id=0):
rowdata, row = getrowdata_db(id=id,
doclean=False,
convertnewtons=False,
checkefficiency=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata['efficiency'] = efficiency
rowdata = remove_invalid_columns(rowdata)
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
rowdata = rowdata.fillna(method='ffill')
delete_strokedata(id)
if id != 0:
rowdata['workoutid'] = id
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(rowdata,npartitions=1)
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
return rowdata
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
empower=True, inboard=0.88, forceunit='lbs'):
if rowdatadf.empty:
return 0
#rowdatadf.set_index([range(len(rowdatadf))], inplace=True)
t = rowdatadf.loc[:, 'TimeStamp (sec)']
t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0])
row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)']
try:
velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)']
except KeyError:
velo = 500./p
hr = rowdatadf.loc[:, ' HRCur (bpm)']
spm = rowdatadf.loc[:, ' Cadence (stokes/min)']
cumdist = rowdatadf.loc[:, 'cum_dist']
power = rowdatadf.loc[:, ' Power (watts)']
averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)']
drivelength = rowdatadf.loc[:, ' DriveLength (meters)']
try:
workoutstate = rowdatadf.loc[:, ' WorkoutState']
except KeyError:
workoutstate = 0 * hr
peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)']
forceratio = averageforce / peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.loc[:, ' DriveTime (ms)']
recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)']
rhythm = 100. * drivetime / (recoverytime + drivetime)
rhythm = rhythm.fillna(value=0)
except:
rhythm = 0.0 * forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isinf(f):
try:
windowsize = 2 * (int(10. / (f))) + 1
except ValueError:
windowsize = 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize < len(hr):
spm = savgol_filter(spm, windowsize, 3)
hr = savgol_filter(hr, windowsize, 3)
drivelength = savgol_filter(drivelength, windowsize, 3)
forceratio = savgol_filter(forceratio, windowsize, 3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0 * t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
except TypeError:
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError:
if forceunit == 'lbs':
driveenergy = drivelength * averageforce * lbstoN
else:
drivenergy = drivelength * averageforce
if forceunit == 'lbs':
averageforce *= lbstoN
peakforce *= lbstoN
powerhr = 60.*power/hr
powerhr = powerhr.fillna(value=0)
if driveenergy.mean() == 0 and driveenergy.std() == 0:
driveenergy = 0*driveenergy+100
distance = rowdatadf.loc[:, 'cum_dist']
velo = 500. / p
distanceperstroke = 60. * velo / spm
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
velo=velo,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
# powerhr=powerhr,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2']
data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1']
data['hr_at'] = rowdatadf.loc[:, 'hr_at']
data['hr_tr'] = rowdatadf.loc[:, 'hr_tr']
data['hr_an'] = rowdatadf.loc[:, 'hr_an']
data['hr_max'] = rowdatadf.loc[:, 'hr_max']
data['hr_bottom'] = 0.0 * data['hr']
try:
tel = rowdatadf.loc[:, ' ElapsedTime (sec)']
except KeyError:
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if empower:
try:
wash = rowdatadf.loc[:, 'wash']
except KeyError:
wash = 0 * power
try:
catch = rowdatadf.loc[:, 'catch']
except KeyError:
catch = 0 * power
try:
finish = rowdatadf.loc[:, 'finish']
except KeyError:
finish = 0 * power
try:
peakforceangle = rowdatadf.loc[:, 'peakforceangle']
except KeyError:
peakforceangle = 0 * power
if data['driveenergy'].mean() == 0:
try:
driveenergy = rowdatadf.loc[:, 'driveenergy']
except KeyError:
driveenergy = power * 60 / spm
else:
driveenergy = data['driveenergy']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() > 0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy / (averageforce * 4.44822)
try:
slip = rowdatadf.loc[:, 'slip']
except KeyError:
slip = 0 * power
try:
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
except ValueError:
totalangle = 0 * power
effectiveangle = 0 * power
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError:
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError:
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError:
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError:
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError:
pass
try:
driveenergy = savgol_filter(driveenergy, windowsize, 3)
except TypeError:
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError:
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError:
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError:
pass
velo = 500. / p
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError:
pass
if otwpower:
try:
nowindpace = rowdatadf.loc[:, 'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.loc[:, 'equivergpower']
except KeyError:
equivergpower = 0 * p + 50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower / 2.8)**(1. / 3.)
ergpace = 500. / ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace * 1e3
data['nowindpace'] = nowindpace * 1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf, np.inf], np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
data.fillna(0,inplace=True)
for k, v in dtypes.items():
try:
data[k] = data[k].astype(v)
except KeyError:
pass
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(data,npartitions=1)
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
return data
def workout_trimp(w):
r = w.user
if w.trimp > 0:
return w.trimp,w.hrtss
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
if w.averagehr is None:
rowdata = rdata(w.csvfilename)
try:
avghr = rowdata.df[' HRCur (bpm)'].mean()
maxhr = rowdata.df[' HRCur (bpm)'].max()
except KeyError:
avghr = None
maxhr = None
w.averagehr = avghr
w.maxhr = maxhr
w.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0
def workout_rscore(w):
if w.rscore > 0:
return w.rscore,w.normp
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0
def workout_normv(w,pp=4.0):
if w.normv > 0:
return w.normv,w.normw
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0