Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
Sander Roosendaal 2c1e6c5909 bug fixes
2021-09-14 19:46:59 +02:00

3233 lines
95 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
# All the data preparation, data cleaning and data mangling should
# be defined here
from __future__ import unicode_literals, absolute_import
from rowers.models import (
Workout, Team, CalcAgePerformance,C2WorldClassAgePerformance,
User
)
import pytz
import collections
import pendulum
from rowingdata import rowingdata as rrdata
from rowingdata import rower as rrower
import yaml
import shutil
from shutil import copyfile
from rowingdata import (
get_file_type, get_empower_rigging,get_empower_firmware
)
from rowers.tasks import (
handle_sendemail_unrecognized,handle_setcp,
handle_getagegrouprecords, handle_update_wps
)
from rowers.tasks import handle_zip_file
from pandas import DataFrame, Series
import dask.dataframe as dd
from dask.delayed import delayed
import pyarrow.parquet as pq
import pyarrow as pa
from pyarrow.lib import ArrowInvalid
from django.utils import timezone
from django.utils.timezone import get_current_timezone
from django_mailbox.models import Message,Mailbox,MessageAttachment
from django.core.exceptions import ValidationError
from time import strftime
import arrow
thetimezone = get_current_timezone()
from rowingdata import (
TCXParser, RowProParser, ErgDataParser,
CoxMateParser, HeroParser,
BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser,
ETHParser,
MysteryParser, BoatCoachOTWParser,QuiskeParser,
painsledDesktopParser, speedcoachParser, ErgStickParser,
SpeedCoach2Parser, FITParser, fitsummarydata,
RitmoTimeParser,KinoMapParser,NKLiNKLogbookParser,
make_cumvalues,cumcpdata,ExcelTemplate,
summarydata, get_file_type,
)
from rowingdata.csvparsers import HumonParser
from rowers.metrics import axes,calc_trimp,rowingmetrics,dtypes,metricsgroups
from rowers.models import strokedatafields
#allowedcolumns = [item[0] for item in rowingmetrics]
allowedcolumns = [key for key,value in strokedatafields.items()]
#from async_messages import messages as a_messages
import os
import zipfile
import pandas as pd
import numpy as np
import itertools
from fitparse import FitFile
from fitparse.base import FitHeaderError
import math
from rowers.tasks import (
handle_sendemail_unrecognized, handle_sendemail_breakthrough,
handle_sendemail_hard, handle_updatecp,handle_updateergcp,
handle_calctrimp,
)
from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys
import rowers.utils as utils
import rowers.datautils as datautils
from rowers.utils import lbstoN,myqueue,wavg,dologging
from timezonefinder import TimezoneFinder
import django_rq
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')
from rowsandall_app.settings import SITE_URL
from rowers.mytypes import otwtypes,otetypes,rowtypes
from rowers import mytypes
from rowers.database import *
from rowers.opaque import encoder
# mapping the DB column names to the CSV file column names
columndict = {
'time': 'TimeStamp (sec)',
'hr': ' HRCur (bpm)',
'velo': ' AverageBoatSpeed (m/s)',
'pace': ' Stroke500mPace (sec/500m)',
'spm': ' Cadence (stokes/min)',
'power': ' Power (watts)',
'averageforce': ' AverageDriveForce (lbs)',
'drivelength': ' DriveLength (meters)',
'peakforce': ' PeakDriveForce (lbs)',
'distance': ' Horizontal (meters)',
'catch': 'catch',
'finish': 'finish',
'peakforceangle': 'peakforceangle',
'wash': 'wash',
'slip': 'slip',
'workoutstate': ' WorkoutState',
'cumdist': 'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def get_video_data(w,groups=['basic'],mode='water'):
modes = [mode,'both','basic']
columns = ['time','velo','spm']
columns += [name for name,d in rowingmetrics if d['group'] in groups and d['mode'] in modes]
columns = list(set(columns))
df = getsmallrowdata_db(columns,ids=[w.id],
workstrokesonly=False,doclean=False,compute=False)
df['time'] = (df['time']-df['time'].min())/1000.
df.sort_values(by='time',inplace=True)
df.set_index(pd.to_timedelta(df['time'],unit='s'),inplace=True)
df2 = df.resample('1s').first().fillna(method='ffill')
df2['time'] = df2.index.total_seconds()
if 'pace' in columns:
df2['pace'] = df2['pace']/1000.
p = df2['pace']
p = p.apply(lambda x:timedeltaconv(x))
p = nicepaceformat(p)
df2['pace'] = p
#mask = df2['time'] < delay
#df2 = df2.mask(mask).dropna()
df2['time'] = (df2['time']-df2['time'].min())
df2 = df2.round(decimals=2)
boatspeed = (100*df2['velo']).astype(int)/100.
try:
coordinates = get_latlon_time(w.id)
except KeyError: # pragma: no cover
nulseries = df['time']*0
coordinates = pd.DataFrame({
'time': df['time'],
'latitude': nulseries,
'longitude': nulseries,
})
coordinates.set_index(pd.to_timedelta(coordinates['time'],unit='s'),inplace=True)
coordinates = coordinates.resample('1s').mean().interpolate()
#mask = coordinates['time'] < delay
#coordinates = coordinates.mask(mask).dropna()
coordinates['time'] = coordinates['time']-coordinates['time'].min()
latitude = coordinates['latitude']
longitude = coordinates['longitude']
# bundle data
data = {
'boatspeed':boatspeed.values.tolist(),
'latitude':latitude.values.tolist(),
'longitude':longitude.values.tolist(),
}
metrics = {}
for c in columns:
if c != 'time':
try:
if dict(rowingmetrics)[c]['numtype'] == 'integer': # pragma: no cover
data[c] = df2[c].astype(int).tolist()
else:
sigfigs = dict(rowingmetrics)[c]['sigfigs']
if (c != 'pace'):
da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs)
else:
da = df2[c]
data[c] = da.values.tolist()
metrics[c] = {
'name': dict(rowingmetrics)[c]['verbose_name'],
'metric': c,
'unit': ''
}
except KeyError: # pragma: no cover
pass
metrics['boatspeed'] = metrics.pop('velo')
# metrics['workperstroke'] = metrics.pop('driveenergy')
metrics = collections.OrderedDict(sorted(metrics.items()))
maxtime = coordinates['time'].max()
return data, metrics, maxtime
def polarization_index(df,rower):
df['dt'] = df['time'].diff()/6.e4
# remove rest (spm<15)
df.dropna(axis=0,inplace=True)
df['dt'] = df['dt'].clip(upper=4,lower=0)
masklow = (df['power']>0) & (df['power']<int(rower.pw_at))
maskmid = (df['power']>=rower.pw_at) & (df['power']<int(rower.pw_an))
maskhigh = (df['power']>rower.pw_an)
time_low_pw = df.loc[masklow,'dt'].sum()
time_mid_pw = df.loc[maskmid,'dt'].sum()
time_high_pw = df.loc[maskhigh,'dt'].sum()
frac_low = time_low_pw/(time_low_pw+time_mid_pw+time_high_pw)
frac_mid = time_mid_pw/(time_low_pw+time_mid_pw+time_high_pw)
frac_high = time_high_pw/(time_low_pw+time_mid_pw+time_high_pw)
index = math.log10(frac_high*100.*frac_low/frac_mid)
return index
def get_latlon(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist: # pragma: no cover
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty: # pragma: no cover
return [pd.Series([],dtype='float'), pd.Series([],dtype='float')]
try:
try:
latitude = rowdata.df.loc[:, ' latitude']
longitude = rowdata.df.loc[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
return [latitude, longitude]
except AttributeError: # pragma: no cover
return [pd.Series([],dtype='float'), pd.Series([],dtype='float')]
return [pd.Series([],dtype='float'), pd.Series([],dtype='float')] # pragma: no cover
def get_latlon_time(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist: # pragma: no cover
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty: # pragma: no cover
return [pd.Series([],dtype='float'), pd.Series([],dtype='float')]
try:
try:
latitude = rowdata.df.loc[:, ' latitude']
longitude = rowdata.df.loc[:, ' longitude']
except KeyError: # pragma: no cover
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
except AttributeError: # pragma: no cover
return pd.DataFrame()
df = pd.DataFrame({
'time': rowdata.df['TimeStamp (sec)']-rowdata.df['TimeStamp (sec)'].min(),
'latitude': rowdata.df[' latitude'],
'longitude': rowdata.df[' longitude']
})
return df
def workout_summary_to_df(
rower,
startdate=datetime.datetime(1970,1,1),
enddate=timezone.now()+timezone.timedelta(days=1)):
ws = Workout.objects.filter(
user=rower,date__gte=startdate,date__lte=enddate,
duplicate=False
).order_by("startdatetime")
types = []
names = []
ids = []
startdatetimes = []
timezones = []
distances = []
durations = []
weightcategories = []
adaptivetypes = []
weightvalues = []
notes = []
tcx_links = []
csv_links = []
workout_links = []
goldstandards = []
goldstandarddurations = []
rscores = []
hrtss = []
trimps = []
rankingpieces = []
boattypes = []
counter1 = 0
counter2 = len(ws)
for w in ws:
counter1 += 1
if counter1 % 10 == 0: # pragma: no cover
print(counter1,'/',counter2)
types.append(w.workouttype)
names.append(w.name)
ids.append(encoder.encode_hex(w.id))
startdatetimes.append(w.startdatetime)
timezones.append(w.timezone)
distances.append(w.distance)
durations.append(w.duration)
weightcategories.append(w.weightcategory)
adaptivetypes.append(w.adaptiveclass)
weightvalues.append(w.weightvalue)
boattypes.append(w.boattype)
notes.append(w.notes)
tcx_link = SITE_URL+'/rowers/workout/{id}/emailtcx'.format(
id=encoder.encode_hex(w.id)
)
tcx_links.append(tcx_link)
csv_link = SITE_URL+'/rowers/workout/{id}/emailcsv'.format(
id=encoder.encode_hex(w.id)
)
csv_links.append(csv_link)
workout_link = SITE_URL+'/rowers/workout/{id}/'.format(
id=encoder.encode_hex(w.id)
)
workout_links.append(workout_link)
trimps.append(workout_trimp(w)[0])
rscore = workout_rscore(w)
rscores.append(int(rscore[0]))
hrtss.append(int(w.hrtss))
goldstandard,goldstandardduration = workout_goldmedalstandard(w)
goldstandards.append(int(goldstandard))
goldstandarddurations.append(int(goldstandardduration))
rankingpieces.append(w.rankingpiece)
df = pd.DataFrame({
'ID': ids,
'date':startdatetimes,
'name':names,
'link':workout_links,
'timezone':timezones,
'type':types,
'boat type':boattypes,
'distance (m)':distances,
'duration ':durations,
'ranking piece':rankingpieces,
'weight category':weightcategories,
'adaptive classification':adaptivetypes,
'weight (kg)':weightvalues,
'Stroke Data TCX':tcx_links,
'Stroke Data CSV':csv_links,
'TRIMP Training Load':trimps,
'TSS Training Load':rscores,
'hrTSS Training Load':hrtss,
'GS':goldstandards,
'GS_secs':goldstandarddurations,
'notes':notes,
})
return df
def get_workouts(ids, userid): # pragma: no cover
goodids = []
for id in ids:
w = Workout.objects.get(id=id)
if int(w.user.user.id) == int(userid):
goodids.append(id)
return [Workout.objects.get(id=id) for id in goodids]
def filter_df(datadf, fieldname, value, largerthan=True):
try:
x = datadf[fieldname]
except KeyError:
return datadf
try:
if largerthan:
mask = datadf[fieldname] < value
else:
mask = datadf[fieldname] >= value
datadf.loc[mask, fieldname] = np.nan
except TypeError:
pass
return datadf
# joins workouts
def join_workouts(r,ids,title='Joined Workout',
parent=None,
setprivate=False,
forceunit='lbs',killparents=False):
message = None
summary = ''
if parent: # pragma: no cover
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate == True and makeprivate == False: # pragma: no cover
makeprivate = True
elif setprivate == False and makeprivate == True: # pragma: no cover
makeprivate = False
# reorder in chronological order
ws = Workout.objects.filter(id__in=ids).order_by("startdatetime")
if not parent:
parent = ws[0]
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
files = [w.csvfilename for w in ws]
row = rdata(files[0])
files = files[1:]
while len(files):
row2 = rdata(files[0])
if row2 != 0:
row = row+row2
files = files[1:]
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
row.write_csv(csvfilename,gzip=True)
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
if killparents: # pragma: no cover
for w in ws:
w.delete()
w = Workout.objects.get(id=id)
w.duplicate = False
w.save()
if message is not None and "duplicate" in message:
message = ""
return (id, message)
def df_resample(datadf):
# time stamps must be in seconds
timestamps = datadf['TimeStamp (sec)'].astype('int')
datadf['timestamps'] = timestamps
newdf = datadf.groupby(['timestamps']).mean()
return newdf
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
try:
workoutids = datadf['workoutid'].unique()
except KeyError:
datadf['workoutid'] = 0
before = {}
for workoutid in datadf['workoutid'].unique():
before[workoutid] = len(datadf[datadf['workoutid']==workoutid])
data_orig = datadf.copy()
# bring metrics which have negative values to positive domain
if len(datadf)==0:
return datadf
try:
datadf['catch'] = -datadf['catch']
except (KeyError,TypeError):
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
except (KeyError,TypeError):
pass
try:
datadf['hr'] = datadf['hr'] + 10
except (KeyError,TypeError):
pass
# protect 0 spm values from being nulled
try:
datadf['spm'] = datadf['spm'] + 1.0
except (KeyError,TypeError) as e:
pass
# protect 0 workoutstate values from being nulled
try:
datadf['workoutstate'] = datadf['workoutstate'] + 1
except (KeyError,TypeError) as e:
pass
try:
datadf = datadf.clip(lower=0)
except TypeError:
pass
# protect advanced metrics columns
advancedcols = [
'rhythm',
'power',
'drivelength',
'forceratio',
'drivespeed',
'driveenergy',
'catch',
'finish',
'averageforce',
'peakforce',
'slip',
'wash',
'peakforceangle',
'effectiveangle',
]
datadf.replace(to_replace=0, value=np.nan, inplace=True)
# datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan))
# bring spm back to real values
try:
datadf['spm'] = datadf['spm'] - 1
except (TypeError,KeyError) as e:
pass
# bring workoutstate back to real values
try:
datadf['workoutstate'] = datadf['workoutstate'] - 1
except (TypeError,KeyError) as e:
pass
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except (KeyError,TypeError):
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
except (KeyError,TypeError):
pass
try:
datadf['hr'] = datadf['hr'] - 10
except (KeyError,TypeError):
pass
# clean data for useful ranges per column
if not ignorehr:
try:
mask = datadf['hr'] < 30
datadf.mask(mask,inplace=True)
except (KeyError,TypeError): # pragma: no cover
pass
try:
mask = datadf['spm'] < 0
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['efficiency'] > 200.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['spm'] < 10
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['pace'] / 1000. > 300.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['efficiency'] < 0.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['pace'] / 1000. < 60.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['power'] > 5000
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['spm'] > 120
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['wash'] < 1
datadf.loc[mask, 'wash'] = np.nan
except (KeyError,TypeError):
pass
# try to guess ignoreadvanced
if not ignoreadvanced:
for metric in advancedcols:
try:
sum = datadf[metric].std()
if sum == 0 or np.isnan(sum):
ignoreadvanced = True
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 0
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['rhythm'] > 70
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['power'] < 20
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['drivespeed'] > 4
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['driveenergy'] < 100
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
try:
mask = datadf['catch'] > -30.
datadf.mask(mask,inplace=True)
except (KeyError,TypeError):
pass
workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly == True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
after = {}
for workoutid in data_orig['workoutid'].unique():
after[workoutid] = len(datadf[datadf['workoutid']==workoutid].dropna())
ratio = float(after[workoutid])/float(before[workoutid])
if ratio < 0.01 or after[workoutid] < 2:
return data_orig
return datadf
def getpartofday(row,r):
workoutstartdatetime = row.rowdatetime
try: # pragma: no cover
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except (ValueError,OverflowError): # pragma: no cover
timezone_str = 'UTC'
if timezone_str == None: # pragma: no cover
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str == None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = row.rowdatetime
except KeyError:
timezone_str = r.defaulttimezone
workoutstartdatetime = row.rowdatetime
h = workoutstartdatetime.astimezone(pytz.timezone(timezone_str)).hour
if h < 12: # pragma: no cover
return "Morning"
elif h < 18: # pragma: no cover
return "Afternoon"
elif h < 22: # pragma: no cover
return "Evening"
else: # pragma: no cover
return "Night"
return None # pragma: no cover
def getstatsfields():
fielddict = {name:d['verbose_name'] for name,d in rowingmetrics}
# fielddict.pop('ergpace')
# fielddict.pop('hr_an')
# fielddict.pop('hr_tr')
# fielddict.pop('hr_at')
# fielddict.pop('hr_ut2')
# fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
# fielddict.pop('nowindpace')
# fielddict.pop('fnowindpace')
# fielddict.pop('fergpace')
# fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
# fielddict.pop('fpace')
# fielddict.pop('pace')
# fielddict.pop('id')
# fielddict.pop('ftime')
# fielddict.pop('x_right')
# fielddict.pop('hr_max')
# fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
try:
fieldlist = [field for field, value in fielddict.iteritems()]
except AttributeError:
fieldlist = [field for field, value in fielddict.items()]
return fieldlist, fielddict
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes, seconds = divmod(tdelta.seconds, 60)
tenths = int(tdelta.microseconds / 1e5)
except AttributeError: # pragma: no cover
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
seconds, rest = divmod(seconds, 1e9)
tenths = int(rest / 1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def timedelta_to_seconds(tdelta): # pragma: no cover
return 60.*tdelta.minute+tdelta.second
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def paceformatsecs(values):
out = []
for v in values:
td = timedeltaconv(v)
formattedv = strfdelta(td)
out.append(formattedv)
return out
def update_c2id_sql(id,c2id):
engine = create_engine(database_url, echo=False)
table = 'rowers_workout'
query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % (table,c2id,id)
with engine.connect() as conn, conn.begin():
result = conn.execute(query)
conn.close()
engine.dispose()
return 1
def getcpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata'): # pragma: no cover
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print("Database locked")
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]): # pragma: no cover
deletecpdata_sql(rower_id)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def fetchcperg(rower,theworkouts):
theids = [int(w.id) for w in theworkouts]
thefilenames = [w.csvfilename for w in theworkouts]
cpdf = getcpdata_sql(rower.id,table='ergcpdata')
job = myqueue(
queuelow,
handle_updateergcp,
rower.id,
thefilenames)
return cpdf
from rowers.datautils import p0
from rowers.utils import calculate_age
from scipy import optimize
def get_workoutsummaries(userid,startdate): # pragma: no cover
u = User.objects.get(id=userid)
r = u.rower
df = workout_summary_to_df(r,startdate=startdate)
df.drop(['Stroke Data TCX','Stroke Data CSV'],axis=1,inplace=True)
df = df.sort_values('date',ascending=False)
return df
def workout_goldmedalstandard(workout,reset=False):
if workout.goldmedalstandard > 0 and not reset:
return workout.goldmedalstandard,workout.goldmedalseconds
if workout.workouttype in rowtypes:
goldmedalstandard,goldmedalseconds = calculate_goldmedalstandard(workout.user,workout)
if workout.workouttype in otwtypes:
factor = 100./(100.-workout.user.otwslack)
goldmedalstandard = goldmedalstandard*factor
workout.goldmedalstandard = goldmedalstandard
workout.goldmedalseconds = goldmedalseconds
workout.save()
return goldmedalstandard, goldmedalseconds
else:
return 0,0
def check_marker(workout):
r = workout.user
gmstandard,gmseconds = workout_goldmedalstandard(workout)
if gmseconds<60:
return None
dd = arrow.get(workout.date).datetime-datetime.timedelta(days=r.kfit)
ws = Workout.objects.filter(date__gte=dd,
date__lte=workout.date,
user=r,duplicate=False,
workouttype__in=mytypes.rowtypes,
).order_by("date")
ids = []
gms = []
for w in ws:
gmstandard,gmseconds = workout_goldmedalstandard(w)
if gmseconds>60:
ids.append(w.id)
gms.append(gmstandard)
df = pd.DataFrame({
'id':ids,
'gms':gms,
})
if df.empty: # pragma: no cover
workout.ranking = True
workout.save()
return workout
indexmax = df['gms'].idxmax()
theid = df.loc[indexmax,'id']
wmax = Workout.objects.get(id=theid)
gms_max = wmax.goldmedalstandard
# check if equal, bigger, or smaller than previous
if not wmax.rankingpiece:
rankingworkouts = ws.filter(rankingpiece=True)
if len(rankingworkouts) == 0:
wmax.rankingpiece = True
wmax.save()
return wmax
lastranking = rankingworkouts[len(rankingworkouts)-1]
if lastranking.goldmedalstandard+0.2 < wmax.goldmedalstandard: # pragma: no cover
wmax.rankingpiece = True
wmax.save()
return wmax
else: # pragma: no cover
return wmax
return None
def calculate_goldmedalstandard(rower,workout,recurrance=True):
cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
try:
df = pd.read_parquet(cpfile)
except:
background = True
if settings.TESTING:
background = False
df, delta, cpvalues = setcp(workout,background=background)
if df.empty:
return 0,0
if df.empty and recurrance: # pragma: no cover
df, delta, cpvalues = setcp(workout,recurrance=False,background=True)
if df.empty:
return 0,0
age = calculate_age(rower.birthdate,today=workout.date)
agerecords = CalcAgePerformance.objects.filter(
age=age,
sex=rower.sex,
weightcategory = rower.weightcategory
)
wcdurations = []
wcpower = []
getrecords = False
if not settings.TESTING: # pragma: no cover
if len(agerecords) == 0: # pragma: no cover
getrecords = True
for record in agerecords: # pragma: no cover
if record.power > 0:
wcdurations.append(record.duration)
wcpower.append(record.power)
else:
getrecords = True
if getrecords: # pragma: no cover
durations = [1,4,30,60]
distances = [100,500,1000,2000,5000,6000,10000,21097,42195]
df2 = pd.DataFrame(
list(
C2WorldClassAgePerformance.objects.filter(
sex=rower.sex,
weightcategory=rower.weightcategory
).values()
)
)
jsondf = df2.to_json()
job = myqueue(queuelow,handle_getagegrouprecords,
jsondf,distances,durations,age,rower.sex,rower.weightcategory)
wcpower = pd.Series(wcpower,dtype='float')
wcdurations = pd.Series(wcdurations,dtype='float')
fitfunc = lambda pars,x: pars[0]/(1+(x/pars[2])) + pars[1]/(1+(x/pars[3]))
errfunc = lambda pars,x,y: fitfunc(pars,x)-y
if len(wcdurations)>=4: # pragma: no cover
p1wc, success = optimize.leastsq(errfunc, p0[:],args=(wcdurations,wcpower))
else:
factor = fitfunc(p0,wcdurations.mean()/wcpower.mean())
p1wc = [p0[0]/factor,p0[1]/factor,p0[2],p0[3]]
success = 0
return 0,0
times = df['delta']
powers = df['cp']
wcpowers = fitfunc(p1wc,times)
scores = 100.*powers/wcpowers
try:
indexmax = scores.idxmax()
delta = int(df.loc[indexmax,'delta'])
maxvalue = scores.max()
except (ValueError,TypeError): # pragma: no cover
indexmax = 0
delta = 0
maxvalue = 0
return maxvalue,delta
def fetchcp_new(rower,workouts):
data = []
for workout in workouts:
cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
try:
df = pd.read_parquet(cpfile)
df['workout'] = str(workout)
df['url'] = workout.url()
data.append(df)
except:
# CP data file doesn't exist yet. has to be created
df, delta, cpvalues = setcp(workout)
df['workout'] = str(workout)
df['url'] = workout.url()
data.append(df)
if len(data) == 0:
return pd.Series(dtype='float'),pd.Series(dtype='float'),0,pd.Series(dtype='float'),pd.Series(dtype='float')
if len(data)>1:
df = pd.concat(data,axis=0)
try:
df = df[df['cp'] == df.groupby(['delta'])['cp'].transform('max')]
except KeyError: # pragma: no cover
return pd.Series(dtype='float'),pd.Series(dtype='float'),0,pd.Series(dtype='float'),pd.Series(dtype='float')
df = df.sort_values(['delta']).reset_index()
return df['delta'],df['cp'],0,df['workout'],df['url']
def setcp(workout,background=False,recurrance=True):
filename = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
strokesdf = getsmallrowdata_db(['power','workoutid','time'],ids = [workout.id])
try:
if strokesdf['power'].std()==0:
return pd.DataFrame(),pd.Series(dtype='float'),pd.Series(dtype='float')
except KeyError:
return pd.DataFrame(),pd.Series(dtype='float'),pd.Series(dtype='float')
if background: # pragma: no cover
job = myqueue(queuelow,handle_setcp,strokesdf,filename,workout.id)
return pd.DataFrame({'delta':[],'cp':[]}),pd.Series(dtype='float'),pd.Series(dtype='float')
if not strokesdf.empty:
totaltime = strokesdf['time'].max()
try:
powermean = strokesdf['power'].mean()
except KeyError: # pragma: no cover
powermean = 0
if powermean != 0:
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = strokesdf.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
df = pd.DataFrame({
'delta':delta,
'cp':cpvalues,
'id':workout.id,
})
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
if recurrance:
goldmedalstandard, goldmedalduration = calculate_goldmedalstandard(workout.user,workout)
workout.goldmedalstandard = goldmedalstandard
workout.goldmedalduration = goldmedalduration
workout.save()
return df,delta,cpvalues
return pd.DataFrame({'delta':[],'cp':[]}),pd.Series(dtype='float'),pd.Series(dtype='float')
def update_wps(r,types,mode='water',asynchron=True):
firstdate = timezone.now()-datetime.timedelta(days=r.cprange)
workouts = Workout.objects.filter(
date__gte=firstdate,
workouttype__in=types,
user = r
)
ids = [w.id for w in workouts]
if asynchron:
job = myqueue(
queue,
handle_update_wps,
r.id,
types,
ids,
mode
)
df = getsmallrowdata_db(['time','driveenergy'],ids=ids)
try:
mask = df['driveenergy'] > 100
except (KeyError, TypeError):
return False
try:
wps_median = int(df.loc[mask,'driveenergy'].median())
if mode == 'water':
r.median_wps = wps_median
else:# pragma: no cover
r.median_wps_erg = wps_median
r.save()
except ValueError: # pragma: no cover
pass
return True
def update_rolling_cp(r,types,mode='water'):
firstdate = timezone.now()-datetime.timedelta(days=r.cprange)
workouts = Workout.objects.filter(
date__gte=firstdate,
workouttype__in=types,
user = r
)
delta, cp, avgpower, workoutnames,urls = fetchcp_new(r,workouts)
powerdf = pd.DataFrame({
'Delta':delta,
'CP':cp,
})
powerdf = powerdf[powerdf['CP']>0]
powerdf.dropna(axis=0,inplace=True)
powerdf.sort_values(['Delta','CP'],ascending=[1,0],inplace=True)
powerdf.drop_duplicates(subset='Delta',keep='first',inplace=True)
res2 = datautils.cpfit(powerdf)
if len(powerdf) != 0:
if mode == 'water':
p1 = res2[0]
r.p0 = p1[0]
r.p1 = p1[1]
r.p2 = p1[2]
r.p3 = p1[3]
r.cpratio = res2[3]
r.save()
else:
p1 = res2[0]
r.ep0 = p1[0]
r.ep1 = p1[1]
r.ep2 = p1[2]
r.ep3 = p1[3]
r.ecpratio = res2[3]
r.save()
return True
return False
def fetchcp(rower,theworkouts,table='cpdata'): # pragma: no cover
# get all power data from database (plus workoutid)
theids = [int(w.id) for w in theworkouts]
columns = ['power','workoutid','time']
df = getsmallrowdata_db(columns,ids=theids)
df.dropna(inplace=True,axis=0)
if df.empty:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([],dtype='float'),pd.Series([],dtype='float'),avgpower2
try:
dfgrouped = df.groupby(['workoutid'])
except KeyError:
avgpower2 = {}
return pd.Series([],dtype='float'),pd.Series([],dtype='float'),avgpower2
try:
avgpower2 = dict(dfgrouped.mean()['power'].astype(int))
except KeyError:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([],dtype='float'),pd.Series([],dtype='float'),avgpower2
cpdf = getcpdata_sql(rower.id,table=table)
if not cpdf.empty:
return cpdf['delta'],cpdf['cp'],avgpower2
else:
job = myqueue(queuelow,
handle_updatecp,
rower.id,
theids,
table=table)
return pd.Series([],dtype='float'),pd.Series([],dtype='float'),avgpower2
return pd.Series([],dtype='float'),pd.Series([],dtype='float'),avgpower2
# create a new workout from manually entered data
def create_row_df(r,distance,duration,startdatetime,workouttype='rower',
avghr=None,avgpwr=None,avgspm=None,
rankingpiece = False,
duplicate=False,rpe=-1,
title='Manual entry',notes='',weightcategory='hwt',
adaptiveclass='None'):
if duration is not None:
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
else: # pragma: no cover
totalseconds = 60.
if distance is None: # pragma: no cover
distance = 0
try:
nr_strokes = int(distance/10.)
except TypeError: # pragma: no cover
nr_strokes = int(20.*totalseconds)
if nr_strokes == 0: # pragma: no cover
nr_strokes = 100
unixstarttime = arrow.get(startdatetime).timestamp()
if not avgspm: # pragma: no cover
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20.
else:
spm = avgspm
step = totalseconds/float(nr_strokes)
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
dstep = distance/float(nr_strokes)
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = unixstarttime + elapsed
try:
pace = 500.*totalseconds/distance
except ZeroDivisionError: # pragma: no cover
pace = 240.
if workouttype in ['rower','slides','dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError: # pragma: no cover
velo = 2.4
power = 2.8*velo**3
elif avgpwr is not None: # pragma: no cover
power = avgpwr
else: # pragma: no cover
power = 0
if avghr is not None:
hr = avghr
else: # pragma: no cover
hr = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)':pace,
' ElapsedTime (sec)':elapsed,
' Power (watts)':power,
' HRCur (bpm)':hr,
})
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
row = rrdata(df=df)
row.write_csv(csvfilename, gzip = True)
id, message = save_workout_database(csvfilename, r,
title=title,
notes=notes,
rankingpiece=rankingpiece,
duplicate=duplicate,
dosmooth=False,
workouttype=workouttype,
consistencychecks=False,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
totaltime=totalseconds)
return (id, message)
from rowers.utils import totaltime_sec_to_string
def checkbreakthrough(w, r):
isbreakthrough = False
ishard = False
workouttype = w.workouttype
if workouttype in rowtypes:
cpdf,delta,cpvalues = setcp(w)
if not cpdf.empty:
if workouttype in otwtypes:
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
success = update_rolling_cp(r,otwtypes,'water')
elif workouttype in otetypes:
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.ep0, r.ep1, r.ep2, r.ep3, r.ecpratio)
success = update_rolling_cp(r,otetypes,'erg')
else: # pragma: no cover
res = 0
res2 = 0
if res:
isbreakthrough = True
if res2 and not isbreakthrough: # pragma: no cover
ishard = True
# submit email task to send email about breakthrough workout
if isbreakthrough:
if not w.duplicate:
w.rankingpiece = True
w.save()
if r.getemailnotifications and not r.emailbounced: # pragma: no cover
job = myqueue(queuehigh,handle_sendemail_breakthrough,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
# submit email task to send email about breakthrough workout
if ishard: # pragma: no cover
if not w.duplicate:
w.rankingpiece = True
w.save()
if r.getemailnotifications and not r.emailbounced:
job = myqueue(queuehigh,handle_sendemail_hard,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
return isbreakthrough, ishard
def checkduplicates(r,workoutdate,workoutstartdatetime,workoutenddatetime):
duplicate = False
ws = Workout.objects.filter(user=r,date=workoutdate,duplicate=False).exclude(
startdatetime__gt=workoutenddatetime
)
ws2 = []
for ww in ws:
t = ww.duration
delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
enddatetime = ww.startdatetime+delta
if enddatetime > workoutstartdatetime:
ws2.append(ww)
if (len(ws2) != 0):
message = "Warning: This workout overlaps with an existing one and was marked as a duplicate"
duplicate = True
return duplicate
return duplicate
# Processes painsled CSV file to database
def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
boattype='1x',
adaptiveclass='None',
weightcategory='hwt',
dosummary=True, title='Workout',
workoutsource='unknown',
notes='', totaldist=0, totaltime=0,
rankingpiece=False,
rpe=-1,
duplicate=False,
summary='',
makeprivate=False,
oarlength=2.89, inboard=0.88,
forceunit='lbs',
consistencychecks=False,
startdatetime='',
impeller=False):
message = None
powerperc = 100 * np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr, r.pw_an]) / r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp,
powerperc=powerperc, powerzones=r.powerzones)
row = rdata(f2, rower=rr)
startdatetime,startdate,starttime,timezone_str,partofday = get_startdate_time_zone(r,row,startdatetime=startdatetime)
if title is None or title == '':
title = 'Workout'
if partofday is not None:
title = '{partofday} {workouttype}'.format(
partofday=partofday,
workouttype=workouttype,
)
if row.df.empty: # pragma: no cover
return (0, 'Error: CSV data file was empty')
dtavg = row.df['TimeStamp (sec)'].diff().mean()
if dtavg < 1:
newdf = df_resample(row.df)
try:
os.remove(f2)
except:
pass
return new_workout_from_df(r, newdf,
title=title,boattype=boattype,
workouttype=workouttype,
workoutsource=workoutsource,startdatetime=startdatetime)
try:
checks = row.check_consistency()
allchecks = 1
for key, value in checks.items():
if not value:
allchecks = 0
except ZeroDivisionError: # pragma: no cover
pass
if not allchecks and consistencychecks:
# row.repair()
pass
if row == 0: # pragma: no cover
return (0, 'Error: CSV data file not found')
try:
lat = row.df[' latitude']
if lat.mean() != 0 and lat.std() != 0 and workouttype == 'rower':
workouttype = 'water'
except KeyError:
pass
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500. / pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else: # pragma: no cover
windowsize = 1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize < len(velo):
velo2 = savgol_filter(velo, windowsize, 3)
else: # pragma: no cover
velo2 = velo
velo3 = pd.Series(velo2,dtype='float')
velo3 = velo3.replace([-np.inf, np.inf], np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500. / abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2, gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
if r.erg_recalculatepower:
row.erg_recalculatepower()
row.write_csv(f2, gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max(
) - row.df['TimeStamp (sec)'].min()
try:
totaltime = totaltime + row.df.loc[:, ' ElapsedTime (sec)'].iloc[0]
except KeyError: # pragma: no cover
pass
if np.isnan(totaltime): # pragma: no cover
totaltime = 0
if dosummary:
summary = row.allstats()
workoutstartdatetime = startdatetime
dologging('debuglog.log','Dataprep line 1721, Workout Startdatetime {workoutstartdatetime}'.format(
workoutstartdatetime=workoutstartdatetime,
))
duration = totaltime_sec_to_string(totaltime)
workoutdate = startdate
workoutstarttime = starttime
s = 'Dataprep line 1730 workoutdate and time set to {workoutdate} and {workoutstarttime}'.format(
workoutdate=workoutdate,
workoutstarttime=workoutstarttime,
)
dologging('debuglog.log',s)
if makeprivate: # pragma: no cover
privacy = 'hidden'
else:
privacy = 'visible'
# checking for inf values
totaldist = np.nan_to_num(totaldist)
maxhr = np.nan_to_num(maxhr)
averagehr = np.nan_to_num(averagehr)
dragfactor = 0
if workouttype in otetypes:
dragfactor = row.dragfactor
t = datetime.datetime.strptime(duration,"%H:%M:%S.%f")
delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
workoutenddatetime = workoutstartdatetime+delta
# check for duplicate start times and duration
duplicate = checkduplicates(r,workoutdate,workoutstartdatetime,workoutenddatetime)
if duplicate:
rankingpiece = False
# test title length
if title is not None and len(title)>140: # pragma: no cover
title = title[0:140]
timezone_str = str(workoutstartdatetime.tzinfo)
w = Workout(user=r, name=title, date=workoutdate,
workouttype=workouttype,
boattype=boattype,
dragfactor=dragfactor,
duration=duration, distance=totaldist,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
starttime=workoutstarttime,
duplicate=duplicate,
workoutsource=workoutsource,
rankingpiece=rankingpiece,
forceunit=forceunit,
rpe=rpe,
csvfilename=f2, notes=notes, summary=summary,
maxhr=maxhr, averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard, oarlength=oarlength,
timezone=timezone_str,
privacy=privacy,
impeller=impeller)
try:
w.save()
except ValidationError: # pragma: no cover
try:
w.startdatetime = timezone.now()
w.save()
except ValidationError:
return (0,'Unable to create your workout')
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
res = dataprep(row.df, id=w.id, bands=True,
barchart=True, otwpower=True, empower=True, inboard=inboard)
isbreakthrough, ishard = checkbreakthrough(w, r)
marker = check_marker(w)
result = update_wps(r,mytypes.otwtypes)
result = update_wps(r,mytypes.otetypes)
job = myqueue(queuehigh,handle_calctrimp,w.id,f2,r.ftp,r.sex,r.hrftp,r.max,r.rest)
return (w.id, message)
parsers = {
'kinomap': KinoMapParser,
'xls': ExcelTemplate,
'rp': RowProParser,
'tcx':TCXParser,
'mystery':MysteryParser,
'ritmotime':RitmoTimeParser,
'quiske': QuiskeParser,
'rowperfect3': RowPerfectParser,
'coxmate': CoxMateParser,
'bcmike': BoatCoachAdvancedParser,
'boatcoach': BoatCoachParser,
'boatcoachotw': BoatCoachOTWParser,
'painsleddesktop': painsledDesktopParser,
'speedcoach': speedcoachParser,
'speedcoach2': SpeedCoach2Parser,
'ergstick': ErgStickParser,
'fit': FITParser,
'ergdata': ErgDataParser,
'humon': HumonParser,
'eth': ETHParser,
'nklinklogbook': NKLiNKLogbookParser,
'hero': HeroParser,
}
def get_startdate_time_zone(r,row,startdatetime=None):
if startdatetime is not None and startdatetime != '':
try:
timezone_str = pendulum.instance(startdatetime).timezone.name
except ValueError: # pragma: no cover
timezone_str = 'Ect/GMT'
elif startdatetime == '':
startdatetime = row.rowdatetime
else:
startdatetime = row.rowdatetime
try:
tz = startdatetime.tzinfo
except AttributeError: # pragma: no cover
startdatetime = row.rowdatetime
partofday = getpartofday(row,r)
if startdatetime.tzinfo is None or str(startdatetime.tzinfo) in ['tzutc()','Ect/GMT']:
timezone_str = 'UTC'
try:
startdatetime = timezone.make_aware(startdatetime)
except ValueError: # pragma: no cover
pass
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
if row.df[' latitude'].std() != 0:
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except (ValueError,OverflowError): # pragma: no cover
timezone_str = 'UTC'
if timezone_str is None: # pragma: no cover
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str is None: # pragma: no cover
timezone_str = r.defaulttimezone
else:
timezone_str = r.defaulttimezone
try:
startdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError: # pragma: no cover
startdatetime = startdatetime.astimezone(
pytz.timezone(timezone_str)
)
except KeyError: # pragma: no cover
timezone_str = r.defaulttimezone
else:
timezone_str = str(startdatetime.tzinfo)
startdate = startdatetime.strftime('%Y-%m-%d')
starttime = startdatetime.strftime('%H:%M:%S')
if timezone_str == 'tzutc()':
timezone_str = 'UTC' # pragma: no cover
return startdatetime,startdate,starttime,timezone_str,partofday
def parsenonpainsled(fileformat,f2,summary,startdatetime='',empowerfirmware=None,inboard=None,oarlength=None):
try:
if fileformat == 'nklinklogbook' and empowerfirmware is not None: # pragma: no cover
if inboard is not None and oarlength is not None:
row = NKLiNKLogbookParser(f2,firmware=empowerfirmware,inboard=inboard,oarlength=oarlength)
else:
row = NKLiNKLogbookParser(f2)
else:
row = parsers[fileformat](f2)
if startdatetime != '': # pragma: no cover
row.rowdatetime = arrow.get(startdatetime).datetime
hasrecognized = True
except (KeyError,IndexError,ValueError): # pragma: no cover
hasrecognized = False
return None, hasrecognized, '', 'unknown'
s = 'Parsenonpainsled, start date time = {startdatetime}'.format(
startdatetime = startdatetime,
#rowdatetime = row.rowdatetime
)
dologging('debuglog.log',s)
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
oarlength, inboard = get_empower_rigging(f2)
empowerfirmware = get_empower_firmware(f2)
if empowerfirmware != '':
fileformat = fileformat+'v'+str(empowerfirmware)
else: # pragma: no cover
fileformat = 'speedcoach2v0'
try:
summary = row.allstats()
except ZeroDivisionError: # pragma: no cover
summary = ''
else:
fileformat = fileformat+'v'+str(empowerfirmware)
# handle FIT
if (fileformat == 'fit'): # pragma: no cover
try:
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
except:
pass
hasrecognized = True
return row,hasrecognized,summary,fileformat
def handle_nonpainsled(f2, fileformat, summary='',startdatetime='',empowerfirmware=None,impeller=False):
oarlength = 2.89
inboard = 0.88
hasrecognized = False
row,hasrecognized,summary,fileformat = parsenonpainsled(fileformat,f2,summary,startdatetime=startdatetime,
empowerfirmware=empowerfirmware)
# Handle c2log
if (fileformat == 'c2log' or fileformat == 'rowprolog'): # pragma: no cover
return (0,'',0,0,'',impeller)
if not hasrecognized: # pragma: no cover
return (0,'',0,0,'',impeller)
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4] + 'o.csv'
row2 = rrdata(df = row.df)
if 'speedcoach2' in fileformat or 'nklinklogbook' in fileformat:
# impeller consistency
impellerdata, consistent, ratio = row.impellerconsistent(threshold=0.3)
if impellerdata and consistent:
impeller = True
if impellerdata and not consistent:
row2.use_gpsdata()
if impeller:
row2.use_impellerdata()
row2.write_csv(f2, gzip=True)
# os.remove(f2)
try:
os.remove(f_to_be_deleted)
except: # pragma: no cover
try:
os.remove(f_to_be_deleted + '.gz')
except:
pass
return (f2, summary, oarlength, inboard, fileformat, impeller)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def get_workouttype_from_fit(filename,workouttype='water'):
try:
fitfile = FitFile(filename,check_crc=False)
except FitHeaderError: # pragma: no cover
return workouttype
records = fitfile.messages
fittype = 'rowing'
for record in records:
if record.name in ['sport','lap']:
try:
fittype = record.get_values()['sport'].lower()
except (KeyError,AttributeError): # pragma: no cover
return 'water'
try:
workouttype = mytypes.fitmappinginv[fittype]
except KeyError: # pragma: no cover
return workouttype
return workouttype
import rowingdata.tcxtools as tcxtools
def get_workouttype_from_tcx(filename,workouttype='water'):
tcxtype = 'rowing'
if workouttype in mytypes.otwtypes:
return workouttype
try: # pragma: no cover
d = tcxtools.tcx_getdict(filename)
try:
tcxtype = d['Activities']['Activity']['@Sport'].lower()
if tcxtype == 'other':
tcxtype = 'rowing'
except KeyError:
return workouttype
except TypeError: # pragma: no cover
pass
try: # pragma: no cover
workouttype = mytypes.garminmappinginv[tcxtype.upper()]
except KeyError: # pragma: no cover
return workouttype
return workouttype # pragma: no cover
def new_workout_from_file(r, f2,
workouttype='rower',
workoutsource=None,
title='Workout',
boattype='1x',
rpe=-1,
makeprivate=False,
startdatetime='',
notes='',
oarlockfirmware='',
inboard=None,
oarlength=None,
impeller=False,
uploadoptions={'boattype':'1x','workouttype':'rower'}):
message = ""
try:
fileformat = get_file_type(f2)
except (IOError,UnicodeDecodeError): # pragma: no cover
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
summary = ''
oarlength = 2.89
inboard = 0.88
# Save zip files to email box for further processing
if len(fileformat) == 3 and fileformat[0] == 'zip': # pragma: no cover
uploadoptions['fromuploadform'] = True
bodyyaml = yaml.safe_dump(uploadoptions,default_flow_style=False)
f_to_be_deleted = f2
workoutsbox = Mailbox.objects.filter(name='workouts')[0]
msg = Message(mailbox=workoutsbox,
from_header=r.user.email,
subject = title,body=bodyyaml)
msg.save()
f3 = 'media/mailbox_attachments/'+f2[6:]
copyfile(f2,f3)
f3 = f3[6:]
a = MessageAttachment(message=msg,document=f3)
a.save()
message = "Zip file was stored for offline processing"
return -1, message, f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'imageformat': # pragma: no cover
os.remove(f2)
message = "You cannot upload image files here"
return (0, message, f2)
if fileformat == 'json': # pragma: no cover
os.remove(f2)
message = "JSON format not supported in direct upload"
return (0, message, f2)
if fileformat == 'c2log':
os.remove(f2)
message = "This summary does not contain stroke data. Use the files containing stroke by stroke data."
return (0, message, f2)
if fileformat == 'nostrokes': # pragma: no cover
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0, message, f2)
if fileformat == 'kml': # pragma: no cover
os.remove(f2)
message = "KML files are not supported"
return (0, message, f2)
# Some people upload corrupted zip files
if fileformat == 'notgzip': # pragma: no cover
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog': # pragma: no cover
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0, message, f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'gpx': # pragma: no cover
os.remove(f2)
message = "GPX files support is on our roadmap. Check back soon."
return (0, message, f2)
if fileformat == 'unknown': # pragma: no cover
message = "We couldn't recognize the file type"
extension = os.path.splitext(f2)[1]
filename = os.path.splitext(f2)[0]
if extension == '.gz':
filename = os.path.splitext(filename)[0]
extension2 = os.path.splitext(filename)[1]+extension
extension = extension2
f4 = filename+'a'+extension
copyfile(f2,f4)
job = myqueue(queuehigh,
handle_sendemail_unrecognized,
f4,
r.user.email)
return (0, message, f2)
if fileformat == 'att': # pragma: no cover
# email attachment which can safely be ignored
return (0, '', f2)
# Get workout type from fit & tcx
if (fileformat == 'fit'): # pragma: no cover
workouttype = get_workouttype_from_fit(f2,workouttype=workouttype)
if (fileformat == 'tcx'):
workouttype = get_workouttype_from_tcx(f2,workouttype=workouttype)
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
f2, summary, oarlength, inboard, fileformat, impeller = handle_nonpainsled(
f2,
fileformat,
startdatetime=startdatetime,
summary=summary,
empowerfirmware=oarlockfirmware,
impeller=impeller,
)
if not f2: # pragma: no cover
message = 'Something went wrong'
return (0, message, '')
dosummary = (fileformat != 'fit' and 'speedcoach2' not in fileformat)
dosummary = dosummary or summary == ''
if 'speedcoach2' in fileformat and workouttype == 'rower':
workouttype = 'water'
if workoutsource is None:
workoutsource = fileformat
dologging('debuglog.log','Saving to database with start date time {startdatetime}'.format(
startdatetime=startdatetime,
))
id, message = save_workout_database(
f2, r,
notes=notes,
workouttype=workouttype,
weightcategory=r.weightcategory,
adaptiveclass=r.adaptiveclass,
boattype=boattype,
makeprivate=makeprivate,
dosummary=dosummary,
workoutsource=workoutsource,
summary=summary,
startdatetime=startdatetime,
rpe=rpe,
inboard=inboard, oarlength=oarlength,
title=title,
forceunit='N',
impeller=impeller,
)
return (id, message, f2)
def split_workout(r, parent, splitsecond, splitmode):
data, row = getrowdata_db(id=parent.id)
latitude, longitude = get_latlon(parent.id)
if not latitude.empty and not longitude.empty:
data[' latitude'] = latitude
data[' longitude'] = longitude
data['time'] = data['time'] / 1000.
data1 = data[data['time'] <= splitsecond].copy()
data2 = data[data['time'] > splitsecond].copy()
data1 = data1.sort_values(['time'])
data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data1.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data1 = data1.groupby('time', axis=0).mean()
data1['time'] = data1.index
data1.reset_index(drop=True, inplace=True)
data2 = data2.sort_values(['time'])
data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data2.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data2 = data2.groupby('time', axis=0).mean()
data2['time'] = data2.index
data2.reset_index(drop=True, inplace=True)
data1['pace'] = data1['pace'] / 1000.
data2['pace'] = data2['pace'] / 1000.
data1.drop_duplicates(subset='time', inplace=True)
data2.drop_duplicates(subset='time', inplace=True)
messages = []
ids = []
if 'keep first' in splitmode:
if 'firstprivate' in splitmode: # pragma: no cover
setprivate = True
else:
setprivate = False
id, message = new_workout_from_df(r, data1,
title=parent.name + ' (1)',
parent=parent,
setprivate=setprivate,
forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if 'keep second' in splitmode:
data2['cumdist'] = data2['cumdist'] - data2.iloc[
0,
data2.columns.get_loc('cumdist')
]
data2['distance'] = data2['distance'] - data2.iloc[
0,
data2.columns.get_loc('distance')
]
data2['time'] = data2['time'] - data2.iloc[
0,
data2.columns.get_loc('time')
]
if 'secondprivate' in splitmode: # pragma: no cover
setprivate = True
else:
setprivate = False
dt = datetime.timedelta(seconds=splitsecond)
id, message = new_workout_from_df(r, data2,
title=parent.name + ' (2)',
parent=parent,
setprivate=setprivate,
dt=dt, forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if not 'keep original' in splitmode: # pragma: no cover
if 'keep second' in splitmode or 'keep first' in splitmode:
parent.delete()
messages.append('Deleted Workout: ' + parent.name)
else:
messages.append('That would delete your workout')
ids.append(encoder.encode_hex(parent.id))
elif 'originalprivate' in splitmode: # pragma: no cover
parent.privacy = 'hidden'
parent.save()
return ids, messages
# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_df(r, df,
title='New Workout',
workoutsource='unknown',
boattype='1x',
workouttype='rower',
parent=None,
startdatetime='',
setprivate=False,
forceunit='lbs',
dt=datetime.timedelta()):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workoutsource = parent.workoutsource
workouttype = parent.workouttype
boattype = parent.boattype
notes = parent.notes
summary = parent.summary
rpe = parent.rpe
if parent.privacy == 'hidden': # pragma: no cover
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime + dt
else:
oarlength = 2.89
inboard = 0.88
notes = ''
summary = ''
makeprivate = False
rpe = 0
if startdatetime == '': # pragma: no cover
startdatetime = timezone.now()
if setprivate: # pragma: no cover
makeprivate = True
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
if forceunit == 'N':
# change to lbs for now
df['peakforce'] /= lbstoN
df['averageforce'] /= lbstoN
df.rename(columns=columndict, inplace=True)
#starttimeunix = mktime(startdatetime.utctimetuple())
starttimeunix = arrow.get(startdatetime).timestamp()
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
# res = df.to_csv(csvfilename+'.gz',index_label='index',
# compression='gzip')
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
boattype=boattype,
title=title,
workoutsource=workoutsource,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
rpe=rpe,
consistencychecks=False)
job = myqueue(queuehigh,handle_calctrimp,id,csvfilename,r.ftp,r.sex,r.hrftp,r.max,r.rest)
return (id, message)
# A wrapper around the rowingdata class, with some error catching
def rdata(file, rower=rrower()):
try:
res = rrdata(csvfile=file, rower=rower)
except (IOError, IndexError): # pragma: no cover
try:
res = rrdata(csvfile=file + '.gz', rower=rower)
except (IOError, IndexError):
res = rrdata()
except:
res = rrdata()
except EOFError: # pragma: no cover
res = rrdata()
except: # pragma: no cover
res = rrdata()
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id):
dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
shutil.rmtree(dirname)
except OSError:
try:
os.remove(dirname)
except FileNotFoundError:
pass
except FileNotFoundError: # pragma: no cover
pass
# Replace stroke data in DB with data from CSV file
def update_strokedata(id, df):
delete_strokedata(id)
rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True)
# Test that all data are of a numerical time
def testdata(time, distance, pace, spm): # pragma: no cover
t1 = np.issubdtype(time, np.number)
t2 = np.issubdtype(distance, np.number)
t3 = np.issubdtype(pace, np.number)
t4 = np.issubdtype(spm, np.number)
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0, doclean=False, convertnewtons=True,
checkefficiency=True):
data = read_df_sql(id)
try:
data['deltat'] = data['time'].diff()
except KeyError: # pragma: no cover
data = pd.DataFrame()
if data.empty:
rowdata, row = getrowdata(id=id)
if not rowdata.empty:
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if checkefficiency==True and not data.empty:
try:
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
data = add_efficiency(id=id)
except KeyError: # pragma: no cover
data = add_efficiency(id=id)
if doclean: # pragma: no cover
data = clean_df_stats(data, ignorehr=True)
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True,compute=True):
# prepmultipledata(ids)
if ids:
csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pd.DataFrame()
data = []
columns = [c for c in columns if c != 'None']
columns = list(set(columns))
if len(ids)>1:
for id,f in zip(ids,csvfilenames):
try:
#df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = pd.read_parquet(f,columns=columns)
data.append(df)
except (OSError,ArrowInvalid,IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
# df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = pd.read_parquet(f,columns=columns)
data.append(df)
try:
df = pd.concat(data,axis=0)
except ValueError:
return pd.DataFrame()
# df = dd.concat(data,axis=0)
else:
try:
df = pd.read_parquet(csvfilenames[0],columns=columns)
rowdata, row = getrowdata(id=ids[0])
except (OSError,ArrowInvalid,IndexError):
rowdata,row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(csvfilenames[0],columns=columns)
# df = dd.read_parquet(csvfilenames[0],
# column=columns,engine='pyarrow',
# )
# df = df.loc[:,~df.columns.duplicated()]
else:
df = pd.DataFrame()
if compute and len(df):
data = df.copy()
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
return df
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
try:
row = Workout.objects.get(id=id)
except Workout.DoesNotExist: # pragma: no cover
return rrdata(),None
f1 = row.csvfilename
# get user
r = row.user
u = r.user
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp)
rowdata = rdata(f1, rower=rr)
return rowdata, row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
import glob
def prepmultipledata(ids, verbose=False): # pragma: no cover
filenames = glob.glob('media/*.parquet')
ids = [id for id in ids if 'media/strokedata_{id}.parquet.gz'.format(id=id) not in filenames]
for id in ids:
rowdata, row = getrowdata(id=id)
if verbose:
print(id)
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
return ids
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_df_sql(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
extracols = []
columns = list(columns) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
ids = [int(id) for id in ids]
df = pd.DataFrame()
if len(ids) == 0: # pragma: no cover
return pd.DataFrame(),extracols
elif len(ids) == 1: # pragma: no cover
try:
filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0])
df = pd.read_parquet(filename,columns=columns)
except OSError:
rowdata,row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(filename,columns=columns)
else:
data = []
filenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
for id,f in zip(ids,filenames):
try:
df = pd.read_parquet(f,columns=columns)
data.append(df)
except (OSError,IndexError,ArrowInvalid):
rowdata,row = getrowdata(id=id)
if rowdata and len(rowdata.df): # pragma: no cover
datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
df = pd.read_parquet(f,columns=columns)
data.append(df)
try:
df = pd.concat(data,axis=0)
except ValueError: # pragma: no cover
return pd.DataFrame(), extracols
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
return df,extracols
def initiate_cp(r):
success = update_rolling_cp(r,otwtypes,'water')
success = update_rolling_cp(r,otetypes,'erg')
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
except (OSError,ArrowInvalid,IndexError): # pragma: no cover
rowdata,row = getrowdata(id=id)
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
try:
df = pd.read_parquet(f)
except OSError:
df = data
else:
df = pd.DataFrame()
df = df.fillna(value=0)
return df
# data fusion
def datafusion(id1, id2, columns, offset):
workout1 = Workout.objects.get(id=id1)
workout2 = Workout.objects.get(id=id2)
df1, w1 = getrowdata_db(id=id1)
df1 = df1.drop([ # 'cumdist',
'hr_ut2',
'hr_ut1',
'hr_at',
'hr_tr',
'hr_an',
'hr_max',
'ftime',
'fpace',
'workoutid',
'id'],
1, errors='ignore')
# Add coordinates to DataFrame
latitude, longitude = get_latlon(id1)
df1[' latitude'] = latitude
df1[' longitude'] = longitude
df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False)
forceunit = 'N'
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
offsetmillisecs += offset.days * (3600 * 24 * 1000)
df2['time'] = df2['time'] + offsetmillisecs
keep1 = {c: c for c in set(df1.columns)}
for c in columns:
keep1.pop(c)
for c in df1.columns:
if not c in keep1:
df1 = df1.drop(c, 1, errors='ignore')
df = pd.concat([df1, df2], ignore_index=True)
df = df.sort_values(['time'])
df = df.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
df.fillna(method='bfill', inplace=True)
# Some new stuff to try out
df = df.groupby('time', axis=0).mean()
df['time'] = df.index
df.reset_index(drop=True, inplace=True)
df['time'] = df['time'] / 1000.
df['pace'] = df['pace'] / 1000.
df['cum_dist'] = df['cumdist']
return df, forceunit
def fix_newtons(id=0, limit=3000): # pragma: no cover
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False)
try:
#avgforce = rowdata['averageforce']
peakforce = rowdata['peakforce']
if peakforce.mean() > limit:
w = Workout.objects.get(id=id)
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except KeyError:
pass
def remove_invalid_columns(df): # pragma: no cover
for c in df.columns:
if not c in allowedcolumns:
df.drop(labels=c,axis=1,inplace=True)
return df
def add_efficiency(id=0): # pragma: no cover
rowdata, row = getrowdata_db(id=id,
doclean=False,
convertnewtons=False,
checkefficiency=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata['efficiency'] = efficiency
rowdata = remove_invalid_columns(rowdata)
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
rowdata = rowdata.fillna(method='ffill')
delete_strokedata(id)
if id != 0:
rowdata['workoutid'] = id
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(rowdata,npartitions=1)
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
return rowdata
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
empower=True, inboard=0.88, forceunit='lbs'):
if rowdatadf.empty:
return 0
#rowdatadf.set_index([range(len(rowdatadf))], inplace=True)
t = rowdatadf.loc[:, 'TimeStamp (sec)']
t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0])
row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)']
try:
velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)']
except KeyError: # pragma: no cover
velo = 500./p
hr = rowdatadf.loc[:, ' HRCur (bpm)']
spm = rowdatadf.loc[:, ' Cadence (stokes/min)']
cumdist = rowdatadf.loc[:, 'cum_dist']
power = rowdatadf.loc[:, ' Power (watts)']
averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)']
drivelength = rowdatadf.loc[:, ' DriveLength (meters)']
try:
workoutstate = rowdatadf.loc[:, ' WorkoutState']
except KeyError: # pragma: no cover
workoutstate = 0 * hr
peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)']
forceratio = averageforce / peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.loc[:, ' DriveTime (ms)']
recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)']
rhythm = 100. * drivetime / (recoverytime + drivetime)
rhythm = rhythm.fillna(value=0)
except: # pragma: no cover
rhythm = 0.0 * forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isinf(f):
try:
windowsize = 2 * (int(10. / (f))) + 1
except ValueError: # pragma: no cover
windowsize = 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize < len(hr):
spm = savgol_filter(spm, windowsize, 3)
hr = savgol_filter(hr, windowsize, 3)
drivelength = savgol_filter(drivelength, windowsize, 3)
forceratio = savgol_filter(forceratio, windowsize, 3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError: # pragma: no cover
t2 = 0 * t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
except TypeError: # pragma: no cover
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError: # pragma: no cover
if forceunit == 'lbs':
driveenergy = drivelength * averageforce * lbstoN
else:
drivenergy = drivelength * averageforce
if forceunit == 'lbs':
averageforce *= lbstoN
peakforce *= lbstoN
powerhr = 60.*power/hr
powerhr = powerhr.fillna(value=0)
if driveenergy.mean() == 0 and driveenergy.std() == 0:
driveenergy = 0*driveenergy+100
distance = rowdatadf.loc[:, 'cum_dist']
velo = 500. / p
distanceperstroke = 60. * velo / spm
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
velo=velo,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
# powerhr=powerhr,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2']
data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1']
data['hr_at'] = rowdatadf.loc[:, 'hr_at']
data['hr_tr'] = rowdatadf.loc[:, 'hr_tr']
data['hr_an'] = rowdatadf.loc[:, 'hr_an']
data['hr_max'] = rowdatadf.loc[:, 'hr_max']
data['hr_bottom'] = 0.0 * data['hr']
try:
tel = rowdatadf.loc[:, ' ElapsedTime (sec)']
except KeyError: # pragma: no cover
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if empower:
try:
wash = rowdatadf.loc[:, 'wash']
except KeyError:
wash = 0 * power
try:
catch = rowdatadf.loc[:, 'catch']
except KeyError:
catch = 0 * power
try:
finish = rowdatadf.loc[:, 'finish']
except KeyError:
finish = 0 * power
try:
peakforceangle = rowdatadf.loc[:, 'peakforceangle']
except KeyError:
peakforceangle = 0 * power
if data['driveenergy'].mean() == 0: # pragma: no cover
try:
driveenergy = rowdatadf.loc[:, 'driveenergy']
except KeyError:
driveenergy = power * 60 / spm
else:
driveenergy = data['driveenergy']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() > 0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy / (averageforce * 4.44822)
try:
slip = rowdatadf.loc[:, 'slip']
except KeyError:
slip = 0 * power
try:
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
except ValueError: # pragma: no cover
totalangle = 0 * power
effectiveangle = 0 * power
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
driveenergy = savgol_filter(driveenergy, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
velo = 500. / p
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError: # pragma: no cover
pass
if otwpower:
try:
nowindpace = rowdatadf.loc[:, 'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.loc[:, 'equivergpower']
except KeyError:
equivergpower = 0 * p + 50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower / 2.8)**(1. / 3.)
ergpace = 500. / ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace * 1e3
data['nowindpace'] = nowindpace * 1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf, np.inf], np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
data.fillna(0,inplace=True)
for k, v in dtypes.items():
try:
data[k] = data[k].astype(v)
except KeyError: # pragma: no cover
pass
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(data,npartitions=1)
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
return data
def workout_trimp(w,reset=False):
r = w.user
if w.trimp > 0 and not reset:
return w.trimp,w.hrtss
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
if w.averagehr is None:
rowdata = rdata(w.csvfilename)
try:
avghr = rowdata.df[' HRCur (bpm)'].mean()
maxhr = rowdata.df[' HRCur (bpm)'].max()
except KeyError: # pragma: no cover
avghr = None
maxhr = None
w.averagehr = avghr
w.maxhr = maxhr
w.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0
def workout_rscore(w,reset=False):
if w.rscore > 0 and not reset:
return w.rscore,w.normp
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0
def workout_normv(w,pp=4.0):
if w.normv > 0: # pragma: no cover
return w.normv,w.normw
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0: # pragma: no cover
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0