Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
Sander Roosendaal 76d57622a9 bug fixes
2023-05-10 17:57:22 +02:00

1728 lines
53 KiB
Python

from rowers.metrics import axes, calc_trimp, rowingmetrics, dtypes, metricsgroups
from rowers.utils import lbstoN, myqueue, wavg, dologging
from rowers.mytypes import otwtypes, otetypes, rowtypes
import glob
import rowingdata.tcxtools as tcxtools
from rowers.utils import totaltime_sec_to_string
from rowers.datautils import p0
from scipy import optimize
from rowers.utils import calculate_age
import datetime
from scipy.signal import savgol_filter
from rowers.opaque import encoder
from rowers.database import *
from rowers import mytypes
from rowsandall_app.settings import SITE_URL
import django_rq
from timezonefinder import TimezoneFinder
import rowers.datautils as datautils
import rowers.utils as utils
import sys
import sqlalchemy as sa
from sqlalchemy import create_engine
from django.conf import settings
import math
from fitparse.base import FitHeaderError
from fitparse import FitFile
import itertools
import numpy as np
import pandas as pd
from zipfile import BadZipFile
import zipfile
import os
from rowers.models import strokedatafields
from rowingdata import (
KinoMapParser,
ExcelTemplate,
TCXParser,
MysteryParser,
RowProParser,
RitmoTimeParser,
QuiskeParser,
RowPerfectParser,
CoxMateParser,
BoatCoachParser,
BoatCoachOTWParser,
BoatCoachAdvancedParser,
painsledDesktopParser,
speedcoachParser,
SpeedCoach2Parser,
ErgStickParser,
FITParser,
ErgDataParser,
HumonParser,
ETHParser,
NKLiNKLogbookParser,
HeroParser,
SmartRowParser,)
# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import (
Workout, Team, CalcAgePerformance, C2WorldClassAgePerformance,
User
)
import pytz
import collections
import pendulum
from rowingdata import rowingdata as rrdata
from rowingdata import rower as rrower
import yaml
import shutil
from shutil import copyfile
from rowingdata import (
get_file_type, get_empower_rigging, get_empower_firmware
)
from rowers.dataroutines import *
from rowers.tasks import (
handle_sendemail_newftp,
handle_sendemail_unrecognized, handle_setcp,
handle_getagegrouprecords, handle_update_wps,
handle_request_post, handle_calctrimp,
handle_updatecp, handle_updateergcp,
handle_sendemail_breakthrough,
handle_sendemail_hard,
)
from rowers.tasks import handle_zip_file
from pandas import DataFrame, Series
import dask.dataframe as dd
from dask.delayed import delayed
import pyarrow.parquet as pq
import pyarrow as pa
from pyarrow.lib import ArrowInvalid
from django.utils import timezone
from django.utils.timezone import get_current_timezone
from django.urls import reverse
import requests
from django.core.exceptions import ValidationError
from time import strftime
import arrow
from rq.job import Job
from rq.registry import StartedJobRegistry
from redis import Redis
from rq import Queue
thetimezone = get_current_timezone()
allowedcolumns = [key for key, value in strokedatafields.items()]
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')
def fetchcperg(rower, theworkouts):
thefilenames = [w.csvfilename for w in theworkouts]
cpdf = getcpdata_sql(rower.id, table='ergcpdata')
_ = myqueue(
queuelow,
handle_updateergcp,
rower.id,
thefilenames)
return cpdf
def workout_goldmedalstandard(workout, reset=False):
if workout.goldmedalstandard > 0 and not reset:
return workout.goldmedalstandard, workout.goldmedalseconds
if workout.workouttype in rowtypes:
goldmedalstandard, goldmedalseconds = calculate_goldmedalstandard(
workout.user, workout)
if workout.workouttype in otwtypes:
try:
factor = 100./(100.-workout.user.otwslack)
except ZeroDivisionError:
factor = 1.
workout.user.otwslack = 0
workout.user.save()
goldmedalstandard = goldmedalstandard*factor
workout.goldmedalstandard = goldmedalstandard
workout.goldmedalseconds = goldmedalseconds
workout.save()
return goldmedalstandard, goldmedalseconds
else:
return 0, 0
def check_marker(workout):
r = workout.user
gmstandard, gmseconds = workout_goldmedalstandard(workout)
if gmseconds < 60:
return None
dd = arrow.get(workout.date).datetime-datetime.timedelta(days=r.kfit)
ws = Workout.objects.filter(date__gte=dd,
date__lte=workout.date,
user=r, duplicate=False,
workouttype__in=mytypes.rowtypes,
).order_by("date")
ids = []
gms = []
for w in ws:
gmstandard, gmseconds = workout_goldmedalstandard(w)
if gmseconds > 60:
ids.append(w.id)
gms.append(gmstandard)
df = pd.DataFrame({
'id': ids,
'gms': gms,
})
if df.empty: # pragma: no cover
workout.ranking = True
workout.save()
return workout
indexmax = df['gms'].idxmax()
theid = df.loc[indexmax, 'id']
wmax = Workout.objects.get(id=theid)
# gms_max = wmax.goldmedalstandard
# check if equal, bigger, or smaller than previous
if not wmax.rankingpiece:
rankingworkouts = ws.filter(rankingpiece=True)
if len(rankingworkouts) == 0:
wmax.rankingpiece = True
wmax.save()
return wmax
lastranking = rankingworkouts[len(rankingworkouts)-1]
if lastranking.goldmedalstandard+0.2 < wmax.goldmedalstandard: # pragma: no cover
wmax.rankingpiece = True
wmax.save()
return wmax
else: # pragma: no cover
return wmax
return None
def workout_summary_to_df(
rower,
startdate=datetime.datetime(1970, 1, 1),
enddate=timezone.now()+timezone.timedelta(days=1)):
ws = Workout.objects.filter(
user=rower, date__gte=startdate, date__lte=enddate,
duplicate=False
).order_by("startdatetime")
types = []
names = []
ids = []
startdatetimes = []
timezones = []
distances = []
durations = []
weightcategories = []
adaptivetypes = []
weightvalues = []
notes = []
tcx_links = []
csv_links = []
workout_links = []
goldstandards = []
goldstandarddurations = []
rscores = []
hrtss = []
trimps = []
rankingpieces = []
boattypes = []
counter1 = 0
counter2 = len(ws)
for w in ws:
counter1 += 1
if counter1 % 10 == 0: # pragma: no cover
print(counter1, '/', counter2)
types.append(w.workouttype)
names.append(w.name)
ids.append(encoder.encode_hex(w.id))
startdatetimes.append(w.startdatetime)
timezones.append(w.timezone)
distances.append(w.distance)
durations.append(w.duration)
weightcategories.append(w.weightcategory)
adaptivetypes.append(w.adaptiveclass)
weightvalues.append(w.weightvalue)
boattypes.append(w.boattype)
notes.append(w.notes)
tcx_link = SITE_URL+'/rowers/workout/{id}/emailtcx'.format(
id=encoder.encode_hex(w.id)
)
tcx_links.append(tcx_link)
csv_link = SITE_URL+'/rowers/workout/{id}/emailcsv'.format(
id=encoder.encode_hex(w.id)
)
csv_links.append(csv_link)
workout_link = SITE_URL+'/rowers/workout/{id}/'.format(
id=encoder.encode_hex(w.id)
)
workout_links.append(workout_link)
trimps.append(workout_trimp(w)[0])
rscore = workout_rscore(w)
rscores.append(int(rscore[0]))
hrtss.append(int(w.hrtss))
goldstandard, goldstandardduration = workout_goldmedalstandard(w)
goldstandards.append(int(goldstandard))
goldstandarddurations.append(int(goldstandardduration))
rankingpieces.append(w.rankingpiece)
df = pd.DataFrame({
'ID': ids,
'date': startdatetimes,
'name': names,
'link': workout_links,
'timezone': timezones,
'type': types,
'boat type': boattypes,
'distance (m)': distances,
'duration ': durations,
'ranking piece': rankingpieces,
'weight category': weightcategories,
'adaptive classification': adaptivetypes,
'weight (kg)': weightvalues,
'Stroke Data TCX': tcx_links,
'Stroke Data CSV': csv_links,
'TRIMP Training Load': trimps,
'TSS Training Load': rscores,
'hrTSS Training Load': hrtss,
'GS': goldstandards,
'GS_secs': goldstandarddurations,
'notes': notes,
})
return df
def resample(id, r, parent, overwrite='copy'):
data, row = getrowdata_db(id=id)
messages = []
# resample
startdatetime = row.startdatetime
data['datetime'] = data['time'].apply(
lambda x: startdatetime+datetime.timedelta(seconds=x/1000.))
data = data.resample('S', on='datetime').mean()
data.interpolate(method='linear', inplace=True)
data.reset_index(drop=True, inplace=True)
# data.drop('datetime',inplace=True)
data['pace'] = data['pace'] / 1000.
data['time'] = data['time'] / 1000.
if overwrite == 'overwrite':
# remove CP data
try:
cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=parent.id)
os.remove(cpfile)
except FileNotFoundError:
pass
# save
data.rename(columns=columndict, inplace=True)
starttimeunix = arrow.get(startdatetime).timestamp()
data[' ElapsedTime (sec)'] = data['TimeStamp (sec)']
data['TimeStamp (sec)'] = data['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=data)
row.write_csv(parent.csvfilename, gzip=True)
_ = dataprep(row.df, id=parent.id, bands=True, barchart=True,
otwpower=True, empower=True, inboard=parent.inboard)
isbreakthrough, ishard = checkbreakthrough(parent, r)
_ = check_marker(parent)
_ = update_wps(r, mytypes.otwtypes)
_ = update_wps(r, mytypes.otetypes)
tss, normp = workout_rscore(parent)
goldmedalstandard, goldmedalseconds = workout_goldmedalstandard(parent)
else:
id, message = new_workout_from_df(r, data, title=parent.name + '(Resampled)',
parent=parent, forceunit='N')
messages.append(message)
return data, id, messages
def calculate_goldmedalstandard(rower, workout, recurrance=True):
cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
try:
df = pd.read_parquet(cpfile)
except:
background = True
if settings.TESTING:
background = False
df, delta, cpvalues = setcp(workout, background=background)
if df.empty:
return 0, 0
if df.empty and recurrance: # pragma: no cover
df, delta, cpvalues = setcp(workout, recurrance=False, background=True)
if df.empty:
return 0, 0
age = calculate_age(rower.birthdate, today=workout.date)
agerecords = CalcAgePerformance.objects.filter(
age=age,
sex=rower.sex,
weightcategory=rower.weightcategory
)
wcdurations = []
wcpower = []
getrecords = False
if not settings.TESTING: # pragma: no cover
if len(agerecords) == 0: # pragma: no cover
getrecords = True
for record in agerecords: # pragma: no cover
if record.power > 0:
wcdurations.append(record.duration)
wcpower.append(record.power)
else:
getrecords = True
if getrecords: # pragma: no cover
durations = [1, 4, 30, 60]
distances = [100, 500, 1000, 2000, 5000, 6000, 10000, 21097, 42195]
df2 = pd.DataFrame(
list(
C2WorldClassAgePerformance.objects.filter(
sex=rower.sex,
weightcategory=rower.weightcategory
).values()
)
)
jsondf = df2.to_json()
_ = myqueue(queuelow, handle_getagegrouprecords,
jsondf, distances, durations, age, rower.sex, rower.weightcategory)
wcpower = pd.Series(wcpower, dtype='float')
wcdurations = pd.Series(wcdurations, dtype='float')
def fitfunc(pars, x):
return pars[0] / (1+(x/pars[2])) + pars[1]/(1+(x/pars[3]))
def errfunc(pars, x, y):
return fitfunc(pars, x)-y
if len(wcdurations) >= 4: # pragma: no cover
p1wc, success = optimize.leastsq(
errfunc, p0[:], args=(wcdurations, wcpower))
else:
factor = fitfunc(p0, wcdurations.mean()/wcpower.mean())
p1wc = [p0[0]/factor, p0[1]/factor, p0[2], p0[3]]
return 0, 0
times = df['delta']
powers = df['cp']
wcpowers = fitfunc(p1wc, times)
scores = 100.*powers/wcpowers
try:
indexmax = scores.idxmax()
delta = int(df.loc[indexmax, 'delta'])
maxvalue = scores.max()
except (ValueError, TypeError): # pragma: no cover
indexmax = 0
delta = 0
maxvalue = 0
return maxvalue, delta
def setcp(workout, background=False, recurrance=True):
filename = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
strokesdf = getsmallrowdata_db(
['power', 'workoutid', 'time'], ids=[workout.id])
try:
if strokesdf['power'].std() == 0:
return pd.DataFrame(), pd.Series(dtype='float'), pd.Series(dtype='float')
except KeyError:
return pd.DataFrame(), pd.Series(dtype='float'), pd.Series(dtype='float')
if background: # pragma: no cover
_ = myqueue(queuelow, handle_setcp, strokesdf, filename, workout.id)
return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float')
if not strokesdf.empty:
totaltime = strokesdf['time'].max()
try:
powermean = strokesdf['power'].mean()
except KeyError: # pragma: no cover
powermean = 0
if powermean != 0:
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = strokesdf.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
df = pd.DataFrame({
'delta': delta,
'cp': cpvalues,
'id': workout.id,
})
df.to_parquet(filename, engine='fastparquet',
compression='GZIP')
if recurrance:
goldmedalstandard, goldmedalduration = calculate_goldmedalstandard(
workout.user, workout)
workout.goldmedalstandard = goldmedalstandard
workout.goldmedalduration = goldmedalduration
workout.save()
return df, delta, cpvalues
return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float')
def update_wps(r, types, mode='water', asynchron=True):
firstdate = timezone.now()-datetime.timedelta(days=r.cprange)
workouts = Workout.objects.filter(
date__gte=firstdate,
workouttype__in=types,
user=r
)
ids = [w.id for w in workouts]
if asynchron:
_ = myqueue(
queue,
handle_update_wps,
r.id,
types,
ids,
mode
)
df = getsmallrowdata_db(['time', 'driveenergy'], ids=ids)
try:
mask = df['driveenergy'] > 100
except (KeyError, TypeError):
return False
try:
wps_median = int(df.loc[mask, 'driveenergy'].median())
if mode == 'water':
r.median_wps = wps_median
else: # pragma: no cover
r.median_wps_erg = wps_median
r.save()
except ValueError: # pragma: no cover
pass
return True
def join_workouts(r, ids, title='Joined Workout',
parent=None,
setprivate=False,
forceunit='lbs', killparents=False):
message = None
summary = ''
if parent: # pragma: no cover
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate is True and makeprivate is False: # pragma: no cover
makeprivate = True
elif setprivate is False and makeprivate is True: # pragma: no cover
makeprivate = False
# reorder in chronological order
ws = Workout.objects.filter(id__in=ids).order_by("startdatetime")
if not parent:
parent = ws[0]
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime
files = [w.csvfilename for w in ws]
row = rdata(files[0])
files = files[1:]
while len(files):
row2 = rdata(files[0])
if row2 != 0:
row = row+row2
files = files[1:]
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
row.write_csv(csvfilename, gzip=True)
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
startdatetime=startdatetime,
makeprivate=makeprivate,
summary=summary,
dosmooth=False,
consistencychecks=False)
if killparents: # pragma: no cover
for w in ws:
w.delete()
w = Workout.objects.get(id=id)
w.duplicate = False
w.save()
if message is not None and "duplicate" in message:
message = ""
return (id, message)
def fetchcp_new(rower, workouts):
data = []
for workout in workouts:
cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
try:
df = pd.read_parquet(cpfile)
df['workout'] = str(workout)
df['url'] = workout.url()
data.append(df)
except:
# CP data file doesn't exist yet. has to be created
df, delta, cpvalues = setcp(workout)
df['workout'] = str(workout)
df['url'] = workout.url()
data.append(df)
if len(data) == 0:
return pd.Series(dtype='float'), pd.Series(dtype='float'), 0, pd.Series(dtype='float'), pd.Series(dtype='float')
if len(data) > 1:
df = pd.concat(data, axis=0)
try:
df = df[df['cp'] == df.groupby(['delta'])['cp'].transform('max')]
except KeyError: # pragma: no cover
return pd.Series(dtype='float'), pd.Series(dtype='float'), 0, pd.Series(dtype='float'), pd.Series(dtype='float')
df = df.sort_values(['delta']).reset_index()
return df['delta'], df['cp'], 0, df['workout'], df['url']
def fetchcp(rower, theworkouts, table='cpdata'): # pragma: no cover
# get all power data from database (plus workoutid)
theids = [int(w.id) for w in theworkouts]
columns = ['power', 'workoutid', 'time']
df = getsmallrowdata_db(columns, ids=theids)
df.dropna(inplace=True, axis=0)
if df.empty:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([], dtype='float'), pd.Series([], dtype='float'), avgpower2
try:
dfgrouped = df.groupby(['workoutid'])
except KeyError:
avgpower2 = {}
return pd.Series([], dtype='float'), pd.Series([], dtype='float'), avgpower2
try:
avgpower2 = dict(dfgrouped.mean()['power'].astype(int))
except KeyError:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([], dtype='float'), pd.Series([], dtype='float'), avgpower2
cpdf = getcpdata_sql(rower.id, table=table)
if not cpdf.empty:
return cpdf['delta'], cpdf['cp'], avgpower2
else:
_ = myqueue(queuelow,
handle_updatecp,
rower.id,
theids,
table=table)
return pd.Series([], dtype='float'), pd.Series([], dtype='float'), avgpower2
return pd.Series([], dtype='float'), pd.Series([], dtype='float'), avgpower2
def update_rolling_cp(r, types, mode='water'):
firstdate = timezone.now()-datetime.timedelta(days=r.cprange)
workouts = Workout.objects.filter(
date__gte=firstdate,
workouttype__in=types,
user=r
)
delta, cp, avgpower, workoutnames, urls = fetchcp_new(r, workouts)
powerdf = pd.DataFrame({
'Delta': delta,
'CP': cp,
})
powerdf = powerdf[powerdf['CP'] > 0]
powerdf.dropna(axis=0, inplace=True)
powerdf.sort_values(['Delta', 'CP'], ascending=[1, 0], inplace=True)
powerdf.drop_duplicates(subset='Delta', keep='first', inplace=True)
res2 = datautils.cpfit(powerdf)
p1 = res2[0]
# calculate FTP
hourseconds = 3600.
pwr = p1[0]/(1+hourseconds/p1[2])
pwr += p1[1]/(1+hourseconds/p1[3])
if len(powerdf) != 0:
if mode == 'water':
r.p0 = p1[0]
r.p1 = p1[1]
r.p2 = p1[2]
r.p3 = p1[3]
r.cpratio = res2[3]
r.save()
if pwr-5 > r.ftp*(100.-r.otwslack)/100. and r.getemailnotifications and not r.emailbounced:
_ = myqueue(queuehigh, handle_sendemail_newftp,r,pwr,'water')
else:
r.ep0 = p1[0]
r.ep1 = p1[1]
r.ep2 = p1[2]
r.ep3 = p1[3]
r.ecpratio = res2[3]
r.save()
if pwr-5 > r.ftp and r.getemailnotifications and not r.emailbounced:
_ = myqueue(queuehigh, handle_sendemail_newftp,r,pwr,'water')
return True
return False
def initiate_cp(r):
_ = update_rolling_cp(r, otwtypes, 'water')
_ = update_rolling_cp(r, otetypes, 'erg')
def split_workout(r, parent, splitsecond, splitmode):
data, row = getrowdata_db(id=parent.id)
latitude, longitude = get_latlon(parent.id)
if not latitude.empty and not longitude.empty:
data[' latitude'] = latitude
data[' longitude'] = longitude
data['time'] = data['time'] / 1000.
data1 = data[data['time'] <= splitsecond].copy()
data2 = data[data['time'] > splitsecond].copy()
data1 = data1.sort_values(['time'])
data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data1.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data1 = data1.groupby('time', axis=0).mean()
data1['time'] = data1.index
data1.reset_index(drop=True, inplace=True)
data2 = data2.sort_values(['time'])
data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data2.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data2 = data2.groupby('time', axis=0).mean()
data2['time'] = data2.index
data2.reset_index(drop=True, inplace=True)
data1['pace'] = data1['pace'] / 1000.
data2['pace'] = data2['pace'] / 1000.
data1.drop_duplicates(subset='time', inplace=True)
data2.drop_duplicates(subset='time', inplace=True)
messages = []
ids = []
if 'keep first' in splitmode:
if 'firstprivate' in splitmode: # pragma: no cover
setprivate = True
else:
setprivate = False
id, message = new_workout_from_df(r, data1,
title=parent.name + ' (1)',
parent=parent,
setprivate=setprivate,
forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if 'keep second' in splitmode:
data2['cumdist'] = data2['cumdist'] - data2.iloc[
0,
data2.columns.get_loc('cumdist')
]
data2['distance'] = data2['distance'] - data2.iloc[
0,
data2.columns.get_loc('distance')
]
data2['time'] = data2['time'] - data2.iloc[
0,
data2.columns.get_loc('time')
]
if 'secondprivate' in splitmode: # pragma: no cover
setprivate = True
else:
setprivate = False
dt = datetime.timedelta(seconds=splitsecond)
id, message = new_workout_from_df(r, data2,
title=parent.name + ' (2)',
parent=parent,
setprivate=setprivate,
dt=dt, forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if 'keep original' not in splitmode: # pragma: no cover
if 'keep second' in splitmode or 'keep first' in splitmode:
parent.delete()
messages.append('Deleted Workout: ' + parent.name)
else:
messages.append('That would delete your workout')
ids.append(encoder.encode_hex(parent.id))
elif 'originalprivate' in splitmode: # pragma: no cover
parent.privacy = 'hidden'
parent.save()
return ids, messages
# create a new workout from manually entered data
def create_row_df(r, distance, duration, startdatetime, workouttype='rower',
avghr=None, avgpwr=None, avgspm=None,
rankingpiece=False,
duplicate=False, rpe=-1,
title='Manual entry', notes='', weightcategory='hwt',
adaptiveclass='None'):
if duration is not None:
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
else: # pragma: no cover
totalseconds = 60.
if distance is None: # pragma: no cover
distance = 0
try:
nr_strokes = int(distance/10.)
except TypeError: # pragma: no cover
nr_strokes = int(20.*totalseconds)
if nr_strokes == 0: # pragma: no cover
nr_strokes = 100
unixstarttime = arrow.get(startdatetime).timestamp()
if not avgspm: # pragma: no cover
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20.
else:
spm = avgspm
# step = totalseconds/float(nr_strokes)
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
# dstep = distance/float(nr_strokes)
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = unixstarttime + elapsed
try:
pace = 500.*totalseconds/distance
except ZeroDivisionError: # pragma: no cover
pace = 240.
if workouttype in ['rower', 'slides', 'dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError: # pragma: no cover
velo = 2.4
power = 2.8*velo**3
elif avgpwr is not None: # pragma: no cover
power = avgpwr
else: # pragma: no cover
power = 0
if avghr is not None:
hr = avghr
else: # pragma: no cover
hr = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)': pace,
' ElapsedTime (sec)': elapsed,
' Power (watts)': power,
' HRCur (bpm)': hr,
})
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
id, message = save_workout_database(csvfilename, r,
title=title,
notes=notes,
rankingpiece=rankingpiece,
duplicate=duplicate,
dosmooth=False,
workouttype=workouttype,
consistencychecks=False,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
totaltime=totalseconds)
return (id, message)
def checkbreakthrough(w, r):
isbreakthrough = False
ishard = False
workouttype = w.workouttype
if workouttype in rowtypes:
cpdf, delta, cpvalues = setcp(w)
if not cpdf.empty:
if workouttype in otwtypes:
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
_ = update_rolling_cp(r, otwtypes, 'water')
elif workouttype in otetypes:
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.ep0, r.ep1, r.ep2, r.ep3, r.ecpratio)
_ = update_rolling_cp(r, otetypes, 'erg')
else: # pragma: no cover
res = 0
res2 = 0
if res:
isbreakthrough = True
if res2 and not isbreakthrough: # pragma: no cover
ishard = True
# submit email task to send email about breakthrough workout
if isbreakthrough:
if not w.duplicate:
w.rankingpiece = True
w.save()
if r.getemailnotifications and not r.emailbounced: # pragma: no cover
_ = myqueue(queuehigh, handle_sendemail_breakthrough,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
for coach in r.get_coaches():
if coach.getemailnotifications and not coach.emailbounced:
_ = myqueue(queuehigh, handle_sendemail_breakthrough,
w.id,
coach.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json(),
surname=True)
# submit email task to send email about breakthrough workout
if ishard: # pragma: no cover
if not w.duplicate:
w.rankingpiece = True
w.save()
if r.getemailnotifications and not r.emailbounced:
_ = myqueue(queuehigh, handle_sendemail_hard,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
for coach in r.get_coaches():
if coach.getemailnotifications and not coach.emailbounced:
_ = myqueue(queuehigh, handle_sendemail_hard,
w.id,
coach.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json(),
surname=True)
return isbreakthrough, ishard
# Processes painsled CSV file to database
def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
boattype='1x',
adaptiveclass='None',
weightcategory='hwt',
dosummary=True, title='Workout',
workoutsource='unknown',
notes='', totaldist=0, totaltime=0,
rankingpiece=False,
rpe=-1,
duplicate=False,
summary='',
makeprivate=False,
oarlength=2.89, inboard=0.88,
forceunit='lbs',
consistencychecks=False,
startdatetime='',
workoutid='',
impeller=False):
message = None
powerperc = 100 * np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr, r.pw_an]) / r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp,
powerperc=powerperc, powerzones=r.powerzones)
row = rdata(f2, rower=rr)
startdatetime, startdate, starttime, timezone_str, partofday = get_startdate_time_zone(
r, row, startdatetime=startdatetime)
if title is None or title == '':
title = 'Workout'
if partofday is not None:
title = '{partofday} {workouttype}'.format(
partofday=partofday,
workouttype=workouttype,
)
if row.df.empty: # pragma: no cover
return (0, 'Error: CSV data file was empty')
dtavg = row.df['TimeStamp (sec)'].diff().mean()
if dtavg < 1:
newdf = df_resample(row.df)
try:
os.remove(f2)
except:
pass
return new_workout_from_df(r, newdf,
title=title, boattype=boattype,
workouttype=workouttype,
workoutsource=workoutsource, startdatetime=startdatetime,
workoutid=workoutid)
try:
checks = row.check_consistency()
allchecks = 1
for key, value in checks.items():
if not value:
allchecks = 0
except ZeroDivisionError: # pragma: no cover
pass
if not allchecks and consistencychecks:
# row.repair()
pass
if row == 0: # pragma: no cover
return (0, 'Error: CSV data file not found')
try:
lat = row.df[' latitude']
if lat.mean() != 0 and lat.std() != 0 and workouttype == 'rower':
workouttype = 'water'
except KeyError:
pass
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500. / pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else: # pragma: no cover
windowsize = 1
if 'originalvelo' not in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize < len(velo):
velo2 = savgol_filter(velo, windowsize, 3)
else: # pragma: no cover
velo2 = velo
velo3 = pd.Series(velo2, dtype='float')
velo3 = velo3.replace([-np.inf, np.inf], np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500. / abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2, gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
if r.erg_recalculatepower:
row.erg_recalculatepower()
row.write_csv(f2, gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max(
) - row.df['TimeStamp (sec)'].min()
try:
totaltime = totaltime + row.df.loc[:, ' ElapsedTime (sec)'].iloc[0]
except KeyError: # pragma: no cover
pass
if np.isnan(totaltime): # pragma: no cover
totaltime = 0
if dosummary:
summary = row.allstats()
workoutstartdatetime = startdatetime
dologging('debuglog.log', 'Dataprep line 1721, Workout Startdatetime {workoutstartdatetime}'.format(
workoutstartdatetime=workoutstartdatetime,
))
duration = totaltime_sec_to_string(totaltime)
workoutdate = startdate
workoutstarttime = starttime
s = 'Dataprep line 1730 workoutdate and time set to {workoutdate} and {workoutstarttime}'.format(
workoutdate=workoutdate,
workoutstarttime=workoutstarttime,
)
dologging('debuglog.log', s)
if makeprivate: # pragma: no cover
privacy = 'hidden'
else:
privacy = 'visible'
# checking for inf values
totaldist = np.nan_to_num(totaldist)
maxhr = np.nan_to_num(maxhr)
averagehr = np.nan_to_num(averagehr)
dragfactor = 0
if workouttype in otetypes:
dragfactor = row.dragfactor
#t = datetime.datetime.strptime(duration, "%H:%M:%S.%f")
delta = datetime.timedelta(
seconds=totaltime)
try:
workoutenddatetime = workoutstartdatetime+delta
except AttributeError as e:
workoutstartdatetime = pendulum.parse(str(workoutstartdatetime))
workoutenddatetime = workoutstartdatetime+delta
# check for duplicate start times and duration
duplicate = checkduplicates(
r, workoutdate, workoutstartdatetime, workoutenddatetime)
if duplicate:
rankingpiece = False
# test title length
if title is not None and len(title) > 140: # pragma: no cover
title = title[0:140]
timezone_str = str(workoutstartdatetime.tzinfo)
if workoutid:
try:
w = Workout.objects.get(id=workoutid)
w.name = title
w.date = workoutdate
w.workouttype = workouttype
w.boattype = boattype
w.dragfactor = dragfactor
w.duration = duration
w.distance = totaldist
w.weightcategory = weightcategory
w.adaptiveclass = adaptiveclass
w.starttime = workoutstarttime
w.duplicate = duplicate
w.workoutsource = workoutsource
w.rankingpiece = rankingpiece
w.forceunit = forceunit
w.rpe = rpe
w.csvfilename = f2
w.notes = notes
w.summary = summary
w.maxhr = maxhr
w.averagehr = averagehr
w.startdatetime = workoutstartdatetime
w.inboard = inboard
w.oarlength = oarlength
w.timezone = timezone_str
w.privacy = privacy
w.impeller = impeller
except Workout.DoesNotExist:
workoutid = ''
if not workoutid:
w = Workout(user=r, name=title, date=workoutdate,
workouttype=workouttype,
boattype=boattype,
dragfactor=dragfactor,
duration=duration, distance=totaldist,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
starttime=workoutstarttime,
duplicate=duplicate,
workoutsource=workoutsource,
rankingpiece=rankingpiece,
forceunit=forceunit,
rpe=rpe,
csvfilename=f2, notes=notes, summary=summary,
maxhr=maxhr, averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard, oarlength=oarlength,
timezone=timezone_str,
privacy=privacy,
impeller=impeller)
try:
w.save()
except ValidationError: # pragma: no cover
try:
w.startdatetime = timezone.now()
w.save()
except ValidationError:
return (0, 'Unable to create your workout')
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
_ = dataprep(row.df, id=w.id, bands=True,
barchart=True, otwpower=True, empower=True, inboard=inboard)
isbreakthrough, ishard = checkbreakthrough(w, r)
_ = check_marker(w)
_ = update_wps(r, mytypes.otwtypes)
_ = update_wps(r, mytypes.otetypes)
_ = myqueue(queuehigh, handle_calctrimp, w.id, f2,
r.ftp, r.sex, r.hrftp, r.max, r.rest)
return (w.id, message)
def new_workout_from_file(r, f2,
workouttype='rower',
workoutsource=None,
title='Workout',
boattype='1x',
rpe=-1,
makeprivate=False,
startdatetime='',
notes='',
workoutid='',
oarlockfirmware='',
inboard=None,
oarlength=None,
impeller=False,
uploadoptions={'boattype': '1x', 'workouttype': 'rower'}):
message = ""
try:
fileformat = get_file_type(f2)
except (IOError, UnicodeDecodeError): # pragma: no cover
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
summary = ''
oarlength = 2.89
inboard = 0.88
# Save zip files to email box for further processing
if len(fileformat) == 3 and fileformat[0] == 'zip': # pragma: no cover
uploadoptions['secret'] = settings.UPLOAD_SERVICE_SECRET
uploadoptions['user'] = r.user.id
uploadoptions['title'] = title
try:
zip_file = zipfile.ZipFile(f2)
for id, filename in enumerate(zip_file.namelist()):
datafile = zip_file.extract(filename, path='media/')
if id > 0:
uploadoptions['title'] = title+' ('+str(id+1)+')'
else:
uploadoptions['title'] = title
uploadoptions['file'] = datafile
url = settings.UPLOAD_SERVICE_URL
_ = myqueue(queuehigh,
handle_request_post,
url,
uploadoptions)
except BadZipFile: # pragma: no cover
pass
return -1, message, f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'imageformat': # pragma: no cover
os.remove(f2)
message = "You cannot upload image files here"
return (0, message, f2)
if fileformat == 'json': # pragma: no cover
os.remove(f2)
message = "JSON format not supported in direct upload"
return (0, message, f2)
if fileformat == 'c2log':
os.remove(f2)
message = "This summary does not contain stroke data. Use the files containing stroke by stroke data."
return (0, message, f2)
if fileformat == 'nostrokes': # pragma: no cover
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0, message, f2)
if fileformat == 'kml': # pragma: no cover
os.remove(f2)
message = "KML files are not supported"
return (0, message, f2)
# Some people upload corrupted zip files
if fileformat == 'notgzip': # pragma: no cover
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog': # pragma: no cover
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0, message, f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'gpx': # pragma: no cover
os.remove(f2)
message = "GPX files support is on our roadmap. Check back soon."
return (0, message, f2)
if fileformat == 'unknown': # pragma: no cover
message = "We couldn't recognize the file type"
extension = os.path.splitext(f2)[1]
filename = os.path.splitext(f2)[0]
if extension == '.gz':
filename = os.path.splitext(filename)[0]
extension2 = os.path.splitext(filename)[1]+extension
extension = extension2
f4 = filename+'a'+extension
copyfile(f2, f4)
_ = myqueue(queuehigh,
handle_sendemail_unrecognized,
f4,
r.user.email)
return (0, message, f2)
if fileformat == 'att': # pragma: no cover
# email attachment which can safely be ignored
os.remove(f2)
return (0, '', f2)
if fileformat == 'quiskesummary':
os.remove(f2)
return (0,'',f2)
# Get workout type from fit & tcx
if (fileformat == 'fit'): # pragma: no cover
workouttype = get_workouttype_from_fit(f2, workouttype=workouttype)
# if (fileformat == 'tcx'):
# workouttype_from_tcx = get_workouttype_from_tcx(f2,workouttype=workouttype)
# if workouttype != 'rower' and workouttype_from_tcx not in mytypes.otwtypes:
# workouttype = workouttype_from_tcx
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
f2, summary, oarlength, inboard, fileformat, impeller = handle_nonpainsled(
f2,
fileformat,
startdatetime=startdatetime,
summary=summary,
empowerfirmware=oarlockfirmware,
impeller=impeller,
)
if not f2: # pragma: no cover
message = 'Something went wrong'
return (0, message, '')
dosummary = (fileformat != 'fit' and 'speedcoach2' not in fileformat)
dosummary = dosummary or summary == ''
if 'speedcoach2' in fileformat and workouttype == 'rower':
workouttype = 'water'
if workoutsource is None:
workoutsource = fileformat
dologging('debuglog.log', 'Saving to database with start date time {startdatetime}'.format(
startdatetime=startdatetime,
))
id, message = save_workout_database(
f2, r,
notes=notes,
workouttype=workouttype,
weightcategory=r.weightcategory,
adaptiveclass=r.adaptiveclass,
boattype=boattype,
makeprivate=makeprivate,
dosummary=dosummary,
workoutsource=workoutsource,
summary=summary,
startdatetime=startdatetime,
rpe=rpe,
inboard=inboard, oarlength=oarlength,
title=title,
forceunit='N',
impeller=impeller,
workoutid=workoutid,
)
return (id, message, f2)
def new_workout_from_df(r, df,
title='New Workout',
workoutsource='unknown',
boattype='1x',
workouttype='rower',
parent=None,
workoutid='',
startdatetime='',
setprivate=False,
forceunit='lbs',
dt=datetime.timedelta()):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workoutsource = parent.workoutsource
workouttype = parent.workouttype
boattype = parent.boattype
notes = parent.notes
summary = parent.summary
rpe = parent.rpe
if parent.privacy == 'hidden': # pragma: no cover
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime + dt
else:
oarlength = 2.89
inboard = 0.88
notes = ''
summary = ''
makeprivate = False
rpe = 0
if startdatetime == '': # pragma: no cover
startdatetime = timezone.now()
if setprivate: # pragma: no cover
makeprivate = True
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
if forceunit == 'N':
# change to lbs for now
df['peakforce'] /= lbstoN
df['averageforce'] /= lbstoN
df.rename(columns=columndict, inplace=True)
starttimeunix = arrow.get(startdatetime).timestamp()
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
boattype=boattype,
title=title,
workoutsource=workoutsource,
notes=notes,
summary=summary,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
workoutid=workoutid,
rpe=rpe,
consistencychecks=False)
_ = myqueue(queuehigh, handle_calctrimp, id, csvfilename,
r.ftp, r.sex, r.hrftp, r.max, r.rest)
return (id, message)
# A wrapper around the rowingdata class, with some error catching
from redis import StrictRedis, Redis
def get_existing_job(w):
# check if this filename is in the queue
r = StrictRedis()
for key in r.keys():
res = key.decode('utf8').split(':')
if len(res) == 3:
jobid = res[2]
try:
job = Job.fetch(jobid, connection=r)
if isinstance(job.args[1], str):
if job.args[1] == w.csvfilename:
return 1
except:
pass
return 0
def workout_trimp(w, reset=False):
r = w.user
if w.trimp > -1 and not reset:
return w.trimp, w.hrtss
if get_existing_job(w):
return 0, 0
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
if w.averagehr is None:
rowdata = rdata(w.csvfilename)
try:
avghr = rowdata.df[' HRCur (bpm)'].mean()
maxhr = rowdata.df[' HRCur (bpm)'].max()
except KeyError: # pragma: no cover
avghr = None
maxhr = None
w.averagehr = avghr
w.maxhr = maxhr
w.save()
_ = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0, 0
def workout_rscore(w, reset=False):
if w.rscore > -1 and not reset:
return w.rscore, w.normp
if get_existing_job(w):
return 0, 0
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
_ = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0, 0
def workout_normv(w, pp=4.0):
if w.normv > -1: # pragma: no cover
return w.normv, w.normw
if get_existing_job(w):
return 0, 0
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0: # pragma: no cover
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
_ = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0, 0