Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
Sander Roosendaal 85a023ce5c fix #433
2019-03-15 10:33:47 +01:00

2490 lines
70 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
# All the data preparation, data cleaning and data mangling should
# be defined here
from __future__ import unicode_literals, absolute_import
from rowers.models import Workout, StrokeData,Team
import pytz
from rowingdata import rowingdata as rrdata
from rowingdata import rower as rrower
from shutil import copyfile
from rowingdata import (
get_file_type, get_empower_rigging,get_empower_firmware
)
from rowers.tasks import handle_sendemail_unrecognized
from rowers.tasks import handle_zip_file
from pandas import DataFrame, Series
from django.utils import timezone
from django.utils.timezone import get_current_timezone
from django_mailbox.models import Message,Mailbox,MessageAttachment
from time import strftime
import arrow
thetimezone = get_current_timezone()
from rowingdata import (
TCXParser, RowProParser, ErgDataParser,
CoxMateParser,
BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser,
MysteryParser, BoatCoachOTWParser,QuiskeParser,
painsledDesktopParser, speedcoachParser, ErgStickParser,
SpeedCoach2Parser, FITParser, fitsummarydata,
RitmoTimeParser,KinoMapParser,
make_cumvalues,cumcpdata,ExcelTemplate,
summarydata, get_file_type,
)
from rowingdata.csvparsers import HumonParser
from rowers.metrics import axes,calc_trimp,rowingmetrics
from rowers.models import strokedatafields
#allowedcolumns = [item[0] for item in rowingmetrics]
allowedcolumns = [key for key,value in strokedatafields.items()]
from async_messages import messages as a_messages
import os
import zipfile
import pandas as pd
import numpy as np
import itertools
import math
from rowers.tasks import (
handle_sendemail_unrecognized, handle_sendemail_breakthrough,
handle_sendemail_hard, handle_updatecp,handle_updateergcp,
handle_calctrimp,
)
from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys
import rowers.utils as utils
import rowers.datautils as datautils
from rowers.utils import lbstoN,myqueue,is_ranking_piece,wavg
from timezonefinder import TimezoneFinder
import django_rq
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')
from rowsandall_app.settings import SITE_URL
from rowers.mytypes import otwtypes,otetypes
from rowers.database import *
from rowers.opaque import encoder
# mapping the DB column names to the CSV file column names
columndict = {
'time': 'TimeStamp (sec)',
'hr': ' HRCur (bpm)',
'velo': ' AverageBoatSpeed (m/s)',
'pace': ' Stroke500mPace (sec/500m)',
'spm': ' Cadence (stokes/min)',
'power': ' Power (watts)',
'averageforce': ' AverageDriveForce (lbs)',
'drivelength': ' DriveLength (meters)',
'peakforce': ' PeakDriveForce (lbs)',
'distance': ' Horizontal (meters)',
'catch': 'catch',
'finish': 'finish',
'peakforceangle': 'peakforceangle',
'wash': 'wash',
'slip': 'slip',
'workoutstate': ' WorkoutState',
'cumdist': 'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def get_latlon(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist:
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty:
return [pd.Series([]), pd.Series([])]
try:
try:
latitude = rowdata.df.loc[:, ' latitude']
longitude = rowdata.df.loc[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
return [latitude, longitude]
except AttributeError:
return [pd.Series([]), pd.Series([])]
return [pd.Series([]), pd.Series([])]
def workout_summary_to_df(
rower,
startdate=datetime.datetime(1970,1,1),
enddate=timezone.now()+timezone.timedelta(days=1)):
ws = Workout.objects.filter(user=rower).order_by("startdatetime")
types = []
names = []
startdatetimes = []
timezones = []
distances = []
durations = []
weightcategories = []
adaptivetypes = []
weightvalues = []
notes = []
tcx_links = []
csv_links = []
rscores = []
trimps = []
for w in ws:
types.append(w.workouttype)
names.append(w.name)
startdatetimes.append(w.startdatetime)
timezones.append(w.timezone)
distances.append(w.distance)
durations.append(w.duration)
weightcategories.append(w.weightcategory)
adaptivetypes.append(w.adaptiveclass)
weightvalues.append(w.weightvalue)
notes.append(w.notes)
tcx_link = SITE_URL+'/rowers/workout/{id}/emailtcx'.format(
id=encoder.encode_hex(w.id)
)
tcx_links.append(tcx_link)
csv_link = SITE_URL+'/rowers/workout/{id}/emailcsv'.format(
id=encoder.encode_hex(w.id)
)
csv_links.append(csv_link)
trimps.append(workout_trimp(w)[0])
rscore = workout_rscore(w)
rscores.append(int(rscore[0]))
df = pd.DataFrame({
'name':names,
'date':startdatetimes,
'timezone':timezones,
'type':types,
'distance (m)':distances,
'duration ':durations,
'weight category':weightcategories,
'adaptive classification':adaptivetypes,
'weight (kg)':weightvalues,
'notes':notes,
'Stroke Data TCX':tcx_links,
'Stroke Data CSV':csv_links,
'TRIMP Training Load':trimps,
'TSS Training Load':rscores,
})
return df
def get_workouts(ids, userid):
goodids = []
for id in ids:
w = Workout.objects.get(id=id)
if int(w.user.user.id) == int(userid):
goodids.append(id)
return [Workout.objects.get(id=id) for id in goodids]
def filter_df(datadf, fieldname, value, largerthan=True):
try:
x = datadf[fieldname]
except KeyError:
return datadf
if largerthan:
mask = datadf[fieldname] < value
else:
mask = datadf[fieldname] >= value
datadf.loc[mask, fieldname] = np.nan
return datadf
# joins workouts
def join_workouts(r,ids,title='Joined Workout',
parent=None,
setprivate=False,
forceunit='lbs'):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime
else:
oarlength = 2.89
inboard = 0.88
workouttype = 'rower'
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate == True and makeprivate == False:
makeprivate = True
elif setprivate == False and makeprivate == True:
makeprivate = False
# reorder in chronological order
ws = Workout.objects.filter(id__in=ids).order_by("startdatetime")
if not parent:
parent = ws[0]
oarlength = parent.oarlength
inboard = parent.inboard
workouttype = parent.workouttype
notes = parent.notes
summary = parent.summary
files = [w.csvfilename for w in ws]
row = rdata(files[0])
files = files[1:]
while len(files):
row2 = rdata(files[0])
if row2 != 0:
row = row+row2
files = files[1:]
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
row.write_csv(csvfilename,gzip=True)
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
title=title,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
return (id, message)
def df_resample(datadf):
# time stamps must be in seconds
timestamps = datadf['TimeStamp (sec)'].astype('int')
datadf['timestamps'] = timestamps
newdf = datadf.groupby(['timestamps']).mean()
return newdf
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False):
# clean data remove zeros and negative values
# bring metrics which have negative values to positive domain
if datadf.empty:
return datadf
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
except KeyError:
pass
try:
datadf['hr'] = datadf['hr'] + 10
except KeyError:
pass
# protect 0 spm values from being nulled
try:
datadf['spm'] = datadf['spm'] + 1.0
except (KeyError,TypeError) as e:
pass
try:
datadf = datadf.clip(lower=0)
except TypeError:
pass
datadf.replace(to_replace=0, value=np.nan, inplace=True)
# bring spm back to real values
try:
datadf['spm'] = datadf['spm'] - 1
except (TypeError,KeyError) as e:
pass
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except KeyError:
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
except KeyError:
pass
try:
datadf['hr'] = datadf['hr'] - 10
except KeyError:
pass
# clean data for useful ranges per column
if not ignorehr:
try:
mask = datadf['hr'] < 30
datadf.loc[mask, 'hr'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] < 0
datadf.loc[mask,'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['efficiency'] > 200.
datadf.loc[mask, 'efficiency'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] < 10
datadf.loc[mask, 'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace'] / 1000. > 300.
datadf.loc[mask, 'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['efficiency'] < 0.
datadf.loc[mask, 'efficiency'] = np.nan
except KeyError:
pass
try:
mask = datadf['pace'] / 1000. < 60.
datadf.loc[mask, 'pace'] = np.nan
except KeyError:
pass
try:
mask = datadf['spm'] > 60
datadf.loc[mask, 'spm'] = np.nan
except KeyError:
pass
try:
mask = datadf['wash'] < 1
datadf.loc[mask, 'wash'] = np.nan
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 5
datadf.loc[mask, 'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['rhythm'] > 70
datadf.loc[mask, 'rhythm'] = np.nan
except KeyError:
pass
try:
mask = datadf['power'] < 20
datadf.loc[mask, 'power'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.loc[mask, 'drivelength'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.loc[mask, 'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.loc[mask, 'forceratio'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.loc[mask, 'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['drivespeed'] > 4
datadf.loc[mask, 'drivespeed'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.loc[mask, 'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['driveenergy'] < 100
datadf.loc[mask, 'driveenergy'] = np.nan
except KeyError:
pass
try:
mask = datadf['catch'] > -30.
datadf.loc[mask, 'catch'] = np.nan
except KeyError:
pass
workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly == True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
return datadf
def getstatsfields():
# Get field names and remove those that are not useful in stats
fields = StrokeData._meta.get_fields()
fielddict = {field.name: field.verbose_name for field in fields}
# fielddict.pop('workoutid')
fielddict.pop('ergpace')
fielddict.pop('hr_an')
fielddict.pop('hr_tr')
fielddict.pop('hr_at')
fielddict.pop('hr_ut2')
fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
fielddict.pop('nowindpace')
fielddict.pop('fnowindpace')
fielddict.pop('fergpace')
fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
fielddict.pop('fpace')
fielddict.pop('pace')
fielddict.pop('id')
fielddict.pop('ftime')
fielddict.pop('x_right')
fielddict.pop('hr_max')
fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
try:
fieldlist = [field for field, value in fielddict.iteritems()]
except AttributeError:
fieldlist = [field for field, value in fielddict.items()]
return fieldlist, fielddict
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes, seconds = divmod(tdelta.seconds, 60)
tenths = int(tdelta.microseconds / 1e5)
except AttributeError:
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
seconds, rest = divmod(seconds, 1e9)
tenths = int(rest / 1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def timedelta_to_seconds(tdelta):
return 60.*tdelta.minute+tdelta.second
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def paceformatsecs(values):
out = []
for v in values:
td = timedeltaconv(v)
formattedv = strfdelta(td)
out.append(formattedv)
return out
def fitnessmetric_to_sql(m,table='powertimefitnessmetric',debug=False):
engine = create_engine(database_url, echo=False)
columns = ', '.join(m.keys())
placeholders = ", ".join(["?"] * len(m))
query = "INSERT into %s ( %s ) Values (%s)" % (table, columns, placeholders)
values = tuple(m[key] for key in m.keys())
with engine.connect() as conn, conn.begin():
result = conn.execute(query,values)
conn.close()
engine.dispose()
return 1
def getcpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print("Database locked")
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]):
deletecpdata_sql(rower_id)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def runcpupdate(
rower,type='water',
startdate=timezone.now()-datetime.timedelta(days=365),
enddate=timezone.now()+datetime.timedelta(days=5)
):
if type == 'water':
theworkouts = Workout.objects.filter(
user=rower,rankingpiece=True,
workouttype='water',
startdatetime__gte=startdate,
startdatetime__lte=enddate
)
table = 'cpdata'
else:
theworkouts = Workout.objects.filter(
user=rower,rankingpiece=True,
workouttype__in=[
'rower',
'dynamic',
'slides'
],
startdatetime__gte=startdate,
startdatetime__lte=enddate
)
table = 'cpergdata'
theids = [w.id for w in theworkouts]
job = myqueue(
queue,
handle_updatecp,
rower.id,
theids,
table=table)
return job
def fetchcperg(rower,theworkouts):
theids = [int(w.id) for w in theworkouts]
thefilenames = [w.csvfilename for w in theworkouts]
cpdf = getcpdata_sql(rower.id,table='ergcpdata')
job = myqueue(
queue,
handle_updateergcp,
rower.id,
thefilenames)
return cpdf
def fetchcp(rower,theworkouts,table='cpdata'):
# get all power data from database (plus workoutid)
theids = [int(w.id) for w in theworkouts]
columns = ['power','workoutid','time']
df = getsmallrowdata_db(columns,ids=theids)
df.dropna(inplace=True,axis=0)
if df.empty:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([]),pd.Series([]),avgpower2
try:
dfgrouped = df.groupby(['workoutid'])
except KeyError:
avgpower2 = {}
return pd.Series([]),pd.Series([]),avgpower2
try:
avgpower2 = dict(dfgrouped.mean()['power'].astype(int))
except KeyError:
avgpower2 = {}
for id in theids:
avgpower2[id] = 0
return pd.Series([]),pd.Series([]),avgpower2
cpdf = getcpdata_sql(rower.id,table=table)
if not cpdf.empty:
return cpdf['delta'],cpdf['cp'],avgpower2
else:
job = myqueue(queue,
handle_updatecp,
rower.id,
theids,
table=table)
return pd.Series([]),pd.Series([]),avgpower2
return pd.Series([]),pd.Series([]),avgpower2
# create a new workout from manually entered data
def create_row_df(r,distance,duration,startdatetime,workouttype='rower',
avghr=None,avgpwr=None,avgspm=None,
rankingpiece = False,
duplicate=False,
title='Manual entry',notes='',weightcategory='hwt',
adaptiveclass='None'):
if duration is not None:
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
else:
totalseconds = 60.
if distance is None:
distance = 0
try:
nr_strokes = int(distance/10.)
except TypeError:
nr_strokes = int(20.*totalseconds)
if nr_strokes == 0:
nr_strokes = 100
unixstarttime = arrow.get(startdatetime).timestamp
if not avgspm:
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20.
else:
spm = avgspm
step = totalseconds/float(nr_strokes)
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
dstep = distance/float(nr_strokes)
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = unixstarttime + elapsed
try:
pace = 500.*totalseconds/distance
except ZeroDivisionError:
pace = 240.
if workouttype in ['rower','slides','dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError:
velo = 2.4
power = 2.8*velo**3
elif avgpwr is not None:
power = avgpwr
else:
power = 0
if avghr is not None:
hr = avghr
else:
hr = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)':pace,
' ElapsedTime (sec)':elapsed,
' Power (watts)':power,
' HRCur (bpm)':hr,
})
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
row = rrdata(df=df)
row.write_csv(csvfilename, gzip = True)
id, message = save_workout_database(csvfilename, r,
title=title,
notes=notes,
rankingpiece=rankingpiece,
duplicate=duplicate,
dosmooth=False,
workouttype=workouttype,
consistencychecks=False,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
totaltime=totalseconds)
return (id, message)
from rowers.utils import totaltime_sec_to_string
# Processes painsled CSV file to database
def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
boattype='1x',
adaptiveclass='None',
weightcategory='hwt',
dosummary=True, title='Workout',
workoutsource='unknown',
notes='', totaldist=0, totaltime=0,
rankingpiece=False,
duplicate=False,
summary='',
makeprivate=False,
oarlength=2.89, inboard=0.88,
forceunit='lbs',
consistencychecks=False):
message = None
powerperc = 100 * np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr, r.pw_an]) / r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp,
powerperc=powerperc, powerzones=r.powerzones)
row = rdata(f2, rower=rr)
if row.df.empty:
return (0, 'Error: CSV data file was empty')
dtavg = row.df['TimeStamp (sec)'].diff().mean()
if dtavg < 1:
newdf = df_resample(row.df)
try:
os.remove(f2)
except:
pass
return new_workout_from_df(r, newdf,
title=title,boattype=boattype,
workouttype=workouttype,
workoutsource=workoutsource)
try:
checks = row.check_consistency()
allchecks = 1
for key, value in checks.items():
if not value:
allchecks = 0
if consistencychecks:
a_messages.error(
r.user, 'Failed consistency check: ' + key + ', autocorrected')
else:
pass
# a_messages.error(r.user,'Failed consistency check: '+key+', not corrected')
except ZeroDivisionError:
pass
if not allchecks and consistencychecks:
# row.repair()
pass
if row == 0:
return (0, 'Error: CSV data file not found')
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500. / pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize < len(velo):
velo2 = savgol_filter(velo, windowsize, 3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf, np.inf], np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500. / abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2, gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2, gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max(
) - row.df['TimeStamp (sec)'].min()
try:
totaltime = totaltime + row.df.loc[:, ' ElapsedTime (sec)'].iloc[0]
except KeyError:
pass
if np.isnan(totaltime):
totaltime = 0
if dosummary:
summary = row.allstats()
timezone_str = 'UTC'
try:
workoutstartdatetime = timezone.make_aware(row.rowdatetime)
except ValueError:
workoutstartdatetime = row.rowdatetime
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except ValueError:
timezone_str = 'UTC'
if timezone_str == None:
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str == None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
)
except KeyError:
timezone_str = r.defaulttimezone
duration = totaltime_sec_to_string(totaltime)
workoutdate = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%Y-%m-%d')
workoutstarttime = workoutstartdatetime.astimezone(
pytz.timezone(timezone_str)
).strftime('%H:%M:%S')
if makeprivate:
privacy = 'hidden'
else:
privacy = 'visible'
# checking for inf values
totaldist = np.nan_to_num(totaldist)
maxhr = np.nan_to_num(maxhr)
averagehr = np.nan_to_num(averagehr)
dragfactor = 0
if workouttype in otetypes:
dragfactor = row.dragfactor
t = datetime.datetime.strptime(duration,"%H:%M:%S.%f")
delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
workoutenddatetime = workoutstartdatetime+delta
# check for duplicate start times and duration
ws = Workout.objects.filter(user=r,date=workoutdate,duplicate=False).exclude(
startdatetime__gt=workoutenddatetime
)
ws2 = []
for ww in ws:
t = ww.duration
delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
enddatetime = ww.startdatetime+delta
if enddatetime > workoutstartdatetime:
ws2.append(ww)
if (len(ws2) != 0):
message = "Warning: This workout overlaps with an existing one and was marked as a duplicate"
duplicate = True
w = Workout(user=r, name=title, date=workoutdate,
workouttype=workouttype,
boattype=boattype,
dragfactor=dragfactor,
duration=duration, distance=totaldist,
weightcategory=weightcategory,
adaptiveclass=adaptiveclass,
starttime=workoutstarttime,
duplicate=duplicate,
workoutsource=workoutsource,
rankingpiece=rankingpiece,
forceunit=forceunit,
csvfilename=f2, notes=notes, summary=summary,
maxhr=maxhr, averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard, oarlength=oarlength,
timezone=timezone_str,
privacy=privacy)
try:
w.save()
except ValidationError:
w.startdatetime = timezone.now()
w.save()
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
res = dataprep(row.df, id=w.id, bands=True,
barchart=True, otwpower=True, empower=True, inboard=inboard)
rscore,normp = workout_rscore(w)
trimp,hrtss = workout_trimp(w)
isbreakthrough = False
ishard = False
if workouttype == 'water':
df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id])
try:
powermean = df['power'].mean()
except KeyError:
powermean = 0
if powermean != 0:
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = df.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
res, btvalues, res2 = utils.isbreakthrough(
delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
else:
res = 0
res2 = 0
if res:
isbreakthrough = True
res = datautils.updatecp(delta, cpvalues, r)
if res2 and not isbreakthrough:
ishard = True
# submit email task to send email about breakthrough workout
if isbreakthrough:
a_messages.info(
r.user, 'It looks like you have a new breakthrough workout'
)
if r.getemailnotifications and not r.emailbounced:
job = myqueue(queuehigh,handle_sendemail_breakthrough,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
# submit email task to send email about breakthrough workout
if ishard:
a_messages.info(r.user, 'That was a pretty hard workout')
if r.getemailnotifications and not r.emailbounced:
job = myqueue(queuehigh,handle_sendemail_hard,
w.id,
r.user.email,
r.user.first_name,
r.user.last_name,
btvalues=btvalues.to_json())
return (w.id, message)
parsers = {
'kinomap': KinoMapParser,
'xls': ExcelTemplate,
'rp': RowProParser,
'tcx':TCXParser,
'mystery':MysteryParser,
'ritmotime':RitmoTimeParser,
'quiske': QuiskeParser,
'rowperfect3': RowPerfectParser,
'coxmate': CoxMateParser,
'bcmike': BoatCoachAdvancedParser,
'boatcoach': BoatCoachParser,
'boatcoachotw': BoatCoachOTWParser,
'painsleddesktop': painsledDesktopParser,
'speedcoach': speedcoachParser,
'speedcoach2': SpeedCoach2Parser,
'ergstick': ErgStickParser,
'fit': FITParser,
'ergdata': ErgDataParser,
'humon': HumonParser,
}
def parsenonpainsled(fileformat,f2,summary):
try:
row = parsers[fileformat](f2)
hasrecognized = True
except KeyError:
hasrecognized = False
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
oarlength, inboard = get_empower_rigging(f2)
empowerfirmware = get_empower_firmware(f2)
if empowerfirmware != '':
fileformat = fileformat+'v'+str(empowerfirmware)
else:
fileformat = 'speedcoach2v0'
summary = row.allstats()
# handle FIT
if (fileformat == 'fit'):
try:
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
except:
pass
hasrecognized = True
return row,hasrecognized,summary,fileformat
def handle_nonpainsled(f2, fileformat, summary=''):
oarlength = 2.89
inboard = 0.88
hasrecognized = False
try:
row,hasrecognized,summary,fileformat = parsenonpainsled(fileformat,f2,summary)
except:
pass
# Handle c2log
if (fileformat == 'c2log' or fileformat == 'rowprolog'):
return (0,0,0,0,0)
if not hasrecognized:
return (0,0,0,0,0)
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4] + 'o.csv'
try:
row2 = rrdata(df = row.df)
row2.write_csv(f2, gzip=True)
except:
return (0,0,0,0,0)
# os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
try:
os.remove(f_to_be_deleted + '.gz')
except:
pass
return (f2, summary, oarlength, inboard, fileformat)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_file(r, f2,
workouttype='rower',
workoutsource=None,
title='Workout',
boattype='1x',
makeprivate=False,
notes=''):
message = None
try:
fileformat = get_file_type(f2)
except IOError:
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
summary = ''
oarlength = 2.89
inboard = 0.88
if len(fileformat) == 3 and fileformat[0] == 'zip':
f_to_be_deleted = f2
workoutsbox = Mailbox.objects.filter(name='workouts')[0]
msg = Message(mailbox=workoutsbox,
from_header=r.user.email,
subject = title)
msg.save()
f3 = 'media/mailbox_attachments/'+f2[6:]
copyfile(f2,f3)
f3 = f3[6:]
a = MessageAttachment(message=msg,document=f3)
a.save()
return -1, message, f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'c2log':
os.remove(f2)
message = "This summary does not contain stroke data. Use the files containing stroke by stroke data."
return (0, message, f2)
if fileformat == 'nostrokes':
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0, message, f2)
if fileformat == 'kml':
os.remove(f2)
message = "KML files are not supported"
return (0, message, f2)
# Some people upload corrupted zip files
if fileformat == 'notgzip':
os.remove(f2)
message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
return (0, message, f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog':
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0, message, f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'unknown':
message = "We couldn't recognize the file type"
f4 = f2[:-5]+'a'+f2[-5:]
copyfile(f2,f4)
job = myqueue(queuehigh,
handle_sendemail_unrecognized,
f4,
r.user.email)
return (0, message, f2)
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
try:
f2, summary, oarlength, inboard, fileformat = handle_nonpainsled(
f2,
fileformat,
summary=summary
)
if not f2:
message = 'Something went wrong'
return (0, message, '')
except Exception as e:
errorstring = str(sys.exc_info()[0])
message = 'Something went wrong: ' + e.message
return (0, message, '')
dosummary = (fileformat != 'fit' and 'speedcoach2' not in fileformat)
dosummary = dosummary or summary == ''
if workoutsource is None:
workoutsource = fileformat
id, message = save_workout_database(
f2, r,
workouttype=workouttype,
weightcategory=r.weightcategory,
adaptiveclass=r.adaptiveclass,
boattype=boattype,
makeprivate=makeprivate,
dosummary=dosummary,
workoutsource=workoutsource,
summary=summary,
inboard=inboard, oarlength=oarlength,
title=title
)
return (id, message, f2)
def split_workout(r, parent, splitsecond, splitmode):
data, row = getrowdata_db(id=parent.id)
latitude, longitude = get_latlon(parent.id)
if not latitude.empty and not longitude.empty:
data[' latitude'] = latitude
data[' longitude'] = longitude
data['time'] = data['time'] / 1000.
data1 = data[data['time'] <= splitsecond].copy()
data2 = data[data['time'] > splitsecond].copy()
data1 = data1.sort_values(['time'])
data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data1.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data1 = data1.groupby('time', axis=0).mean()
data1['time'] = data1.index
data1.reset_index(drop=True, inplace=True)
data2 = data2.sort_values(['time'])
data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
data2.fillna(method='bfill', inplace=True)
# Some new stuff to try out
data2 = data2.groupby('time', axis=0).mean()
data2['time'] = data2.index
data2.reset_index(drop=True, inplace=True)
data1['pace'] = data1['pace'] / 1000.
data2['pace'] = data2['pace'] / 1000.
data1.drop_duplicates(subset='time', inplace=True)
data2.drop_duplicates(subset='time', inplace=True)
messages = []
ids = []
if 'keep first' in splitmode:
if 'firstprivate' in splitmode:
setprivate = True
else:
setprivate = False
id, message = new_workout_from_df(r, data1,
title=parent.name + ' (1)',
parent=parent,
setprivate=setprivate,
forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if 'keep second' in splitmode:
data2['cumdist'] = data2['cumdist'] - data2.iloc[
0,
data2.columns.get_loc('cumdist')
]
data2['distance'] = data2['distance'] - data2.iloc[
0,
data2.columns.get_loc('distance')
]
data2['time'] = data2['time'] - data2.iloc[
0,
data2.columns.get_loc('time')
]
if 'secondprivate' in splitmode:
setprivate = True
else:
setprivate = False
dt = datetime.timedelta(seconds=splitsecond)
id, message = new_workout_from_df(r, data2,
title=parent.name + ' (2)',
parent=parent,
setprivate=setprivate,
dt=dt, forceunit='N')
messages.append(message)
ids.append(encoder.encode_hex(id))
if not 'keep original' in splitmode:
if 'keep second' in splitmode or 'keep first' in splitmode:
parent.delete()
messages.append('Deleted Workout: ' + parent.name)
else:
messages.append('That would delete your workout')
ids.append(encoder.encode_hex(parent.id))
elif 'originalprivate' in splitmode:
parent.privacy = 'hidden'
parent.save()
return ids, messages
# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_df(r, df,
title='New Workout',
workoutsource='unknown',
boattype='1x',
workouttype='rower',
parent=None,
setprivate=False,
forceunit='lbs',
dt=datetime.timedelta()):
message = None
summary = ''
if parent:
oarlength = parent.oarlength
inboard = parent.inboard
workoutsource = parent.workoutsource
workouttype = parent.workouttype
boattype = parent.boattype
notes = parent.notes
summary = parent.summary
if parent.privacy == 'hidden':
makeprivate = True
else:
makeprivate = False
startdatetime = parent.startdatetime + dt
else:
oarlength = 2.89
inboard = 0.88
notes = ''
summary = ''
makeprivate = False
startdatetime = timezone.now()
if setprivate:
makeprivate = True
timestr = strftime("%Y%m%d-%H%M%S")
csvfilename = 'media/df_' + timestr + '.csv'
if forceunit == 'N':
# change to lbs for now
df['peakforce'] /= lbstoN
df['averageforce'] /= lbstoN
df.rename(columns=columndict, inplace=True)
#starttimeunix = mktime(startdatetime.utctimetuple())
starttimeunix = arrow.get(startdatetime).timestamp
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix
row = rrdata(df=df)
row.write_csv(csvfilename, gzip=True)
# res = df.to_csv(csvfilename+'.gz',index_label='index',
# compression='gzip')
id, message = save_workout_database(csvfilename, r,
workouttype=workouttype,
boattype=boattype,
title=title,
workoutsource=workoutsource,
notes=notes,
oarlength=oarlength,
inboard=inboard,
makeprivate=makeprivate,
dosmooth=False,
consistencychecks=False)
return (id, message)
# Compare the data from the CSV file and the database
# Currently only calculates number of strokes. To be expanded with
# more elaborate testing if needed
def compare_data(id):
row = Workout.objects.get(id=id)
f1 = row.csvfilename
try:
rowdata = rdata(f1)
l1 = len(rowdata.df)
except AttributeError:
rowdata = 0
l1 = 0
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
res = conn.execute(query)
l2 = res.fetchall()[0][0]
except:
print("Database Locked")
conn.close()
engine.dispose()
lfile = l1
ldb = l2
return l1 == l2 and l1 != 0, ldb, lfile
# Repair data for workouts where the CSV file is lost (or the DB entries
# don't exist)
def repair_data(verbose=False):
ws = Workout.objects.all()
for w in ws:
if verbose:
sys.stdout.write(".")
test, ldb, lfile = compare_data(w.id)
if not test:
if verbose:
print(w.id, lfile, ldb)
try:
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except (IOError, AttributeError):
pass
if lfile == 0:
# if not ldb - delete workout
try:
data = read_df_sql(w.id)
try:
datalength = len(data)
except AttributeError:
datalength = 0
if datalength != 0:
data.rename(columns=columndict, inplace=True)
res = data.to_csv(w.csvfilename + '.gz',
index_label='index',
compression='gzip')
else:
w.delete()
except:
pass
# A wrapper around the rowingdata class, with some error catching
def rdata(file, rower=rrower()):
try:
res = rrdata(csvfile=file, rower=rower)
except (IOError, IndexError):
try:
res = rrdata(csvfile=file + '.gz', rower=rower)
except (IOError, IndexError):
res = rrdata()
except:
res = rrdata()
except EOFError:
res = rrdata()
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print("Database Locked")
conn.close()
engine.dispose()
# Replace stroke data in DB with data from CSV file
def update_strokedata(id, df):
delete_strokedata(id)
rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True)
# Test that all data are of a numerical time
def testdata(time, distance, pace, spm):
t1 = np.issubdtype(time, np.number)
t2 = np.issubdtype(distance, np.number)
t3 = np.issubdtype(pace, np.number)
t4 = np.issubdtype(spm, np.number)
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0, doclean=False, convertnewtons=True,
checkefficiency=True):
data = read_df_sql(id)
data['x_right'] = data['x_right'] / 1.0e6
data['deltat'] = data['time'].diff()
if data.empty:
rowdata, row = getrowdata(id=id)
if not rowdata.empty:
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if not data.empty and data['efficiency'].mean() == 0 and data['power'].mean() != 0 and checkefficiency == True:
data = add_efficiency(id=id)
if doclean:
data = clean_df_stats(data, ignorehr=True)
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True):
prepmultipledata(ids)
data,extracols = read_cols_df_sql(ids, columns)
if extracols and len(ids)==1:
w = Workout.objects.get(id=ids[0])
row = rdata(w.csvfilename)
try:
row.set_instroke_metrics()
except AttributeError:
pass
try:
f = row.df['TimeStamp (sec)'].diff().mean()
except (AttributeError,KeyError) as e:
f = 0
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
for c in extracols:
try:
cdata = row.df[c]
cdata.fillna(inplace=True,method='bfill')
# This doesn't work because sometimes data are duplicated at save
try:
cdata2 = savgol_filter(cdata.values,windowsize,3)
data[c] = cdata2
except ValueError:
data[c] = cdata
except (KeyError, AttributeError):
data[c] = 0
# convert newtons
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
row = Workout.objects.get(id=id)
f1 = row.csvfilename
# get user
r = row.user
u = r.user
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp)
rowdata = rdata(f1, rower=rr)
return rowdata, row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
def prepmultipledata(ids, verbose=False):
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
res = conn.execute(query)
res = list(itertools.chain.from_iterable(res.fetchall()))
conn.close()
engine.dispose()
try:
ids2 = [int(id) for id in ids]
except ValueError:
ids2 = ids
res = list(set(ids2) - set(res))
for id in res:
rowdata, row = getrowdata(id=id)
if verbose:
print(id)
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
return res
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_df_sql(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
prepmultipledata(ids)
axx = [f.name for f in StrokeData._meta.get_fields()]
extracols = []
columns2 = list(columns)
for c in columns:
if not c in axx:
columns2.remove(c)
extracols.append(c)
columns = list(columns2) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
ids = [int(id) for id in ids]
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column + ', '
cls = cls[:-2]
if len(ids) == 0:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
columns=cls,
))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format(
id=ids[0],
columns=cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format(
columns=cls,
ids=tuple(ids),
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
engine.dispose()
return df,extracols
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format(
id=id)), engine)
engine.dispose()
df = df.fillna(value=0)
funit = Workout.objects.get(id=id).forceunit
if funit == 'lbs':
try:
df['peakforce'] = df['peakforce'] * lbstoN
except KeyError:
pass
try:
df['averageforce'] = df['averageforce'] * lbstoN
except KeyError:
pass
return df
# Get the necessary data from the strokedata table in the DB.
# For the flex plot
def smalldataprep(therows, xparam, yparam1, yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(csvfile=f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(csvfile=f1 + '.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
pass
return df
# data fusion
def datafusion(id1, id2, columns, offset):
workout1 = Workout.objects.get(id=id1)
workout2 = Workout.objects.get(id=id2)
df1, w1 = getrowdata_db(id=id1)
df1 = df1.drop([ # 'cumdist',
'hr_ut2',
'hr_ut1',
'hr_at',
'hr_tr',
'hr_an',
'hr_max',
'ftime',
'fpace',
'workoutid',
'id'],
1, errors='ignore')
# Add coordinates to DataFrame
latitude, longitude = get_latlon(id1)
df1[' latitude'] = latitude
df1[' longitude'] = longitude
df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False)
forceunit = 'N'
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
offsetmillisecs += offset.days * (3600 * 24 * 1000)
df2['time'] = df2['time'] + offsetmillisecs
keep1 = {c: c for c in set(df1.columns)}
for c in columns:
keep1.pop(c)
for c in df1.columns:
if not c in keep1:
df1 = df1.drop(c, 1, errors='ignore')
df = pd.concat([df1, df2], ignore_index=True)
df = df.sort_values(['time'])
df = df.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
df.fillna(method='bfill', inplace=True)
# Some new stuff to try out
df = df.groupby('time', axis=0).mean()
df['time'] = df.index
df.reset_index(drop=True, inplace=True)
df['time'] = df['time'] / 1000.
df['pace'] = df['pace'] / 1000.
df['cum_dist'] = df['cumdist']
return df, forceunit
def fix_newtons(id=0, limit=3000):
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False)
try:
#avgforce = rowdata['averageforce']
peakforce = rowdata['peakforce']
if peakforce.mean() > limit:
w = Workout.objects.get(id=id)
print("fixing ", id)
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except KeyError:
pass
def remove_invalid_columns(df):
for c in df.columns:
if not c in allowedcolumns:
df.drop(labels=c,axis=1,inplace=True)
return df
def add_efficiency(id=0):
rowdata, row = getrowdata_db(id=id,
doclean=False,
convertnewtons=False,
checkefficiency=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata['efficiency'] = efficiency
rowdata = remove_invalid_columns(rowdata)
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
rowdata = rowdata.fillna(method='ffill')
delete_strokedata(id)
if id != 0:
rowdata['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
rowdata.to_sql('strokedata', engine,
if_exists='append', index=False)
conn.close()
engine.dispose()
return rowdata
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
empower=True, inboard=0.88, forceunit='lbs'):
if rowdatadf.empty:
return 0
#rowdatadf.set_index([range(len(rowdatadf))], inplace=True)
t = rowdatadf.loc[:, 'TimeStamp (sec)']
t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0])
row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)']
try:
velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)']
except KeyError:
velo = 500./p
hr = rowdatadf.loc[:, ' HRCur (bpm)']
spm = rowdatadf.loc[:, ' Cadence (stokes/min)']
cumdist = rowdatadf.loc[:, 'cum_dist']
power = rowdatadf.loc[:, ' Power (watts)']
averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)']
drivelength = rowdatadf.loc[:, ' DriveLength (meters)']
try:
workoutstate = rowdatadf.loc[:, ' WorkoutState']
except KeyError:
workoutstate = 0 * hr
peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)']
forceratio = averageforce / peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.loc[:, ' DriveTime (ms)']
recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)']
rhythm = 100. * drivetime / (recoverytime + drivetime)
rhythm = rhythm.fillna(value=0)
except:
rhythm = 0.0 * forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isinf(f):
try:
windowsize = 2 * (int(10. / (f))) + 1
except ValueError:
windowsize = 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize < len(hr):
spm = savgol_filter(spm, windowsize, 3)
hr = savgol_filter(hr, windowsize, 3)
drivelength = savgol_filter(drivelength, windowsize, 3)
forceratio = savgol_filter(forceratio, windowsize, 3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0 * t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
except TypeError:
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError:
if forceunit == 'lbs':
driveenergy = drivelength * averageforce * lbstoN
else:
drivenergy = drivelength * averageforce
distance = rowdatadf.loc[:, 'cum_dist']
velo = 500. / p
distanceperstroke = 60. * velo / spm
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
velo=velo,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2']
data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1']
data['hr_at'] = rowdatadf.loc[:, 'hr_at']
data['hr_tr'] = rowdatadf.loc[:, 'hr_tr']
data['hr_an'] = rowdatadf.loc[:, 'hr_an']
data['hr_max'] = rowdatadf.loc[:, 'hr_max']
data['hr_bottom'] = 0.0 * data['hr']
try:
tel = rowdatadf.loc[:, ' ElapsedTime (sec)']
except KeyError:
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if barchart:
# time increments for bar chart
time_increments = rowdatadf.loc[:, ' ElapsedTime (sec)'].diff()
try:
time_increments.iloc[0] = time_increments.iloc[1]
except KeyError:
time_increments.iloc[0] = 1.
time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments)
x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x)))
data['x_right'] = x_right
if empower:
try:
wash = rowdatadf.loc[:, 'wash']
except KeyError:
wash = 0 * power
try:
catch = rowdatadf.loc[:, 'catch']
except KeyError:
catch = 0 * power
try:
finish = rowdatadf.loc[:, 'finish']
except KeyError:
finish = 0 * power
try:
peakforceangle = rowdatadf.loc[:, 'peakforceangle']
except KeyError:
peakforceangle = 0 * power
if data['driveenergy'].mean() == 0:
try:
driveenergy = rowdatadf.loc[:, 'driveenergy']
except KeyError:
driveenergy = power * 60 / spm
else:
driveenergy = data['driveenergy']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() > 0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy / (averageforce * 4.44822)
try:
slip = rowdatadf.loc[:, 'slip']
except KeyError:
slip = 0 * power
try:
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
except ValueError:
totalangle = 0 * power
effectiveangle = 0 * power
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError:
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError:
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError:
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError:
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError:
pass
try:
driveenergy = savgol_filter(driveenergy, windowsize, 3)
except TypeError:
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError:
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError:
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError:
pass
velo = 500. / p
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError:
pass
if otwpower:
try:
nowindpace = rowdatadf.loc[:, 'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.loc[:, 'equivergpower']
except KeyError:
equivergpower = 0 * p + 50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower / 2.8)**(1. / 3.)
ergpace = 500. / ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace * 1e3
data['nowindpace'] = nowindpace * 1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf, np.inf], np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
data.to_sql('strokedata', engine, if_exists='append', index=False)
conn.close()
engine.dispose()
return data
def workout_trimp(w):
r = w.user
if w.trimp > 0:
return w.trimp,w.hrtss
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
if w.averagehr is None:
rowdata = rdata(w.csvfilename)
try:
avghr = rowdata.df[' HRCur (bpm)'].mean()
maxhr = rowdata.df[' HRCur (bpm)'].max()
except KeyError:
avghr = None
maxhr = None
w.averagehr = avghr
w.maxhr = maxhr
w.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0
def workout_rscore(w):
if w.rscore > 0:
return w.rscore,w.normp
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0
def workout_normv(w,pp=4.0):
if w.normv > 0:
return w.normv,w.normw
r = w.user
ftp = float(r.ftp)
if w.workouttype in otwtypes:
ftp = ftp*(100.-r.otwslack)/100.
if r.hrftp == 0:
hrftp = (r.an+r.tr)/2.
r.hrftp = int(hrftp)
r.save()
job = myqueue(
queuehigh,
handle_calctrimp,
w.id,
w.csvfilename,
ftp,
r.sex,
r.hrftp,
r.max,
r.rest)
return 0,0