2974 lines
91 KiB
Python
2974 lines
91 KiB
Python
from rowers.metrics import axes, calc_trimp, rowingmetrics, dtypes, metricsgroups, metricsdicts
|
|
from rowers.utils import lbstoN, wavg, dologging
|
|
from rowers.mytypes import otwtypes, otetypes, rowtypes
|
|
import glob
|
|
import rowingdata.tcxtools as tcxtools
|
|
from rowers.utils import totaltime_sec_to_string
|
|
from rowers.datautils import p0
|
|
from scipy import optimize
|
|
from rowers.utils import calculate_age
|
|
import datetime
|
|
from scipy.signal import savgol_filter
|
|
from rowers.opaque import encoder
|
|
from rowers.database import *
|
|
from rowers import mytypes
|
|
from rowsandall_app.settings import SITE_URL
|
|
import django_rq
|
|
from timezonefinder import TimezoneFinder
|
|
import rowers.datautils as datautils
|
|
import rowers.utils as utils
|
|
import sys
|
|
import sqlalchemy as sa
|
|
from sqlalchemy import create_engine
|
|
from django.conf import settings
|
|
import math
|
|
from fitparse.base import FitHeaderError
|
|
from fitparse import FitFile
|
|
import itertools
|
|
import numpy as np
|
|
import pandas as pd
|
|
from zipfile import BadZipFile
|
|
import zipfile
|
|
import os
|
|
from rowers.models import strokedatafields
|
|
import polars as pl
|
|
import polars.selectors as cs
|
|
from polars.exceptions import (
|
|
ColumnNotFoundError, SchemaError, ComputeError,
|
|
InvalidOperationError, ShapeError
|
|
)
|
|
from pandas.errors import IntCastingNaNError
|
|
|
|
from rowingdata import (
|
|
KinoMapParser,
|
|
ExcelTemplate,
|
|
TCXParser,
|
|
MysteryParser,
|
|
RowProParser,
|
|
RitmoTimeParser,
|
|
QuiskeParser,
|
|
RowPerfectParser,
|
|
CoxMateParser,
|
|
BoatCoachParser,
|
|
BoatCoachOTWParser,
|
|
BoatCoachAdvancedParser,
|
|
painsledDesktopParser,
|
|
speedcoachParser,
|
|
SpeedCoach2Parser,
|
|
ErgStickParser,
|
|
FITParser,
|
|
ErgDataParser,
|
|
HumonParser,
|
|
ETHParser,
|
|
NKLiNKLogbookParser,
|
|
HeroParser,
|
|
SmartRowParser,)
|
|
|
|
from rowingdata import make_cumvalues
|
|
|
|
# All the data preparation, data cleaning and data mangling should
|
|
# be defined here
|
|
from rowers.models import (
|
|
Workout, Team, CalcAgePerformance, C2WorldClassAgePerformance,
|
|
User
|
|
)
|
|
|
|
import pytz
|
|
from pytz.exceptions import UnknownTimeZoneError
|
|
import collections
|
|
import pendulum
|
|
from rowingdata import rowingdata as rrdata
|
|
from rowingdata import rowingdata_pl as rrdata_pl
|
|
|
|
from rowingdata import rower as rrower
|
|
|
|
import yaml
|
|
import shutil
|
|
from shutil import copyfile
|
|
|
|
from rowingdata import (
|
|
get_file_type, get_empower_rigging, get_empower_firmware
|
|
)
|
|
|
|
|
|
from pandas import DataFrame, Series
|
|
import dask.dataframe as dd
|
|
from dask.delayed import delayed
|
|
import pyarrow.parquet as pq
|
|
import pyarrow as pa
|
|
|
|
from pyarrow.lib import ArrowInvalid, ArrowTypeError
|
|
|
|
from django.utils import timezone
|
|
from django.utils.timezone import get_current_timezone
|
|
from django.urls import reverse
|
|
import requests
|
|
|
|
from django.core.exceptions import ValidationError
|
|
|
|
from time import strftime
|
|
import arrow
|
|
|
|
thetimezone = get_current_timezone()
|
|
|
|
allowedcolumns = [key for key, value in strokedatafields.items()]
|
|
|
|
from rowsandall_app.settings_dev import use_sqlite
|
|
from rowsandall_app.settings_dev import DATABASES as DEV_DATABASES
|
|
|
|
try:
|
|
user = settings.DATABASES['default']['USER']
|
|
except KeyError: # pragma: no cover
|
|
user = ''
|
|
try:
|
|
password = settings.DATABASES['default']['PASSWORD']
|
|
except KeyError: # pragma: no cover
|
|
password = ''
|
|
|
|
try:
|
|
database_name = settings.DATABASES['default']['NAME']
|
|
except KeyError: # pragma: no cover
|
|
database_name = ''
|
|
try:
|
|
host = settings.DATABASES['default']['HOST']
|
|
except KeyError: # pragma: no cover
|
|
host = ''
|
|
try:
|
|
port = settings.DATABASES['default']['PORT']
|
|
except KeyError: # pragma: no cover
|
|
port = ''
|
|
|
|
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
|
|
user=user,
|
|
password=password,
|
|
database_name=database_name,
|
|
host=host,
|
|
port=port,
|
|
)
|
|
|
|
database_name_dev = DEV_DATABASES['default']['NAME']
|
|
|
|
|
|
|
|
if use_sqlite:
|
|
database_url_debug = 'sqlite:///'+database_name_dev
|
|
database_url = database_url_debug
|
|
|
|
database_url_debug = database_url
|
|
|
|
|
|
# mapping the DB column names to the CSV file column names
|
|
columndict = {
|
|
'time': 'TimeStamp (sec)',
|
|
'hr': ' HRCur (bpm)',
|
|
'velo': ' AverageBoatSpeed (m/s)',
|
|
'pace': ' Stroke500mPace (sec/500m)',
|
|
'spm': ' Cadence (stokes/min)',
|
|
'power': ' Power (watts)',
|
|
'averageforce': ' AverageDriveForce (lbs)',
|
|
'drivelength': ' DriveLength (meters)',
|
|
'peakforce': ' PeakDriveForce (lbs)',
|
|
'distance': ' Horizontal (meters)',
|
|
'catch': 'catch',
|
|
'finish': 'finish',
|
|
'peakforceangle': 'peakforceangle',
|
|
'wash': 'wash',
|
|
'slip': 'slip',
|
|
'workoutstate': ' WorkoutState',
|
|
'cumdist': 'cum_dist',
|
|
'check_factor': 'check_factor',
|
|
}
|
|
|
|
|
|
def remove_nulls_pl(data):
|
|
for c in data.columns:
|
|
if c=='hr':
|
|
dologging('remove_nulls.log',"HR data len {f}".format(f=len(data[c])))
|
|
data = data.lazy().with_columns(
|
|
pl.when(
|
|
pl.all().is_infinite()
|
|
).then(None).otherwise(pl.all()).keep_name()
|
|
)
|
|
data = data.select(pl.all().forward_fill())
|
|
data = data.select(pl.all().backward_fill())
|
|
data = data.fill_nan(None)
|
|
|
|
data = data.select(cs.by_dtype(pl.NUMERIC_DTYPES)).collect()
|
|
data = data[[s.name for s in data if not s.is_infinite().sum()]]
|
|
data = data[[s.name for s in data if not (s.null_count() == data.height)]]
|
|
|
|
|
|
if not data.is_empty():
|
|
try:
|
|
data = data.drop_nulls()
|
|
except: # pragma: no cover
|
|
pass
|
|
|
|
for c in data.columns:
|
|
if c=='hr':
|
|
dologging('remove_nulls.log',"HR data len {f}".format(f=len(data[c])))
|
|
|
|
return data
|
|
|
|
|
|
def get_video_data(w, groups=['basic'], mode='water'):
|
|
modes = [mode, 'both', 'basic']
|
|
columns = ['time', 'velo', 'spm']
|
|
columns += [name for name, d in rowingmetrics if d['group']
|
|
in groups and d['mode'] in modes]
|
|
columns = list(set(columns))
|
|
df = getsmallrowdata_pd(columns, ids=[w.id],
|
|
workstrokesonly=False, doclean=False, compute=False)
|
|
df.dropna(axis=0, how='all', inplace=True)
|
|
df.dropna(axis=1, how='all', inplace=True)
|
|
|
|
df['time'] = (df['time']-df['time'].min())/1000.
|
|
|
|
df.sort_values(by='time', inplace=True)
|
|
|
|
df.set_index(pd.to_timedelta(df['time'], unit='s'), inplace=True)
|
|
df2 = df.resample('1s').first().fillna(method='ffill')
|
|
df2['time'] = df2.index.total_seconds()
|
|
|
|
if 'pace' in columns:
|
|
df2['pace'] = df2['pace']/1000.
|
|
p = df2['pace']
|
|
p = p.apply(lambda x: timedeltaconv(x))
|
|
p = nicepaceformat(p)
|
|
df2['pace'] = p
|
|
|
|
df2['time'] = (df2['time']-df2['time'].min())
|
|
|
|
df2 = df2.round(decimals=2)
|
|
|
|
|
|
try:
|
|
coordinates = get_latlon_time(w.id)
|
|
except KeyError: # pragma: no cover
|
|
nulseries = df['time']*0
|
|
coordinates = pd.DataFrame({
|
|
'time': df['time'],
|
|
'latitude': nulseries,
|
|
'longitude': nulseries,
|
|
})
|
|
|
|
coordinates.set_index(pd.to_timedelta(
|
|
coordinates['time'], unit='s'), inplace=True)
|
|
coordinates = coordinates.resample('1s').first().interpolate().fillna(method='ffill')
|
|
#coordinates['time'] = coordinates['time']-coordinates['time'].min()
|
|
df2 = pd.concat([df2, coordinates], axis=1)
|
|
latitude = df2['latitude']
|
|
longitude = df2['longitude']
|
|
try:
|
|
boatspeed = (100*df2['velo'].fillna(method='ffill').fillna(method='bfill')).astype(int)/100.
|
|
except IntCastingNaNError:
|
|
boatspeed = 0.0*df2['longitude']
|
|
# bundle data
|
|
data = {
|
|
'boatspeed': boatspeed.values.tolist(),
|
|
'latitude': latitude.values.tolist(),
|
|
'longitude': longitude.values.tolist(),
|
|
}
|
|
|
|
metrics = {}
|
|
|
|
for c in columns:
|
|
if c != 'time':
|
|
try:
|
|
if dict(rowingmetrics)[c]['numtype'] == 'integer': # pragma: no cover
|
|
data[c] = df2[c].astype(int).tolist()
|
|
else:
|
|
sigfigs = dict(rowingmetrics)[c]['sigfigs']
|
|
if (c != 'pace'):
|
|
try:
|
|
da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs)
|
|
except:
|
|
da = df2[c]
|
|
else:
|
|
da = df2[c]
|
|
data[c] = da.values.tolist()
|
|
metrics[c] = {
|
|
'name': dict(rowingmetrics)[c]['verbose_name'],
|
|
'metric': c,
|
|
'unit': ''
|
|
}
|
|
except KeyError: # pragma: no cover
|
|
pass
|
|
|
|
metrics['boatspeed'] = metrics.pop('velo')
|
|
# metrics['workperstroke'] = metrics.pop('driveenergy')
|
|
metrics = collections.OrderedDict(sorted(metrics.items()))
|
|
|
|
maxtime = coordinates['time'].max()
|
|
|
|
data = pd.DataFrame(data)
|
|
data.replace([np.inf, -np.inf], np.nan, inplace=True)
|
|
data.dropna(inplace=True)
|
|
|
|
data = pl.from_pandas(data)
|
|
|
|
data = data.to_dict(as_series=False)
|
|
|
|
return data, metrics, maxtime
|
|
|
|
|
|
def polarization_index(df, rower):
|
|
df['dt'] = df['time'].diff()/6.e4
|
|
# remove rest (spm<15)
|
|
df.dropna(axis=0, inplace=True)
|
|
df['dt'] = df['dt'].clip(upper=4, lower=0)
|
|
|
|
masklow = (df['power'] > 0) & (df['power'] < int(rower.pw_at))
|
|
maskmid = (df['power'] >= rower.pw_at) & (df['power'] < int(rower.pw_an))
|
|
maskhigh = (df['power'] > rower.pw_an)
|
|
|
|
time_low_pw = df.loc[masklow, 'dt'].sum()
|
|
time_mid_pw = df.loc[maskmid, 'dt'].sum()
|
|
time_high_pw = df.loc[maskhigh, 'dt'].sum()
|
|
|
|
frac_low = time_low_pw/(time_low_pw+time_mid_pw+time_high_pw)
|
|
frac_mid = time_mid_pw/(time_low_pw+time_mid_pw+time_high_pw)
|
|
frac_high = time_high_pw/(time_low_pw+time_mid_pw+time_high_pw)
|
|
|
|
index = math.log10(frac_high*100.*frac_low/frac_mid)
|
|
|
|
return index
|
|
|
|
|
|
def get_latlon(id):
|
|
try:
|
|
w = Workout.objects.get(id=id)
|
|
except Workout.DoesNotExist: # pragma: no cover
|
|
return False
|
|
|
|
rowdata = rdata(w.csvfilename)
|
|
|
|
if rowdata.df.empty: # pragma: no cover
|
|
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')]
|
|
|
|
try:
|
|
try:
|
|
latitude = rowdata.df.loc[:, ' latitude']
|
|
longitude = rowdata.df.loc[:, ' longitude']
|
|
except KeyError:
|
|
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
|
|
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
|
|
return [latitude, longitude]
|
|
except AttributeError: # pragma: no cover
|
|
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')]
|
|
|
|
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')] # pragma: no cover
|
|
|
|
|
|
def get_latlon_time(id):
|
|
try:
|
|
w = Workout.objects.get(id=id)
|
|
except Workout.DoesNotExist: # pragma: no cover
|
|
return False
|
|
|
|
rowdata = rdata(w.csvfilename)
|
|
|
|
if rowdata.df.empty: # pragma: no cover
|
|
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')]
|
|
|
|
try:
|
|
try:
|
|
_ = rowdata.df.loc[:, ' latitude']
|
|
_ = rowdata.df.loc[:, ' longitude']
|
|
except KeyError: # pragma: no cover
|
|
rowdata.df['latitude'] = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
|
|
rowdata.df['longitude'] = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
|
|
except AttributeError: # pragma: no cover
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame({
|
|
'time': rowdata.df['TimeStamp (sec)']-rowdata.df['TimeStamp (sec)'].min(),
|
|
'latitude': rowdata.df[' latitude'],
|
|
'longitude': rowdata.df[' longitude']
|
|
})
|
|
|
|
return df
|
|
|
|
|
|
def workout_has_latlon(id):
|
|
latitude, longitude = get_latlon(id)
|
|
latmean = latitude.mean()
|
|
lonmean = longitude.mean()
|
|
|
|
if latmean == 0 and lonmean == 0:
|
|
return False, latmean, lonmean
|
|
|
|
if latitude.std() > 0 and longitude.std() > 0:
|
|
return True, latmean, lonmean
|
|
|
|
return False, latmean, lonmean
|
|
|
|
|
|
|
|
def get_workouts(ids, userid): # pragma: no cover
|
|
goodids = []
|
|
for id in ids:
|
|
w = Workout.objects.get(id=id)
|
|
if int(w.user.user.id) == int(userid):
|
|
goodids.append(id)
|
|
|
|
return [Workout.objects.get(id=id) for id in goodids]
|
|
|
|
|
|
def filter_df(datadf, fieldname, value, largerthan=True):
|
|
|
|
try:
|
|
_ = datadf[fieldname]
|
|
except KeyError:
|
|
return datadf
|
|
|
|
try:
|
|
if largerthan:
|
|
mask = datadf[fieldname] < value
|
|
else:
|
|
mask = datadf[fieldname] >= value
|
|
|
|
datadf.loc[mask, fieldname] = np.nan
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
|
|
return datadf
|
|
|
|
# joins workouts
|
|
|
|
|
|
|
|
def df_resample(datadf):
|
|
# time stamps must be in seconds
|
|
timestamps = datadf['TimeStamp (sec)'].astype('int')
|
|
|
|
datadf['timestamps'] = timestamps
|
|
# newdf = datadf.groupby(['timestamps']).mean()
|
|
newdf = datadf[~datadf.duplicated(['timestamps'])]
|
|
return newdf
|
|
|
|
|
|
|
|
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
|
ignoreadvanced=False, for_chart=False):
|
|
# clean data remove zeros and negative values
|
|
|
|
try:
|
|
_ = datadf['workoutid'].unique()
|
|
except KeyError:
|
|
try:
|
|
datadf['workoutid'] = 0
|
|
except TypeError: # pragma: no cover
|
|
datadf = datadf.with_columns(pl.lit(0).alias("workoutid"))
|
|
|
|
before = {}
|
|
ids = datadf['workoutid'].unique()
|
|
for workoutid in ids:
|
|
before[workoutid] = len(datadf[datadf['workoutid'] == workoutid])
|
|
|
|
data_orig = datadf.copy()
|
|
|
|
# bring metrics which have negative values to positive domain
|
|
if len(datadf) == 0:
|
|
return datadf
|
|
try:
|
|
datadf['catch'] = -datadf['catch']
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
datadf['hr'] = datadf['hr'] + 10
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# protect 0 spm values from being nulled
|
|
try:
|
|
datadf['spm'] = datadf['spm'] + 1.0
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# protect 0 workoutstate values from being nulled
|
|
try:
|
|
datadf['workoutstate'] = datadf['workoutstate'] + 1
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
datadf = datadf.clip(lower=0)
|
|
except TypeError:
|
|
pass
|
|
|
|
# protect advanced metrics columns
|
|
advancedcols = [
|
|
'rhythm',
|
|
'power',
|
|
'drivelength',
|
|
'forceratio',
|
|
'drivespeed',
|
|
'driveenergy',
|
|
'catch',
|
|
'finish',
|
|
'averageforce',
|
|
'peakforce',
|
|
'slip',
|
|
'wash',
|
|
'peakforceangle',
|
|
'effectiveangle',
|
|
]
|
|
|
|
datadf.replace(to_replace=0, value=np.nan, inplace=True)
|
|
# datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan))
|
|
|
|
# bring spm back to real values
|
|
try:
|
|
datadf['spm'] = datadf['spm'] - 1
|
|
except (TypeError, KeyError):
|
|
pass
|
|
|
|
# bring workoutstate back to real values
|
|
try:
|
|
datadf['workoutstate'] = datadf['workoutstate'] - 1
|
|
except (TypeError, KeyError):
|
|
pass
|
|
|
|
# return from positive domain to negative
|
|
try:
|
|
datadf['catch'] = -datadf['catch']
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
datadf['hr'] = datadf['hr'] - 10
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# clean data for useful ranges per column
|
|
if not ignorehr: # pragma: no cover
|
|
try:
|
|
mask = datadf['hr'] < 30
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError): # pragma: no cover
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['spm'] < 0
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['efficiency'] > 200.
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['spm'] < 10
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['pace'] / 1000. > 300.
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['efficiency'] < 0.
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['pace'] / 1000. < 60.
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['power'] > 5000
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['spm'] > 120
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['wash'] < 1
|
|
datadf.loc[mask, 'wash'] = np.nan
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# try to guess ignoreadvanced
|
|
if not ignoreadvanced:
|
|
for metric in advancedcols:
|
|
try:
|
|
sum = datadf[metric].std()
|
|
if sum == 0 or np.isnan(sum):
|
|
ignoreadvanced = True
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
if not ignoreadvanced:
|
|
try:
|
|
mask = datadf['rhythm'] < 0
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['rhythm'] > 70
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['power'] < 20
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['drivelength'] < 0.5
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['forceratio'] < 0.2
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['forceratio'] > 1.0
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['drivespeed'] < 0.5
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['drivespeed'] > 4
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['driveenergy'] > 2000
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['driveenergy'] < 100
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
mask = datadf['catch'] > -30.
|
|
datadf.mask(mask, inplace=True)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
|
|
workoutstatesrest = [3]
|
|
# workoutstatetransition = [0, 2, 10, 11, 12, 13]
|
|
|
|
if workstrokesonly == 'True' or workstrokesonly is True:
|
|
try:
|
|
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
|
|
except:
|
|
pass
|
|
|
|
after = {}
|
|
|
|
if for_chart: # pragma: no cover
|
|
return datadf
|
|
for workoutid in data_orig['workoutid'].unique():
|
|
after[workoutid] = len(
|
|
datadf[datadf['workoutid'] == workoutid].dropna())
|
|
ratio = float(after[workoutid])/float(before[workoutid])
|
|
if ratio < 0.01 or after[workoutid] < 2:
|
|
return data_orig
|
|
|
|
return datadf # pragma: no cover
|
|
|
|
def replace_zeros_with_nan(x): # pragma: no cover
|
|
return np.nan if x == 0 else x
|
|
|
|
def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
|
|
ignoreadvanced=False, for_chart=False): # pragma: no cover
|
|
# clean data remove zeros and negative values
|
|
try:
|
|
_ = datadf['workoutid'].unique()
|
|
except KeyError: # pragma: no cover
|
|
try:
|
|
datadf['workoutid'] = 0
|
|
except TypeError:
|
|
datadf = datadf.with_columns(pl.lit(0).alias("workoutid"))
|
|
except ColumnNotFoundError: # pragma: no cover
|
|
datadf = datadf.with_columns(pl.lit(0).alias("workoutid"))
|
|
|
|
before = {}
|
|
ids = list(datadf['workoutid'].unique())
|
|
for workoutid in ids:
|
|
before[workoutid] = len(datadf.filter(pl.col("workoutid")==workoutid))
|
|
|
|
data_orig = datadf.clone()
|
|
|
|
# bring metrics which have negative values to positive domain
|
|
if len(datadf) == 0: # pragma: no cover
|
|
return data_orig
|
|
try:
|
|
datadf = datadf.with_columns((-pl.col('catch')).alias('catch'))
|
|
except (KeyError, TypeError): # pragma: no cover
|
|
pass
|
|
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
|
|
return data_orig
|
|
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('peakforceangle')+1000).alias('peakforceangle'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
|
|
return data_orig
|
|
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('hr')+10).alias('hr'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
|
|
return data_orig
|
|
|
|
# protect 0 spm values from being nulled
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('spm')+1.0).alias('spm'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
|
|
return data_orig
|
|
|
|
# protect 0 workoutstate values from being nulled
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('workoutstate')+1).alias('workoutstate'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
|
|
return data_orig
|
|
|
|
try: # pragma: no cover
|
|
datadf = datadf.select(pl.all().clip(lower_bound=0))
|
|
# datadf = datadf.clip(lower=0)
|
|
except (TypeError):
|
|
pass
|
|
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
|
|
return data_orig
|
|
|
|
# protect advanced metrics columns
|
|
advancedcols = [
|
|
'rhythm',
|
|
'power',
|
|
'drivelength',
|
|
'forceratio',
|
|
'drivespeed',
|
|
'driveenergy',
|
|
'catch',
|
|
'finish',
|
|
'averageforce',
|
|
'peakforce',
|
|
'slip',
|
|
'wash',
|
|
'peakforceangle',
|
|
'effectiveangle',
|
|
] # pragma: no cover
|
|
|
|
for col in datadf.columns: # pragma: no cover
|
|
datadf = datadf.with_columns(
|
|
pl.when(datadf[col] == 0).then(pl.lit(np.nan)).otherwise(datadf[col]),
|
|
name=col
|
|
)
|
|
|
|
# datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan))
|
|
|
|
# bring spm back to real values
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('spm')-1.0).alias('spm'))
|
|
except (TypeError, KeyError):
|
|
pass
|
|
|
|
# bring workoutstate back to real values
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('workoutstate')-1).alias('workoutstate'))
|
|
except (TypeError, KeyError):
|
|
pass
|
|
|
|
# return from positive domain to negative
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((-pl.col('catch')).alias('catch'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try: # pragma: no cover
|
|
datadf = datadf.with_columns((pl.col('peakforceangle')-1000).alias('peakforceangle'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
try:
|
|
datadf = datadf.with_columns((pl.col('hr')+10).alias('hr'))
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# clean data for useful ranges per column
|
|
if not ignorehr:
|
|
datadf = datadf.filter(pl.col("hr")>=30)
|
|
|
|
|
|
datadf = datadf.filter(
|
|
pl.col("spm") >=0,
|
|
pl.col("spm")>=10,
|
|
pl.col("pace")<=300*1000.,
|
|
pl.col("pace")>=60*1000,
|
|
pl.col("power")<=5000,
|
|
pl.col("spm")<=120,
|
|
)
|
|
|
|
|
|
# try to guess ignoreadvanced
|
|
if not ignoreadvanced:
|
|
for metric in advancedcols:
|
|
try:
|
|
sum = datadf[metric].std()
|
|
if sum == 0 or np.isnan(sum):
|
|
ignoreadvanced = True
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
if not ignoreadvanced:
|
|
datadf = datadf.filter(pl.col("rhythm")>=0,
|
|
pl.col("rhythm")<=70,
|
|
pl.col("power")>=20,
|
|
pl.col("efficiency")<=200,
|
|
pl.col("drivelength")>=0.5,
|
|
pl.col("wash")>=1,
|
|
pl.col("efficiency")>=0,
|
|
pl.col("forceratio")>=0.2,
|
|
pl.col("forceratio")<=1.0,
|
|
pl.col("drivespeed")>=0.5,
|
|
pl.col("drivespeed")<=4,
|
|
pl.col("driveenergy")<=2000,
|
|
pl.col("driveenergy")>=100,
|
|
pl.col("catch")<=-30)
|
|
|
|
|
|
|
|
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
|
|
workoutstatesrest = [3]
|
|
# workoutstatetransition = [0, 2, 10, 11, 12, 13]
|
|
|
|
if workstrokesonly == 'True' or workstrokesonly is True:
|
|
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
|
|
|
|
after = {}
|
|
|
|
if for_chart:
|
|
return datadf
|
|
for workoutid in data_orig['workoutid'].unique():
|
|
after[workoutid] = len(datadf.filter(pl.col("workoutid")==workoutid))
|
|
ratio = float(after[workoutid])/float(before[workoutid])
|
|
if ratio < 0.01 or after[workoutid] < 2:
|
|
return data_orig
|
|
|
|
|
|
|
|
return datadf
|
|
|
|
|
|
def getpartofday(row, r):
|
|
workoutstartdatetime = row.rowdatetime
|
|
try: # pragma: no cover
|
|
latavg = row.df[' latitude'].mean()
|
|
lonavg = row.df[' longitude'].mean()
|
|
|
|
tf = TimezoneFinder()
|
|
try:
|
|
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
|
|
except (ValueError, OverflowError): # pragma: no cover
|
|
timezone_str = 'UTC'
|
|
if timezone_str is None: # pragma: no cover
|
|
timezone_str = tf.closest_timezone_at(lng=lonavg,
|
|
lat=latavg)
|
|
if timezone_str is None:
|
|
timezone_str = r.defaulttimezone
|
|
try:
|
|
workoutstartdatetime = pytz.timezone(timezone_str).localize(
|
|
row.rowdatetime
|
|
)
|
|
except ValueError:
|
|
workoutstartdatetime = row.rowdatetime
|
|
except KeyError:
|
|
timezone_str = r.defaulttimezone
|
|
workoutstartdatetime = row.rowdatetime
|
|
|
|
h = workoutstartdatetime.astimezone(pytz.timezone(timezone_str)).hour
|
|
|
|
if h < 12: # pragma: no cover
|
|
return "Morning"
|
|
elif h < 18: # pragma: no cover
|
|
return "Afternoon"
|
|
elif h < 22: # pragma: no cover
|
|
return "Evening"
|
|
else: # pragma: no cover
|
|
return "Night"
|
|
|
|
return None # pragma: no cover
|
|
|
|
|
|
def getstatsfields():
|
|
fielddict = {name: d['verbose_name'] for name, d in rowingmetrics}
|
|
|
|
# fielddict.pop('ergpace')
|
|
# fielddict.pop('hr_an')
|
|
# fielddict.pop('hr_tr')
|
|
# fielddict.pop('hr_at')
|
|
# fielddict.pop('hr_ut2')
|
|
# fielddict.pop('hr_ut1')
|
|
fielddict.pop('time')
|
|
fielddict.pop('distance')
|
|
# fielddict.pop('nowindpace')
|
|
# fielddict.pop('fnowindpace')
|
|
# fielddict.pop('fergpace')
|
|
# fielddict.pop('equivergpower')
|
|
# fielddict.pop('workoutstate')
|
|
# fielddict.pop('fpace')
|
|
# fielddict.pop('pace')
|
|
# fielddict.pop('id')
|
|
# fielddict.pop('ftime')
|
|
# fielddict.pop('x_right')
|
|
# fielddict.pop('hr_max')
|
|
# fielddict.pop('hr_bottom')
|
|
fielddict.pop('cumdist')
|
|
|
|
try:
|
|
fieldlist = [field for field, value in fielddict.iteritems()]
|
|
except AttributeError:
|
|
fieldlist = [field for field, value in fielddict.items()]
|
|
|
|
return fieldlist, fielddict
|
|
|
|
|
|
# A string representation for time deltas
|
|
def niceformat(values):
|
|
out = []
|
|
for v in values:
|
|
formattedv = strfdelta(v)
|
|
out.append(formattedv)
|
|
|
|
return out
|
|
|
|
# A nice printable format for time delta values
|
|
|
|
|
|
def strfdelta(tdelta):
|
|
try:
|
|
minutes, seconds = divmod(tdelta.seconds, 60)
|
|
tenths = int(tdelta.microseconds / 1e5)
|
|
except AttributeError: # pragma: no cover
|
|
try:
|
|
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
|
|
seconds, rest = divmod(seconds, 1e9)
|
|
tenths = int(rest / 1e8)
|
|
except AttributeError:
|
|
minutes = 0
|
|
seconds = 0
|
|
tenths = 0
|
|
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
|
|
minutes=minutes,
|
|
seconds=seconds,
|
|
tenths=tenths,
|
|
)
|
|
|
|
return res
|
|
|
|
|
|
def timedelta_to_seconds(tdelta): # pragma: no cover
|
|
return 60.*tdelta.minute+tdelta.second
|
|
|
|
|
|
# A nice printable format for pace values
|
|
|
|
|
|
def nicepaceformat(values):
|
|
out = []
|
|
for v in values:
|
|
formattedv = strfdelta(v)
|
|
out.append(formattedv)
|
|
|
|
return out
|
|
|
|
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
|
|
|
|
|
|
def timedeltaconv(x):
|
|
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
|
|
dt = datetime.timedelta(seconds=x)
|
|
else:
|
|
dt = datetime.timedelta(seconds=350.)
|
|
|
|
return dt
|
|
|
|
|
|
def paceformatsecs(values):
|
|
out = []
|
|
for v in values:
|
|
td = timedeltaconv(v)
|
|
formattedv = strfdelta(td)
|
|
out.append(formattedv)
|
|
|
|
return out
|
|
|
|
|
|
def update_c2id_sql(id, c2id):
|
|
workout = Workout.objects.get(id=id)
|
|
workout.uploadedtoc2 = c2id
|
|
workout.save()
|
|
|
|
return 1
|
|
|
|
|
|
def getcpdata_sql(rower_id, table='cpdata'):
|
|
engine = create_engine(database_url, echo=False)
|
|
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
|
|
rower_id=rower_id,
|
|
table=table,
|
|
))
|
|
|
|
_ = engine.raw_connection()
|
|
df = pd.read_sql_query(query, engine)
|
|
|
|
return df
|
|
|
|
|
|
def deletecpdata_sql(rower_id, table='cpdata'): # pragma: no cover
|
|
engine = create_engine(database_url, echo=False)
|
|
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
|
|
rower_id=rower_id,
|
|
table=table,
|
|
))
|
|
with engine.connect() as conn, conn.begin():
|
|
try:
|
|
_ = conn.execute(query)
|
|
except Exception as e:
|
|
print(Exception, e)
|
|
print("Database locked")
|
|
conn.close()
|
|
engine.dispose()
|
|
|
|
|
|
def updatecpdata_sql(rower_id, delta, cp, table='cpdata', distance=pd.Series([], dtype='float'),
|
|
debug=False): # pragma: no cover
|
|
deletecpdata_sql(rower_id)
|
|
df = pd.DataFrame(
|
|
{
|
|
'delta': delta,
|
|
'cp': cp,
|
|
'user': rower_id
|
|
}
|
|
)
|
|
|
|
if not distance.empty:
|
|
df['distance'] = distance
|
|
|
|
engine = create_engine(database_url, echo=False)
|
|
with engine.connect() as conn, conn.begin():
|
|
df.to_sql(table, engine, if_exists='append', index=False)
|
|
conn.close()
|
|
engine.dispose()
|
|
|
|
|
|
|
|
def get_workoutsummaries(userid, startdate): # pragma: no cover
|
|
u = User.objects.get(id=userid)
|
|
r = u.rower
|
|
df = workout_summary_to_df(r, startdate=startdate)
|
|
df.drop(['Stroke Data TCX', 'Stroke Data CSV'], axis=1, inplace=True)
|
|
df = df.sort_values('date', ascending=False)
|
|
|
|
return df
|
|
|
|
|
|
def checkduplicates(r, workoutdate, workoutstartdatetime, workoutenddatetime):
|
|
duplicate = False
|
|
ws = Workout.objects.filter(user=r, date=workoutdate, duplicate=False).exclude(
|
|
startdatetime__gt=workoutenddatetime
|
|
)
|
|
|
|
ws2 = []
|
|
|
|
for ww in ws:
|
|
t = ww.duration
|
|
delta = datetime.timedelta(
|
|
hours=t.hour, minutes=t.minute, seconds=t.second)
|
|
if ww.startdatetime is not None:
|
|
enddatetime = ww.startdatetime+delta
|
|
if enddatetime > workoutstartdatetime:
|
|
ws2.append(ww)
|
|
|
|
if (len(ws2) != 0):
|
|
duplicate = True
|
|
return duplicate
|
|
|
|
return duplicate
|
|
|
|
|
|
|
|
parsers = {
|
|
'kinomap': KinoMapParser,
|
|
'xls': ExcelTemplate,
|
|
'rp': RowProParser,
|
|
'tcx': TCXParser,
|
|
'mystery': MysteryParser,
|
|
'ritmotime': RitmoTimeParser,
|
|
'quiske': QuiskeParser,
|
|
'rowperfect3': RowPerfectParser,
|
|
'coxmate': CoxMateParser,
|
|
'bcmike': BoatCoachAdvancedParser,
|
|
'boatcoach': BoatCoachParser,
|
|
'boatcoachotw': BoatCoachOTWParser,
|
|
'painsleddesktop': painsledDesktopParser,
|
|
'speedcoach': speedcoachParser,
|
|
'speedcoach2': SpeedCoach2Parser,
|
|
'ergstick': ErgStickParser,
|
|
'fit': FITParser,
|
|
'ergdata': ErgDataParser,
|
|
'humon': HumonParser,
|
|
'eth': ETHParser,
|
|
'nklinklogbook': NKLiNKLogbookParser,
|
|
'hero': HeroParser,
|
|
'smartrow': SmartRowParser,
|
|
}
|
|
|
|
|
|
def get_startdate_time_zone(r, row, startdatetime=None):
|
|
if isinstance(startdatetime, str):
|
|
try:
|
|
startdatetime = pendulum.parse(startdatetime)
|
|
except:
|
|
dologging('debuglog.log','Could not parse start date time '+startdatetime)
|
|
|
|
if startdatetime is not None and startdatetime != '':
|
|
try:
|
|
timezone_str = pendulum.instance(startdatetime).timezone.name
|
|
except ValueError: # pragma: no cover
|
|
timezone_str = 'Ect/GMT'
|
|
elif startdatetime == '':
|
|
startdatetime = row.rowdatetime
|
|
else:
|
|
startdatetime = row.rowdatetime
|
|
|
|
try:
|
|
_ = startdatetime.tzinfo
|
|
except AttributeError: # pragma: no cover
|
|
startdatetime = row.rowdatetime
|
|
|
|
partofday = getpartofday(row, r)
|
|
|
|
if startdatetime.tzinfo is None or str(startdatetime.tzinfo) in ['tzutc()', 'Ect/GMT']:
|
|
timezone_str = 'UTC'
|
|
try:
|
|
startdatetime = timezone.make_aware(startdatetime)
|
|
except ValueError: # pragma: no cover
|
|
pass
|
|
|
|
try:
|
|
latavg = row.df[' latitude'].mean()
|
|
lonavg = row.df[' longitude'].mean()
|
|
|
|
tf = TimezoneFinder()
|
|
if row.df[' latitude'].std() != 0:
|
|
try:
|
|
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
|
|
except (ValueError, OverflowError): # pragma: no cover
|
|
timezone_str = 'UTC'
|
|
if timezone_str is None: # pragma: no cover
|
|
timezone_str = tf.closest_timezone_at(lng=lonavg,
|
|
lat=latavg)
|
|
if timezone_str is None: # pragma: no cover
|
|
timezone_str = r.defaulttimezone
|
|
else:
|
|
timezone_str = r.defaulttimezone
|
|
try:
|
|
startdatetime = pytz.timezone(timezone_str).localize(
|
|
row.rowdatetime
|
|
)
|
|
except ValueError: # pragma: no cover
|
|
startdatetime = startdatetime.astimezone(
|
|
pytz.timezone(timezone_str)
|
|
)
|
|
except KeyError: # pragma: no cover
|
|
timezone_str = r.defaulttimezone
|
|
else:
|
|
timezone_str = str(startdatetime.tzinfo)
|
|
|
|
try:
|
|
startdatetime = startdatetime.astimezone(pytz.timezone(timezone_str))
|
|
except UnknownTimeZoneError:
|
|
startdatetime = startdatetime.astimezone(pytz.utc)
|
|
|
|
startdate = startdatetime.strftime('%Y-%m-%d')
|
|
starttime = startdatetime.strftime('%H:%M:%S')
|
|
|
|
if timezone_str == 'tzutc()':
|
|
timezone_str = 'UTC' # pragma: no cover
|
|
|
|
return startdatetime, startdate, starttime, timezone_str, partofday
|
|
|
|
|
|
def parsenonpainsled(fileformat, f2, summary, startdatetime='', empowerfirmware=None, inboard=None, oarlength=None):
|
|
try:
|
|
if fileformat == 'nklinklogbook' and empowerfirmware is not None: # pragma: no cover
|
|
if inboard is not None and oarlength is not None:
|
|
row = NKLiNKLogbookParser(
|
|
f2, firmware=empowerfirmware, inboard=inboard, oarlength=oarlength)
|
|
else:
|
|
row = NKLiNKLogbookParser(f2)
|
|
else:
|
|
try:
|
|
row = parsers[fileformat](f2)
|
|
except:
|
|
hasrecognized = False
|
|
return None, hasrecognized, '', 'unknown'
|
|
if startdatetime != '': # pragma: no cover
|
|
row.rowdatetime = arrow.get(startdatetime).datetime
|
|
hasrecognized = True
|
|
except (KeyError, IndexError, ValueError): # pragma: no cover
|
|
hasrecognized = False
|
|
return None, hasrecognized, '', 'unknown'
|
|
|
|
s = 'Parsenonpainsled, start date time = {startdatetime}'.format(
|
|
startdatetime=startdatetime,
|
|
)
|
|
dologging('debuglog.log', s)
|
|
|
|
# handle speed coach GPS 2
|
|
if (fileformat == 'speedcoach2'):
|
|
oarlength, inboard = get_empower_rigging(f2)
|
|
empowerfirmware = get_empower_firmware(f2)
|
|
if empowerfirmware != '':
|
|
fileformat = fileformat+'v'+str(empowerfirmware)
|
|
else: # pragma: no cover
|
|
fileformat = 'speedcoach2v0'
|
|
try:
|
|
summary = row.allstats()
|
|
except ZeroDivisionError: # pragma: no cover
|
|
summary = ''
|
|
else:
|
|
fileformat = fileformat+'v'+str(empowerfirmware)
|
|
|
|
# handle FIT
|
|
if (fileformat == 'fit'): # pragma: no cover
|
|
try:
|
|
s = FitSummaryData(f2)
|
|
s.setsummary()
|
|
summary = s.summarytext
|
|
except Exception as e:
|
|
pass
|
|
hasrecognized = True
|
|
|
|
return row, hasrecognized, summary, fileformat
|
|
|
|
|
|
def handle_nonpainsled(f2, fileformat, summary='', startdatetime='', empowerfirmware=None, impeller=False):
|
|
oarlength = 2.89
|
|
inboard = 0.88
|
|
hasrecognized = False
|
|
|
|
row, hasrecognized, summary, fileformat = parsenonpainsled(fileformat, f2, summary, startdatetime=startdatetime,
|
|
empowerfirmware=empowerfirmware)
|
|
|
|
# Handle c2log
|
|
if (fileformat == 'c2log' or fileformat == 'rowprolog'): # pragma: no cover
|
|
return (0, '', 0, 0, '', impeller)
|
|
|
|
if not hasrecognized: # pragma: no cover
|
|
return (0, '', 0, 0, '', impeller)
|
|
|
|
f_to_be_deleted = f2
|
|
# should delete file
|
|
f2 = f2[:-4] + 'o.csv'
|
|
|
|
row2 = rrdata(df=row.df)
|
|
|
|
if 'quiske' in fileformat:
|
|
row2.add_instroke_speed()
|
|
|
|
if 'speedcoach2' in fileformat or 'nklinklogbook' in fileformat:
|
|
# impeller consistency
|
|
impellerdata, consistent, ratio = row.impellerconsistent(threshold=0.3)
|
|
|
|
if impellerdata and consistent:
|
|
impeller = True
|
|
if impellerdata and not consistent:
|
|
row2.use_gpsdata()
|
|
if impeller:
|
|
row2.use_impellerdata()
|
|
|
|
row2.write_csv(f2, gzip=True)
|
|
|
|
# os.remove(f2)
|
|
try:
|
|
os.remove(f_to_be_deleted)
|
|
except: # pragma: no cover
|
|
try:
|
|
os.remove(f_to_be_deleted + '.gz')
|
|
except:
|
|
pass
|
|
|
|
return (f2, summary, oarlength, inboard, fileformat, impeller)
|
|
|
|
# Create new workout from file and store it in the database
|
|
# This routine should be used everywhere in views.py
|
|
|
|
|
|
def get_workouttype_from_fit(filename, workouttype='water'):
|
|
try:
|
|
fitfile = FitFile(filename, check_crc=False)
|
|
except FitHeaderError: # pragma: no cover
|
|
return workouttype
|
|
|
|
records = fitfile.messages
|
|
fittype = 'rowing'
|
|
for record in records:
|
|
if record.name in ['sport', 'lap']:
|
|
try:
|
|
fittype = record.get_values()['sport'].lower()
|
|
except (KeyError, AttributeError): # pragma: no cover
|
|
return 'water'
|
|
try:
|
|
workouttype = mytypes.fitmappinginv[fittype]
|
|
except KeyError: # pragma: no cover
|
|
return workouttype
|
|
|
|
return workouttype
|
|
|
|
|
|
def get_workouttype_from_tcx(filename, workouttype='water'):
|
|
tcxtype = 'rowing'
|
|
if workouttype in mytypes.otwtypes:
|
|
return workouttype
|
|
try: # pragma: no cover
|
|
d = tcxtools.tcx_getdict(filename)
|
|
try:
|
|
tcxtype = d['Activities']['Activity']['@Sport'].lower()
|
|
if tcxtype == 'other':
|
|
tcxtype = 'rowing'
|
|
except KeyError:
|
|
return workouttype
|
|
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
|
|
try: # pragma: no cover
|
|
workouttype = mytypes.garminmappinginv[tcxtype.upper()]
|
|
except KeyError: # pragma: no cover
|
|
return workouttype
|
|
|
|
return workouttype # pragma: no cover
|
|
|
|
|
|
|
|
|
|
# Create new workout from data frame and store it in the database
|
|
# This routine should be used everywhere in views.py and mailprocessing.py
|
|
# Currently there is code duplication
|
|
|
|
|
|
# A wrapper around the rowingdata class, with some error catching
|
|
|
|
|
|
def rdata(file, rower=rrower()):
|
|
try:
|
|
res = rrdata(csvfile=file, rower=rower)
|
|
except (IOError, IndexError): # pragma: no cover
|
|
try:
|
|
res = rrdata(csvfile=file + '.gz', rower=rower)
|
|
except (IOError, IndexError):
|
|
res = rrdata()
|
|
except:
|
|
res = rrdata()
|
|
except EOFError: # pragma: no cover
|
|
res = rrdata()
|
|
except: # pragma: no cover
|
|
res = rrdata()
|
|
|
|
return res
|
|
|
|
# Remove all stroke data for workout ID from database
|
|
|
|
|
|
def delete_strokedata(id, debug=False):
|
|
dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
try:
|
|
shutil.rmtree(dirname)
|
|
except OSError:
|
|
try:
|
|
os.remove(dirname)
|
|
except FileNotFoundError:
|
|
pass
|
|
except FileNotFoundError: # pragma: no cover
|
|
pass
|
|
|
|
# Replace stroke data in DB with data from CSV file
|
|
|
|
|
|
def update_strokedata(id, df, debug=False):
|
|
delete_strokedata(id, debug=debug)
|
|
_ = dataplep(df, id=id, bands=True, barchart=True, otwpower=True)
|
|
|
|
# Test that all data are of a numerical time
|
|
|
|
|
|
def testdata(time, distance, pace, spm): # pragma: no cover
|
|
t1 = time.dtype in pl.NUMERIC_DTYPES
|
|
t2 = distance.dtype in pl.NUMERIC_DTYPES
|
|
t3 = pace.dtype in pl.NUMERIC_DTYPES
|
|
t4 = spm.dtype in pl.NUMERIC_DTYPES
|
|
|
|
return t1 and t2 and t3 and t4
|
|
|
|
# Get data from DB for one workout (fetches all data). If data
|
|
# is not in DB, read from CSV file (and create DB entry)
|
|
|
|
|
|
def getrowdata_db(id=0, doclean=False, convertnewtons=True,
|
|
checkefficiency=True, for_chart=False):
|
|
data = read_df_sql(id)
|
|
try:
|
|
data['deltat'] = data['time'].diff()
|
|
except KeyError: # pragma: no cover
|
|
data = pd.DataFrame()
|
|
|
|
if data.empty:
|
|
rowdata, row = getrowdata(id=id)
|
|
if not rowdata.empty: # pragma: no cover
|
|
data = dataplep(rowdata.df, id=id, bands=True,
|
|
barchart=True, otwpower=True)
|
|
else:
|
|
data = pd.DataFrame() # returning empty dataframe
|
|
else:
|
|
row = Workout.objects.get(id=id)
|
|
|
|
if checkefficiency is True and not data.empty:
|
|
try:
|
|
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
|
|
data = add_efficiency(id=id)
|
|
except KeyError: # pragma: no cover
|
|
data = add_efficiency(id=id)
|
|
|
|
if doclean: # pragma: no cover
|
|
data = clean_df_stats(data, ignorehr=True, for_chart=for_chart)
|
|
|
|
return data, row
|
|
|
|
def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
|
|
checkefficiency=True, for_chart=False):
|
|
data = read_df_sql(id,polars=True)
|
|
try:
|
|
data = data.with_columns((pl.col('time').diff()).alias("deltat")) # data['time'].diff()
|
|
except KeyError: # pragma: no cover
|
|
data = pl.DataFrame()
|
|
|
|
if data.is_empty():
|
|
rowdata, row = getrowdata(id=id)
|
|
if not rowdata.empty: # pragma: no cover
|
|
data = dataplep(rowdata.df, id=id, bands=True,
|
|
barchart=True, otwpower=True, polars=True)
|
|
else:
|
|
data = pl.DataFrame() # returning empty dataframe
|
|
else:
|
|
row = Workout.objects.get(id=id)
|
|
|
|
if checkefficiency is True and not data.is_empty():
|
|
try:
|
|
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
|
|
data = add_efficiency_pl(id=id, polars=True)
|
|
except KeyError: # pragma: no cover
|
|
data = add_efficiency_pl(id=id)
|
|
|
|
if doclean: # pragma: no cover
|
|
data = clean_df_stats(data, ignorehr=True, for_chart=for_chart)
|
|
|
|
return data, row
|
|
|
|
|
|
|
|
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True,
|
|
startenddict={}, driveenergy=True):
|
|
if ids:
|
|
csvfilenames = [
|
|
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
|
|
else:
|
|
return pl.DataFrame()
|
|
|
|
data = []
|
|
columns = [c for c in columns if c != 'None'] + ['distance', 'spm', 'workoutid','workoutstate']
|
|
if driveenergy:
|
|
columns = columns + ['driveenergy']
|
|
columns = list(set(columns))
|
|
|
|
for id, f in zip(ids, csvfilenames):
|
|
if os.path.isfile(f):
|
|
try:
|
|
df = pl.scan_parquet(f)
|
|
except ComputeError:
|
|
rowdata, row = getrowdata(id=id)
|
|
try:
|
|
shutil.rmtree(f)
|
|
except:
|
|
pass
|
|
if rowdata and len(rowdata.df):
|
|
_ = dataplep(rowdata.df, id=id,
|
|
bands=True, otwpower=True, barchart=True,polars=True)
|
|
df = pl.scan_parquet(f)
|
|
if startenddict:
|
|
try:
|
|
startsecond, endsecond = startenddict[id]
|
|
df = df.filter(pl.col("time") >= 1.0e3*startsecond,
|
|
pl.col("time") <= 1.0e3*endsecond)
|
|
df = df.with_columns(time = pl.col("time")-1.0e3*startsecond)
|
|
if 'cumdist' in columns:
|
|
df = df.collect()
|
|
df = df.with_columns(cumdist = pl.col("cumdist")-df[0, "cumdist"]).lazy()
|
|
except KeyError:
|
|
pass
|
|
data.append(df)
|
|
else:
|
|
rowdata, row = getrowdata(id=id)
|
|
try:
|
|
shutil.rmtree(f)
|
|
except:
|
|
pass
|
|
if rowdata and len(rowdata.df):
|
|
_ = dataplep(rowdata.df, id=id,
|
|
bands=True, otwpower=True, barchart=True,
|
|
polars=True)
|
|
try:
|
|
df = pl.scan_parquet(f)
|
|
if startenddict:
|
|
try:
|
|
startsecond, endsecond = startenddict[id]
|
|
df = df.filter(pl.col("time") >= 1.0e3*startsecond,
|
|
pl.col("time") <= 1.0e3*endsecond)
|
|
df = df.with_columns(time = pl.col("time")-1.0e3*startsecond)
|
|
if 'cumdist' in columns:
|
|
df = df.collect()
|
|
df = df.with_columns(cumdist = pl.col("cumdist")-df[0, "cumdist"]).lazy()
|
|
except KeyError:
|
|
pass
|
|
data.append(df)
|
|
except ComputeError:
|
|
pass
|
|
|
|
try:
|
|
data = pl.collect_all(data)
|
|
except ComputeError:
|
|
return pl.DataFrame()
|
|
if len(data)==0:
|
|
return pl.DataFrame()
|
|
|
|
|
|
try:
|
|
datadf = pl.concat(data).select(columns)
|
|
except ColumnNotFoundError:
|
|
datadf = pl.concat(data)
|
|
existing_columns = [col for col in columns if col in datadf.columns]
|
|
datadf = datadf.select(existing_columns)
|
|
except (ShapeError, SchemaError, ColumnNotFoundError):
|
|
data = [
|
|
df.select(columns)
|
|
for df in data]
|
|
|
|
# float columns
|
|
floatcolumns = []
|
|
intcolumns = []
|
|
stringcolumns = []
|
|
for c in columns:
|
|
try:
|
|
if metricsdicts[c]['numtype'] == 'float':
|
|
floatcolumns.append(c)
|
|
if metricsdicts[c]['numtype'] == 'integer':
|
|
intcolumns.append(c)
|
|
except KeyError:
|
|
if c[0] == 'f':
|
|
stringcolumns.append(c)
|
|
else:
|
|
intcolumns.append(c)
|
|
|
|
try:
|
|
data = [
|
|
df.with_columns(
|
|
cs.float().cast(pl.Float64)
|
|
).with_columns(
|
|
cs.integer().cast(pl.Int64)
|
|
).with_columns(
|
|
cs.by_name(intcolumns).cast(pl.Int64)
|
|
).with_columns(
|
|
cs.by_name(floatcolumns).cast(pl.Float64)
|
|
).with_columns(
|
|
cs.by_name(stringcolumns).cast(pl.String)
|
|
)
|
|
for df in data
|
|
]
|
|
except ComputeError:
|
|
pass
|
|
|
|
try:
|
|
datadf = pl.concat(data)
|
|
except SchemaError:
|
|
data = [
|
|
df.with_columns(cs.integer().cast(pl.Float64)) for df in data
|
|
]
|
|
datadf = pl.concat(data)
|
|
|
|
|
|
|
|
|
|
exprs = []
|
|
|
|
if workstrokesonly:
|
|
workoutstatesrest = [3]
|
|
exprs.append(~pl.col("workoutstate").is_in(workoutstatesrest))
|
|
|
|
# got data
|
|
if not doclean:
|
|
if exprs:
|
|
datadf2 = datadf.filter(exprs)
|
|
if not datadf2.is_empty():
|
|
return datadf2
|
|
|
|
return datadf
|
|
|
|
# do clean
|
|
if "spm" in datadf.columns:
|
|
exprs.append(pl.col("spm") >= 10 )
|
|
exprs.append(pl.col("spm") <= 120)
|
|
if "pace" in datadf.columns:
|
|
exprs.append(pl.col("pace") <= 300*1000.)
|
|
exprs.append(pl.col("pace") >= 60*1000.)
|
|
if "power" in datadf.columns:
|
|
exprs.append(pl.col("power") <= 5000)
|
|
exprs.append(pl.col("power")>=20)
|
|
|
|
if "rhythm" in datadf.columns:
|
|
exprs.append(pl.col("rhythm")>=0)
|
|
exprs.append(pl.col("rhythm")<=70)
|
|
if "efficiency" in datadf.columns:
|
|
exprs.append(pl.col("efficiency")<=200)
|
|
exprs.append(pl.col("efficiency")>=0)
|
|
if "wash" in datadf.columns:
|
|
exprs.append(pl.col("wash")>=1)
|
|
if "drivelength" in datadf.columns:
|
|
exprs.append(pl.col("drivelength")>=0.5)
|
|
if "forceratio" in datadf.columns:
|
|
exprs.append(pl.col("forceratio")>=0.2)
|
|
exprs.append(pl.col("forceratio")<=1.0)
|
|
if "drivespeed" in datadf.columns:
|
|
exprs.append(pl.col("drivespeed")>=0.5)
|
|
exprs.append(pl.col("drivespeed")<=4)
|
|
if "driveenergy" in datadf.columns:
|
|
exprs.append(pl.col("driveenergy")<=2000)
|
|
exprs.append(pl.col("driveenergy")>=100)
|
|
if "catch" in datadf.columns:
|
|
exprs.append(pl.col("catch")<=-30)
|
|
|
|
if exprs:
|
|
datadf2 = datadf.filter(exprs)
|
|
|
|
if not datadf2.is_empty():
|
|
return datadf2
|
|
|
|
exprs = []
|
|
if workstrokesonly:
|
|
workoutstatesrest = [3]
|
|
exprs.append(~pl.col("workoutstate").is_in(workoutstatesrest))
|
|
|
|
if exprs:
|
|
datadf2 = datadf.filter(exprs)
|
|
if not datadf2.is_empty():
|
|
return datadf2
|
|
|
|
return datadf
|
|
|
|
def getsmallrowdata_pd(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
|
|
debug=False, for_chart=False):
|
|
# prepmultipledata(ids)
|
|
|
|
if ids:
|
|
csvfilenames = [
|
|
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
|
|
else:
|
|
return pd.DataFrame()
|
|
|
|
data = []
|
|
columns = [c for c in columns if c != 'None']
|
|
columns = list(set(columns))
|
|
|
|
df = pd.DataFrame()
|
|
|
|
if len(ids) > 1:
|
|
for id, f in zip(ids, csvfilenames):
|
|
try:
|
|
df = pd.read_parquet(f, columns=columns)
|
|
data.append(df)
|
|
except (OSError, ArrowInvalid, IndexError): # pragma: no cover
|
|
rowdata, row = getrowdata(id=id)
|
|
if rowdata and len(rowdata.df):
|
|
_ = dataplep(rowdata.df, id=id,
|
|
bands=True, otwpower=True, barchart=True)
|
|
try:
|
|
df = pd.read_parquet(f, columns=columns)
|
|
data.append(df)
|
|
except (OSError, ArrowInvalid, IndexError):
|
|
pass
|
|
try:
|
|
df = pd.concat(data, axis=0)
|
|
except ValueError: # pragma: no cover
|
|
return pd.DataFrame()
|
|
|
|
else:
|
|
try:
|
|
df = pd.read_parquet(csvfilenames[0], columns=columns)
|
|
rowdata, row = getrowdata(id=ids[0])
|
|
except (OSError, IndexError, ArrowInvalid):
|
|
rowdata, row = getrowdata(id=ids[0])
|
|
if rowdata and len(rowdata.df): # pragma: no cover
|
|
data = dataplep(
|
|
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
|
|
try:
|
|
df = pd.read_parquet(csvfilenames[0], columns=columns)
|
|
except:
|
|
df = pd.DataFrame
|
|
else:
|
|
df = pd.DataFrame()
|
|
except:
|
|
rowdata, row = getrowdata(id=ids[0])
|
|
if rowdata and len(rowdata.df): # pragma: no cover
|
|
data = dataplep(
|
|
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
|
|
try:
|
|
df = pd.read_parquet(csvfilenames[0], columns=columns)
|
|
except:
|
|
df = pd.DataFrame()
|
|
else:
|
|
df = pd.DataFrame()
|
|
|
|
try:
|
|
if compute and len(df):
|
|
data = df.copy()
|
|
if doclean:
|
|
data = clean_df_stats(data, ignorehr=True,
|
|
workstrokesonly=workstrokesonly,
|
|
for_chart=for_chart)
|
|
data.dropna(axis=1, how='all', inplace=True)
|
|
data.dropna(axis=0, how='all', inplace=True)
|
|
return data
|
|
except TypeError:
|
|
pass
|
|
|
|
return df
|
|
|
|
# Fetch both the workout and the workout stroke data (from CSV file)
|
|
|
|
|
|
def getrowdata(id=0):
|
|
|
|
# check if valid ID exists (workout exists)
|
|
try:
|
|
row = Workout.objects.get(id=id)
|
|
except Workout.DoesNotExist: # pragma: no cover
|
|
return rrdata(), None
|
|
|
|
f1 = row.csvfilename
|
|
|
|
# get user
|
|
|
|
r = row.user
|
|
|
|
rr = rrower(hrmax=r.max, hrut2=r.ut2,
|
|
hrut1=r.ut1, hrat=r.at,
|
|
hrtr=r.tr, hran=r.an, ftp=r.ftp)
|
|
|
|
rowdata = rdata(f1, rower=rr)
|
|
|
|
return rowdata, row
|
|
|
|
# Checks if all rows for a list of workout IDs have entries in the
|
|
# stroke_data table. If this is not the case, it creates the stroke
|
|
# data
|
|
# In theory, this should never yield any work, but it's a good
|
|
# safety net for programming errors elsewhere in the app
|
|
# Also used heavily when I moved from CSV file only to CSV+Stroke data
|
|
|
|
|
|
def prepmultipledata(ids, verbose=False): # pragma: no cover
|
|
filenames = glob.glob('media/*.parquet')
|
|
ids = [
|
|
id for id in ids if 'media/strokedata_{id}.parquet.gz'.format(id=id) not in filenames]
|
|
|
|
for id in ids:
|
|
rowdata, row = getrowdata(id=id)
|
|
if verbose:
|
|
print(id)
|
|
if rowdata and len(rowdata.df):
|
|
_ = dataplep(rowdata.df, id=id, bands=True,
|
|
barchart=True, otwpower=True)
|
|
return ids
|
|
|
|
# Read a set of columns for a set of workout ids, returns data as a
|
|
# pandas dataframe
|
|
|
|
def read_cols_pl(ids, columns):
|
|
extracols = []
|
|
|
|
|
|
columns = list(columns) + ['distance', 'spm', 'workoutid']
|
|
columns = [x for x in columns if x != 'None']
|
|
columns = list(set(columns))
|
|
ids = [int(id) for id in ids]
|
|
|
|
df = pl.DataFrame()
|
|
|
|
if len(ids) == 0:
|
|
return pl.DataFrame()
|
|
|
|
df = read_data(columns, ids=ids, doclean=False, compute=False)
|
|
|
|
if 'peakforce' in columns:
|
|
funits = ((w.id, w.forceunit)
|
|
for w in Workout.objects.filter(id__in=ids))
|
|
for id, u in funits:
|
|
if u == 'lbs':
|
|
df = df.with_columns(
|
|
peakforce=pl.when(pl.col('workoutid')==id)
|
|
.then(pl.col('peakforce') * lbstoN)
|
|
.otherwise(pl.col('peakforce')))
|
|
if 'averageforce' in columns:
|
|
funits = ((w.id, w.forceunit)
|
|
for w in Workout.objects.filter(id__in=ids))
|
|
for id, u in funits:
|
|
if u == 'lbs':
|
|
df = df.with_columns(
|
|
averageforce=pl.when(pl.col('workoutid')==id)
|
|
.then(pl.col('averageforce') * lbstoN)
|
|
.otherwise(pl.col('averageforce')))
|
|
|
|
|
|
return df, extracols
|
|
|
|
|
|
def read_cols_df_sql(ids, columns, convertnewtons=True):
|
|
# drop columns that are not in offical list
|
|
# axx = [ax[0] for ax in axes]
|
|
|
|
extracols = []
|
|
|
|
|
|
columns = list(columns) + ['distance', 'spm', 'workoutid']
|
|
columns = [x for x in columns if x != 'None']
|
|
columns = list(set(columns))
|
|
ids = [int(id) for id in ids]
|
|
|
|
df = pd.DataFrame()
|
|
|
|
if len(ids) == 0: # pragma: no cover
|
|
return pd.DataFrame(), extracols
|
|
elif len(ids) == 1: # pragma: no cover
|
|
try:
|
|
filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0])
|
|
pq_file = pq.ParquetDataset(filename)
|
|
columns_in_file = [c for c in columns if c in pq_file.schema.names]
|
|
df = pd.read_parquet(filename, columns=columns_in_file)
|
|
except OSError:
|
|
rowdata, row = getrowdata(id=ids[0])
|
|
if rowdata and len(rowdata.df):
|
|
_ = dataplep(rowdata.df,
|
|
id=ids[0], bands=True, otwpower=True, barchart=True)
|
|
pq_file = pq.ParquetDataset(filename)
|
|
columns_in_file = [c for c in columns if c in pq_file.schema.names]
|
|
df = pd.read_parquet(filename, columns=columns_in_file)
|
|
else:
|
|
data = []
|
|
filenames = [
|
|
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
|
|
for id, f in zip(ids, filenames):
|
|
try:
|
|
pq_file = pq.ParquetDataset(f)
|
|
columns_in_file = [c for c in columns if c in pq_file.schema.names]
|
|
df = pd.read_parquet(f, columns=columns_in_file)
|
|
data.append(df)
|
|
except (OSError, IndexError, ArrowInvalid):
|
|
rowdata, row = getrowdata(id=id)
|
|
if rowdata and len(rowdata.df): # pragma: no cover
|
|
_ = dataplep(rowdata.df, id=id,
|
|
bands=True, otwpower=True, barchart=True)
|
|
pq_file = pq.ParquetDataset(f)
|
|
columns_in_file = [c for c in columns if c in pq_file.schema.names]
|
|
df = pd.read_parquet(f, columns=columns_in_file)
|
|
data.append(df)
|
|
|
|
try:
|
|
df = pd.concat(data, axis=0)
|
|
except ValueError: # pragma: no cover
|
|
return pd.DataFrame(), extracols
|
|
|
|
df = df.fillna(value=0)
|
|
|
|
if 'peakforce' in columns:
|
|
funits = ((w.id, w.forceunit)
|
|
for w in Workout.objects.filter(id__in=ids))
|
|
for id, u in funits:
|
|
if u == 'lbs':
|
|
mask = df['workoutid'] == id
|
|
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
|
|
if 'averageforce' in columns:
|
|
funits = ((w.id, w.forceunit)
|
|
for w in Workout.objects.filter(id__in=ids))
|
|
for id, u in funits:
|
|
if u == 'lbs':
|
|
mask = df['workoutid'] == id
|
|
df.loc[mask, 'averageforce'] = df.loc[mask,
|
|
'averageforce'] * lbstoN
|
|
|
|
return df, extracols
|
|
|
|
|
|
|
|
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
|
|
|
|
|
|
def read_df_sql(id, polars=False):
|
|
if polars:
|
|
try:
|
|
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
df = pl.read_parquet(f)
|
|
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
|
|
rowdata, row = getrowdata(id=id)
|
|
try:
|
|
shutil.rmtree(f)
|
|
except:
|
|
pass
|
|
if rowdata and len(rowdata.df):
|
|
_ = dataplep(rowdata.df, id=id,
|
|
bands=True, otwpower=True, barchart=True,
|
|
polars=True)
|
|
try:
|
|
df = pl.read_parquet(f, columns=columns)
|
|
except (OSError, ArrowInvalid, IndexError):
|
|
pass
|
|
df = df.fill_nan(None).drop_nulls()
|
|
|
|
return df
|
|
try:
|
|
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
df = pd.read_parquet(f)
|
|
except (OSError, ArrowInvalid, IndexError): # pragma: no cover
|
|
rowdata, row = getrowdata(id=id)
|
|
if rowdata and len(rowdata.df):
|
|
data = dataplep(rowdata.df, id=id, bands=True,
|
|
otwpower=True, barchart=True)
|
|
try:
|
|
df = pd.read_parquet(f)
|
|
except OSError:
|
|
df = data
|
|
else:
|
|
df = pd.DataFrame()
|
|
|
|
df = df.fillna(value=0)
|
|
|
|
return df
|
|
|
|
|
|
# data fusion
|
|
|
|
|
|
def datafusion(id1, id2, columns, offset):
|
|
df1, w1 = getrowdata_db(id=id1)
|
|
df1 = df1.drop([ # 'cumdist',
|
|
'hr_ut2',
|
|
'hr_ut1',
|
|
'hr_at',
|
|
'hr_tr',
|
|
'hr_an',
|
|
'hr_max',
|
|
'ftime',
|
|
'fpace',
|
|
'workoutid',
|
|
'id'],
|
|
axis=1, errors='ignore')
|
|
|
|
# Add coordinates to DataFrame
|
|
latitude, longitude = get_latlon(id1)
|
|
|
|
df1[' latitude'] = latitude
|
|
df1[' longitude'] = longitude
|
|
|
|
df2 = getsmallrowdata_pd(['time'] + columns, ids=[id2], doclean=False)
|
|
|
|
forceunit = 'N'
|
|
|
|
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
|
|
offsetmillisecs += offset.days * (3600 * 24 * 1000)
|
|
df2['time'] = df2['time'] + offsetmillisecs
|
|
|
|
keep1 = {c: c for c in set(df1.columns)}
|
|
|
|
for c in columns:
|
|
keep1.pop(c)
|
|
|
|
for c in df1.columns:
|
|
if c not in keep1:
|
|
df1 = df1.drop(c, axis=1, errors='ignore')
|
|
|
|
df = pd.concat([df1, df2], ignore_index=True)
|
|
df = df.sort_values(['time'])
|
|
df = df.interpolate(method='linear', axis=0, limit_direction='both',
|
|
limit=10)
|
|
df.fillna(method='bfill', inplace=True)
|
|
|
|
# Some new stuff to try out
|
|
#df = df.groupby('time',axis=0).mean()
|
|
#df['time'] = df.index
|
|
#df.reset_index(drop=True, inplace=True)
|
|
|
|
df['time'] = df['time'] / 1000.
|
|
df['pace'] = df['pace'] / 1000.
|
|
df['cum_dist'] = df['cumdist']
|
|
|
|
return df, forceunit
|
|
|
|
|
|
def fix_newtons(id=0, limit=3000): # pragma: no cover
|
|
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
|
|
rowdata = read_data(['peakforce'], ids=[id], doclean=False)
|
|
try:
|
|
peakforce = rowdata['peakforce']
|
|
if peakforce.mean() > limit:
|
|
w = Workout.objects.get(id=id)
|
|
|
|
rowdata = rdata(w.csvfilename)
|
|
if rowdata and len(rowdata.df):
|
|
update_strokedata(w.id, rowdata.df)
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def remove_invalid_columns_pl(df): # pragma: no cover
|
|
for c in df.get_columns():
|
|
if c not in allowedcolumns:
|
|
df = df.drop(c)
|
|
|
|
return df
|
|
|
|
def remove_invalid_columns(df): # pragma: no cover
|
|
for c in df.columns:
|
|
if c not in allowedcolumns:
|
|
df.drop(labels=c, axis=1, inplace=True)
|
|
|
|
return df
|
|
|
|
def add_efficiency_pl(id=0): # pragma: no cover
|
|
rowdata, row = getrowdata_pl(id=id,
|
|
doclean=False,
|
|
convertnewtons=False,
|
|
checkefficiency=False)
|
|
power = rowdata['power']
|
|
pace = rowdata['pace'] / 1.0e3
|
|
velo = 500. / pace
|
|
ergpw = 2.8 * velo**3
|
|
efficiency = 100. * ergpw / power
|
|
|
|
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
|
|
efficiency.fillna(method='ffill')
|
|
rowdata = rowdata.with_columns(pl.col(efficiency).alias("efficiency")) # ['efficiency'] = efficiency
|
|
|
|
rowdata = remove_invalid_columns_pl(rowdata)
|
|
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
|
|
rowdata = rowdata.fillna(method='ffill')
|
|
|
|
delete_strokedata(id)
|
|
|
|
|
|
if id != 0:
|
|
rowdata = rowdata.with_column(pl.lit(id).alias("workoutid"))
|
|
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
rowdata.write_parquet(filename, compression='gzip')
|
|
|
|
|
|
return rowdata
|
|
|
|
|
|
def add_efficiency(id=0): # pragma: no cover
|
|
rowdata, row = getrowdata_db(id=id,
|
|
doclean=False,
|
|
convertnewtons=False,
|
|
checkefficiency=False)
|
|
power = rowdata['power']
|
|
pace = rowdata['pace'] / 1.0e3
|
|
velo = 500. / pace
|
|
ergpw = 2.8 * velo**3
|
|
efficiency = 100. * ergpw / power
|
|
|
|
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
|
|
efficiency.fillna(method='ffill')
|
|
rowdata['efficiency'] = efficiency
|
|
|
|
rowdata = remove_invalid_columns(rowdata)
|
|
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
|
|
rowdata = rowdata.fillna(method='ffill')
|
|
|
|
delete_strokedata(id)
|
|
|
|
if id != 0:
|
|
rowdata['workoutid'] = id
|
|
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
df = dd.from_pandas(rowdata, npartitions=1)
|
|
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
|
|
|
|
return rowdata
|
|
|
|
# This is the main routine.
|
|
# it reindexes, sorts, filters, and smooths the data, then
|
|
# saves it to the stroke_data table in the database
|
|
# Takes a rowingdata object's DataFrame as input
|
|
|
|
# polars
|
|
def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchart=True, otwpower=True,
|
|
empower=True, debug=False, polars=True
|
|
):
|
|
# rowdatadf is pd.DataFrame
|
|
|
|
|
|
if isinstance(rowdatadf, pd.DataFrame):
|
|
if rowdatadf.empty:
|
|
return 0
|
|
|
|
try:
|
|
df = pl.from_pandas(rowdatadf)
|
|
except (ArrowInvalid, ArrowTypeError):
|
|
for k, v in dtypes.items():
|
|
try:
|
|
rowdatadf[k] = rowdatadf[k].astype(v)
|
|
except KeyError: # pragma: no cover
|
|
pass
|
|
try:
|
|
df = pl.from_pandas(rowdatadf)
|
|
except (ArrowInvalid, ArrowTypeError):
|
|
return dataprep(rowdatadf, id=id, inboard=inboard, forceunit=forceunit, bands=bands, barchart=barchart,
|
|
otwpower=otwpower, debug=debug,polars=True)
|
|
else:
|
|
df = rowdatadf
|
|
if df.is_empty():
|
|
return 0
|
|
|
|
df = df.with_columns((pl.col("TimeStamp (sec)")-df[0, "TimeStamp (sec)"]).alias("TimeStamp (sec)"))
|
|
df = df.with_columns((pl.col(" Stroke500mPace (sec/500m)").clip(1,3000)).alias(" Stroke500mPace"))
|
|
if ' AverageBoatSpeed (m/s)' not in df.columns:
|
|
df = df.with_columns((500./pl.col(' Stroke500mPace (sec/500m)')).alias(' AverageBoatSpeed (m/s)'))
|
|
if ' WorkoutState' not in df.columns:
|
|
df = df.with_columns((pl.lit(0)).alias(" WorkoutState"))
|
|
if df[" DriveTime (ms)"].mean() is not None and df[" DriveTime (ms)"].mean() > 0:
|
|
df = df.with_columns((100.*pl.col(" DriveTime (ms)")/(pl.col(" DriveTime (ms)")+pl.col(" StrokeRecoveryTime (ms)"))).alias("rhythm"))
|
|
else:
|
|
df = df.with_columns((pl.lit(0)).alias("rhythm"))
|
|
try:
|
|
if df[" PeakDriveForce (lbs)"].mean() is not None and df[" PeakDriveForce (lbs)"].mean() > 0:
|
|
df = df.with_columns((pl.col(" AverageDriveForce (lbs)")/pl.col(" PeakDriveForce (lbs)")).alias("forceratio"))
|
|
else:
|
|
df = df.with_columns((pl.lit(0)).alias("forceratio"))
|
|
except TypeError:
|
|
df = df.with_columns((pl.lit(0)).alias("forceratio"))
|
|
try:
|
|
f = df['TimeStamp (sec)'].diff().mean()
|
|
except TypeError:
|
|
f = 0
|
|
windowsize = 1
|
|
try:
|
|
if f != 0 and not np.isinf(f):
|
|
try:
|
|
windowsize = 2 * (int(10. / (f))) + 1
|
|
except ValueError:
|
|
windowsize = 1
|
|
except TypeError:
|
|
pass
|
|
|
|
if windowsize <= 3:
|
|
windowsize = 5
|
|
|
|
|
|
try:
|
|
df = df.with_columns(
|
|
(pl.col(" Cadence (stokes/min)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
|
|
).alias(" Cadence (stokes/min)"))
|
|
except ComputeError as e:
|
|
pass
|
|
try:
|
|
df = df.with_columns(
|
|
(pl.col(" DriveLength (meters)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
|
|
).alias(" DriveLength (meters)"))
|
|
except ComputeError:
|
|
pass
|
|
try:
|
|
df = df.with_columns(
|
|
(pl.col(" HRCur (bpm)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
|
|
).alias(" HRCur (bpm)"))
|
|
except ComputeError:
|
|
pass
|
|
try:
|
|
df = df.with_columns((pl.col("forceratio").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()).alias("forceratio"))
|
|
except ComputeError:
|
|
pass
|
|
|
|
df = df.with_columns((pl.col(" DriveLength (meters)") / pl.col(" DriveTime (ms)") * 1.0e3).alias("drivespeed"))
|
|
if df[" DriveTime (ms)"].mean() == 0:
|
|
df = df.with_columns((pl.lit(0)).alias("drivespeed"))
|
|
|
|
|
|
if 'driveenergy' not in df.columns:
|
|
if forceunit == 'lbs':
|
|
df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)") * lbstoN).alias("driveenergy"))
|
|
else:
|
|
df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)")).alias("driveenergy"))
|
|
|
|
|
|
if forceunit == 'lbs':
|
|
df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)"))
|
|
df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)"))
|
|
|
|
if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0:
|
|
df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy"))
|
|
|
|
df = df.with_columns((60. * pl.col(" AverageBoatSpeed (m/s)")/pl.col(" Cadence (stokes/min)")).alias("distanceperstroke"))
|
|
|
|
t2 = df["TimeStamp (sec)"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
|
|
p2 = df[" Stroke500mPace"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
|
|
|
|
data = pl.DataFrame(
|
|
dict(
|
|
time=df["TimeStamp (sec)"] * 1e3,
|
|
hr=df[" HRCur (bpm)"],
|
|
pace=df[" Stroke500mPace"] * 1e3,
|
|
spm=df[" Cadence (stokes/min)"],
|
|
velo=df[" AverageBoatSpeed (m/s)"],
|
|
cumdist=df["cum_dist"],
|
|
ftime=niceformat(t2),
|
|
fpace=nicepaceformat(p2),
|
|
driveenergy=df["driveenergy"],
|
|
power=df[' Power (watts)'],
|
|
workoutstate=df[" WorkoutState"],
|
|
averageforce=df[" AverageDriveForce (lbs)"],
|
|
drivelength=df[" DriveLength (meters)"],
|
|
peakforce=df[" PeakDriveForce (lbs)"],
|
|
forceratio=df["forceratio"],
|
|
distance=df["cum_dist"],
|
|
drivespeed=df["drivespeed"],
|
|
rhythm=df["rhythm"],
|
|
distanceperstroke=df["distanceperstroke"],
|
|
)
|
|
)
|
|
|
|
data = data.with_columns(
|
|
hr_ut2 = df['hr_ut2'],
|
|
hr_ut1 = df['hr_ut1'],
|
|
hr_at = df['hr_at'],
|
|
hr_tr = df['hr_tr'],
|
|
hr_an = df['hr_an'],
|
|
hr_max = df['hr_max'],
|
|
hr_bottom = 0.0*df[' HRCur (bpm)'],
|
|
)
|
|
|
|
|
|
if 'check_factor' not in df.columns:
|
|
data = data.with_columns(
|
|
check_factor = pl.lit(0.0),
|
|
)
|
|
else:
|
|
data = data.with_columns(
|
|
check_factor = df['check_factor'],
|
|
)
|
|
|
|
if 'wash' not in df.columns:
|
|
data = data.with_columns(
|
|
wash = pl.lit(0.0),
|
|
catch = pl.lit(0.0),
|
|
peakforceangle = pl.lit(0.0),
|
|
finish = pl.lit(0.0),
|
|
slip = pl.lit(0.0),
|
|
totalangle = pl.lit(0.0),
|
|
effectiveangle = pl.lit(0.0),
|
|
efficiency = pl.lit(0.0),
|
|
)
|
|
else:
|
|
wash = df['wash']
|
|
catch = df['catch']
|
|
finish = df['finish']
|
|
peakforceangle = df['peakforceangle']
|
|
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
|
|
if arclength.mean() is not None and arclength.mean() > 0:
|
|
drivelength = arclength
|
|
else:
|
|
drivelength = data['drivelength']
|
|
|
|
slip = df['slip']
|
|
totalangle = finish - catch
|
|
effectiveangle = finish - wash - catch - slip
|
|
|
|
if windowsize > 3 and windowsize < len(slip):
|
|
try:
|
|
wash = savgol_filter(wash, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
slip = savgol_filter(slip, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
catch = savgol_filter(catch, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
finish = savgol_filter(finish, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
drivelength = savgol_filter(drivelength, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
totalangle = savgol_filter(totalangle, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
|
|
data = data.with_columns(
|
|
wash = wash,
|
|
catch = catch,
|
|
slip = slip,
|
|
finish = finish,
|
|
peakforceangle = peakforceangle,
|
|
drivelength = drivelength,
|
|
totalangle = totalangle,
|
|
effectiveangle = effectiveangle,
|
|
)
|
|
|
|
ergpw = 2.8*data['velo']**3
|
|
efficiency = 100. * ergpw / data['power']
|
|
if data['power'].mean() == 0:
|
|
efficiency = 100.+0.0*data['power']
|
|
|
|
data = data.with_columns(efficiency=efficiency)
|
|
|
|
if id != 0:
|
|
data = data.with_columns(
|
|
workoutid = pl.lit(id)
|
|
)
|
|
# cast data
|
|
for k, v in dtypes.items():
|
|
if v == 'int':
|
|
data = data.cast({k: pl.Int64})
|
|
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
try:
|
|
data.write_parquet(filename, compression='gzip')
|
|
except IsADirectoryError:
|
|
shutil.rmtree(filename)
|
|
data.write_parquet(filename, compression='gzip')
|
|
|
|
|
|
return data
|
|
|
|
# pandas/a little polars
|
|
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
|
|
empower=True, inboard=0.88, forceunit='lbs', debug=False, polars=True):
|
|
|
|
if rowdatadf.empty:
|
|
return 0
|
|
|
|
t = rowdatadf.loc[:, 'TimeStamp (sec)']
|
|
t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0])
|
|
|
|
row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000
|
|
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
|
|
|
|
p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)']
|
|
try:
|
|
velo = rowdatadf.loc[:, ' AverageBoatSpeed (m/s)']
|
|
except KeyError: # pragma: no cover
|
|
velo = 500./p
|
|
|
|
hr = rowdatadf.loc[:, ' HRCur (bpm)']
|
|
spm = rowdatadf.loc[:, ' Cadence (stokes/min)']
|
|
cumdist = rowdatadf.loc[:, 'cum_dist']
|
|
power = rowdatadf.loc[:, ' Power (watts)']
|
|
averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)']
|
|
drivelength = rowdatadf.loc[:, ' DriveLength (meters)']
|
|
try:
|
|
workoutstate = rowdatadf.loc[:, ' WorkoutState']
|
|
except KeyError: # pragma: no cover
|
|
workoutstate = 0 * hr
|
|
|
|
peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)']
|
|
|
|
forceratio = averageforce / peakforce
|
|
forceratio = forceratio.fillna(value=0)
|
|
|
|
try:
|
|
drivetime = rowdatadf.loc[:, ' DriveTime (ms)']
|
|
recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)']
|
|
rhythm = 100. * drivetime / (recoverytime + drivetime)
|
|
rhythm = rhythm.fillna(value=0)
|
|
except: # pragma: no cover
|
|
rhythm = 0.0 * forceratio
|
|
|
|
f = rowdatadf['TimeStamp (sec)'].diff().mean()
|
|
if f != 0 and not np.isinf(f):
|
|
try:
|
|
windowsize = 2 * (int(10. / (f))) + 1
|
|
except ValueError: # pragma: no cover
|
|
windowsize = 1
|
|
else:
|
|
windowsize = 1
|
|
if windowsize <= 3:
|
|
windowsize = 5
|
|
|
|
if windowsize > 3 and windowsize < len(hr):
|
|
spm = savgol_filter(spm, windowsize, 3)
|
|
hr = savgol_filter(hr, windowsize, 3)
|
|
drivelength = savgol_filter(drivelength, windowsize, 3)
|
|
forceratio = savgol_filter(forceratio, windowsize, 3)
|
|
|
|
try:
|
|
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
|
|
except TypeError: # pragma: no cover
|
|
t2 = 0 * t
|
|
|
|
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
|
|
|
|
try:
|
|
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
|
|
except TypeError: # pragma: no cover
|
|
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
|
|
|
|
drivespeed = drivespeed.fillna(value=0)
|
|
|
|
try:
|
|
driveenergy = rowdatadf['driveenergy']
|
|
except KeyError: # pragma: no cover
|
|
if forceunit == 'lbs':
|
|
driveenergy = drivelength * averageforce * lbstoN
|
|
else:
|
|
driveenergy = drivelength * averageforce
|
|
|
|
if forceunit == 'lbs':
|
|
averageforce *= lbstoN
|
|
peakforce *= lbstoN
|
|
|
|
powerhr = 60.*power/hr
|
|
powerhr = powerhr.fillna(value=0)
|
|
|
|
if driveenergy.mean() == 0 and driveenergy.std() == 0:
|
|
driveenergy = 0*driveenergy+100
|
|
|
|
distance = rowdatadf.loc[:, 'cum_dist']
|
|
velo = 500. / p
|
|
|
|
distanceperstroke = 60. * velo / spm
|
|
|
|
data = DataFrame(
|
|
dict(
|
|
time=t * 1e3,
|
|
hr=hr,
|
|
pace=p * 1e3,
|
|
spm=spm,
|
|
velo=velo,
|
|
cumdist=cumdist,
|
|
ftime=niceformat(t2),
|
|
fpace=nicepaceformat(p2),
|
|
driveenergy=driveenergy,
|
|
power=power,
|
|
workoutstate=workoutstate,
|
|
averageforce=averageforce,
|
|
drivelength=drivelength,
|
|
peakforce=peakforce,
|
|
forceratio=forceratio,
|
|
distance=distance,
|
|
drivespeed=drivespeed,
|
|
rhythm=rhythm,
|
|
distanceperstroke=distanceperstroke,
|
|
# powerhr=powerhr,
|
|
)
|
|
)
|
|
|
|
if bands:
|
|
# HR bands
|
|
data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2']
|
|
data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1']
|
|
data['hr_at'] = rowdatadf.loc[:, 'hr_at']
|
|
data['hr_tr'] = rowdatadf.loc[:, 'hr_tr']
|
|
data['hr_an'] = rowdatadf.loc[:, 'hr_an']
|
|
data['hr_max'] = rowdatadf.loc[:, 'hr_max']
|
|
data['hr_bottom'] = 0.0 * data['hr']
|
|
|
|
try:
|
|
_ = rowdatadf.loc[:, ' ElapsedTime (sec)']
|
|
except KeyError: # pragma: no cover
|
|
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
|
|
|
|
if empower:
|
|
try:
|
|
wash = rowdatadf.loc[:, 'wash']
|
|
except KeyError:
|
|
wash = 0 * power
|
|
|
|
try:
|
|
catch = rowdatadf.loc[:, 'catch']
|
|
except KeyError:
|
|
catch = 0 * power
|
|
|
|
try:
|
|
finish = rowdatadf.loc[:, 'finish']
|
|
except KeyError:
|
|
finish = 0 * power
|
|
|
|
try:
|
|
peakforceangle = rowdatadf.loc[:, 'peakforceangle']
|
|
except KeyError:
|
|
peakforceangle = 0 * power
|
|
|
|
if data['driveenergy'].mean() == 0: # pragma: no cover
|
|
try:
|
|
driveenergy = rowdatadf.loc[:, 'driveenergy']
|
|
except KeyError:
|
|
driveenergy = power * 60 / spm
|
|
else:
|
|
driveenergy = data['driveenergy']
|
|
|
|
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
|
|
if arclength.mean() > 0:
|
|
drivelength = arclength
|
|
elif drivelength.mean() == 0:
|
|
drivelength = driveenergy / (averageforce * 4.44822)
|
|
|
|
try:
|
|
slip = rowdatadf.loc[:, 'slip']
|
|
except KeyError:
|
|
slip = 0 * power
|
|
|
|
try:
|
|
totalangle = finish - catch
|
|
effectiveangle = finish - wash - catch - slip
|
|
except ValueError: # pragma: no cover
|
|
totalangle = 0 * power
|
|
effectiveangle = 0 * power
|
|
|
|
if windowsize > 3 and windowsize < len(slip):
|
|
try:
|
|
wash = savgol_filter(wash, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
slip = savgol_filter(slip, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
catch = savgol_filter(catch, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
finish = savgol_filter(finish, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
driveenergy = savgol_filter(driveenergy, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
drivelength = savgol_filter(drivelength, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
totalangle = savgol_filter(totalangle, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
try:
|
|
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
|
|
except TypeError: # pragma: no cover
|
|
pass
|
|
|
|
velo = 500. / p
|
|
|
|
ergpw = 2.8 * velo**3
|
|
efficiency = 100. * ergpw / power
|
|
|
|
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
|
|
efficiency.fillna(method='ffill')
|
|
|
|
try:
|
|
data['wash'] = wash
|
|
data['catch'] = catch
|
|
data['slip'] = slip
|
|
data['finish'] = finish
|
|
data['peakforceangle'] = peakforceangle
|
|
data['driveenergy'] = driveenergy
|
|
data['drivelength'] = drivelength
|
|
data['totalangle'] = totalangle
|
|
data['effectiveangle'] = effectiveangle
|
|
data['efficiency'] = efficiency
|
|
except ValueError: # pragma: no cover
|
|
pass
|
|
|
|
if otwpower:
|
|
try:
|
|
nowindpace = rowdatadf.loc[:, 'nowindpace']
|
|
except KeyError:
|
|
nowindpace = p
|
|
try:
|
|
equivergpower = rowdatadf.loc[:, 'equivergpower']
|
|
except KeyError:
|
|
equivergpower = 0 * p + 50.
|
|
|
|
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
|
|
ergvelo = (equivergpower / 2.8)**(1. / 3.)
|
|
|
|
ergpace = 500. / ergvelo
|
|
ergpace[ergpace == np.inf] = 240.
|
|
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
|
|
|
|
data['ergpace'] = ergpace * 1e3
|
|
data['nowindpace'] = nowindpace * 1e3
|
|
data['equivergpower'] = equivergpower
|
|
data['fergpace'] = nicepaceformat(ergpace2)
|
|
data['fnowindpace'] = nicepaceformat(nowindpace2)
|
|
|
|
data = data.replace([-np.inf, np.inf], np.nan)
|
|
data = data.fillna(method='ffill')
|
|
|
|
# write data if id given
|
|
if id != 0:
|
|
data['workoutid'] = id
|
|
data.fillna(0, inplace=True)
|
|
for k, v in dtypes.items():
|
|
try:
|
|
data[k] = data[k].astype(v)
|
|
except KeyError: # pragma: no cover
|
|
pass
|
|
|
|
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
|
df = dd.from_pandas(data, npartitions=1)
|
|
|
|
if polars:
|
|
pldf = pl.from_pandas(data)
|
|
try:
|
|
pldf.write_parquet(filename, compression='gzip')
|
|
except IsADirectoryError:
|
|
shutil.rmtree(filename)
|
|
pldf.write_parquet(filename, compression='gzip')
|
|
else:
|
|
try:
|
|
df.to_parquet(filename, engine='fastparquet', compression='gzip')
|
|
except FileNotFoundError:
|
|
df2 = dd.from_pandas(df, npartitions=1)
|
|
df2.to_parquet(filename, engine='fastparquet', compression='gzip')
|
|
except FileExistsError:
|
|
os.remove(filename)
|
|
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
|
|
|
|
if polars:
|
|
pldf = pl.from_pandas(data)
|
|
return pldf
|
|
|
|
return data
|
|
|
|
|
|
|
|
def delete_agegroup_db(age, sex, weightcategory, debug=False):
|
|
if debug: # pragma: no cover
|
|
engine = create_engine(database_url_debug, echo=False)
|
|
else: # pragma: no cover
|
|
engine = create_engine(database_url, echo=False)
|
|
|
|
query = sa.text("DELETE from {table} WHERE age='{age}' and weightcategory='{weightcategory}' and sex='{sex}';".format(
|
|
sex=sex,
|
|
age=age,
|
|
weightcategory=weightcategory,
|
|
table='calcagegrouprecords'
|
|
))
|
|
with engine.connect() as conn, conn.begin():
|
|
_ = conn.execute(query)
|
|
conn.close()
|
|
engine.dispose()
|
|
|
|
|
|
|
|
|
|
|
|
def update_agegroup_db(age, sex, weightcategory, wcdurations, wcpower,
|
|
debug=False):
|
|
|
|
delete_agegroup_db(age, sex, weightcategory, debug=debug)
|
|
|
|
wcdurations = [None if type(y) is float and np.isnan(
|
|
y) else y for y in wcdurations]
|
|
wcpower = [None if type(y) is float and np.isnan(y)
|
|
else y for y in wcpower]
|
|
|
|
df = pd.DataFrame(
|
|
{
|
|
'duration': wcdurations,
|
|
'power': wcpower,
|
|
}
|
|
)
|
|
|
|
df['sex'] = sex
|
|
df['age'] = age
|
|
df['weightcategory'] = weightcategory
|
|
df.replace([np.inf, -np.inf], np.nan, inplace=True)
|
|
df.dropna(axis=0, inplace=True)
|
|
|
|
if debug: # pragma: no cover # pragma: no cover
|
|
engine = create_engine(database_url_debug, echo=False)
|
|
else:
|
|
engine = create_engine(database_url, echo=False)
|
|
|
|
table = 'calcagegrouprecords'
|
|
with engine.connect() as conn, conn.begin():
|
|
df.to_sql(table, engine, if_exists='append', index=False)
|
|
conn.close()
|
|
engine.dispose()
|
|
|
|
|
|
|
|
def add_c2_stroke_data_db(strokedata, workoutid, starttimeunix, csvfilename,
|
|
debug=False, workouttype='rower'):
|
|
|
|
res = make_cumvalues(0.1*strokedata['t'])
|
|
cum_time = res[0]
|
|
lapidx = res[1]
|
|
|
|
unixtime = cum_time+starttimeunix
|
|
# unixtime[0] = starttimeunix
|
|
seconds = 0.1*strokedata.loc[:, 't']
|
|
|
|
nr_rows = len(unixtime)
|
|
|
|
try: # pragma: no cover
|
|
latcoord = strokedata.loc[:, 'lat']
|
|
loncoord = strokedata.loc[:, 'lon']
|
|
except:
|
|
latcoord = np.zeros(nr_rows)
|
|
loncoord = np.zeros(nr_rows)
|
|
|
|
try:
|
|
strokelength = strokedata.loc[:, 'strokelength']
|
|
except:
|
|
strokelength = np.zeros(nr_rows)
|
|
|
|
dist2 = 0.1*strokedata.loc[:, 'd']
|
|
|
|
try:
|
|
spm = strokedata.loc[:, 'spm']
|
|
except KeyError: # pragma: no cover
|
|
spm = 0*dist2
|
|
|
|
try:
|
|
hr = strokedata.loc[:, 'hr']
|
|
except KeyError: # pragma: no cover
|
|
hr = 0*spm
|
|
|
|
pace = strokedata.loc[:, 'p']/10.
|
|
pace = np.clip(pace, 0, 1e4)
|
|
pace = pace.replace(0, 300)
|
|
|
|
velo = 500./pace
|
|
power = 2.8*velo**3
|
|
if workouttype == 'bike': # pragma: no cover
|
|
velo = 1000./pace
|
|
|
|
# save csv
|
|
# Create data frame with all necessary data to write to csv
|
|
df = pd.DataFrame({'TimeStamp (sec)': unixtime,
|
|
' Horizontal (meters)': dist2,
|
|
' Cadence (stokes/min)': spm,
|
|
' HRCur (bpm)': hr,
|
|
' longitude': loncoord,
|
|
' latitude': latcoord,
|
|
' Stroke500mPace (sec/500m)': pace,
|
|
' Power (watts)': power,
|
|
' DragFactor': np.zeros(nr_rows),
|
|
' DriveLength (meters)': np.zeros(nr_rows),
|
|
' StrokeDistance (meters)': strokelength,
|
|
' DriveTime (ms)': np.zeros(nr_rows),
|
|
' StrokeRecoveryTime (ms)': np.zeros(nr_rows),
|
|
' AverageDriveForce (lbs)': np.zeros(nr_rows),
|
|
' PeakDriveForce (lbs)': np.zeros(nr_rows),
|
|
' lapIdx': lapidx,
|
|
' WorkoutState': 4,
|
|
' ElapsedTime (sec)': seconds,
|
|
'cum_dist': dist2,
|
|
})
|
|
|
|
df.sort_values(by='TimeStamp (sec)', ascending=True)
|
|
|
|
# Create CSV file name and save data to CSV file
|
|
row = rrdata(df=df)
|
|
row.write_csv(csvfilename)
|
|
row = rrdata_pl(df=pl.from_pandas(row.df))
|
|
#res = df.to_csv(csvfilename, index_label='index',
|
|
# compression='gzip')
|
|
|
|
|
|
data = dataplep(row.df, id=workoutid, bands=False, debug=debug)
|
|
|
|
return data
|
|
|
|
# Creates C2 stroke data
|
|
def create_c2_stroke_data_db(
|
|
distance, duration, workouttype,
|
|
workoutid, starttimeunix, csvfilename, debug=False): # pragma: no cover
|
|
|
|
nr_strokes = int(distance/10.)
|
|
|
|
totalseconds = duration.hour*3600.
|
|
totalseconds += duration.minute*60.
|
|
totalseconds += duration.second
|
|
totalseconds += duration.microsecond/1.e6
|
|
|
|
try:
|
|
spm = 60.*nr_strokes/totalseconds
|
|
except ZeroDivisionError:
|
|
spm = 20*np.zeros(nr_strokes)
|
|
|
|
try:
|
|
_ = totalseconds/float(nr_strokes)
|
|
except ZeroDivisionError:
|
|
return 0
|
|
|
|
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
|
|
|
|
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
|
|
|
|
unixtime = starttimeunix + elapsed
|
|
|
|
pace = 500.*totalseconds/distance
|
|
|
|
if workouttype in ['rower', 'slides', 'dynamic']:
|
|
try:
|
|
velo = distance/totalseconds
|
|
except ZeroDivisionError:
|
|
velo = 0
|
|
power = 2.8*velo**3
|
|
else:
|
|
power = 0
|
|
|
|
df = pl.DataFrame({
|
|
'TimeStamp (sec)': unixtime,
|
|
' Horizontal (meters)': d,
|
|
' Cadence (stokes/min)': spm,
|
|
' Stroke500mPace (sec/500m)': pace,
|
|
' ElapsedTime (sec)': elapsed,
|
|
' Power (watts)': power,
|
|
' HRCur (bpm)': np.zeros(nr_strokes),
|
|
' longitude': np.zeros(nr_strokes),
|
|
' latitude': np.zeros(nr_strokes),
|
|
' DragFactor': np.zeros(nr_strokes),
|
|
' DriveLength (meters)': np.zeros(nr_strokes),
|
|
' StrokeDistance (meters)': np.zeros(nr_strokes),
|
|
' DriveTime (ms)': np.zeros(nr_strokes),
|
|
' StrokeRecoveryTime (ms)': np.zeros(nr_strokes),
|
|
' AverageDriveForce (lbs)': np.zeros(nr_strokes),
|
|
' PeakDriveForce (lbs)': np.zeros(nr_strokes),
|
|
' lapIdx': np.zeros(nr_strokes),
|
|
'cum_dist': d
|
|
})
|
|
|
|
df = df.with_columns((pl.col("TimeStamp (sec)")).alias(" ElapsedTime (sec)"))
|
|
|
|
row = rrdata_pl(df=df)
|
|
row.writecsv(csvfilename, compression=True)
|
|
|
|
data = dataplep(df, id=workoutid, bands=False, debug=debug)
|
|
|
|
return data
|
|
|
|
|
|
def update_empower(id, inboard, oarlength, boattype, df, f1, debug=False): # pragma: no cover
|
|
|
|
corr_factor = 1.0
|
|
if 'x' in boattype:
|
|
# sweep
|
|
a = 0.06
|
|
b = 0.275
|
|
else:
|
|
# scull
|
|
a = 0.15
|
|
b = 0.275
|
|
|
|
corr_factor = empower_bug_correction(oarlength, inboard, a, b)
|
|
|
|
success = False
|
|
|
|
try:
|
|
df['power empower old'] = df[' Power (watts)']
|
|
df[' Power (watts)'] = df[' Power (watts)'] * corr_factor
|
|
df['driveenergy empower old'] = df['driveenergy']
|
|
df['driveenergy'] = df['driveenergy'] * corr_factor
|
|
success = True
|
|
except KeyError:
|
|
pass
|
|
|
|
if success:
|
|
delete_strokedata(id, debug=debug)
|
|
if debug: # pragma: no cover
|
|
print("updated ", id)
|
|
print("correction ", corr_factor)
|
|
else:
|
|
if debug: # pragma: no cover
|
|
print("not updated ", id)
|
|
|
|
_ = dataplep(df, id=id, bands=True, barchart=True, otwpower=True, debug=debug)
|
|
|
|
row = rrdata(df=df)
|
|
row.write_csv(f1, gzip=True)
|
|
|
|
return success
|