Private
Public Access
1
0
Files
rowsandall/rowers/dataroutines.py

3103 lines
95 KiB
Python

from rowers.metrics import axes, calc_trimp, rowingmetrics, dtypes, metricsgroups, metricsdicts
from rowers.utils import lbstoN, wavg, dologging
from rowers.mytypes import otwtypes, otetypes, rowtypes
import glob
import rowingdata.tcxtools as tcxtools
from rowers.utils import totaltime_sec_to_string
from rowers.datautils import p0
from scipy import optimize
from rowers.utils import calculate_age
import datetime
from scipy.signal import savgol_filter
from rowers.opaque import encoder
from rowers.database import *
from rowers import mytypes
from rowsandall_app.settings import SITE_URL
import django_rq
from timezonefinder import TimezoneFinder
import rowers.datautils as datautils
import rowers.utils as utils
import sys
import sqlalchemy as sa
from sqlalchemy import create_engine
from django.conf import settings
import math
from fitparse.base import FitHeaderError
from fitparse import FitFile
import itertools
import numpy as np
import pandas as pd
from zipfile import BadZipFile
import zipfile
import os
from rowers.models import strokedatafields
import polars as pl
import polars.selectors as cs
from polars.exceptions import (
ColumnNotFoundError, SchemaError, ComputeError,
InvalidOperationError, ShapeError
)
from pandas.errors import IntCastingNaNError
from rowingdata import (
KinoMapParser,
ExcelTemplate,
TCXParser,
MysteryParser,
RowProParser,
RitmoTimeParser,
QuiskeParser,
RowPerfectParser,
CoxMateParser,
BoatCoachParser,
BoatCoachOTWParser,
BoatCoachAdvancedParser,
painsledDesktopParser,
speedcoachParser,
SpeedCoach2Parser,
ErgStickParser,
FITParser,
ErgDataParser,
HumonParser,
ETHParser,
NKLiNKLogbookParser,
HeroParser,
SmartRowParser,)
from rowingdata import make_cumvalues
# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import (
Workout, Team, CalcAgePerformance, C2WorldClassAgePerformance,
User
)
import pytz
from pytz.exceptions import UnknownTimeZoneError
import collections
import pendulum
from rowingdata import rowingdata as rrdata
from rowingdata import rowingdata_pl as rrdata_pl
from rowingdata import rower as rrower
import yaml
import shutil
from shutil import copyfile
from rowingdata import (
get_file_type, get_empower_rigging, get_empower_firmware
)
from pandas import DataFrame, Series
import dask.dataframe as dd
from dask.delayed import delayed
import pyarrow.parquet as pq
import pyarrow as pa
from pyarrow.lib import ArrowInvalid, ArrowTypeError
from django.utils import timezone
from django.utils.timezone import get_current_timezone
from django.urls import reverse
import requests
from django.core.exceptions import ValidationError
from time import strftime
import arrow
thetimezone = get_current_timezone()
allowedcolumns = [key for key, value in strokedatafields.items()]
from rowsandall_app.settings_dev import use_sqlite
from rowsandall_app.settings_dev import DATABASES as DEV_DATABASES
try:
user = settings.DATABASES['default']['USER']
except KeyError: # pragma: no cover
user = ''
try:
password = settings.DATABASES['default']['PASSWORD']
except KeyError: # pragma: no cover
password = ''
try:
database_name = settings.DATABASES['default']['NAME']
except KeyError: # pragma: no cover
database_name = ''
try:
host = settings.DATABASES['default']['HOST']
except KeyError: # pragma: no cover
host = ''
try:
port = settings.DATABASES['default']['PORT']
except KeyError: # pragma: no cover
port = ''
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
database_name_dev = DEV_DATABASES['default']['NAME']
if use_sqlite:
database_url_debug = 'sqlite:///'+database_name_dev
database_url = database_url_debug
database_url_debug = database_url
# mapping the DB column names to the CSV file column names
columndict = {
'time': 'TimeStamp (sec)',
'hr': ' HRCur (bpm)',
'velo': ' AverageBoatSpeed (m/s)',
'pace': ' Stroke500mPace (sec/500m)',
'spm': ' Cadence (stokes/min)',
'power': ' Power (watts)',
'averageforce': ' AverageDriveForce (lbs)',
'drivelength': ' DriveLength (meters)',
'peakforce': ' PeakDriveForce (lbs)',
'distance': ' Horizontal (meters)',
'catch': 'catch',
'finish': 'finish',
'peakforceangle': 'peakforceangle',
'wash': 'wash',
'slip': 'slip',
'workoutstate': ' WorkoutState',
'cumdist': 'cum_dist',
'check_factor': 'check_factor',
}
def remove_nulls_pl(data):
for c in data.columns:
if c=='hr':
dologging('remove_nulls.log',"HR data len {f}".format(f=len(data[c])))
data = data.lazy().with_columns(
pl.when(
pl.all().is_infinite()
).then(None).otherwise(pl.all()).keep_name()
)
data = data.select(pl.all().forward_fill())
data = data.select(pl.all().backward_fill())
data = data.fill_nan(None)
data = data.select(cs.by_dtype(pl.NUMERIC_DTYPES)).collect()
data = data[[s.name for s in data if not s.is_infinite().sum()]]
data = data[[s.name for s in data if not (s.null_count() == data.height)]]
if not data.is_empty():
try:
data = data.drop_nulls()
except: # pragma: no cover
pass
for c in data.columns:
if c=='hr':
dologging('remove_nulls.log',"HR data len {f}".format(f=len(data[c])))
return data
def get_video_data(w, groups=['basic'], mode='water'):
modes = [mode, 'both', 'basic']
columns = ['time', 'velo', 'spm']
columns += [name for name, d in rowingmetrics if d['group']
in groups and d['mode'] in modes]
columns = list(set(columns))
df = getsmallrowdata_pd(columns, ids=[w.id],
workstrokesonly=False, doclean=False, compute=False)
df.dropna(axis=0, how='all', inplace=True)
df.dropna(axis=1, how='all', inplace=True)
df['time'] = (df['time']-df['time'].min())/1000.
df.sort_values(by='time', inplace=True)
df.set_index(pd.to_timedelta(df['time'], unit='s'), inplace=True)
df2 = df.resample('1s').first().fillna(method='ffill')
df2['time'] = df2.index.total_seconds()
if 'pace' in columns:
df2['pace'] = df2['pace']/1000.
p = df2['pace']
p = p.apply(lambda x: timedeltaconv(x))
p = nicepaceformat(p)
df2['pace'] = p
df2['time'] = (df2['time']-df2['time'].min())
df2 = df2.round(decimals=2)
try:
coordinates = get_latlon_time(w.id)
except KeyError: # pragma: no cover
nulseries = df['time']*0
coordinates = pd.DataFrame({
'time': df['time'],
'latitude': nulseries,
'longitude': nulseries,
})
coordinates.set_index(pd.to_timedelta(
coordinates['time'], unit='s'), inplace=True)
coordinates = coordinates.resample('1s').first().interpolate().fillna(method='ffill')
#coordinates['time'] = coordinates['time']-coordinates['time'].min()
df2 = pd.concat([df2, coordinates], axis=1)
latitude = df2['latitude']
longitude = df2['longitude']
try:
boatspeed = (100*df2['velo'].fillna(method='ffill').fillna(method='bfill')).astype(int)/100.
except IntCastingNaNError:
boatspeed = 0.0*df2['longitude']
# bundle data
data = {
'boatspeed': boatspeed.values.tolist(),
'latitude': latitude.values.tolist(),
'longitude': longitude.values.tolist(),
}
metrics = {}
for c in columns:
if c != 'time':
try:
if dict(rowingmetrics)[c]['numtype'] == 'integer': # pragma: no cover
data[c] = df2[c].astype(int).tolist()
else:
sigfigs = dict(rowingmetrics)[c]['sigfigs']
if (c != 'pace'):
try:
da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs)
except:
da = df2[c]
else:
da = df2[c]
data[c] = da.values.tolist()
metrics[c] = {
'name': dict(rowingmetrics)[c]['verbose_name'],
'metric': c,
'unit': ''
}
except KeyError: # pragma: no cover
pass
metrics['boatspeed'] = metrics.pop('velo')
# metrics['workperstroke'] = metrics.pop('driveenergy')
metrics = collections.OrderedDict(sorted(metrics.items()))
maxtime = coordinates['time'].max()
data = pd.DataFrame(data)
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)
data = pl.from_pandas(data)
data = data.to_dict(as_series=False)
return data, metrics, maxtime
def polarization_index(df, rower):
df['dt'] = df['time'].diff()/6.e4
# remove rest (spm<15)
df.dropna(axis=0, inplace=True)
df['dt'] = df['dt'].clip(upper=4, lower=0)
masklow = (df['power'] > 0) & (df['power'] < int(rower.pw_at))
maskmid = (df['power'] >= rower.pw_at) & (df['power'] < int(rower.pw_an))
maskhigh = (df['power'] > rower.pw_an)
time_low_pw = df.loc[masklow, 'dt'].sum()
time_mid_pw = df.loc[maskmid, 'dt'].sum()
time_high_pw = df.loc[maskhigh, 'dt'].sum()
frac_low = time_low_pw/(time_low_pw+time_mid_pw+time_high_pw)
frac_mid = time_mid_pw/(time_low_pw+time_mid_pw+time_high_pw)
frac_high = time_high_pw/(time_low_pw+time_mid_pw+time_high_pw)
index = math.log10(frac_high*100.*frac_low/frac_mid)
return index
def get_latlon(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist: # pragma: no cover
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty: # pragma: no cover
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')]
try:
try:
latitude = rowdata.df.loc[:, ' latitude']
longitude = rowdata.df.loc[:, ' longitude']
except KeyError:
latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
return [latitude, longitude]
except AttributeError: # pragma: no cover
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')]
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')] # pragma: no cover
def get_latlon_time(id):
try:
w = Workout.objects.get(id=id)
except Workout.DoesNotExist: # pragma: no cover
return False
rowdata = rdata(w.csvfilename)
if rowdata.df.empty: # pragma: no cover
return [pd.Series([], dtype='float'), pd.Series([], dtype='float')]
try:
try:
_ = rowdata.df.loc[:, ' latitude']
_ = rowdata.df.loc[:, ' longitude']
except KeyError: # pragma: no cover
rowdata.df['latitude'] = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
rowdata.df['longitude'] = 0 * rowdata.df.loc[:, 'TimeStamp (sec)']
except AttributeError: # pragma: no cover
return pd.DataFrame()
df = pd.DataFrame({
'time': rowdata.df['TimeStamp (sec)']-rowdata.df['TimeStamp (sec)'].min(),
'latitude': rowdata.df[' latitude'],
'longitude': rowdata.df[' longitude']
})
return df
def workout_has_latlon(id):
latitude, longitude = get_latlon(id)
latmean = latitude.mean()
lonmean = longitude.mean()
if latmean == 0 and lonmean == 0:
return False, latmean, lonmean
if latitude.std() > 0 and longitude.std() > 0:
return True, latmean, lonmean
return False, latmean, lonmean
def get_workouts(ids, userid): # pragma: no cover
goodids = []
for id in ids:
w = Workout.objects.get(id=id)
if int(w.user.user.id) == int(userid):
goodids.append(id)
return [Workout.objects.get(id=id) for id in goodids]
def filter_df(datadf, fieldname, value, largerthan=True):
try:
_ = datadf[fieldname]
except KeyError:
return datadf
try:
if largerthan:
mask = datadf[fieldname] < value
else:
mask = datadf[fieldname] >= value
datadf.loc[mask, fieldname] = np.nan
except TypeError: # pragma: no cover
pass
return datadf
# joins workouts
def df_resample(datadf):
# time stamps must be in seconds
timestamps = datadf['TimeStamp (sec)'].astype('int')
datadf['timestamps'] = timestamps
# newdf = datadf.groupby(['timestamps']).mean()
newdf = datadf[~datadf.duplicated(['timestamps'])]
return newdf
def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False, for_chart=False):
# clean data remove zeros and negative values
try:
_ = datadf['workoutid'].unique()
except KeyError:
try:
datadf['workoutid'] = 0
except TypeError: # pragma: no cover
datadf = datadf.with_columns(pl.lit(0).alias("workoutid"))
before = {}
ids = datadf['workoutid'].unique()
for workoutid in ids:
before[workoutid] = len(datadf[datadf['workoutid'] == workoutid])
data_orig = datadf.copy()
# bring metrics which have negative values to positive domain
if len(datadf) == 0:
return datadf
try:
datadf['catch'] = -datadf['catch']
except (KeyError, TypeError):
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
except (KeyError, TypeError):
pass
try:
datadf['hr'] = datadf['hr'] + 10
except (KeyError, TypeError):
pass
# protect 0 spm values from being nulled
try:
datadf['spm'] = datadf['spm'] + 1.0
except (KeyError, TypeError):
pass
# protect 0 workoutstate values from being nulled
try:
datadf['workoutstate'] = datadf['workoutstate'] + 1
except (KeyError, TypeError):
pass
try:
datadf = datadf.clip(lower=0)
except TypeError:
pass
# protect advanced metrics columns
advancedcols = [
'rhythm',
'power',
'drivelength',
'forceratio',
'drivespeed',
'driveenergy',
'catch',
'finish',
'averageforce',
'peakforce',
'slip',
'wash',
'peakforceangle',
'effectiveangle',
]
datadf.replace(to_replace=0, value=np.nan, inplace=True)
# datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan))
# bring spm back to real values
try:
datadf['spm'] = datadf['spm'] - 1
except (TypeError, KeyError):
pass
# bring workoutstate back to real values
try:
datadf['workoutstate'] = datadf['workoutstate'] - 1
except (TypeError, KeyError):
pass
# return from positive domain to negative
try:
datadf['catch'] = -datadf['catch']
except (KeyError, TypeError):
pass
try:
datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
except (KeyError, TypeError):
pass
try:
datadf['hr'] = datadf['hr'] - 10
except (KeyError, TypeError):
pass
# clean data for useful ranges per column
if not ignorehr: # pragma: no cover
try:
mask = datadf['hr'] < 30
datadf.mask(mask, inplace=True)
except (KeyError, TypeError): # pragma: no cover
pass
try:
mask = datadf['spm'] < 0
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['efficiency'] > 200.
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['spm'] < 10
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['pace'] / 1000. > 300.
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['efficiency'] < 0.
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['pace'] / 1000. < 60.
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['power'] > 5000
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['spm'] > 120
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['wash'] < 1
datadf.loc[mask, 'wash'] = np.nan
except (KeyError, TypeError):
pass
# try to guess ignoreadvanced
if not ignoreadvanced:
for metric in advancedcols:
try:
sum = datadf[metric].std()
if sum == 0 or np.isnan(sum):
ignoreadvanced = True
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 0
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['rhythm'] > 70
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['power'] < 20
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['drivelength'] < 0.5
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['forceratio'] < 0.2
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['forceratio'] > 1.0
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['drivespeed'] < 0.5
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['drivespeed'] > 4
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['driveenergy'] > 2000
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['driveenergy'] < 100
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
try:
mask = datadf['catch'] > -30.
datadf.mask(mask, inplace=True)
except (KeyError, TypeError):
pass
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
# workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly is True:
try:
datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
except:
pass
after = {}
if for_chart: # pragma: no cover
return datadf
for workoutid in data_orig['workoutid'].unique():
after[workoutid] = len(
datadf[datadf['workoutid'] == workoutid].dropna())
ratio = float(after[workoutid])/float(before[workoutid])
if ratio < 0.01 or after[workoutid] < 2:
return data_orig
return datadf # pragma: no cover
def replace_zeros_with_nan(x): # pragma: no cover
return np.nan if x == 0 else x
def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
ignoreadvanced=False, for_chart=False): # pragma: no cover
# clean data remove zeros and negative values
try:
_ = datadf['workoutid'].unique()
except KeyError: # pragma: no cover
try:
datadf['workoutid'] = 0
except TypeError:
datadf = datadf.with_columns(pl.lit(0).alias("workoutid"))
except ColumnNotFoundError: # pragma: no cover
datadf = datadf.with_columns(pl.lit(0).alias("workoutid"))
before = {}
ids = list(datadf['workoutid'].unique())
for workoutid in ids:
before[workoutid] = len(datadf.filter(pl.col("workoutid")==workoutid))
data_orig = datadf.clone()
# bring metrics which have negative values to positive domain
if len(datadf) == 0: # pragma: no cover
return data_orig
try:
datadf = datadf.with_columns((-pl.col('catch')).alias('catch'))
except (KeyError, TypeError): # pragma: no cover
pass
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
return data_orig
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('peakforceangle')+1000).alias('peakforceangle'))
except (KeyError, TypeError):
pass
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
return data_orig
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('hr')+10).alias('hr'))
except (KeyError, TypeError):
pass
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
return data_orig
# protect 0 spm values from being nulled
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('spm')+1.0).alias('spm'))
except (KeyError, TypeError):
pass
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
return data_orig
# protect 0 workoutstate values from being nulled
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('workoutstate')+1).alias('workoutstate'))
except (KeyError, TypeError):
pass
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
return data_orig
try: # pragma: no cover
datadf = datadf.select(pl.all().clip(lower_bound=0))
# datadf = datadf.clip(lower=0)
except (TypeError):
pass
except(ComputeError, InvalidOperationError, ColumnNotFoundError):
return data_orig
# protect advanced metrics columns
advancedcols = [
'rhythm',
'power',
'drivelength',
'forceratio',
'drivespeed',
'driveenergy',
'catch',
'finish',
'averageforce',
'peakforce',
'slip',
'wash',
'peakforceangle',
'effectiveangle',
] # pragma: no cover
for col in datadf.columns: # pragma: no cover
datadf = datadf.with_columns(
pl.when(datadf[col] == 0).then(pl.lit(np.nan)).otherwise(datadf[col]),
name=col
)
# datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan))
# bring spm back to real values
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('spm')-1.0).alias('spm'))
except (TypeError, KeyError):
pass
# bring workoutstate back to real values
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('workoutstate')-1).alias('workoutstate'))
except (TypeError, KeyError):
pass
# return from positive domain to negative
try: # pragma: no cover
datadf = datadf.with_columns((-pl.col('catch')).alias('catch'))
except (KeyError, TypeError):
pass
try: # pragma: no cover
datadf = datadf.with_columns((pl.col('peakforceangle')-1000).alias('peakforceangle'))
except (KeyError, TypeError):
pass
try:
datadf = datadf.with_columns((pl.col('hr')+10).alias('hr'))
except (KeyError, TypeError):
pass
# clean data for useful ranges per column
if not ignorehr:
datadf = datadf.filter(pl.col("hr")>=30)
datadf = datadf.filter(
pl.col("spm") >=0,
pl.col("spm")>=10,
pl.col("pace")<=300*1000.,
pl.col("pace")>=60*1000,
pl.col("power")<=5000,
pl.col("spm")<=120,
)
# try to guess ignoreadvanced
if not ignoreadvanced:
for metric in advancedcols:
try:
sum = datadf[metric].std()
if sum == 0 or np.isnan(sum):
ignoreadvanced = True
except (KeyError, TypeError):
pass
if not ignoreadvanced:
datadf = datadf.filter(pl.col("rhythm")>=0,
pl.col("rhythm")<=70,
pl.col("power")>=20,
pl.col("efficiency")<=200,
pl.col("drivelength")>=0.5,
pl.col("wash")>=1,
pl.col("efficiency")>=0,
pl.col("forceratio")>=0.2,
pl.col("forceratio")<=1.0,
pl.col("drivespeed")>=0.5,
pl.col("drivespeed")<=4,
pl.col("driveenergy")<=2000,
pl.col("driveenergy")>=100,
pl.col("catch")<=-30)
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
# workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly is True:
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
after = {}
if for_chart:
return datadf
for workoutid in data_orig['workoutid'].unique():
after[workoutid] = len(datadf.filter(pl.col("workoutid")==workoutid))
ratio = float(after[workoutid])/float(before[workoutid])
if ratio < 0.01 or after[workoutid] < 2:
return data_orig
return datadf
def getpartofday(row, r, startdatetime=None, timezone=''):
workoutstartdatetime = row.rowdatetime
try: # pragma: no cover
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except (ValueError, OverflowError): # pragma: no cover
timezone_str = 'UTC'
if timezone_str is None: # pragma: no cover
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str is None:
timezone_str = r.defaulttimezone
try:
workoutstartdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError:
workoutstartdatetime = row.rowdatetime
except KeyError:
timezone_str = r.defaulttimezone
workoutstartdatetime = row.rowdatetime
h = workoutstartdatetime.astimezone(pytz.timezone(timezone_str)).hour
if startdatetime is not None:
h = startdatetime.hour
if h < 12: # pragma: no cover
return "Morning"
elif h < 18: # pragma: no cover
return "Afternoon"
elif h < 22: # pragma: no cover
return "Evening"
else: # pragma: no cover
return "Night"
return None # pragma: no cover
def getstatsfields():
fielddict = {name: d['verbose_name'] for name, d in rowingmetrics}
# fielddict.pop('ergpace')
# fielddict.pop('hr_an')
# fielddict.pop('hr_tr')
# fielddict.pop('hr_at')
# fielddict.pop('hr_ut2')
# fielddict.pop('hr_ut1')
fielddict.pop('time')
fielddict.pop('distance')
# fielddict.pop('nowindpace')
# fielddict.pop('fnowindpace')
# fielddict.pop('fergpace')
# fielddict.pop('equivergpower')
# fielddict.pop('workoutstate')
# fielddict.pop('fpace')
# fielddict.pop('pace')
# fielddict.pop('id')
# fielddict.pop('ftime')
# fielddict.pop('x_right')
# fielddict.pop('hr_max')
# fielddict.pop('hr_bottom')
fielddict.pop('cumdist')
try:
fieldlist = [field for field, value in fielddict.iteritems()]
except AttributeError:
fieldlist = [field for field, value in fielddict.items()]
return fieldlist, fielddict
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes, seconds = divmod(tdelta.seconds, 60)
tenths = int(tdelta.microseconds / 1e5)
except AttributeError: # pragma: no cover
try:
minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
seconds, rest = divmod(seconds, 1e9)
tenths = int(rest / 1e8)
except AttributeError:
minutes = 0
seconds = 0
tenths = 0
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def timedelta_to_seconds(tdelta): # pragma: no cover
return 60.*tdelta.minute+tdelta.second
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def paceformatsecs(values):
out = []
for v in values:
td = timedeltaconv(v)
formattedv = strfdelta(td)
out.append(formattedv)
return out
def update_c2id_sql(id, c2id):
workout = Workout.objects.get(id=id)
workout.uploadedtoc2 = c2id
workout.save()
return 1
def getcpdata_sql(rower_id, table='cpdata'):
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
_ = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id, table='cpdata'): # pragma: no cover
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
_ = conn.execute(query)
except Exception as e:
print(Exception, e)
print("Database locked")
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id, delta, cp, table='cpdata', distance=pd.Series([], dtype='float'),
debug=False): # pragma: no cover
deletecpdata_sql(rower_id)
df = pd.DataFrame(
{
'delta': delta,
'cp': cp,
'user': rower_id
}
)
if not distance.empty:
df['distance'] = distance
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def get_workoutsummaries(userid, startdate): # pragma: no cover
u = User.objects.get(id=userid)
r = u.rower
df = workout_summary_to_df(r, startdate=startdate)
df.drop(['Stroke Data TCX', 'Stroke Data CSV'], axis=1, inplace=True)
df = df.sort_values('date', ascending=False)
return df
def checkduplicates(r, workoutdate, workoutstartdatetime, workoutenddatetime):
duplicate = False
ws = Workout.objects.filter(user=r, date=workoutdate, duplicate=False).exclude(
startdatetime__gt=workoutenddatetime
)
ws2 = []
for ww in ws:
t = ww.duration
delta = datetime.timedelta(
hours=t.hour, minutes=t.minute, seconds=t.second)
if ww.startdatetime is not None:
enddatetime = ww.startdatetime+delta
if enddatetime > workoutstartdatetime:
ws2.append(ww)
if (len(ws2) != 0):
duplicate = True
return duplicate
return duplicate
parsers = {
'kinomap': KinoMapParser,
'xls': ExcelTemplate,
'rp': RowProParser,
'tcx': TCXParser,
'mystery': MysteryParser,
'ritmotime': RitmoTimeParser,
'quiske': QuiskeParser,
'rowperfect3': RowPerfectParser,
'coxmate': CoxMateParser,
'bcmike': BoatCoachAdvancedParser,
'boatcoach': BoatCoachParser,
'boatcoachotw': BoatCoachOTWParser,
'painsleddesktop': painsledDesktopParser,
'speedcoach': speedcoachParser,
'speedcoach2': SpeedCoach2Parser,
'ergstick': ErgStickParser,
'fit': FITParser,
'ergdata': ErgDataParser,
'humon': HumonParser,
'eth': ETHParser,
'nklinklogbook': NKLiNKLogbookParser,
'hero': HeroParser,
'smartrow': SmartRowParser,
}
def get_startdate_time_zone(r, row, startdatetime=None, timezone=''):
preserve_timezone = False
if timezone != '':
preserve_timezone = True
if isinstance(startdatetime, str):
try:
startdatetime = pendulum.parse(startdatetime)
except:
dologging('debuglog.log','Could not parse start date time '+startdatetime)
if startdatetime is not None and startdatetime != '':
try:
timezone_str = pendulum.instance(startdatetime).timezone.name
except ValueError: # pragma: no cover
timezone_str = 'Ect/GMT'
elif startdatetime == '':
startdatetime = row.rowdatetime
#else:
# startdatetime = row.rowdatetime
try:
_ = startdatetime.tzinfo
except AttributeError: # pragma: no cover
startdatetime = row.rowdatetime
partofday = getpartofday(row, r, startdatetime=startdatetime, timezone=timezone)
if startdatetime.tzinfo is None or str(startdatetime.tzinfo) in ['tzutc()', 'Ect/GMT']:
timezone_str = 'UTC'
try:
startdatetime = timezone.make_aware(startdatetime)
except ValueError: # pragma: no cover
pass
except AttributeError:
pass
try:
latavg = row.df[' latitude'].mean()
lonavg = row.df[' longitude'].mean()
tf = TimezoneFinder()
if row.df[' latitude'].std() != 0:
try:
timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
except (ValueError, OverflowError): # pragma: no cover
timezone_str = 'UTC'
if timezone_str is None: # pragma: no cover
timezone_str = tf.closest_timezone_at(lng=lonavg,
lat=latavg)
if timezone_str is None: # pragma: no cover
timezone_str = r.defaulttimezone
else:
timezone_str = r.defaulttimezone
try:
startdatetime = pytz.timezone(timezone_str).localize(
row.rowdatetime
)
except ValueError: # pragma: no cover
startdatetime = startdatetime.astimezone(
pytz.timezone(timezone_str)
)
except KeyError: # pragma: no cover
timezone_str = r.defaulttimezone
else:
timezone_str = str(startdatetime.tzinfo)
try:
if not preserve_timezone:
startdatetime = startdatetime.astimezone(pytz.timezone(timezone_str))
except UnknownTimeZoneError:
try:
offset = timezone_str.strip("(TimeZone('')").split(":")
hours = int(offset[0])
minutes = int(offset[1])
tz_offset = datetime.timedelta(hours=hours, minutes=minutes)
tz = datetime.timezone(tz_offset)
utc_offset = tz.utcoffset(datetime.utcnow()).total_seconds()
for zone in pytz.all_timezones:
if pytz.timezone(zone).utcoffset(datetime.utcnow()).total_seconds() == utc_offset:
timezone_str = zone
break
except Exception as e:
pass
startdatetime = startdatetime.astimezone(pytz.utc)
startdate = startdatetime.strftime('%Y-%m-%d')
starttime = startdatetime.strftime('%H:%M:%S')
if timezone_str == 'tzutc()':
timezone_str = 'UTC' # pragma: no cover
return startdatetime, startdate, starttime, timezone_str, partofday
def parsenonpainsled(fileformat, f2, summary, startdatetime='', empowerfirmware=None, inboard=None, oarlength=None):
try:
if fileformat == 'nklinklogbook' and empowerfirmware is not None: # pragma: no cover
if inboard is not None and oarlength is not None:
row = NKLiNKLogbookParser(
f2, firmware=empowerfirmware, inboard=inboard, oarlength=oarlength)
else:
row = NKLiNKLogbookParser(f2)
else:
try:
row = parsers[fileformat](f2)
except:
hasrecognized = False
return None, hasrecognized, '', 'unknown'
if startdatetime != '': # pragma: no cover
row.rowdatetime = arrow.get(startdatetime).datetime
hasrecognized = True
except (KeyError, IndexError, ValueError): # pragma: no cover
hasrecognized = False
return None, hasrecognized, '', 'unknown'
s = 'Parsenonpainsled, start date time = {startdatetime}'.format(
startdatetime=startdatetime,
)
dologging('debuglog.log', s)
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
oarlength, inboard = get_empower_rigging(f2)
empowerfirmware = get_empower_firmware(f2)
if empowerfirmware != '':
fileformat = fileformat+'v'+str(empowerfirmware)
else: # pragma: no cover
fileformat = 'speedcoach2v0'
try:
summary = row.allstats()
except ZeroDivisionError: # pragma: no cover
summary = ''
else:
fileformat = fileformat+'v'+str(empowerfirmware)
# handle FIT
if (fileformat == 'fit'): # pragma: no cover
try:
s = FitSummaryData(f2)
s.setsummary()
summary = s.summarytext
except Exception as e:
pass
hasrecognized = True
return row, hasrecognized, summary, fileformat
def handle_nonpainsled(f2, fileformat, summary='', startdatetime='', empowerfirmware=None, impeller=False):
oarlength = 2.89
inboard = 0.88
hasrecognized = False
row, hasrecognized, summary, fileformat = parsenonpainsled(fileformat, f2, summary, startdatetime=startdatetime,
empowerfirmware=empowerfirmware)
# Handle c2log
if (fileformat == 'c2log' or fileformat == 'rowprolog'): # pragma: no cover
return (0, '', 0, 0, '', impeller)
if not hasrecognized: # pragma: no cover
return (0, '', 0, 0, '', impeller)
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4] + 'o.csv'
row2 = rrdata(df=row.df)
if 'quiske' in fileformat:
row2.add_instroke_speed()
if 'speedcoach2' in fileformat or 'nklinklogbook' in fileformat:
# impeller consistency
impellerdata, consistent, ratio = row.impellerconsistent(threshold=0.3)
if impellerdata and consistent:
impeller = True
if impellerdata and not consistent:
row2.use_gpsdata()
if impeller:
row2.use_impellerdata()
row2.write_csv(f2, gzip=True)
# os.remove(f2)
try:
os.remove(f_to_be_deleted)
except: # pragma: no cover
try:
os.remove(f_to_be_deleted + '.gz')
except:
pass
return (f2, summary, oarlength, inboard, fileformat, impeller)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py
def get_notes_from_fit(filename):
try:
fitfile = FitFile(filename, check_crc=False)
except FitHeaderError: # pragma: no cover
return ''
records = fitfile.messages
notes = ''
for record in records:
if record.name == 'session':
try:
notes = ' '.join(record.get_values()['description'].split())
except KeyError:
pass
return notes
def get_title_from_fit(filename):
try:
fitfile = FitFile(filename, check_crc=False)
except FitHeaderError: # pragma: no cover
return ''
records = fitfile.messages
title = ''
for record in records:
if record.name == 'workout':
try:
title = ' '.join(record.get_values()['wkt_name'].split())
except KeyError:
pass
return title
def get_workouttype_from_fit(filename, workouttype='water'):
try:
fitfile = FitFile(filename, check_crc=False)
except FitHeaderError: # pragma: no cover
return workouttype
records = fitfile.messages
fittype = 'rowing'
subsporttype = ''
for record in records:
if record.name in ['sport', 'lap','session']:
try:
fittype = record.get_values()['sport'].lower()
try:
subsporttype = record.get_values()['sub_sport'].lower()
except KeyError:
subsporttype = ''
except (KeyError, AttributeError): # pragma: no cover
pass
if subsporttype:
try:
workouttype = mytypes.fitmappinginv[subsporttype]
except KeyError:
pass
else:
try:
workouttype = mytypes.fitmappinginv[fittype]
except KeyError:
pass
return workouttype
def get_workouttype_from_tcx(filename, workouttype='water'):
tcxtype = 'rowing'
if workouttype in mytypes.otwtypes:
return workouttype
try: # pragma: no cover
d = tcxtools.tcx_getdict(filename)
try:
tcxtype = d['Activities']['Activity']['@Sport'].lower()
if tcxtype == 'other':
tcxtype = 'rowing'
except KeyError:
return workouttype
except TypeError: # pragma: no cover
pass
try: # pragma: no cover
workouttype = mytypes.garminmappinginv[tcxtype.upper()]
except KeyError: # pragma: no cover
return workouttype
return workouttype # pragma: no cover
# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
# A wrapper around the rowingdata class, with some error catching
def rdata(file, rower=rrower()):
try:
res = rrdata(csvfile=file, rower=rower)
except (IOError, IndexError): # pragma: no cover
try:
res = rrdata(csvfile=file + '.gz', rower=rower)
except (IOError, IndexError):
res = rrdata()
except:
res = rrdata()
except EOFError: # pragma: no cover
res = rrdata()
except: # pragma: no cover
res = rrdata()
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id, debug=False):
dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
shutil.rmtree(dirname)
except OSError:
try:
os.remove(dirname)
except FileNotFoundError:
pass
except FileNotFoundError: # pragma: no cover
pass
# Replace stroke data in DB with data from CSV file
def update_strokedata(id, df, debug=False):
delete_strokedata(id, debug=debug)
_ = dataplep(df, id=id, bands=True, barchart=True, otwpower=True)
# Test that all data are of a numerical time
def testdata(time, distance, pace, spm): # pragma: no cover
t1 = time.dtype in pl.NUMERIC_DTYPES
t2 = distance.dtype in pl.NUMERIC_DTYPES
t3 = pace.dtype in pl.NUMERIC_DTYPES
t4 = spm.dtype in pl.NUMERIC_DTYPES
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0, doclean=False, convertnewtons=True,
checkefficiency=True, for_chart=False):
data = read_df_sql(id)
try:
data['deltat'] = data['time'].diff()
except KeyError: # pragma: no cover
data = pd.DataFrame()
if data.empty:
rowdata, row = getrowdata(id=id)
if not rowdata.empty: # pragma: no cover
data = dataplep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if checkefficiency is True and not data.empty:
try:
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
data = add_efficiency(id=id)
except KeyError: # pragma: no cover
data = add_efficiency(id=id)
if doclean: # pragma: no cover
data = clean_df_stats(data, ignorehr=True, for_chart=for_chart)
return data, row
def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
checkefficiency=True, for_chart=False):
data = read_df_sql(id,polars=True)
try:
data = data.with_columns((pl.col('time').diff()).alias("deltat")) # data['time'].diff()
except KeyError: # pragma: no cover
data = pl.DataFrame()
if data.is_empty():
rowdata, row = getrowdata(id=id)
if not rowdata.empty: # pragma: no cover
data = dataplep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True, polars=True)
else:
data = pl.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if checkefficiency is True and not data.is_empty():
try:
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
data = add_efficiency_pl(id=id, polars=True)
except KeyError: # pragma: no cover
data = add_efficiency_pl(id=id)
if doclean: # pragma: no cover
data = clean_df_stats(data, ignorehr=True, for_chart=for_chart)
return data, row
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True,
startenddict={}, driveenergy=True):
if ids:
csvfilenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pl.DataFrame()
data = []
columns = [c for c in columns if c != 'None'] + ['distance', 'spm', 'workoutid','workoutstate']
if driveenergy:
columns = columns + ['driveenergy']
columns = list(set(columns))
for id, f in zip(ids, csvfilenames):
if os.path.isfile(f):
try:
df = pl.scan_parquet(f)
except ComputeError:
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,polars=True)
df = pl.scan_parquet(f)
if startenddict:
try:
startsecond, endsecond = startenddict[id]
df = df.filter(pl.col("time") >= 1.0e3*startsecond,
pl.col("time") <= 1.0e3*endsecond)
df = df.with_columns(time = pl.col("time")-1.0e3*startsecond)
if 'cumdist' in columns:
df = df.collect()
df = df.with_columns(cumdist = pl.col("cumdist")-df[0, "cumdist"]).lazy()
except KeyError:
pass
data.append(df)
else:
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,
polars=True)
try:
df = pl.scan_parquet(f)
if startenddict:
try:
startsecond, endsecond = startenddict[id]
df = df.filter(pl.col("time") >= 1.0e3*startsecond,
pl.col("time") <= 1.0e3*endsecond)
df = df.with_columns(time = pl.col("time")-1.0e3*startsecond)
if 'cumdist' in columns:
df = df.collect()
df = df.with_columns(cumdist = pl.col("cumdist")-df[0, "cumdist"]).lazy()
except KeyError:
pass
data.append(df)
except ComputeError:
pass
try:
data = pl.collect_all(data)
except ComputeError:
return pl.DataFrame()
if len(data)==0:
return pl.DataFrame()
try:
datadf = pl.concat(data).select(columns)
except ColumnNotFoundError:
datadf = pl.concat(data)
existing_columns = [col for col in columns if col in datadf.columns]
datadf = datadf.select(existing_columns)
except ShapeError:
try:
data = [
df.select(columns)
for df in data]
except ColumnNotFoundError:
existing_columns = [col for col in columns if col in df.columns]
df = df.select(existing_columns)
# float columns
floatcolumns = []
intcolumns = []
stringcolumns = []
for c in columns:
try:
if metricsdicts[c]['numtype'] == 'float':
floatcolumns.append(c)
if metricsdicts[c]['numtype'] == 'integer':
intcolumns.append(c)
except KeyError:
if c[0] == 'f':
stringcolumns.append(c)
else:
intcolumns.append(c)
try:
data = [
df.with_columns(
cs.float().cast(pl.Float64)
).with_columns(
cs.integer().cast(pl.Int64)
).with_columns(
cs.by_name(intcolumns).cast(pl.Int64)
).with_columns(
cs.by_name(floatcolumns).cast(pl.Float64)
).with_columns(
cs.by_name(stringcolumns).cast(pl.String)
)
for df in data
]
except ComputeError:
pass
except ColumnNotFoundError:
pass
try:
datadf = pl.concat(data)
except SchemaError:
try:
data = [
df.with_columns(cs.integer().cast(pl.Float64)) for df in data
]
datadf = pl.concat(data)
except ShapeError:
return pl.DataFrame()
except SchemaError:
try:
data = [
df.select(columns)
for df in data]
except ColumnNotFoundError:
existing_columns = [col for col in columns if col in df.columns]
df = df.select(existing_columns)
# float columns
floatcolumns = []
intcolumns = []
stringcolumns = []
for c in columns:
try:
if metricsdicts[c]['numtype'] == 'float':
floatcolumns.append(c)
if metricsdicts[c]['numtype'] == 'integer':
intcolumns.append(c)
except KeyError:
if c[0] == 'f':
stringcolumns.append(c)
else:
intcolumns.append(c)
try:
data = [
df.with_columns(
cs.float().cast(pl.Float64)
).with_columns(
cs.integer().cast(pl.Int64)
).with_columns(
cs.by_name(intcolumns).cast(pl.Int64)
).with_columns(
cs.by_name(floatcolumns).cast(pl.Float64)
).with_columns(
cs.by_name(stringcolumns).cast(pl.String)
)
for df in data
]
except ComputeError:
pass
except ColumnNotFoundError:
pass
try:
datadf = pl.concat(data)
except SchemaError:
try:
data = [
df.with_columns(cs.integer().cast(pl.Float64)) for df in data
]
datadf = pl.concat(data)
except ShapeError:
return pl.DataFrame()
exprs = []
if workstrokesonly:
workoutstatesrest = [3]
exprs.append(~pl.col("workoutstate").is_in(workoutstatesrest))
# got data
if not doclean:
if exprs:
datadf2 = datadf.filter(exprs)
if not datadf2.is_empty():
return datadf2
return datadf
# do clean
if "spm" in datadf.columns:
exprs.append(pl.col("spm") >= 10 )
exprs.append(pl.col("spm") <= 120)
if "pace" in datadf.columns:
exprs.append(pl.col("pace") <= 300*1000.)
exprs.append(pl.col("pace") >= 60*1000.)
if "power" in datadf.columns:
exprs.append(pl.col("power") <= 5000)
exprs.append(pl.col("power")>=20)
if "rhythm" in datadf.columns:
exprs.append(pl.col("rhythm")>=0)
exprs.append(pl.col("rhythm")<=70)
if "efficiency" in datadf.columns:
exprs.append(pl.col("efficiency")<=200)
exprs.append(pl.col("efficiency")>=0)
if "wash" in datadf.columns:
exprs.append(pl.col("wash")>=1)
if "drivelength" in datadf.columns:
exprs.append(pl.col("drivelength")>=0.5)
if "forceratio" in datadf.columns:
exprs.append(pl.col("forceratio")>=0.2)
exprs.append(pl.col("forceratio")<=1.0)
if "drivespeed" in datadf.columns:
exprs.append(pl.col("drivespeed")>=0.5)
exprs.append(pl.col("drivespeed")<=4)
if "driveenergy" in datadf.columns:
exprs.append(pl.col("driveenergy")<=2000)
exprs.append(pl.col("driveenergy")>=100)
if "catch" in datadf.columns:
exprs.append(pl.col("catch")<=-30)
if exprs:
datadf2 = datadf.filter(exprs)
if not datadf2.is_empty():
return datadf2
exprs = []
if workstrokesonly:
workoutstatesrest = [3]
exprs.append(~pl.col("workoutstate").is_in(workoutstatesrest))
if exprs:
datadf2 = datadf.filter(exprs)
if not datadf2.is_empty():
return datadf2
return datadf
def getsmallrowdata_pd(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
debug=False, for_chart=False):
# prepmultipledata(ids)
if ids:
csvfilenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pd.DataFrame()
data = []
columns = [c for c in columns if c != 'None']
columns = list(set(columns))
df = pd.DataFrame()
if len(ids) > 1:
for id, f in zip(ids, csvfilenames):
try:
df = pd.read_parquet(f, columns=columns)
data.append(df)
except (OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True)
try:
df = pd.read_parquet(f, columns=columns)
data.append(df)
except (OSError, ArrowInvalid, IndexError):
pass
try:
df = pd.concat(data, axis=0)
except ValueError: # pragma: no cover
return pd.DataFrame()
else:
try:
df = pd.read_parquet(csvfilenames[0], columns=columns)
rowdata, row = getrowdata(id=ids[0])
except (OSError, IndexError, ArrowInvalid):
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataplep(
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
try:
df = pd.read_parquet(csvfilenames[0], columns=columns)
except:
df = pd.DataFrame
else:
df = pd.DataFrame()
except:
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataplep(
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
try:
df = pd.read_parquet(csvfilenames[0], columns=columns)
except:
df = pd.DataFrame()
else:
df = pd.DataFrame()
try:
if compute and len(df):
data = df.copy()
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly,
for_chart=for_chart)
data.dropna(axis=1, how='all', inplace=True)
data.dropna(axis=0, how='all', inplace=True)
return data
except TypeError:
pass
return df
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
try:
row = Workout.objects.get(id=id)
except Workout.DoesNotExist: # pragma: no cover
return rrdata(), None
f1 = row.csvfilename
# get user
r = row.user
rr = rrower(hrmax=r.max, hrut2=r.ut2,
hrut1=r.ut1, hrat=r.at,
hrtr=r.tr, hran=r.an, ftp=r.ftp)
rowdata = rdata(f1, rower=rr)
return rowdata, row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
def prepmultipledata(ids, verbose=False): # pragma: no cover
filenames = glob.glob('media/*.parquet')
ids = [
id for id in ids if 'media/strokedata_{id}.parquet.gz'.format(id=id) not in filenames]
for id in ids:
rowdata, row = getrowdata(id=id)
if verbose:
print(id)
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True)
return ids
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_pl(ids, columns):
extracols = []
columns = list(columns) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
ids = [int(id) for id in ids]
df = pl.DataFrame()
if len(ids) == 0:
return pl.DataFrame()
df = read_data(columns, ids=ids, doclean=False, compute=False)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
df = df.with_columns(
peakforce=pl.when(pl.col('workoutid')==id)
.then(pl.col('peakforce') * lbstoN)
.otherwise(pl.col('peakforce')))
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
df = df.with_columns(
averageforce=pl.when(pl.col('workoutid')==id)
.then(pl.col('averageforce') * lbstoN)
.otherwise(pl.col('averageforce')))
return df, extracols
def read_cols_df_sql(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
extracols = []
columns = list(columns) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
ids = [int(id) for id in ids]
df = pd.DataFrame()
if len(ids) == 0: # pragma: no cover
return pd.DataFrame(), extracols
elif len(ids) == 1: # pragma: no cover
try:
filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0])
pq_file = pq.ParquetDataset(filename)
columns_in_file = [c for c in columns if c in pq_file.schema.names]
df = pd.read_parquet(filename, columns=columns_in_file)
except OSError:
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df,
id=ids[0], bands=True, otwpower=True, barchart=True)
pq_file = pq.ParquetDataset(filename)
columns_in_file = [c for c in columns if c in pq_file.schema.names]
df = pd.read_parquet(filename, columns=columns_in_file)
else:
data = []
filenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
for id, f in zip(ids, filenames):
try:
pq_file = pq.ParquetDataset(f)
columns_in_file = [c for c in columns if c in pq_file.schema.names]
df = pd.read_parquet(f, columns=columns_in_file)
data.append(df)
except (OSError, IndexError, ArrowInvalid):
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df): # pragma: no cover
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True)
pq_file = pq.ParquetDataset(f)
columns_in_file = [c for c in columns if c in pq_file.schema.names]
df = pd.read_parquet(f, columns=columns_in_file)
data.append(df)
try:
df = pd.concat(data, axis=0)
except ValueError: # pragma: no cover
return pd.DataFrame(), extracols
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
return df, extracols
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id, polars=False):
if polars:
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pl.read_parquet(f)
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,
polars=True)
try:
df = pl.read_parquet(f, columns=columns)
except (OSError, ArrowInvalid, IndexError):
pass
df = df.fill_nan(None).drop_nulls()
return df
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
except (OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df):
data = dataplep(rowdata.df, id=id, bands=True,
otwpower=True, barchart=True)
try:
df = pd.read_parquet(f)
except OSError:
df = data
else:
df = pd.DataFrame()
df = df.fillna(value=0)
return df
# data fusion
def datafusion(id1, id2, columns, offset):
df1, w1 = getrowdata_db(id=id1)
df1 = df1.drop([ # 'cumdist',
'hr_ut2',
'hr_ut1',
'hr_at',
'hr_tr',
'hr_an',
'hr_max',
'ftime',
'fpace',
'workoutid',
'id'],
axis=1, errors='ignore')
# Add coordinates to DataFrame
latitude, longitude = get_latlon(id1)
df1[' latitude'] = latitude
df1[' longitude'] = longitude
df2 = getsmallrowdata_pd(['time'] + columns, ids=[id2], doclean=False)
forceunit = 'N'
offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
offsetmillisecs += offset.days * (3600 * 24 * 1000)
df2['time'] = df2['time'] + offsetmillisecs
keep1 = {c: c for c in set(df1.columns)}
for c in columns:
keep1.pop(c)
for c in df1.columns:
if c not in keep1:
df1 = df1.drop(c, axis=1, errors='ignore')
df = pd.concat([df1, df2], ignore_index=True)
df = df.sort_values(['time'])
df = df.interpolate(method='linear', axis=0, limit_direction='both',
limit=10)
df.fillna(method='bfill', inplace=True)
# Some new stuff to try out
#df = df.groupby('time',axis=0).mean()
#df['time'] = df.index
#df.reset_index(drop=True, inplace=True)
df['time'] = df['time'] / 1000.
df['pace'] = df['pace'] / 1000.
df['cum_dist'] = df['cumdist']
return df, forceunit
def fix_newtons(id=0, limit=3000): # pragma: no cover
# rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
rowdata = read_data(['peakforce'], ids=[id], doclean=False)
try:
peakforce = rowdata['peakforce']
if peakforce.mean() > limit:
w = Workout.objects.get(id=id)
rowdata = rdata(w.csvfilename)
if rowdata and len(rowdata.df):
update_strokedata(w.id, rowdata.df)
except KeyError:
pass
def remove_invalid_columns_pl(df): # pragma: no cover
for c in df.get_columns():
if c not in allowedcolumns:
df = df.drop(c)
return df
def remove_invalid_columns(df): # pragma: no cover
for c in df.columns:
if c not in allowedcolumns:
df.drop(labels=c, axis=1, inplace=True)
return df
def add_efficiency_pl(id=0): # pragma: no cover
rowdata, row = getrowdata_pl(id=id,
doclean=False,
convertnewtons=False,
checkefficiency=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata = rowdata.with_columns(pl.col(efficiency).alias("efficiency")) # ['efficiency'] = efficiency
rowdata = remove_invalid_columns_pl(rowdata)
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
rowdata = rowdata.fillna(method='ffill')
delete_strokedata(id)
if id != 0:
rowdata = rowdata.with_column(pl.lit(id).alias("workoutid"))
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
rowdata.write_parquet(filename, compression='gzip')
return rowdata
def add_efficiency(id=0): # pragma: no cover
rowdata, row = getrowdata_db(id=id,
doclean=False,
convertnewtons=False,
checkefficiency=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata['efficiency'] = efficiency
rowdata = remove_invalid_columns(rowdata)
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
rowdata = rowdata.fillna(method='ffill')
delete_strokedata(id)
if id != 0:
rowdata['workoutid'] = id
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(rowdata, npartitions=1)
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
return rowdata
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
# polars
def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchart=True, otwpower=True,
empower=True, debug=False, polars=True
):
# rowdatadf is pd.DataFrame
if isinstance(rowdatadf, pd.DataFrame):
if rowdatadf.empty:
return 0
try:
df = pl.from_pandas(rowdatadf)
except (ArrowInvalid, ArrowTypeError):
for k, v in dtypes.items():
try:
rowdatadf[k] = rowdatadf[k].astype(v)
except KeyError: # pragma: no cover
pass
try:
df = pl.from_pandas(rowdatadf)
except (ArrowInvalid, ArrowTypeError):
return dataprep(rowdatadf, id=id, inboard=inboard, forceunit=forceunit, bands=bands, barchart=barchart,
otwpower=otwpower, debug=debug,polars=True)
else:
df = rowdatadf
if df.is_empty():
return 0
df = df.with_columns((pl.col("TimeStamp (sec)")-df[0, "TimeStamp (sec)"]).alias("TimeStamp (sec)"))
df = df.with_columns((pl.col(" Stroke500mPace (sec/500m)").clip(1,3000)).alias(" Stroke500mPace"))
if ' AverageBoatSpeed (m/s)' not in df.columns:
df = df.with_columns((500./pl.col(' Stroke500mPace (sec/500m)')).alias(' AverageBoatSpeed (m/s)'))
if ' WorkoutState' not in df.columns:
df = df.with_columns((pl.lit(0)).alias(" WorkoutState"))
if df[" DriveTime (ms)"].mean() is not None and df[" DriveTime (ms)"].mean() > 0:
df = df.with_columns((100.*pl.col(" DriveTime (ms)")/(pl.col(" DriveTime (ms)")+pl.col(" StrokeRecoveryTime (ms)"))).alias("rhythm"))
else:
df = df.with_columns((pl.lit(0)).alias("rhythm"))
try:
if df[" PeakDriveForce (lbs)"].mean() is not None and df[" PeakDriveForce (lbs)"].mean() > 0:
df = df.with_columns((pl.col(" AverageDriveForce (lbs)")/pl.col(" PeakDriveForce (lbs)")).alias("forceratio"))
else:
df = df.with_columns((pl.lit(0)).alias("forceratio"))
except TypeError:
df = df.with_columns((pl.lit(0)).alias("forceratio"))
try:
f = df['TimeStamp (sec)'].diff().mean()
except TypeError:
f = 0
windowsize = 1
try:
if f != 0 and not np.isinf(f):
try:
windowsize = 2 * (int(10. / (f))) + 1
except ValueError:
windowsize = 1
except TypeError:
pass
if windowsize <= 3:
windowsize = 5
try:
df = df.with_columns(
(pl.col(" Cadence (stokes/min)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
).alias(" Cadence (stokes/min)"))
except ComputeError as e:
pass
try:
df = df.with_columns(
(pl.col(" DriveLength (meters)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
).alias(" DriveLength (meters)"))
except ComputeError:
pass
try:
df = df.with_columns(
(pl.col(" HRCur (bpm)").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()
).alias(" HRCur (bpm)"))
except ComputeError:
pass
try:
df = df.with_columns((pl.col("forceratio").map_batches(lambda x: savgol_filter(x.to_numpy(), windowsize, 3)).explode()).alias("forceratio"))
except ComputeError:
pass
df = df.with_columns((pl.col(" DriveLength (meters)") / pl.col(" DriveTime (ms)") * 1.0e3).alias("drivespeed"))
if df[" DriveTime (ms)"].mean() == 0:
df = df.with_columns((pl.lit(0)).alias("drivespeed"))
if 'driveenergy' not in df.columns:
if forceunit == 'lbs':
df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)") * lbstoN).alias("driveenergy"))
else:
df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)")).alias("driveenergy"))
if forceunit == 'lbs':
df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)"))
df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)"))
if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0:
df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy"))
df = df.with_columns((60. * pl.col(" AverageBoatSpeed (m/s)")/pl.col(" Cadence (stokes/min)")).alias("distanceperstroke"))
t2 = df["TimeStamp (sec)"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
p2 = df[" Stroke500mPace"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
data = pl.DataFrame(
dict(
time=df["TimeStamp (sec)"] * 1e3,
hr=df[" HRCur (bpm)"],
pace=df[" Stroke500mPace"] * 1e3,
spm=df[" Cadence (stokes/min)"],
velo=df[" AverageBoatSpeed (m/s)"],
cumdist=df["cum_dist"],
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=df["driveenergy"],
power=df[' Power (watts)'],
workoutstate=df[" WorkoutState"],
averageforce=df[" AverageDriveForce (lbs)"],
drivelength=df[" DriveLength (meters)"],
peakforce=df[" PeakDriveForce (lbs)"],
forceratio=df["forceratio"],
distance=df["cum_dist"],
drivespeed=df["drivespeed"],
rhythm=df["rhythm"],
distanceperstroke=df["distanceperstroke"],
)
)
data = data.with_columns(
hr_ut2 = df['hr_ut2'],
hr_ut1 = df['hr_ut1'],
hr_at = df['hr_at'],
hr_tr = df['hr_tr'],
hr_an = df['hr_an'],
hr_max = df['hr_max'],
hr_bottom = 0.0*df[' HRCur (bpm)'],
)
if 'check_factor' not in df.columns:
data = data.with_columns(
check_factor = pl.lit(0.0),
)
else:
data = data.with_columns(
check_factor = df['check_factor'],
)
if 'wash' not in df.columns:
data = data.with_columns(
wash = pl.lit(0.0),
catch = pl.lit(0.0),
peakforceangle = pl.lit(0.0),
finish = pl.lit(0.0),
slip = pl.lit(0.0),
totalangle = pl.lit(0.0),
effectiveangle = pl.lit(0.0),
efficiency = pl.lit(0.0),
)
else:
wash = df['wash']
catch = df['catch']
finish = df['finish']
peakforceangle = df['peakforceangle']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() is not None and arclength.mean() > 0:
drivelength = arclength
else:
drivelength = data['drivelength']
slip = df['slip']
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
data = data.with_columns(
wash = wash,
catch = catch,
slip = slip,
finish = finish,
peakforceangle = peakforceangle,
drivelength = drivelength,
totalangle = totalangle,
effectiveangle = effectiveangle,
)
ergpw = 2.8*data['velo']**3
efficiency = 100. * ergpw / data['power']
if data['power'].mean() == 0:
efficiency = 100.+0.0*data['power']
data = data.with_columns(efficiency=efficiency)
if id != 0:
data = data.with_columns(
workoutid = pl.lit(id)
)
# cast data
for k, v in dtypes.items():
if v == 'int':
data = data.cast({k: pl.Int64})
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
data.write_parquet(filename, compression='gzip')
except IsADirectoryError:
shutil.rmtree(filename)
data.write_parquet(filename, compression='gzip')
return data
# pandas/a little polars
def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
empower=True, inboard=0.88, forceunit='lbs', debug=False, polars=True):
if rowdatadf.empty:
return 0
t = rowdatadf.loc[:, 'TimeStamp (sec)']
t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0])
row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)']
try:
velo = rowdatadf.loc[:, ' AverageBoatSpeed (m/s)']
except KeyError: # pragma: no cover
velo = 500./p
hr = rowdatadf.loc[:, ' HRCur (bpm)']
spm = rowdatadf.loc[:, ' Cadence (stokes/min)']
cumdist = rowdatadf.loc[:, 'cum_dist']
power = rowdatadf.loc[:, ' Power (watts)']
averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)']
drivelength = rowdatadf.loc[:, ' DriveLength (meters)']
try:
workoutstate = rowdatadf.loc[:, ' WorkoutState']
except KeyError: # pragma: no cover
workoutstate = 0 * hr
peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)']
forceratio = averageforce / peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.loc[:, ' DriveTime (ms)']
recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)']
rhythm = 100. * drivetime / (recoverytime + drivetime)
rhythm = rhythm.fillna(value=0)
except: # pragma: no cover
rhythm = 0.0 * forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0 and not np.isinf(f):
try:
windowsize = 2 * (int(10. / (f))) + 1
except ValueError: # pragma: no cover
windowsize = 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize < len(hr):
spm = savgol_filter(spm, windowsize, 3)
hr = savgol_filter(hr, windowsize, 3)
drivelength = savgol_filter(drivelength, windowsize, 3)
forceratio = savgol_filter(forceratio, windowsize, 3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError: # pragma: no cover
t2 = 0 * t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
except TypeError: # pragma: no cover
drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError: # pragma: no cover
if forceunit == 'lbs':
driveenergy = drivelength * averageforce * lbstoN
else:
driveenergy = drivelength * averageforce
if forceunit == 'lbs':
averageforce *= lbstoN
peakforce *= lbstoN
powerhr = 60.*power/hr
powerhr = powerhr.fillna(value=0)
if driveenergy.mean() == 0 and driveenergy.std() == 0:
driveenergy = 0*driveenergy+100
distance = rowdatadf.loc[:, 'cum_dist']
velo = 500. / p
distanceperstroke = 60. * velo / spm
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
velo=velo,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
# powerhr=powerhr,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2']
data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1']
data['hr_at'] = rowdatadf.loc[:, 'hr_at']
data['hr_tr'] = rowdatadf.loc[:, 'hr_tr']
data['hr_an'] = rowdatadf.loc[:, 'hr_an']
data['hr_max'] = rowdatadf.loc[:, 'hr_max']
data['hr_bottom'] = 0.0 * data['hr']
try:
_ = rowdatadf.loc[:, ' ElapsedTime (sec)']
except KeyError: # pragma: no cover
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if empower:
try:
wash = rowdatadf.loc[:, 'wash']
except KeyError:
wash = 0 * power
try:
catch = rowdatadf.loc[:, 'catch']
except KeyError:
catch = 0 * power
try:
finish = rowdatadf.loc[:, 'finish']
except KeyError:
finish = 0 * power
try:
peakforceangle = rowdatadf.loc[:, 'peakforceangle']
except KeyError:
peakforceangle = 0 * power
if data['driveenergy'].mean() == 0: # pragma: no cover
try:
driveenergy = rowdatadf.loc[:, 'driveenergy']
except KeyError:
driveenergy = power * 60 / spm
else:
driveenergy = data['driveenergy']
arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
if arclength.mean() > 0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy / (averageforce * 4.44822)
try:
slip = rowdatadf.loc[:, 'slip']
except KeyError:
slip = 0 * power
try:
totalangle = finish - catch
effectiveangle = finish - wash - catch - slip
except ValueError: # pragma: no cover
totalangle = 0 * power
effectiveangle = 0 * power
if windowsize > 3 and windowsize < len(slip):
try:
wash = savgol_filter(wash, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
slip = savgol_filter(slip, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
catch = savgol_filter(catch, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
finish = savgol_filter(finish, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
driveenergy = savgol_filter(driveenergy, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
drivelength = savgol_filter(drivelength, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
totalangle = savgol_filter(totalangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
try:
effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
except TypeError: # pragma: no cover
pass
velo = 500. / p
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError: # pragma: no cover
pass
if otwpower:
try:
nowindpace = rowdatadf.loc[:, 'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.loc[:, 'equivergpower']
except KeyError:
equivergpower = 0 * p + 50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower / 2.8)**(1. / 3.)
ergpace = 500. / ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace * 1e3
data['nowindpace'] = nowindpace * 1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf, np.inf], np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
data.fillna(0, inplace=True)
for k, v in dtypes.items():
try:
data[k] = data[k].astype(v)
except KeyError: # pragma: no cover
pass
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(data, npartitions=1)
if polars:
pldf = pl.from_pandas(data)
try:
pldf.write_parquet(filename, compression='gzip')
except IsADirectoryError:
shutil.rmtree(filename)
pldf.write_parquet(filename, compression='gzip')
else:
try:
df.to_parquet(filename, engine='fastparquet', compression='gzip')
except FileNotFoundError:
df2 = dd.from_pandas(df, npartitions=1)
df2.to_parquet(filename, engine='fastparquet', compression='gzip')
except FileExistsError:
os.remove(filename)
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
if polars:
pldf = pl.from_pandas(data)
return pldf
return data
def delete_agegroup_db(age, sex, weightcategory, debug=False):
if debug: # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else: # pragma: no cover
engine = create_engine(database_url, echo=False)
query = sa.text("DELETE from {table} WHERE age='{age}' and weightcategory='{weightcategory}' and sex='{sex}';".format(
sex=sex,
age=age,
weightcategory=weightcategory,
table='calcagegrouprecords'
))
with engine.connect() as conn, conn.begin():
_ = conn.execute(query)
conn.close()
engine.dispose()
def update_agegroup_db(age, sex, weightcategory, wcdurations, wcpower,
debug=False):
delete_agegroup_db(age, sex, weightcategory, debug=debug)
wcdurations = [None if type(y) is float and np.isnan(
y) else y for y in wcdurations]
wcpower = [None if type(y) is float and np.isnan(y)
else y for y in wcpower]
df = pd.DataFrame(
{
'duration': wcdurations,
'power': wcpower,
}
)
df['sex'] = sex
df['age'] = age
df['weightcategory'] = weightcategory
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(axis=0, inplace=True)
if debug: # pragma: no cover # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
table = 'calcagegrouprecords'
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def add_c2_stroke_data_db(strokedata, workoutid, starttimeunix, csvfilename,
debug=False, workouttype='rower'):
res = make_cumvalues(0.1*strokedata['t'])
cum_time = res[0]
lapidx = res[1]
unixtime = cum_time+starttimeunix
# unixtime[0] = starttimeunix
seconds = 0.1*strokedata.loc[:, 't']
nr_rows = len(unixtime)
try: # pragma: no cover
latcoord = strokedata.loc[:, 'lat']
loncoord = strokedata.loc[:, 'lon']
except:
latcoord = np.zeros(nr_rows)
loncoord = np.zeros(nr_rows)
try:
strokelength = strokedata.loc[:, 'strokelength']
except:
strokelength = np.zeros(nr_rows)
dist2 = 0.1*strokedata.loc[:, 'd']
try:
spm = strokedata.loc[:, 'spm']
except KeyError: # pragma: no cover
spm = 0*dist2
try:
hr = strokedata.loc[:, 'hr']
except KeyError: # pragma: no cover
hr = 0*spm
pace = strokedata.loc[:, 'p']/10.
pace = np.clip(pace, 0, 1e4)
pace = pace.replace(0, 300)
velo = 500./pace
power = 2.8*velo**3
if workouttype == 'bike': # pragma: no cover
velo = 1000./pace
# save csv
# Create data frame with all necessary data to write to csv
df = pd.DataFrame({'TimeStamp (sec)': unixtime,
' Horizontal (meters)': dist2,
' Cadence (stokes/min)': spm,
' HRCur (bpm)': hr,
' longitude': loncoord,
' latitude': latcoord,
' Stroke500mPace (sec/500m)': pace,
' Power (watts)': power,
' DragFactor': np.zeros(nr_rows),
' DriveLength (meters)': np.zeros(nr_rows),
' StrokeDistance (meters)': strokelength,
' DriveTime (ms)': np.zeros(nr_rows),
' StrokeRecoveryTime (ms)': np.zeros(nr_rows),
' AverageDriveForce (lbs)': np.zeros(nr_rows),
' PeakDriveForce (lbs)': np.zeros(nr_rows),
' lapIdx': lapidx,
' WorkoutState': 4,
' ElapsedTime (sec)': seconds,
'cum_dist': dist2,
})
df.sort_values(by='TimeStamp (sec)', ascending=True)
# Create CSV file name and save data to CSV file
row = rrdata(df=df)
row.write_csv(csvfilename)
row = rrdata_pl(df=pl.from_pandas(row.df))
#res = df.to_csv(csvfilename, index_label='index',
# compression='gzip')
data = dataplep(row.df, id=workoutid, bands=False, debug=debug)
return data
# Creates C2 stroke data
def create_c2_stroke_data_db(
distance, duration, workouttype,
workoutid, starttimeunix, csvfilename, debug=False): # pragma: no cover
nr_strokes = int(distance/10.)
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20*np.zeros(nr_strokes)
try:
_ = totalseconds/float(nr_strokes)
except ZeroDivisionError:
return 0
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = starttimeunix + elapsed
pace = 500.*totalseconds/distance
if workouttype in ['rower', 'slides', 'dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError:
velo = 0
power = 2.8*velo**3
else:
power = 0
df = pl.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)': pace,
' ElapsedTime (sec)': elapsed,
' Power (watts)': power,
' HRCur (bpm)': np.zeros(nr_strokes),
' longitude': np.zeros(nr_strokes),
' latitude': np.zeros(nr_strokes),
' DragFactor': np.zeros(nr_strokes),
' DriveLength (meters)': np.zeros(nr_strokes),
' StrokeDistance (meters)': np.zeros(nr_strokes),
' DriveTime (ms)': np.zeros(nr_strokes),
' StrokeRecoveryTime (ms)': np.zeros(nr_strokes),
' AverageDriveForce (lbs)': np.zeros(nr_strokes),
' PeakDriveForce (lbs)': np.zeros(nr_strokes),
' lapIdx': np.zeros(nr_strokes),
'cum_dist': d
})
df = df.with_columns((pl.col("TimeStamp (sec)")).alias(" ElapsedTime (sec)"))
row = rrdata_pl(df=df)
row.writecsv(csvfilename, compression=True)
data = dataplep(df, id=workoutid, bands=False, debug=debug)
return data
def update_empower(id, inboard, oarlength, boattype, df, f1, debug=False): # pragma: no cover
corr_factor = 1.0
if 'x' in boattype:
# sweep
a = 0.06
b = 0.275
else:
# scull
a = 0.15
b = 0.275
corr_factor = empower_bug_correction(oarlength, inboard, a, b)
success = False
try:
df['power empower old'] = df[' Power (watts)']
df[' Power (watts)'] = df[' Power (watts)'] * corr_factor
df['driveenergy empower old'] = df['driveenergy']
df['driveenergy'] = df['driveenergy'] * corr_factor
success = True
except KeyError:
pass
if success:
delete_strokedata(id, debug=debug)
if debug: # pragma: no cover
print("updated ", id)
print("correction ", corr_factor)
else:
if debug: # pragma: no cover
print("not updated ", id)
_ = dataplep(df, id=id, bands=True, barchart=True, otwpower=True, debug=debug)
row = rrdata(df=df)
row.write_csv(f1, gzip=True)
return success