Private
Public Access
1
0
Files
rowsandall/rowers/datautils.py
2023-08-22 15:57:21 +02:00

471 lines
12 KiB
Python

import pandas as pd
import numpy as np
from scipy.interpolate import griddata
from scipy import optimize
from rowers.mytypes import otwtypes, otetypes, rowtypes
from rowers.models import Workout
p0 = [190, 200, 33, 16000]
# RPE to TSS
rpetotss = {
1: 20,
2: 30,
3: 40,
4: 50,
5: 60,
6: 70,
7: 80,
8: 100,
9: 120,
10: 140,
}
def rscore_approx(row):
if row['rscore'] > 0:
return row['rscore']
if row['rscore'] == 0:
if row['hrtss'] > 0:
return row['hrtss']
else:
dd = 3600*row['duration'].hour+60*row['duration'].minute+row['duration'].second
dd = dd/3600
return rpetotss[row['rpe']]*dd
if row['rscore'] < 0:
w = Workout.objects.get(id=row['id'])
trimp, hrtss = dataprep.workout_trimp(w)
rscore, normp = dataprep.workout_rscore(w)
if row['rpe'] and row['rpe'] > 0:
dd = 3600*row['duration'].hour+60*row['duration'].minute+row['duration'].second
dd = dd/3600
return rpetotss[row['rpe']]*dd
return 0
def updatecp(delta, cpvalues, r, workouttype='water'): # pragma: no cover
if workouttype in otwtypes:
p0 = r.p0
p1 = r.p1
p2 = r.p2
p3 = r.p3
else:
p0 = r.ep0
p1 = r.ep1
p2 = r.ep2
p3 = r.ep3
cp2 = p0/(1+delta/p2)
cp2 += p1/(1+delta/p3)
delta = delta.append(delta)
cp = cpvalues.append(cp2)
powerdf = pd.DataFrame({
'Delta': delta,
'CP': cp,
})
powerdf.dropna(axis=0, inplace=True)
powerdf.sort_values(['Delta', 'CP'], ascending=[1, 0], inplace=True)
powerdf.drop_duplicates(subset='Delta', keep='first', inplace=True)
res = cpfit(powerdf)
p1 = res[0]
if workouttype in otwtypes:
r.p0 = p1[0]
r.p1 = p1[1]
r.p2 = p1[2]
r.p3 = p1[3]
r.cpratio = res[3]
else:
r.ep0 = p1[0]
r.ep1 = p1[1]
r.ep2 = p1[2]
r.ep3 = p1[3]
r.ecpratio = res[3]
r.save()
return 1
def cpfit(powerdf, fraclimit=0.0001, nmax=1000):
# Fit the data to thee parameter CP model
def fitfunc(pars, x):
return abs(
pars[0])/(1+(x/abs(pars[2]))) + abs(pars[1])/(1+(x/abs(pars[3])))
def errfunc(pars, x, y):
return fitfunc(pars, x)-y
p1 = p0
thesecs = powerdf['Delta']
theavpower = powerdf['CP']
if len(thesecs) >= 4:
try:
p1, success = optimize.leastsq(
errfunc, p0[:], args=(thesecs, theavpower))
except: # pragma: no cover
factor = fitfunc(p0, thesecs.mean())/theavpower.mean()
p1 = [p0[0]/factor, p0[1]/factor, p0[2], p0[3]]
else:
factor = fitfunc(p0, thesecs.mean())/theavpower.mean()
p1 = [p0[0]/factor, p0[1]/factor, p0[2], p0[3]]
p1 = [abs(p) for p in p1]
fitt = pd.Series(10**(4*np.arange(100)/100.))
fitpower = fitfunc(p1, fitt)
fitpoints = fitfunc(p1, thesecs)
fitpoints0 = fitpoints.copy()
dd = fitpoints-theavpower
ddmin = dd.min()
frac = abs(ddmin)/fitpoints.mean()
counter = 0
while frac > fraclimit and counter < nmax:
fitpoints = fitpoints*(fitpoints.mean()-ddmin)/(fitpoints.mean())
dd = fitpoints-theavpower
ddmin = dd.min()
frac = abs(ddmin)/fitpoints.mean()
counter += 1
ratio = fitpoints.mean()/fitpoints0.mean()
return p1, fitt, fitpower, ratio
def getlogarr(maxt):
maxlog10 = np.log10(maxt-5)
# print(maxlog10,round(maxlog10))
aantal = 10*round(maxlog10)
logarr = np.arange(aantal+1)/10.
res = []
for la in logarr:
try:
v = 5+int(10.**(la))
except ValueError: # pragma: no cover
v = 0
res.append(v)
logarr = pd.Series(res, dtype='float')
logarr.drop_duplicates(keep='first', inplace=True)
logarr = logarr.values
return logarr
def getsinglecp(df): # pragma: no cover
thesecs = df['TimeStamp (sec)'].max()-df['TimeStamp (sec)'].min()
if thesecs != 0:
maxt = 1.05*thesecs
else:
maxt = 1000.
logarr = getlogarr(maxt)
dfnew = pd.DataFrame({
'time': 1000*(df['TimeStamp (sec)']-df.loc[:, 'TimeStamp (sec)'].iloc[0]),
'power': df[' Power (watts)']
})
dfnew['workoutid'] = 0
dfgrouped = dfnew.groupby(['workoutid'])
delta, cpvalue, avgpower = getcp(dfgrouped, logarr)
return delta, cpvalue, avgpower
def getcp_new(dfgrouped, logarr): # pragma: no cover
delta = []
cpvalue = []
avgpower = {}
# print(dfgrouped)
for id, group in dfgrouped:
tt = group['time'].copy()
ww = group['power'].copy()
try:
avgpower[id] = int(ww.mean())
except ValueError:
avgpower[id] = '---'
tmax = tt.max()
if tmax > 500000:
newlen = int(tmax/2000.)
newt = np.arange(newlen)*tmax/float(newlen)
deltat = newt[1]-newt[0]
else:
newt = np.arange(0, tmax, 10.)
deltat = 10.
ww = griddata(tt.values,
ww.values,
newt, method='linear',
rescale=True)
tt = pd.Series(newt, dtype='float')
ww = pd.Series(ww, dtype='float')
G = pd.Series(ww.cumsum(), dtype='float')
G = pd.concat([pd.Series([0], dtype='float'), G])
h = np.mgrid[0:len(tt)+1:1, 0:len(tt)+1:1]
distances = pd.DataFrame(h[1]-h[0])
ones = 1+np.zeros(len(G))
Ghor = np.outer(ones, G)
Gver = np.outer(G, ones)
Gdif = Ghor - Gver
Gdif = np.tril(Gdif.T).T
Gdif = pd.DataFrame(Gdif)
F = Gdif/distances
F.fillna(inplace=True, method='ffill', axis=1)
F.fillna(inplace=True, value=0)
restime = []
power = []
for i in np.arange(0, len(tt)+1, 1):
restime.append(deltat*i)
cp = np.diag(F, i).max()
power.append(cp)
power[0] = power[1]
restime = np.array(restime)
power = np.array(power)
cpvalues = griddata(restime, power,
logarr, method='linear', fill_value=0)
for cpv in cpvalues:
cpvalue.append(cpv)
for d in logarr:
delta.append(d)
df = pd.DataFrame({
'delta': delta,
'cpvalue': cpvalue
})
df.dropna(axis=0, how='any', inplace=True)
df = df.sort_values(['delta', 'cp'], ascending=[1, 0])
df = df.drop_duplicates(subset='Delta', keep='first')
delta = df['delta']
cpvalue = df['cpvalue']
return delta, cpvalue, avgpower
def getcp(dfgrouped, logarr):
delta = []
cpvalue = []
avgpower = {}
for id, group in dfgrouped:
tt = group['time'].copy()
ww = group['power'].copy()
# Remove data where PM is repeating final power value
# of an interval during the rest
rolling_std = ww.rolling(window=4).std()
# deltas = tt.diff()
mask = rolling_std == 0
ww.loc[mask] = 0
mask = ww > 2000
ww.loc[mask] = 0
# tmax = tt.max()
try:
avgpower[id] = int(ww.mean())
except ValueError: # pragma: no cover
avgpower[id] = '---'
if not np.isnan(ww.mean()):
length = len(ww)
dt = []
cpw = []
for i in range(length-2):
deltat, wmax = getmaxwattinterval(tt, ww, i)
if not np.isnan(deltat) and not np.isnan(wmax):
dt.append(deltat)
cpw.append(wmax)
dt = pd.Series(dt, dtype='float')
cpw = pd.Series(cpw, dtype='float')
if len(dt) > 2:
cpvalues = griddata(dt.values,
cpw.values,
logarr, method='linear',
rescale=True)
for cpv in cpvalues:
cpvalue.append(cpv)
for d in logarr:
delta.append(d)
delta = pd.Series(delta, name='Delta', dtype='float')
cpvalue = pd.Series(cpvalue, name='CP', dtype='float')
cpdf = pd.DataFrame({
'delta': delta,
'cpvalue': cpvalue
})
cpdf.dropna(axis=0, how='any', inplace=True)
delta = cpdf['delta']
cpvalue = cpdf['cpvalue']
return delta, cpvalue, avgpower
def getmaxwattinterval(tt, ww, i):
w_roll = ww.rolling(i+2).mean().dropna()
if len(w_roll):
# now goes with # data points - should be fixed seconds
indexmax = w_roll.idxmax(axis=0)
# indexmaxpos = indexmax.get_loc(indexmax)
indexmaxpos = indexmax
try:
t_0 = tt.loc[indexmaxpos]
t_1 = tt.loc[indexmaxpos-i]
deltas = tt.loc[indexmaxpos-i:indexmaxpos].diff().dropna()
testres = 1.0e-3*deltas.max() < 30. and not deltas.isnull().values.any()
if testres:
deltat = 1.0e-3*(t_0-t_1)
wmax = w_roll.loc[indexmaxpos]
# if wmax > 800 or wmax*5.0e-4*deltat > 800.0:
# wmax = 0
else:
wmax = 0
deltat = 0
except KeyError:
wmax = 0
deltat = 0
else: # pragma: no cover
wmax = 0
deltat = 0
return deltat, wmax
def getfastest(df, thevalue, mode='distance'):
tt = df['time'].copy()
dd = df['cumdist'].copy()
tmax = tt.max()
if mode == 'distance': # pragma: no cover
if dd.max() < thevalue:
return 0
else: # pragma: no cover
if tt.max() < thevalue:
return 0
# if tmax > 500000:
# newlen=int(tmax/2000.)
# newt = np.arange(newlen)*tmax/float(newlen)
# deltat = newt[1]-newt[0]
# else:
# newt = np.arange(0,tmax,10.)
# deltat = 10.
newlen = 1000
newt = np.arange(newlen)*tmax/float(newlen)
deltat = newt[1]-newt[0]
dd = griddata(tt.values,
dd.values, newt, method='linear', rescale=True)
tt = pd.Series(newt, dtype='float')
dd = pd.Series(dd, dtype='float')
G = pd.concat([pd.Series([0]), dd])
# T = pd.concat([pd.Series([0]), dd])
# h = np.mgrid[0:len(tt)+1:1, 0:len(tt)+1:1]
# distances = pd.DataFrame(h[1]-h[0])
ones = 1+np.zeros(len(G))
Ghor = np.outer(ones, G)
# Thor = np.outer(ones, T)
# Tver = np.outer(T, ones)
Gver = np.outer(G, ones)
Gdif = Ghor-Gver
Gdif = np.tril(Gdif.T).T
Gdif = pd.DataFrame(Gdif)
F = Gdif
F.fillna(inplace=True, method='ffill', axis=1)
F.fillna(inplace=True, value=0)
restime = []
distance = []
starttimes = []
endtime = []
for i in np.arange(0, len(tt)+1, 1):
restime.append(deltat*i)
cp = np.diag(F, i).max()
loc = np.argmax(np.diag(F, i))
thestarttime = tt[loc]
starttimes.append(thestarttime)
distance.append(cp)
distance[0] = distance[1]
restime = np.array(restime)
distance = np.array(distance)
starttimes = np.array(starttimes)
# for i in range(len(restime)):
# if restime[i]<thevalue*60*1000:
# print(i,restime[i],distance[i],60*1000*thevalue)
if mode == 'distance':
duration = griddata(distance, restime, [
thevalue], method='linear', rescale=True)
starttime = griddata(distance, starttimes, [
thevalue], method='linear', rescale=True)
distance = griddata(distance, distance, [
thevalue], method='linear', rescale=True)
endtime = starttime+duration
# print(duration,starttime,endtime,'aa')
return duration[0]/1000., starttime[0]/1000., endtime[0]/1000.
else: # pragma: no cover
distance = griddata(restime, distance, [
thevalue*60*1000], method='linear', rescale=True)
starttime = griddata(restime, starttimes, [
thevalue*60*1000], method='linear', rescale=True)
duration = griddata(restime, restime, [
thevalue*60*1000], method='linear', rescale=True)
endtime = starttime+duration
return distance[0], starttime[0]/1000., endtime[0]/1000.
return 0 # pragma: no cover