Private
Public Access
1
0
Files
rowsandall/rowers/datautils.py
2021-04-26 17:48:22 +02:00

475 lines
12 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import pandas as pd
import numpy as np
from scipy.interpolate import griddata
from scipy import optimize
from rowers.mytypes import otwtypes,otetypes,rowtypes
#p0 = [500,350,10,8000]
p0 = [190,200,33,16000]
# RPE to TSS
rpetotss = {
1:20,
2:30,
3:40,
4:50,
5:60,
6:70,
7:80,
8:100,
9:120,
10:140,
}
def updatecp(delta,cpvalues,r,workouttype='water'): # pragma: no cover
if workouttype in otwtypes:
p0 = r.p0
p1 = r.p1
p2 = r.p2
p3 = r.p3
else:
p0 = r.ep0
p1 = r.ep1
p2 = r.ep2
p3 = r.ep3
cp2 = p0/(1+delta/p2)
cp2 += p1/(1+delta/p3)
delta = delta.append(delta)
cp = cpvalues.append(cp2)
powerdf = pd.DataFrame({
'Delta':delta,
'CP':cp,
})
powerdf.dropna(axis=0,inplace=True)
powerdf.sort_values(['Delta','CP'],ascending=[1,0],inplace=True)
powerdf.drop_duplicates(subset='Delta',keep='first',inplace=True)
res = cpfit(powerdf)
p1 = res[0]
if workouttype in otwtypes:
r.p0 = p1[0]
r.p1 = p1[1]
r.p2 = p1[2]
r.p3 = p1[3]
r.cpratio = res[3]
else:
r.ep0 = p1[0]
r.ep1 = p1[1]
r.ep2 = p1[2]
r.ep3 = p1[3]
r.ecpratio = res[3]
r.save()
return 1
def cpfit(powerdf,fraclimit=0.0001,nmax=1000):
# Fit the data to thee parameter CP model
fitfunc = lambda pars,x: abs(pars[0])/(1+(x/abs(pars[2]))) + abs(pars[1])/(1+(x/abs(pars[3])))
errfunc = lambda pars,x,y: fitfunc(pars,x)-y
p1 = p0
thesecs = powerdf['Delta']
theavpower = powerdf['CP']
if len(thesecs)>=4:
try:
p1, success = optimize.leastsq(errfunc, p0[:], args = (thesecs,theavpower))
except: # pragma: no cover
factor = fitfunc(p0,thesecs.mean())/theavpower.mean()
p1 = [p0[0]/factor,p0[1]/factor,p0[2],p0[3]]
else:
factor = fitfunc(p0,thesecs.mean())/theavpower.mean()
p1 = [p0[0]/factor,p0[1]/factor,p0[2],p0[3]]
p1 = [abs(p) for p in p1]
fitt = pd.Series(10**(4*np.arange(100)/100.))
fitpower = fitfunc(p1,fitt)
fitpoints = fitfunc(p1,thesecs)
fitpoints0 = fitpoints.copy()
dd = fitpoints-theavpower
ddmin = dd.min()
frac = abs(ddmin)/fitpoints.mean()
counter = 0
while frac>fraclimit and counter<nmax:
fitpoints = fitpoints*(fitpoints.mean()-ddmin)/(fitpoints.mean())
dd = fitpoints-theavpower
ddmin = dd.min()
frac = abs(ddmin)/fitpoints.mean()
counter += 1
ratio = fitpoints.mean()/fitpoints0.mean()
return p1,fitt,fitpower,ratio
def getlogarr(maxt):
maxlog10 = np.log10(maxt-5)
#print(maxlog10,round(maxlog10))
aantal = 10*round(maxlog10)
logarr = np.arange(aantal+1)/10.
res = []
for la in logarr:
try:
v = 5+int(10.**(la))
except ValueError: # pragma: no cover
v = 0
res.append(v)
logarr = pd.Series(res)
logarr.drop_duplicates(keep='first',inplace=True)
logarr = logarr.values
return logarr
def getsinglecp(df): # pragma: no cover
thesecs = df['TimeStamp (sec)'].max()-df['TimeStamp (sec)'].min()
if thesecs != 0:
maxt = 1.05*thesecs
else:
maxt = 1000.
logarr = getlogarr(maxt)
dfnew = pd.DataFrame({
'time':1000*(df['TimeStamp (sec)']-df.loc[:,'TimeStamp (sec)'].iloc[0]),
'power':df[' Power (watts)']
})
dfnew['workoutid'] = 0
dfgrouped = dfnew.groupby(['workoutid'])
delta,cpvalue,avgpower = getcp(dfgrouped,logarr)
return delta,cpvalue,avgpower
def getcp_new(dfgrouped,logarr): # pragma: no cover
delta = []
cpvalue = []
avgpower = {}
#print(dfgrouped)
for id, group in dfgrouped:
tt = group['time'].copy()
ww = group['power'].copy()
try:
avgpower[id] = int(ww.mean())
except ValueError:
avgpower[id] = '---'
tmax = tt.max()
if tmax > 500000:
newlen = int(tmax/2000.)
newt = np.arange(newlen)*tmax/float(newlen)
deltat = newt[1]-newt[0]
else:
newt = np.arange(0,tmax,10.)
deltat = 10.
ww = griddata(tt.values,
ww.values,
newt,method='linear',
rescale=True)
tt = pd.Series(newt)
ww = pd.Series(ww)
G = pd.Series(ww.cumsum())
G = pd.concat([pd.Series([0]),G])
h = np.mgrid[0:len(tt)+1:1,0:len(tt)+1:1]
distances = pd.DataFrame(h[1]-h[0])
ones = 1+np.zeros(len(G))
Ghor = np.outer(ones,G)
Gver = np.outer(G,ones)
Gdif = Ghor - Gver
Gdif = np.tril(Gdif.T).T
Gdif = pd.DataFrame(Gdif)
F = Gdif/distances
F.fillna(inplace=True,method='ffill',axis=1)
F.fillna(inplace=True,value=0)
restime = []
power = []
for i in np.arange(0,len(tt)+1,1):
restime.append(deltat*i)
cp = np.diag(F,i).max()
power.append(cp)
power[0] = power[1]
restime = np.array(restime)
power = np.array(power)
#power[0] = power[1]
cpvalues = griddata(restime,power,
logarr,method='linear', fill_value=0)
for cpv in cpvalues:
cpvalue.append(cpv)
for d in logarr:
delta.append(d)
df = pd.DataFrame({
'delta':delta,
'cpvalue':cpvalue
})
df.dropna(axis=0, how='any',inplace=True)
df = df.sort_values(['delta','cp'], ascending=[1, 0])
df = df.drop_duplicates(subset='Delta', keep='first')
delta = df['delta']
cpvalue = df['cpvalue']
return delta,cpvalue,avgpower
def getcp(dfgrouped,logarr):
delta = []
cpvalue = []
avgpower = {}
#avgpower[0] = 0
for id,group in dfgrouped:
tt = group['time'].copy()
ww = group['power'].copy()
# Remove data where PM is repeating final power value
# of an interval during the rest
rolling_std = ww.rolling(window=4).std()
deltas = tt.diff()
mask = rolling_std == 0
ww.loc[mask] = 0
mask = ww > 2000
ww.loc[mask] = 0
tmax = tt.max()
# if tmax > 3600000:
# newlen = int(tmax/10000.)
# else:
# newlen = len(tt)
# if newlen < len(tt):
# newt = np.arange(newlen)*tmax/float(newlen)
# ww = griddata(tt.values,
# ww.values,
# newt,method='nearest',
# rescale=True)
#
# tt = pd.Series(newt)
# ww = pd.Series(ww)
try:
avgpower[id] = int(ww.mean())
except ValueError: # pragma: no cover
avgpower[id] = '---'
if not np.isnan(ww.mean()):
length = len(ww)
dt = []
cpw = []
for i in range(length-2):
deltat,wmax = getmaxwattinterval(tt,ww,i)
if not np.isnan(deltat) and not np.isnan(wmax):
dt.append(deltat)
cpw.append(wmax)
dt = pd.Series(dt)
cpw = pd.Series(cpw)
if len(dt)>2:
cpvalues = griddata(dt.values,
cpw.values,
logarr,method='linear',
rescale=True)
for cpv in cpvalues:
cpvalue.append(cpv)
for d in logarr:
delta.append(d)
delta = pd.Series(delta,name='Delta')
cpvalue = pd.Series(cpvalue,name='CP')
cpdf = pd.DataFrame({
'delta':delta,
'cpvalue':cpvalue
})
cpdf.dropna(axis=0, how='any',inplace=True)
delta = cpdf['delta']
cpvalue = cpdf['cpvalue']
return delta,cpvalue,avgpower
def getmaxwattinterval(tt,ww,i):
w_roll = ww.rolling(i+2).mean().dropna()
if len(w_roll):
# now goes with # data points - should be fixed seconds
indexmax = w_roll.idxmax(axis=1)
# indexmaxpos = indexmax.get_loc(indexmax)
indexmaxpos = indexmax
try:
t_0 = tt.loc[indexmaxpos]
t_1 = tt.loc[indexmaxpos-i]
deltas = tt.loc[indexmaxpos-i:indexmaxpos].diff().dropna()
testres = 1.0e-3*deltas.max() < 30. and not deltas.isnull().values.any()
if testres:
deltat = 1.0e-3*(t_0-t_1)
wmax = w_roll.loc[indexmaxpos]
#if wmax > 800 or wmax*5.0e-4*deltat > 800.0:
# wmax = 0
else:
wmax = 0
deltat = 0
except KeyError:
wmax = 0
deltat = 0
else: # pragma: no cover
wmax = 0
deltat = 0
return deltat,wmax
def getfastest(df,thevalue,mode='distance'):
tt = df['time'].copy()
dd = df['cumdist'].copy()
tmax = tt.max()
if mode == 'distance': # pragma: no cover
if dd.max() < thevalue:
return 0
else: # pragma: no cover
if tt.max() < thevalue:
return 0
# if tmax > 500000:
# newlen=int(tmax/2000.)
# newt = np.arange(newlen)*tmax/float(newlen)
# deltat = newt[1]-newt[0]
# else:
# newt = np.arange(0,tmax,10.)
# deltat = 10.
newlen = 1000
newt = np.arange(newlen)*tmax/float(newlen)
deltat = newt[1]-newt[0]
dd = griddata(tt.values,
dd.values,newt,method='linear',rescale=True)
tt = pd.Series(newt)
dd = pd.Series(dd)
G = pd.concat([pd.Series([0]),dd])
T = pd.concat([pd.Series([0]),dd])
h = np.mgrid[0:len(tt)+1:1,0:len(tt)+1:1]
distances = pd.DataFrame(h[1]-h[0])
ones = 1+np.zeros(len(G))
Ghor = np.outer(ones,G)
Thor = np.outer(ones,T)
Tver = np.outer(T,ones)
Gver = np.outer(G,ones)
Gdif = Ghor-Gver
Gdif = np.tril(Gdif.T).T
Gdif = pd.DataFrame(Gdif)
F = Gdif
F.fillna(inplace=True,method='ffill',axis=1)
F.fillna(inplace=True,value=0)
restime = []
distance = []
starttimes = []
endtime = []
for i in np.arange(0,len(tt)+1,1):
restime.append(deltat*i)
cp = np.diag(F,i).max()
loc = np.argmax(np.diag(F,i))
thestarttime = tt[loc]
starttimes.append(thestarttime)
distance.append(cp)
distance[0] = distance[1]
restime = np.array(restime)
distance = np.array(distance)
starttimes = np.array(starttimes)
#for i in range(len(restime)):
# if restime[i]<thevalue*60*1000:
# print(i,restime[i],distance[i],60*1000*thevalue)
d2 = 0
if mode == 'distance':
duration = griddata(distance,restime,[thevalue],method='linear',rescale=True)
starttime = griddata(distance,starttimes,[thevalue],method='linear',rescale=True)
distance = griddata(distance,distance,[thevalue],method='linear',rescale=True)
endtime = starttime+duration
#print(duration,starttime,endtime,'aa')
return duration[0]/1000.,starttime[0]/1000.,endtime[0]/1000.
else: # pragma: no cover
distance = griddata(restime,distance,[thevalue*60*1000],method='linear',rescale=True)
starttime = griddata(restime,starttimes,[thevalue*60*1000],method='linear',rescale=True)
duration = griddata(restime,restime,[thevalue*60*1000],method='linear',rescale=True)
endtime = starttime+duration
print(distance,starttime,endtime )
return distance[0],starttime[0]/1000.,endtime[0]/1000.
return 0 # pragma: no cover