rowsandall/rowers/datautils.py


import pandas as pd
import numpy as np
from scipy.interpolate import griddata
from scipy import optimize

from rowers.mytypes import otwtypes, otetypes, rowtypes
from rowers.models import Workout

p0 = [190, 200, 33, 16000]

# RPE to TSS
rpetotss = {
    1: 20,
    2: 30,
    3: 40,
    4: 50,
    5: 60,
    6: 70,
    7: 80,
    8: 100,
    9: 120,
    10: 140,
}

def rscore_approx(row):
    if row['rscore'] > 0:
        return row['rscore']
    if row['rscore'] == 0:
        if row['hrtss'] > 0:
            return row['hrtss']
        else:
            dd = 3600*row['duration'].hour+60*row['duration'].minute+row['duration'].second
            dd = dd/3600
            return rpetotss[row['rpe']]*dd
    if row['rscore'] < 0:
        w = Workout.objects.get(id=row['id'])
        trimp, hrtss = dataprep.workout_trimp(w)
        rscore, normp = dataprep.workout_rscore(w)
        if row['rpe'] and row['rpe'] > 0:
            dd = 3600*row['duration'].hour+60*row['duration'].minute+row['duration'].second
            dd = dd/3600
            return rpetotss[row['rpe']]*dd

    return 0

def updatecp(delta, cpvalues, r, workouttype='water'):  # pragma: no cover
    if workouttype in otwtypes:
        p0 = r.p0
        p1 = r.p1
        p2 = r.p2
        p3 = r.p3
    else:
        p0 = r.ep0
        p1 = r.ep1
        p2 = r.ep2
        p3 = r.ep3
    cp2 = p0/(1+delta/p2)
    cp2 += p1/(1+delta/p3)

    delta = delta.append(delta)
    cp = cpvalues.append(cp2)

    powerdf = pd.DataFrame({
        'Delta': delta,
        'CP': cp,
    })

    powerdf.dropna(axis=0, inplace=True)
    powerdf.sort_values(['Delta', 'CP'], ascending=[1, 0], inplace=True)
    powerdf.drop_duplicates(subset='Delta', keep='first', inplace=True)

    res = cpfit(powerdf)
    p1 = res[0]

    if workouttype in otwtypes:
        r.p0 = p1[0]
        r.p1 = p1[1]
        r.p2 = p1[2]
        r.p3 = p1[3]
        r.cpratio = res[3]
    else:
        r.ep0 = p1[0]
        r.ep1 = p1[1]
        r.ep2 = p1[2]
        r.ep3 = p1[3]
        r.ecpratio = res[3]

    r.save()

    return 1


def cpfit(powerdf, fraclimit=0.0001, nmax=1000):
    # Fit the data to thee parameter CP model
    def fitfunc(pars, x):
        return abs(
            pars[0])/(1+(x/abs(pars[2]))) + abs(pars[1])/(1+(x/abs(pars[3])))

    def errfunc(pars, x, y):
        return fitfunc(pars, x)-y

    p1 = p0

    thesecs = powerdf['Delta']
    theavpower = powerdf['CP']

    if len(thesecs) >= 4:
        try:
            p1, success = optimize.leastsq(
                errfunc, p0[:], args=(thesecs, theavpower))
        except:  # pragma: no cover
            factor = fitfunc(p0, thesecs.mean())/theavpower.mean()
            p1 = [p0[0]/factor, p0[1]/factor, p0[2], p0[3]]

    else:
        factor = fitfunc(p0, thesecs.mean())/theavpower.mean()
        p1 = [p0[0]/factor, p0[1]/factor, p0[2], p0[3]]

    p1 = [abs(p) for p in p1]
    fitt = pd.Series(10**(4*np.arange(100)/100.))

    fitpower = fitfunc(p1, fitt)

    fitpoints = fitfunc(p1, thesecs)
    fitpoints0 = fitpoints.copy()
    dd = fitpoints-theavpower

    ddmin = dd.min()

    frac = abs(ddmin)/fitpoints.mean()

    counter = 0
    while frac > fraclimit and counter < nmax:
        fitpoints = fitpoints*(fitpoints.mean()-ddmin)/(fitpoints.mean())
        dd = fitpoints-theavpower
        ddmin = dd.min()
        frac = abs(ddmin)/fitpoints.mean()
        counter += 1

    ratio = fitpoints.mean()/fitpoints0.mean()

    return p1, fitt, fitpower, ratio


def getlogarr(maxt):
    maxlog10 = np.log10(maxt-5)
    # print(maxlog10,round(maxlog10))
    aantal = 10*round(maxlog10)
    logarr = np.arange(aantal+1)/10.

    res = []
    for la in logarr:
        try:
            v = 5+int(10.**(la))
        except ValueError:  # pragma: no cover
            v = 0
        res.append(v)

    logarr = pd.Series(res, dtype='float')
    logarr.drop_duplicates(keep='first', inplace=True)

    logarr = logarr.values
    return logarr


def getsinglecp(df):  # pragma: no cover
    thesecs = df['TimeStamp (sec)'].max()-df['TimeStamp (sec)'].min()
    if thesecs != 0:
        maxt = 1.05*thesecs
    else:
        maxt = 1000.

    logarr = getlogarr(maxt)

    dfnew = pd.DataFrame({
        'time': 1000*(df['TimeStamp (sec)']-df.loc[:, 'TimeStamp (sec)'].iloc[0]),
        'power': df[' Power (watts)']
    })

    dfnew['workoutid'] = 0

    dfgrouped = dfnew.groupby(['workoutid'])
    delta, cpvalue, avgpower = getcp(dfgrouped, logarr)

    return delta, cpvalue, avgpower


def getcp_new(dfgrouped, logarr):  # pragma: no cover
    delta = []
    cpvalue = []
    avgpower = {}

    # print(dfgrouped)

    for id, group in dfgrouped:
        tt = group['time'].copy()

        ww = group['power'].copy()

        try:
            avgpower[id] = int(ww.mean())
        except ValueError:
            avgpower[id] = '---'

        tmax = tt.max()

        if tmax > 500000:
            newlen = int(tmax/2000.)
            newt = np.arange(newlen)*tmax/float(newlen)
            deltat = newt[1]-newt[0]
        else:
            newt = np.arange(0, tmax, 10.)
            deltat = 10.

        ww = griddata(tt.values,
                      ww.values,
                      newt, method='linear',
                      rescale=True)

        tt = pd.Series(newt, dtype='float')
        ww = pd.Series(ww, dtype='float')

        G = pd.Series(ww.cumsum(), dtype='float')
        G = pd.concat([pd.Series([0], dtype='float'), G])

        h = np.mgrid[0:len(tt)+1:1, 0:len(tt)+1:1]

        distances = pd.DataFrame(h[1]-h[0])

        ones = 1+np.zeros(len(G))

        Ghor = np.outer(ones, G)
        Gver = np.outer(G, ones)

        Gdif = Ghor - Gver

        Gdif = np.tril(Gdif.T).T

        Gdif = pd.DataFrame(Gdif)

        F = Gdif/distances

        F.fillna(inplace=True, method='ffill', axis=1)
        F.fillna(inplace=True, value=0)

        restime = []
        power = []

        for i in np.arange(0, len(tt)+1, 1):
            restime.append(deltat*i)
            cp = np.diag(F, i).max()
            power.append(cp)

        power[0] = power[1]

        restime = np.array(restime)
        power = np.array(power)

        cpvalues = griddata(restime, power,
                            logarr, method='linear', fill_value=0)

        for cpv in cpvalues:
            cpvalue.append(cpv)
        for d in logarr:
            delta.append(d)

    df = pd.DataFrame({
        'delta': delta,
        'cpvalue': cpvalue
    })

    df.dropna(axis=0, how='any', inplace=True)
    df = df.sort_values(['delta', 'cp'], ascending=[1, 0])
    df = df.drop_duplicates(subset='Delta', keep='first')

    delta = df['delta']
    cpvalue = df['cpvalue']

    return delta, cpvalue, avgpower


def getcp(dfgrouped, logarr):
    delta = []
    cpvalue = []
    avgpower = {}

    for id, group in dfgrouped:
        tt = group['time'].copy()
        ww = group['power'].copy()

        # Remove data where PM is repeating final power value
        # of an interval during the rest
        rolling_std = ww.rolling(window=4).std()
        # deltas = tt.diff()

        mask = rolling_std == 0
        ww.loc[mask] = 0

        mask = ww > 2000
        ww.loc[mask] = 0

        # tmax = tt.max()

        try:
            avgpower[id] = int(ww.mean())
        except ValueError:  # pragma: no cover
            avgpower[id] = '---'
        if not np.isnan(ww.mean()):
            length = len(ww)
            dt = []
            cpw = []
            for i in range(length-2):
                deltat, wmax = getmaxwattinterval(tt, ww, i)

                if not np.isnan(deltat) and not np.isnan(wmax):
                    dt.append(deltat)
                    cpw.append(wmax)

            dt = pd.Series(dt, dtype='float')
            cpw = pd.Series(cpw, dtype='float')
            if len(dt) > 2:
                cpvalues = griddata(dt.values,
                                    cpw.values,
                                    logarr, method='linear',
                                    rescale=True)

                for cpv in cpvalues:
                    cpvalue.append(cpv)
                for d in logarr:
                    delta.append(d)

    delta = pd.Series(delta, name='Delta', dtype='float')
    cpvalue = pd.Series(cpvalue, name='CP', dtype='float')

    cpdf = pd.DataFrame({
        'delta': delta,
        'cpvalue': cpvalue
    })

    cpdf.dropna(axis=0, how='any', inplace=True)

    delta = cpdf['delta']
    cpvalue = cpdf['cpvalue']

    return delta, cpvalue, avgpower


def getmaxwattinterval(tt, ww, i):
    w_roll = ww.rolling(i+2).mean().dropna()
    if len(w_roll):
        # now goes with # data points - should be fixed seconds
        indexmax = w_roll.idxmax(axis=0)
        #        indexmaxpos = indexmax.get_loc(indexmax)
        indexmaxpos = indexmax
        try:
            t_0 = tt.loc[indexmaxpos]
            t_1 = tt.loc[indexmaxpos-i]
            deltas = tt.loc[indexmaxpos-i:indexmaxpos].diff().dropna()
            testres = 1.0e-3*deltas.max() < 30. and not deltas.isnull().values.any()
            if testres:
                deltat = 1.0e-3*(t_0-t_1)
                wmax = w_roll.loc[indexmaxpos]
                # if wmax > 800 or wmax*5.0e-4*deltat > 800.0:
                #    wmax = 0
            else:
                wmax = 0
                deltat = 0
        except KeyError:
            wmax = 0
            deltat = 0
    else:  # pragma: no cover
        wmax = 0
        deltat = 0

    return deltat, wmax


def getfastest(df, thevalue, mode='distance'):
    tt = df['time'].copy()
    dd = df['cumdist'].copy()

    tmax = tt.max()
    if mode == 'distance':  # pragma: no cover
        if dd.max() < thevalue:
            return 0
    else:  # pragma: no cover
        if tt.max() < thevalue:
            return 0


#    if tmax > 500000:
#        newlen=int(tmax/2000.)
#        newt = np.arange(newlen)*tmax/float(newlen)
#        deltat = newt[1]-newt[0]
#    else:
#        newt = np.arange(0,tmax,10.)
#        deltat = 10.

    newlen = 1000
    newt = np.arange(newlen)*tmax/float(newlen)
    deltat = newt[1]-newt[0]

    dd = griddata(tt.values,
                  dd.values, newt, method='linear', rescale=True)

    tt = pd.Series(newt, dtype='float')
    dd = pd.Series(dd, dtype='float')

    G = pd.concat([pd.Series([0]), dd])
    # T = pd.concat([pd.Series([0]), dd])
    # h = np.mgrid[0:len(tt)+1:1, 0:len(tt)+1:1]
    # distances = pd.DataFrame(h[1]-h[0])
    ones = 1+np.zeros(len(G))
    Ghor = np.outer(ones, G)
    # Thor = np.outer(ones, T)
    # Tver = np.outer(T, ones)
    Gver = np.outer(G, ones)
    Gdif = Ghor-Gver
    Gdif = np.tril(Gdif.T).T
    Gdif = pd.DataFrame(Gdif)
    F = Gdif

    F.fillna(inplace=True, method='ffill', axis=1)
    F.fillna(inplace=True, value=0)

    restime = []
    distance = []
    starttimes = []
    endtime = []

    for i in np.arange(0, len(tt)+1, 1):
        restime.append(deltat*i)
        cp = np.diag(F, i).max()
        loc = np.argmax(np.diag(F, i))
        thestarttime = tt[loc]
        starttimes.append(thestarttime)
        distance.append(cp)

    distance[0] = distance[1]

    restime = np.array(restime)
    distance = np.array(distance)
    starttimes = np.array(starttimes)

    # for i in range(len(restime)):
    #    if restime[i]<thevalue*60*1000:
    #        print(i,restime[i],distance[i],60*1000*thevalue)

    if mode == 'distance':
        duration = griddata(distance, restime, [
                            thevalue], method='linear', rescale=True)
        starttime = griddata(distance, starttimes, [
                             thevalue], method='linear', rescale=True)
        distance = griddata(distance, distance, [
                            thevalue], method='linear', rescale=True)
        endtime = starttime+duration
        # print(duration,starttime,endtime,'aa')
        return duration[0]/1000., starttime[0]/1000., endtime[0]/1000.
    else:  # pragma: no cover
        distance = griddata(restime, distance, [
                            thevalue*60*1000], method='linear', rescale=True)
        starttime = griddata(restime, starttimes, [
                             thevalue*60*1000], method='linear', rescale=True)
        duration = griddata(restime, restime, [
                            thevalue*60*1000], method='linear', rescale=True)
        endtime = starttime+duration
        return distance[0], starttime[0]/1000., endtime[0]/1000.

    return 0  # pragma: no cover