rowsandall/rowers/dataprep.py

# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import Workout, User, Rower, StrokeData
from rowingdata import rowingdata as rrdata

from rowers.tasks import handle_sendemail_unrecognized
from rowers.tasks import handle_zip_file
import pytz
from rowingdata import rower as rrower
from rowingdata import main as rmain

from rowingdata import get_file_type, get_empower_rigging

from pandas import DataFrame, Series
from pytz import timezone as tz, utc
from django.utils import timezone
from time import strftime, strptime, mktime, time, daylight
import arrow
from django.utils.timezone import get_current_timezone

thetimezone = get_current_timezone()
from rowingdata import (
    TCXParser, RowProParser, ErgDataParser,
    CoxMateParser,
    BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser,
    MysteryParser, BoatCoachOTWParser,
    painsledDesktopParser, speedcoachParser, ErgStickParser,
    SpeedCoach2Parser, FITParser, fitsummarydata,
    make_cumvalues,cumcpdata,
    summarydata, get_file_type,
)

from rowers.models import Team
from rowers.metrics import axes
from async_messages import messages as a_messages
import os
import zipfile
import pandas as pd
import numpy as np
import itertools
import math
from tasks import (
    handle_sendemail_unrecognized, handle_sendemail_breakthrough,
    handle_sendemail_hard, handle_updatecp,handle_updateergcp
)

from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys

import utils
import datautils
from utils import lbstoN
from scipy.interpolate import griddata

from timezonefinder import TimezoneFinder

import django_rq
queue = django_rq.get_queue('default')
queuelow = django_rq.get_queue('low')
queuehigh = django_rq.get_queue('default')


user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
host = settings.DATABASES['default']['HOST']
port = settings.DATABASES['default']['PORT']

database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
    user=user,
    password=password,
    database_name=database_name,
    host=host,
    port=port,
)

# Use SQLite local database when we're in debug mode
if settings.DEBUG or user == '':
    # database_url = 'sqlite:///db.sqlite3'
    database_url = 'sqlite:///' + database_name


# mapping the DB column names to the CSV file column names
columndict = {
    'time': 'TimeStamp (sec)',
    'hr': ' HRCur (bpm)',
    'pace': ' Stroke500mPace (sec/500m)',
    'spm': ' Cadence (stokes/min)',
    'power': ' Power (watts)',
    'averageforce': ' AverageDriveForce (lbs)',
    'drivelength': ' DriveLength (meters)',
    'peakforce': ' PeakDriveForce (lbs)',
    'distance': ' Horizontal (meters)',
    'catch': 'catch',
    'finish': 'finish',
    'peakforceangle': 'peakforceangle',
    'wash': 'wash',
    'slip': 'slip',
    'workoutstate': ' WorkoutState',
    'cumdist': 'cum_dist',
}

from scipy.signal import savgol_filter

import datetime


def get_latlon(id):
    try:
        w = Workout.objects.get(id=id)
    except Workout.DoesNotExist:
        return False

    rowdata = rdata(w.csvfilename)
    try:
        try:
            latitude = rowdata.df.ix[:, ' latitude']
            longitude = rowdata.df.ix[:, ' longitude']
        except KeyError:
            latitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)']
            longitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)']
        return [latitude, longitude]
    except AttributeError:
        return [pd.Series([]), pd.Series([])]

    return [pd.Series([]), pd.Series([])]


def get_workouts(ids, userid):
    goodids = []
    for id in ids:
        w = Workout.objects.get(id=id)
        if int(w.user.user.id) == int(userid):
            goodids.append(id)

    return [Workout.objects.get(id=id) for id in goodids]


def filter_df(datadf, fieldname, value, largerthan=True):

    try:
        x = datadf[fieldname]
    except KeyError:
        return datadf

    if largerthan:
        mask = datadf[fieldname] < value
    else:
        mask = datadf[fieldname] >= value

    datadf.loc[mask, fieldname] = np.nan

    return datadf


def df_resample(datadf):
    # time stamps must be in seconds
    timestamps = datadf['TimeStamp (sec)'].astype('int')
    datadf['timestamps'] = timestamps
    newdf = datadf.groupby(['timestamps']).mean()
    return newdf


def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
                   ignoreadvanced=False):
    # clean data remove zeros and negative values

    # bring metrics which have negative values to positive domain
    try:
        datadf['catch'] = -datadf['catch']
    except KeyError:
        pass

    try:
        datadf['peakforceangle'] = datadf['peakforceangle'] + 1000
    except KeyError:
        pass

    try:
        datadf['hr'] = datadf['hr'] + 10
    except KeyError:
        pass


    try:
        datadf = datadf.clip(lower=0)
    except TypeError:
        pass

    datadf.replace(to_replace=0, value=np.nan, inplace=True)


    # return from positive domain to negative
    try:
        datadf['catch'] = -datadf['catch']
    except KeyError:
        pass

    try:
        datadf['peakforceangle'] = datadf['peakforceangle'] - 1000
    except KeyError:
        pass

    try:
        datadf['hr'] = datadf['hr'] - 10
    except KeyError:
        pass

    # clean data for useful ranges per column
    if not ignorehr:
        try:
            mask = datadf['hr'] < 30
            datadf.loc[mask, 'hr'] = np.nan
        except KeyError:
            pass

    try:
        mask = datadf['spm'] < 10
        datadf.loc[mask, 'spm'] = np.nan
    except KeyError:
        pass

    try:
        mask = datadf['pace'] / 1000. > 300.
        datadf.loc[mask, 'pace'] = np.nan
    except KeyError:
        pass

    try:
        mask = datadf['efficiency'] < 0.
        datadf.loc[mask, 'efficiency'] = np.nan
    except KeyError:
        pass

    try:
        mask = datadf['pace'] / 1000. < 60.
        datadf.loc[mask, 'pace'] = np.nan
    except KeyError:
        pass

    try:
        mask = datadf['spm'] > 60
        datadf.loc[mask, 'spm'] = np.nan
    except KeyError:
        pass

    try:
        mask = datadf['wash'] < 1
        datadf.loc[mask, 'wash'] = np.nan
    except KeyError:
        pass

    if not ignoreadvanced:
        try:
            mask = datadf['rhythm'] < 5
            datadf.loc[mask, 'rhythm'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['rhythm'] > 70
            datadf.loc[mask, 'rhythm'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['power'] < 20
            datadf.loc[mask, 'power'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['drivelength'] < 0.5
            datadf.loc[mask, 'drivelength'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['forceratio'] < 0.2
            datadf.loc[mask, 'forceratio'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['forceratio'] > 1.0
            datadf.loc[mask, 'forceratio'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['drivespeed'] < 0.5
            datadf.loc[mask, 'drivespeed'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['drivespeed'] > 4
            datadf.loc[mask, 'drivespeed'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['driveenergy'] > 2000
            datadf.loc[mask, 'driveenergy'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['driveenergy'] < 100
            datadf.loc[mask, 'driveenergy'] = np.nan
        except KeyError:
            pass

        try:
            mask = datadf['catch'] > -30.
            datadf.loc[mask, 'catch'] = np.nan
        except KeyError:
            pass

    workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
    workoutstatesrest = [3]
    workoutstatetransition = [0, 2, 10, 11, 12, 13]

    if workstrokesonly == 'True' or workstrokesonly == True:
        try:
            datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)]
        except:
            pass

    return datadf


def getstatsfields():
    # Get field names and remove those that are not useful in stats
    fields = StrokeData._meta.get_fields()

    fielddict = {field.name: field.verbose_name for field in fields}

    # fielddict.pop('workoutid')
    fielddict.pop('ergpace')
    fielddict.pop('hr_an')
    fielddict.pop('hr_tr')
    fielddict.pop('hr_at')
    fielddict.pop('hr_ut2')
    fielddict.pop('hr_ut1')
    fielddict.pop('time')
    fielddict.pop('distance')
    fielddict.pop('nowindpace')
    fielddict.pop('fnowindpace')
    fielddict.pop('fergpace')
    fielddict.pop('equivergpower')
#    fielddict.pop('workoutstate')
    fielddict.pop('fpace')
    fielddict.pop('pace')
    fielddict.pop('id')
    fielddict.pop('ftime')
    fielddict.pop('x_right')
    fielddict.pop('hr_max')
    fielddict.pop('hr_bottom')
    fielddict.pop('cumdist')

    fieldlist = [field for field, value in fielddict.iteritems()]

    return fieldlist, fielddict


# A string representation for time deltas
def niceformat(values):
    out = []
    for v in values:
        formattedv = strfdelta(v)
        out.append(formattedv)

    return out

# A nice printable format for time delta values


def strfdelta(tdelta):
    try:
        minutes, seconds = divmod(tdelta.seconds, 60)
        tenths = int(tdelta.microseconds / 1e5)
    except AttributeError:
        minutes, seconds = divmod(tdelta.view(np.int64), 60e9)
        seconds, rest = divmod(seconds, 1e9)
        tenths = int(rest / 1e8)
    res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
        minutes=minutes,
        seconds=seconds,
        tenths=tenths,
    )

    return res

# A nice printable format for pace values


def nicepaceformat(values):
    out = []
    for v in values:
        formattedv = strfdelta(v)
        out.append(formattedv)

    return out

# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace


def timedeltaconv(x):
    if np.isfinite(x) and x != 0 and x > 0 and x < 175000:
        dt = datetime.timedelta(seconds=x)
    else:
        dt = datetime.timedelta(seconds=350.)

    return dt


def paceformatsecs(values):
    out = []
    for v in values:
        td = timedeltaconv(v)
        formattedv = strfdelta(td)
        out.append(formattedv)

    return out

def getcpdata_sql(rower_id,table='cpdata'):
    engine = create_engine(database_url, echo=False)
    query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
        rower_id=rower_id,
        table=table,
        ))
    connection = engine.raw_connection()
    df = pd.read_sql_query(query, engine)

    return df

def deletecpdata_sql(rower_id,table='cpdata'):
    engine = create_engine(database_url, echo=False)
    query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
        rower_id=rower_id,
        table=table,
        ))
    with engine.connect() as conn, conn.begin():
        try:
            result = conn.execute(query)
        except:
            print "Database locked"
    conn.close()
    engine.dispose()


def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]):
    deletecpdata_sql(rower_id)
    df = pd.DataFrame(
        {
            'delta':delta,
            'cp':cp,
            'user':rower_id
        }
    )

    if not distance.empty:
        df['distance'] = distance

    engine = create_engine(database_url, echo=False)
    with engine.connect() as conn, conn.begin():
        df.to_sql(table, engine, if_exists='append', index=False)
    conn.close()
    engine.dispose()


def runcpupdate(rower):
    startdate = timezone.now()-datetime.timedelta(days=365)
    enddate = timezone.now()+datetime.timedelta(days=5)
    theworkouts = Workout.objects.filter(user=rower,rankingpiece=True,
				          workouttype='water',
				          startdatetime__gte=startdate,
					  startdatetime__lte=enddate)

    theids = [w.id for w in theworkouts]


    if settings.DEBUG:
        res = handle_updatecp.delay(rower.id,theids,debug=True)
    else:
        res = queue.enqueue(handle_updatecp,rower.id,theids)

def fetchcperg(rower,theworkouts):
    theids = [int(w.id) for w in theworkouts]
    thefilenames = [w.csvfilename for w in theworkouts]
    cpdf = getcpdata_sql(rower.id,table='ergcpdata')

    if settings.DEBUG:
        res = handle_updateergcp.delay(rower.id,thefilenames,debug=True)
    else:
        res = queue.enqueue(handle_updateergcp,rower.id,thefilenames)

    return cpdf


def fetchcp(rower,theworkouts):
    # get all power data from database (plus workoutid)
    theids = [int(w.id) for w in theworkouts]
    columns = ['power','workoutid','time']
    df = getsmallrowdata_db(columns,ids=theids)

    dfgrouped = df.groupby(['workoutid'])
    avgpower2 = dict(dfgrouped.mean()['power'].astype(int))

    cpdf = getcpdata_sql(rower.id)

    if not cpdf.empty:
        return cpdf['delta'],cpdf['cp'],avgpower2
    else:
        if settings.DEBUG:
            res = handle_updatecp.delay(rower.id,theids,debug=True)
        else:
            res = queue.enqueue(handle_updatecp,rower.id,theids)
        return [],[],avgpower2


    return [],[],avgpower2


# Processes painsled CSV file to database


def save_workout_database(f2, r, dosmooth=True, workouttype='rower',
                          dosummary=True, title='Workout',
                          workoutsource='unknown',
                          notes='', totaldist=0, totaltime=0,
                          summary='',
                          makeprivate=False,
                          oarlength=2.89, inboard=0.88,
                          forceunit='lbs',
                          consistencychecks=False):
    message = None
    powerperc = 100 * np.array([r.pw_ut2,
                                r.pw_ut1,
                                r.pw_at,
                                r.pw_tr, r.pw_an]) / r.ftp

    # make workout and put in database
    rr = rrower(hrmax=r.max, hrut2=r.ut2,
                hrut1=r.ut1, hrat=r.at,
                hrtr=r.tr, hran=r.an, ftp=r.ftp,
                powerperc=powerperc, powerzones=r.powerzones)
    row = rdata(f2, rower=rr)

    dtavg = row.df['TimeStamp (sec)'].diff().mean()

    if dtavg < 1:
        newdf = df_resample(row.df)
        try:
            os.remove(f2)
        except:
            pass
        return new_workout_from_df(r, newdf,
                                   title=title)
    try:
        checks = row.check_consistency()
        allchecks = 1
        for key, value in checks.iteritems():
            if not value:
                allchecks = 0
                if consistencychecks:
                    a_messages.error(
                        r.user, 'Failed consistency check: ' + key + ', autocorrected')
                else:
                    pass
                    # a_messages.error(r.user,'Failed consistency check: '+key+', not corrected')
    except ZeroDivisionError:
        pass

    if not allchecks and consistencychecks:
        # row.repair()
        pass

    if row == 0:
        return (0, 'Error: CSV data file not found')

    if dosmooth:
        # auto smoothing
        pace = row.df[' Stroke500mPace (sec/500m)'].values
        velo = 500. / pace

        f = row.df['TimeStamp (sec)'].diff().mean()
        if f != 0 and not np.isnan(f):
            windowsize = 2 * (int(10. / (f))) + 1
        else:
            windowsize = 1
        if not 'originalvelo' in row.df:
            row.df['originalvelo'] = velo

        if windowsize > 3 and windowsize < len(velo):
            velo2 = savgol_filter(velo, windowsize, 3)
        else:
            velo2 = velo

        velo3 = pd.Series(velo2)
        velo3 = velo3.replace([-np.inf, np.inf], np.nan)
        velo3 = velo3.fillna(method='ffill')

        pace2 = 500. / abs(velo3)

        row.df[' Stroke500mPace (sec/500m)'] = pace2

        row.df = row.df.fillna(0)

        row.write_csv(f2, gzip=True)
        try:
            os.remove(f2)
        except:
            pass

    # recalculate power data
    if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
        try:
            row.erg_recalculatepower()
            row.write_csv(f2, gzip=True)
        except:
            pass

    averagehr = row.df[' HRCur (bpm)'].mean()
    maxhr = row.df[' HRCur (bpm)'].max()

    if totaldist == 0:
        totaldist = row.df['cum_dist'].max()
    if totaltime == 0:
        totaltime = row.df['TimeStamp (sec)'].max(
        ) - row.df['TimeStamp (sec)'].min()
        try:
            totaltime = totaltime + row.df.ix[0, ' ElapsedTime (sec)']
        except KeyError:
            pass

    if np.isnan(totaltime):
        totaltime = 0

    hours = int(totaltime / 3600.)
    if hours > 23:
        message = 'Warning: The workout duration was longer than 23 hours. '
        hours = 23

    minutes = int((totaltime - 3600. * hours) / 60.)
    if minutes > 59:
        minutes = 59
        if not message:
            message = 'Warning: there is something wrong with the workout duration'

    seconds = int(totaltime - 3600. * hours - 60. * minutes)
    if seconds > 59:
        seconds = 59
        if not message:
            message = 'Warning: there is something wrong with the workout duration'

    tenths = int(10 * (totaltime - 3600. * hours - 60. * minutes - seconds))
    if tenths > 9:
        tenths = 9
        if not message:
            message = 'Warning: there is something wrong with the workout duration'

    duration = "%s:%s:%s.%s" % (hours, minutes, seconds, tenths)

    if dosummary:
        summary = row.allstats()

    timezone_str = 'UTC'
    try:
        workoutstartdatetime = timezone.make_aware(row.rowdatetime)
    except ValueError:
        workoutstartdatetime = row.rowdatetime

    try:
        latavg = row.df[' latitude'].mean()
        lonavg = row.df[' longitude'].mean()

        tf = TimezoneFinder()
        try:
            timezone_str = tf.timezone_at(lng=lonavg, lat=latavg)
        except ValueError:
            timezone_str = 'UTC'
        if timezone_str == None:
            timezone_str = tf.closest_timezone_at(lng=lonavg,
                                                  lat=latavg)
            if timezone_str == None:
                timezone_str = r.defaulttimezone
        try:
            workoutstartdatetime = pytz.timezone(timezone_str).localize(
                row.rowdatetime
            )
        except ValueError:
            workoutstartdatetime = workoutstartdatetime.astimezone(
                pytz.timezone(timezone_str)
            )
    except KeyError:
        timezone_str = r.defaulttimezone

    workoutdate = workoutstartdatetime.astimezone(
        pytz.timezone(timezone_str)
    ).strftime('%Y-%m-%d')
    workoutstarttime = workoutstartdatetime.astimezone(
        pytz.timezone(timezone_str)
    ).strftime('%H:%M:%S')

    if makeprivate:
        privacy = 'hidden'
    else:
        privacy = 'visible'

    # checking for inf values

    totaldist = np.nan_to_num(totaldist)
    maxhr = np.nan_to_num(maxhr)
    averagehr = np.nan_to_num(averagehr)

    # check for duplicate start times and duration
    ws = Workout.objects.filter(startdatetime=workoutstartdatetime,
                                distance=totaldist,
                                user=r)
    if (len(ws) != 0):
        message = "Warning: This workout probably already exists in the database"
        privacy = 'hidden'

    w = Workout(user=r, name=title, date=workoutdate,
                workouttype=workouttype,
                duration=duration, distance=totaldist,
                weightcategory=r.weightcategory,
                starttime=workoutstarttime,
                workoutsource=workoutsource,
                forceunit=forceunit,
                csvfilename=f2, notes=notes, summary=summary,
                maxhr=maxhr, averagehr=averagehr,
                startdatetime=workoutstartdatetime,
                inboard=inboard, oarlength=oarlength,
                timezone=timezone_str,
                privacy=privacy)

    w.save()

    isbreakthrough = False
    ishard = False
    if workouttype == 'water':
        df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id])
        if df['power'].mean():
            thesecs = totaltime
            maxt = 1.05 * thesecs
            if maxt > 0:
                logarr = datautils.getlogarr(maxt)
                dfgrouped = df.groupby(['workoutid'])
                delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)

                res, btvalues, res2 = utils.isbreakthrough(
                    delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio)
            else:
                res = 0
                res2 = 0
            if res:
                isbreakthrough = True
                res = datautils.updatecp(delta, cpvalues, r)
            if res2 and not isbreakthrough:
                ishard = True

    # submit email task to send email about breakthrough workout
    if isbreakthrough:
        a_messages.info(
            r.user, 'It looks like you have a new breakthrough workout')
        if settings.DEBUG and r.getemailnotifications:
            res = handle_sendemail_breakthrough.delay(w.id, r.user.email,
                                                      r.user.first_name,
                                                      r.user.last_name,
                                                      btvalues=btvalues.to_json())
        elif r.getemailnotifications:
            try:
                res = queuehigh.enqueue(
                    handle_sendemail_breakthrough(w.id,
                                                  r.user.email,
                                                  r.user.first_name,
                                                  r.user.last_name,
                                                  btvalues=btvalues.to_json()))
            except AttributeError:
                pass
        else:
            pass
    # submit email task to send email about breakthrough workout
    if ishard:
        a_messages.info(r.user, 'That was a pretty hard workout')
        if settings.DEBUG and r.getemailnotifications:
            res = handle_sendemail_hard.delay(w.id, r.user.email,
                                              r.user.first_name,
                                              r.user.last_name,
                                              btvalues=btvalues.to_json())
        elif r.getemailnotifications:
            try:
                res = queuehigh.enqueue(
                    handle_sendemail_hard(w.id,
                                          r.user.email,
                                          r.user.first_name,
                                          r.user.last_name,
                                          btvalues=btvalues.to_json()))
            except AttributeError:
                pass
        else:
            pass

    if privacy == 'visible':
        ts = Team.objects.filter(rower=r)
        for t in ts:
            w.team.add(t)

    # put stroke data in database
    res = dataprep(row.df, id=w.id, bands=True,
                   barchart=True, otwpower=True, empower=True, inboard=inboard)

    return (w.id, message)


def handle_nonpainsled(f2, fileformat, summary=''):
    oarlength = 2.89
    inboard = 0.88
    # handle RowPro:
    if (fileformat == 'rp'):
        row = RowProParser(f2)
        # handle TCX
    if (fileformat == 'tcx'):
        row = TCXParser(f2)

    # handle Mystery
    if (fileformat == 'mystery'):
        row = MysteryParser(f2)

    # handle RowPerfect
    if (fileformat == 'rowperfect3'):
        row = RowPerfectParser(f2)

    # handle ErgData
    if (fileformat == 'ergdata'):
        row = ErgDataParser(f2)

    # handle CoxMate
    if (fileformat == 'coxmate'):
        row = CoxMateParser(f2)

    # handle Mike
    if (fileformat == 'bcmike'):
        row = BoatCoachAdvancedParser(f2)

    # handle BoatCoach
    if (fileformat == 'boatcoach'):
        row = BoatCoachParser(f2)

    # handle BoatCoach OTW
    if (fileformat == 'boatcoachotw'):
        row = BoatCoachOTWParser(f2)

    # handle painsled desktop
    if (fileformat == 'painsleddesktop'):
        row = painsledDesktopParser(f2)

    # handle speed coach GPS
    if (fileformat == 'speedcoach'):
        row = speedcoachParser(f2)

    # handle speed coach GPS 2
    if (fileformat == 'speedcoach2'):
        row = SpeedCoach2Parser(f2)
        try:
            oarlength, inboard = get_empower_rigging(f2)
            summary = row.allstats()
        except:
            pass

    # handle ErgStick
    if (fileformat == 'ergstick'):
        row = ErgStickParser(f2)

    # handle FIT
    if (fileformat == 'fit'):
        row = FITParser(f2)
        try:
            s = fitsummarydata(f2)
            s.setsummary()
            summary = s.summarytext
        except:
            pass

    f_to_be_deleted = f2
    # should delete file
    f2 = f2[:-4] + 'o.csv'
    row.write_csv(f2, gzip=True)

    # os.remove(f2)
    try:
        os.remove(f_to_be_deleted)
    except:
        os.remove(f_to_be_deleted + '.gz')

    return (f2, summary, oarlength, inboard)

# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication


def new_workout_from_file(r, f2,
                          workouttype='rower',
                          title='Workout',
                          makeprivate=False,
                          notes=''):
    message = None
    try:
        fileformat = get_file_type(f2)
    except IOError:
        os.remove(f2)
        message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
        return (0, message, f2)

    summary = ''
    oarlength = 2.89
    inboard = 0.88
    if len(fileformat) == 3 and fileformat[0] == 'zip':
        f_to_be_deleted = f2
        title = os.path.basename(f2)
        if settings.DEBUG:
            res = handle_zip_file.delay(
                r.user.email, title, f2
            )

        else:
            res = queuelow.enqueue(
                handle_zip_file,
                r.user.email,
                title,
                f2
            )

        return -1, message, f2

    # Some people try to upload Concept2 logbook summaries
    if fileformat == 'c2log':
        os.remove(f2)
        message = "This C2 logbook summary does not contain stroke data. Please download the Export Stroke Data file from the workout details on the C2 logbook."
        return (0, message, f2)

    if fileformat == 'nostrokes':
        os.remove(f2)
        message = "It looks like this file doesn't contain stroke data."
        return (0, message, f2)

    # Some people upload corrupted zip files
    if fileformat == 'notgzip':
        os.remove(f2)
        message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect."
        return (0, message, f2)

    # Some people try to upload RowPro summary logs
    if fileformat == 'rowprolog':
        os.remove(f2)
        message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
        return (0, message, f2)

    # Sometimes people try an unsupported file type.
    # Send an email to info@rowsandall.com with the file attached
    # for me to check if it is a bug, or a new file type
    # worth supporting
    if fileformat == 'unknown':
        message = "We couldn't recognize the file type"
        if settings.DEBUG:
            res = handle_sendemail_unrecognized.delay(f2,
                                                      r.user.email)

        else:
            res = queuehigh.enqueue(handle_sendemail_unrecognized,
                                    f2, r.user.email)
        return (0, message, f2)

    # handle non-Painsled by converting it to painsled compatible CSV
    if (fileformat != 'csv'):
        try:
            f2, summary, oarlength, inboard = handle_nonpainsled(f2,
                                                                 fileformat,
                                                                 summary=summary)
        except:
            errorstring = str(sys.exc_info()[0])
            message = 'Something went wrong: ' + errorstring
            return (0, message, '')

    dosummary = (fileformat != 'fit')
    id, message = save_workout_database(f2, r,
                                        workouttype=workouttype,
                                        makeprivate=makeprivate,
                                        dosummary=dosummary,
                                        workoutsource=fileformat,
                                        summary=summary,
                                        inboard=inboard, oarlength=oarlength,
                                        title=title)

    return (id, message, f2)


def split_workout(r, parent, splitsecond, splitmode):
    data, row = getrowdata_db(id=parent.id)
    latitude, longitude = get_latlon(parent.id)
    if not latitude.empty and not longitude.empty:
        data[' latitude'] = latitude
        data[' longitude'] = longitude

    data['time'] = data['time'] / 1000.

    data1 = data[data['time'] <= splitsecond].copy()
    data2 = data[data['time'] > splitsecond].copy()

    data1 = data1.sort_values(['time'])
    data1 = data1.interpolate(method='linear', axis=0, limit_direction='both',
                              limit=10)
    data1.fillna(method='bfill', inplace=True)

    # Some new stuff to try out
    data1 = data1.groupby('time', axis=0).mean()
    data1['time'] = data1.index
    data1.reset_index(drop=True, inplace=True)

    data2 = data2.sort_values(['time'])
    data2 = data2.interpolate(method='linear', axis=0, limit_direction='both',
                              limit=10)
    data2.fillna(method='bfill', inplace=True)

    # Some new stuff to try out
    data2 = data2.groupby('time', axis=0).mean()
    data2['time'] = data2.index
    data2.reset_index(drop=True, inplace=True)

    data1['pace'] = data1['pace'] / 1000.
    data2['pace'] = data2['pace'] / 1000.

    data1.drop_duplicates(subset='time', inplace=True)
    data2.drop_duplicates(subset='time', inplace=True)

    messages = []
    ids = []

    if 'keep first' in splitmode:
        if 'firstprivate' in splitmode:
            setprivate = True
        else:
            setprivate = False

        id, message = new_workout_from_df(r, data1,
                                          title=parent.name + ' (1)',
                                          parent=parent,
                                          setprivate=setprivate,
                                          forceunit='N')
        messages.append(message)
        ids.append(id)
    if 'keep second' in splitmode:
        data2['cumdist'] = data2['cumdist'] - data2.ix[0, 'cumdist']
        data2['distance'] = data2['distance'] - data2.ix[0, 'distance']
        data2['time'] = data2['time'] - data2.ix[0, 'time']
        if 'secondprivate' in splitmode:
            setprivate = True
        else:
            setprivate = False

        dt = datetime.timedelta(seconds=splitsecond)

        id, message = new_workout_from_df(r, data2,
                                          title=parent.name + ' (2)',
                                          parent=parent,
                                          setprivate=setprivate,
                                          dt=dt, forceunit='N')
        messages.append(message)
        ids.append(id)

    if not 'keep original' in splitmode:
        if 'keep second' in splitmode or 'keep first' in splitmode:
            parent.delete()
            messages.append('Deleted Workout: ' + parent.name)
        else:
            messages.append('That would delete your workout')
            ids.append(parent.id)
    elif 'originalprivate' in splitmode:
        parent.privacy = 'hidden'
        parent.save()

    return ids, messages

# Create new workout from data frame and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication


def new_workout_from_df(r, df,
                        title='New Workout',
                        parent=None,
                        setprivate=False,
                        forceunit='lbs',
                        dt=datetime.timedelta()):

    message = None

    summary = ''
    if parent:
        oarlength = parent.oarlength
        inboard = parent.inboard
        workouttype = parent.workouttype
        notes = parent.notes
        summary = parent.summary
        if parent.privacy == 'hidden':
            makeprivate = True
        else:
            makeprivate = False

        startdatetime = parent.startdatetime + dt
    else:
        oarlength = 2.89
        inboard = 0.88
        workouttype = 'rower'
        notes = ''
        summary = ''
        makeprivate = False
        startdatetime = timezone.now()

    if setprivate:
        makeprivate = True

    timestr = strftime("%Y%m%d-%H%M%S")

    csvfilename = 'media/df_' + timestr + '.csv'
    if forceunit == 'N':
        # change to lbs for now
        df['peakforce'] /= lbstoN
        df['averageforce'] /= lbstoN

    df.rename(columns=columndict, inplace=True)

    #starttimeunix = mktime(startdatetime.utctimetuple())
    starttimeunix = arrow.get(startdatetime).timestamp
    df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']

    df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix

    row = rrdata(df=df)
    row.write_csv(csvfilename, gzip=True)

    # res =  df.to_csv(csvfilename+'.gz',index_label='index',
    #                 compression='gzip')

    id, message = save_workout_database(csvfilename, r,
                                        workouttype=workouttype,
                                        title=title,
                                        notes=notes,
                                        oarlength=oarlength,
                                        inboard=inboard,
                                        makeprivate=makeprivate,
                                        dosmooth=False,
                                        consistencychecks=False)

    return (id, message)


# Compare the data from the CSV file and the database
# Currently only calculates number of strokes. To be expanded with
# more elaborate testing if needed
def compare_data(id):
    row = Workout.objects.get(id=id)
    f1 = row.csvfilename
    try:
        rowdata = rdata(f1)
        l1 = len(rowdata.df)
    except AttributeError:
        rowdata = 0
        l1 = 0

    engine = create_engine(database_url, echo=False)
    query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format(
        id=id,
    ))
    with engine.connect() as conn, conn.begin():
        try:
            res = conn.execute(query)
            l2 = res.fetchall()[0][0]
        except:
            print "Database Locked"
    conn.close()
    engine.dispose()
    lfile = l1
    ldb = l2
    return l1 == l2 and l1 != 0, ldb, lfile

# Repair data for workouts where the CSV file is lost (or the DB entries
# don't exist)


def repair_data(verbose=False):
    ws = Workout.objects.all()
    for w in ws:
        if verbose:
            sys.stdout.write(".")
        test, ldb, lfile = compare_data(w.id)
        if not test:
            if verbose:
                print w.id, lfile, ldb
            try:
                rowdata = rdata(w.csvfilename)
                if rowdata and len(rowdata.df):
                    update_strokedata(w.id, rowdata.df)

            except IOError, AttributeError:
                pass

            if lfile == 0:
                # if not ldb - delete workout

                try:
                    data = read_df_sql(w.id)
                    try:
                        datalength = len(data)
                    except AttributeError:
                        datalength = 0

                    if datalength != 0:
                        data.rename(columns=columndict, inplace=True)
                        res = data.to_csv(w.csvfilename + '.gz',
                                          index_label='index',
                                          compression='gzip')
                        print 'adding csv file'
                    else:
                        print w.id, ' No stroke records anywhere'
                        w.delete()
                except:
                    print 'failed'
                    print str(sys.exc_info()[0])
                    pass

# A wrapper around the rowingdata class, with some error catching


def rdata(file, rower=rrower()):
    try:
        res = rrdata(csvfile=file, rower=rower)
    except IOError, IndexError:
        try:
            res = rrdata(csvfile=file + '.gz', rower=rower)
        except IOError, IndexError:
            res = 0
        except:
            res = 0

    return res

# Remove all stroke data for workout ID from database


def delete_strokedata(id):
    engine = create_engine(database_url, echo=False)
    query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
        id=id,
    ))
    with engine.connect() as conn, conn.begin():
        try:
            result = conn.execute(query)
        except:
            print "Database Locked"
    conn.close()
    engine.dispose()

# Replace stroke data in DB with data from CSV file


def update_strokedata(id, df):
    delete_strokedata(id)
    rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True)

# Test that all data are of a numerical time


def testdata(time, distance, pace, spm):
    t1 = np.issubdtype(time, np.number)
    t2 = np.issubdtype(distance, np.number)
    t3 = np.issubdtype(pace, np.number)
    t4 = np.issubdtype(spm, np.number)

    return t1 and t2 and t3 and t4

# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)


def getrowdata_db(id=0, doclean=False, convertnewtons=True):
    data = read_df_sql(id)
    data['x_right'] = data['x_right'] / 1.0e6

    if data.empty:
        rowdata, row = getrowdata(id=id)
        if rowdata:
            data = dataprep(rowdata.df, id=id, bands=True,
                            barchart=True, otwpower=True)
        else:
            data = pd.DataFrame()  # returning empty dataframe
    else:
        row = Workout.objects.get(id=id)

    if data['efficiency'].mean() == 0 and data['power'].mean() != 0:
        data = add_efficiency(id=id)

    if doclean:
        data = clean_df_stats(data, ignorehr=True)

    return data, row

# Fetch a subset of the data from the DB


def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True):
    prepmultipledata(ids)
    data = read_cols_df_sql(ids, columns)

    # convert newtons

    if doclean:
        data = clean_df_stats(data, ignorehr=True,
                              workstrokesonly=workstrokesonly)

    return data

# Fetch both the workout and the workout stroke data (from CSV file)


def getrowdata(id=0):

    # check if valid ID exists (workout exists)
    row = Workout.objects.get(id=id)

    f1 = row.csvfilename

    # get user

    r = row.user
    u = r.user

    rr = rrower(hrmax=r.max, hrut2=r.ut2,
                hrut1=r.ut1, hrat=r.at,
                hrtr=r.tr, hran=r.an, ftp=r.ftp)

    rowdata = rdata(f1, rower=rr)

    return rowdata, row

# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data


def prepmultipledata(ids, verbose=False):
    query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
    engine = create_engine(database_url, echo=False)

    with engine.connect() as conn, conn.begin():
        res = conn.execute(query)
        res = list(itertools.chain.from_iterable(res.fetchall()))
    conn.close()
    engine.dispose()

    try:
        ids2 = [int(id) for id in ids]
    except ValueError:
        ids2 = ids

    res = list(set(ids2) - set(res))
    for id in res:
        rowdata, row = getrowdata(id=id)
        if verbose:
            print id
        if rowdata and len(rowdata.df):
            data = dataprep(rowdata.df, id=id, bands=True,
                            barchart=True, otwpower=True)
    return res

# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe


def read_cols_df_sql(ids, columns, convertnewtons=True):
    # drop columns that are not in offical list
    #    axx = [ax[0] for ax in axes]
    axx = [f.name for f in StrokeData._meta.get_fields()]

    for c in columns:
        if not c in axx:
            columns.remove(c)

    columns = list(columns) + ['distance', 'spm', 'workoutid']
    columns = [x for x in columns if x != 'None']
    columns = list(set(columns))
    cls = ''
    engine = create_engine(database_url, echo=False)

    for column in columns:
        cls += column + ', '
    cls = cls[:-2]
    if len(ids) == 0:
        query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
            columns=cls,
        ))
    elif len(ids) == 1:
        query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format(
            id=ids[0],
            columns=cls,
        ))
    else:
        query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format(
            columns=cls,
            ids=tuple(ids),
        ))

    connection = engine.raw_connection()
    df = pd.read_sql_query(query, engine)

    df = df.fillna(value=0)

    if 'peakforce' in columns:
        funits = ((w.id, w.forceunit)
                  for w in Workout.objects.filter(id__in=ids))
        for id, u in funits:
            if u == 'lbs':
                mask = df['workoutid'] == id
                df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
    if 'averageforce' in columns:
        funits = ((w.id, w.forceunit)
                  for w in Workout.objects.filter(id__in=ids))
        for id, u in funits:
            if u == 'lbs':
                mask = df['workoutid'] == id
                df.loc[mask, 'averageforce'] = df.loc[mask,
                                                      'averageforce'] * lbstoN

    engine.dispose()
    return df

# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe


def read_df_sql(id):
    engine = create_engine(database_url, echo=False)

    df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format(
        id=id)), engine)

    engine.dispose()
    df = df.fillna(value=0)

    funit = Workout.objects.get(id=id).forceunit

    if funit == 'lbs':
        try:
            df['peakforce'] = df['peakforce'] * lbstoN
        except KeyError:
            pass

        try:
            df['averageforce'] = df['averageforce'] * lbstoN
        except KeyError:
            pass

    return df

# Get the necessary data from the strokedata table in the DB.
# For the flex plot


def smalldataprep(therows, xparam, yparam1, yparam2):
    df = pd.DataFrame()
    if yparam2 == 'None':
        yparam2 = 'power'
    df[xparam] = []
    df[yparam1] = []
    df[yparam2] = []
    df['distance'] = []
    df['spm'] = []
    for workout in therows:
        f1 = workout.csvfilename

        try:
            rowdata = dataprep(rrdata(f1).df)

            rowdata = pd.DataFrame({xparam: rowdata[xparam],
                                    yparam1: rowdata[yparam1],
                                    yparam2: rowdata[yparam2],
                                    'distance': rowdata['distance'],
                                    'spm': rowdata['spm'],
                                    }
                                   )
            if workout.forceunit == 'lbs':
                try:
                    rowdata['peakforce'] *= lbstoN
                except KeyError:
                    pass

                try:
                    rowdata['averageforce'] *= lbstoN
                except KeyError:
                    pass

            df = pd.concat([df, rowdata], ignore_index=True)
        except IOError:
            try:
                rowdata = dataprep(rrdata(f1 + '.gz').df)
                rowdata = pd.DataFrame({xparam: rowdata[xparam],
                                        yparam1: rowdata[yparam1],
                                        yparam2: rowdata[yparam2],
                                        'distance': rowdata['distance'],
                                        'spm': rowdata['spm'],
                                        }
                                       )
                if workout.forceunit == 'lbs':
                    try:
                        rowdata['peakforce'] *= lbstoN
                    except KeyError:
                        pass

                    try:
                        rowdata['averageforce'] *= lbstoN
                    except KeyError:
                        pass
                df = pd.concat([df, rowdata], ignore_index=True)
            except IOError:
                pass

    return df

# data fusion


def datafusion(id1, id2, columns, offset):
    workout1 = Workout.objects.get(id=id1)
    workout2 = Workout.objects.get(id=id2)

    df1, w1 = getrowdata_db(id=id1)
    df1 = df1.drop([  # 'cumdist',
        'hr_ut2',
        'hr_ut1',
        'hr_at',
        'hr_tr',
        'hr_an',
        'hr_max',
        'ftime',
        'fpace',
        'workoutid',
        'id'],
        1, errors='ignore')

    # Add coordinates to DataFrame
    latitude, longitude = get_latlon(id1)

    df1[' latitude'] = latitude
    df1[' longitude'] = longitude

    df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False)

    forceunit = 'N'

    offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000.
    offsetmillisecs += offset.days * (3600 * 24 * 1000)
    df2['time'] = df2['time'] + offsetmillisecs

    keep1 = {c: c for c in set(df1.columns)}

    for c in columns:
        keep1.pop(c)

    for c in df1.columns:
        if not c in keep1:
            df1 = df1.drop(c, 1, errors='ignore')

    df = pd.concat([df1, df2], ignore_index=True)
    df = df.sort_values(['time'])
    df = df.interpolate(method='linear', axis=0, limit_direction='both',
                        limit=10)
    df.fillna(method='bfill', inplace=True)

    # Some new stuff to try out
    df = df.groupby('time', axis=0).mean()
    df['time'] = df.index
    df.reset_index(drop=True, inplace=True)

    df['time'] = df['time'] / 1000.
    df['pace'] = df['pace'] / 1000.
    df['cum_dist'] = df['cumdist']

    return df, forceunit


def fix_newtons(id=0, limit=3000):
    # rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False)
    rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False)
    try:
        #avgforce = rowdata['averageforce']
        peakforce = rowdata['peakforce']
        if peakforce.mean() > limit:
            w = Workout.objects.get(id=id)
            print "fixing ", id
            rowdata = rdata(w.csvfilename)
            if rowdata and len(rowdata.df):
                update_strokedata(w.id, rowdata.df)
    except KeyError:
        pass


def add_efficiency(id=0):
    rowdata, row = getrowdata_db(id=id, doclean=False, convertnewtons=False)
    power = rowdata['power']
    pace = rowdata['pace'] / 1.0e3
    velo = 500. / pace
    ergpw = 2.8 * velo**3
    efficiency = 100. * ergpw / power

    efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
    efficiency.fillna(method='ffill')
    rowdata['efficiency'] = efficiency
    delete_strokedata(id)
    if id != 0:
        rowdata['workoutid'] = id
        engine = create_engine(database_url, echo=False)
        with engine.connect() as conn, conn.begin():
            rowdata.to_sql('strokedata', engine,
                           if_exists='append', index=False)
        conn.close()
        engine.dispose()
    return rowdata

# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input


def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
             empower=True, inboard=0.88, forceunit='lbs'):
    if rowdatadf.empty:
        return 0

    rowdatadf.set_index([range(len(rowdatadf))], inplace=True)
    t = rowdatadf.ix[:, 'TimeStamp (sec)']
    t = pd.Series(t - rowdatadf.ix[0, 'TimeStamp (sec)'])

    row_index = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)'] > 3000
    rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000.

    p = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)']
    hr = rowdatadf.ix[:, ' HRCur (bpm)']
    spm = rowdatadf.ix[:, ' Cadence (stokes/min)']
    cumdist = rowdatadf.ix[:, 'cum_dist']
    power = rowdatadf.ix[:, ' Power (watts)']
    averageforce = rowdatadf.ix[:, ' AverageDriveForce (lbs)']
    drivelength = rowdatadf.ix[:, ' DriveLength (meters)']
    try:
        workoutstate = rowdatadf.ix[:, ' WorkoutState']
    except KeyError:
        workoutstate = 0 * hr

    peakforce = rowdatadf.ix[:, ' PeakDriveForce (lbs)']

    forceratio = averageforce / peakforce
    forceratio = forceratio.fillna(value=0)

    try:
        drivetime = rowdatadf.ix[:, ' DriveTime (ms)']
        recoverytime = rowdatadf.ix[:, ' StrokeRecoveryTime (ms)']
        rhythm = 100. * drivetime / (recoverytime + drivetime)
        rhythm = rhythm.fillna(value=0)
    except:
        rhythm = 0.0 * forceratio

    f = rowdatadf['TimeStamp (sec)'].diff().mean()
    if f != 0:
        windowsize = 2 * (int(10. / (f))) + 1
    else:
        windowsize = 1
    if windowsize <= 3:
        windowsize = 5

    if windowsize > 3 and windowsize < len(hr):
        spm = savgol_filter(spm, windowsize, 3)
        hr = savgol_filter(hr, windowsize, 3)
        drivelength = savgol_filter(drivelength, windowsize, 3)
        forceratio = savgol_filter(forceratio, windowsize, 3)

    try:
        t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
    except TypeError:
        t2 = 0 * t

    p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))

    try:
        drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3
    except TypeError:
        drivespeed = 0.0 * rowdatadf['TimeStamp (sec)']

    drivespeed = drivespeed.fillna(value=0)

    try:
        driveenergy = rowdatadf['driveenergy']
    except KeyError:
        if forceunit == 'lbs':
            driveenergy = drivelength * averageforce * lbstoN
        else:
            drivenergy = drivelength * averageforce

    distance = rowdatadf.ix[:, 'cum_dist']
    velo = 500. / p

    distanceperstroke = 60. * velo / spm

    data = DataFrame(
        dict(
            time=t * 1e3,
            hr=hr,
            pace=p * 1e3,
            spm=spm,
            cumdist=cumdist,
            ftime=niceformat(t2),
            fpace=nicepaceformat(p2),
            driveenergy=driveenergy,
            power=power,
            workoutstate=workoutstate,
            averageforce=averageforce,
            drivelength=drivelength,
            peakforce=peakforce,
            forceratio=forceratio,
            distance=distance,
            drivespeed=drivespeed,
            rhythm=rhythm,
            distanceperstroke=distanceperstroke,
        )
    )

    if bands:
        # HR bands
        data['hr_ut2'] = rowdatadf.ix[:, 'hr_ut2']
        data['hr_ut1'] = rowdatadf.ix[:, 'hr_ut1']
        data['hr_at'] = rowdatadf.ix[:, 'hr_at']
        data['hr_tr'] = rowdatadf.ix[:, 'hr_tr']
        data['hr_an'] = rowdatadf.ix[:, 'hr_an']
        data['hr_max'] = rowdatadf.ix[:, 'hr_max']
        data['hr_bottom'] = 0.0 * data['hr']

    try:
        tel = rowdatadf.ix[:, ' ElapsedTime (sec)']
    except KeyError:
        rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']

    if barchart:
        # time increments for bar chart
        time_increments = rowdatadf.ix[:, ' ElapsedTime (sec)'].diff()
        time_increments[0] = time_increments[1]
        time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments)
        x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x)))

        data['x_right'] = x_right

    if empower:
        try:
            wash = rowdatadf.ix[:, 'wash']
        except KeyError:
            wash = 0 * power

        try:
            catch = rowdatadf.ix[:, 'catch']
        except KeyError:
            catch = 0 * power

        try:
            finish = rowdatadf.ix[:, 'finish']
        except KeyError:
            finish = 0 * power

        try:
            peakforceangle = rowdatadf.ix[:, 'peakforceangle']
        except KeyError:
            peakforceangle = 0 * power

        if data['driveenergy'].mean() == 0:
            try:
                driveenergy = rowdatadf.ix[:, 'driveenergy']
            except KeyError:
                driveenergy = power * 60 / spm
        else:
            driveenergy = data['driveenergy']

        arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch))
        if arclength.mean() > 0:
            drivelength = arclength
        elif drivelength.mean() == 0:
            drivelength = driveenergy / (averageforce * 4.44822)

        try:
            slip = rowdatadf.ix[:, 'slip']
        except KeyError:
            slip = 0 * power

        try:
            totalangle = finish - catch
            effectiveangle = finish - wash - catch - slip
        except ValueError:
            totalangle = 0 * power
            effectiveangle = 0 * power

        if windowsize > 3 and windowsize < len(slip):
            try:
                wash = savgol_filter(wash, windowsize, 3)
            except TypeError:
                pass
            try:
                slip = savgol_filter(slip, windowsize, 3)
            except TypeError:
                pass
            try:
                catch = savgol_filter(catch, windowsize, 3)
            except TypeError:
                pass
            try:
                finish = savgol_filter(finish, windowsize, 3)
            except TypeError:
                pass
            try:
                peakforceangle = savgol_filter(peakforceangle, windowsize, 3)
            except TypeError:
                pass
            try:
                driveenergy = savgol_filter(driveenergy, windowsize, 3)
            except TypeError:
                pass
            try:
                drivelength = savgol_filter(drivelength, windowsize, 3)
            except TypeError:
                pass
            try:
                totalangle = savgol_filter(totalangle, windowsize, 3)
            except TypeError:
                pass
            try:
                effectiveangle = savgol_filter(effectiveangle, windowsize, 3)
            except TypeError:
                pass

        velo = 500. / p

        ergpw = 2.8 * velo**3
        efficiency = 100. * ergpw / power

        efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
        efficiency.fillna(method='ffill')

        try:
            data['wash'] = wash
            data['catch'] = catch
            data['slip'] = slip
            data['finish'] = finish
            data['peakforceangle'] = peakforceangle
            data['driveenergy'] = driveenergy
            data['drivelength'] = drivelength
            data['totalangle'] = totalangle
            data['effectiveangle'] = effectiveangle
            data['efficiency'] = efficiency
        except ValueError:
            pass

    if otwpower:
        try:
            nowindpace = rowdatadf.ix[:, 'nowindpace']
        except KeyError:
            nowindpace = p
        try:
            equivergpower = rowdatadf.ix[:, 'equivergpower']
        except KeyError:
            equivergpower = 0 * p + 50.

        nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
        ergvelo = (equivergpower / 2.8)**(1. / 3.)

        ergpace = 500. / ergvelo
        ergpace[ergpace == np.inf] = 240.
        ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))

        data['ergpace'] = ergpace * 1e3
        data['nowindpace'] = nowindpace * 1e3
        data['equivergpower'] = equivergpower
        data['fergpace'] = nicepaceformat(ergpace2)
        data['fnowindpace'] = nicepaceformat(nowindpace2)

    data = data.replace([-np.inf, np.inf], np.nan)
    data = data.fillna(method='ffill')

    # write data if id given
    if id != 0:
        data['workoutid'] = id
        engine = create_engine(database_url, echo=False)
        with engine.connect() as conn, conn.begin():
            data.to_sql('strokedata', engine, if_exists='append', index=False)
        conn.close()
        engine.dispose()
    return data