Private
Public Access
1
0
Files
rowsandall/rowers/dataprep.py
2016-12-21 22:41:33 +01:00

632 lines
17 KiB
Python

from rowers.models import Workout, User, Rower
from rowingdata import rowingdata as rrdata
from rowers.tasks import handle_sendemail_unrecognized
from rowingdata import rower as rrower
from rowingdata import main as rmain
from rowingdata import get_file_type
from pandas import DataFrame,Series
from pytz import timezone as tz,utc
from django.utils.timezone import get_current_timezone
thetimezone = get_current_timezone()
import pandas as pd
import numpy as np
import itertools
from django.conf import settings
from sqlalchemy import create_engine
import sqlalchemy as sa
import sys
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
host = settings.DATABASES['default']['HOST']
port = settings.DATABASES['default']['PORT']
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
if settings.DEBUG or user=='':
# database_url = 'sqlite:///db.sqlite3'
database_url = 'sqlite:///'+database_name
from scipy.signal import savgol_filter
import datetime
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def strfdelta(tdelta):
try:
minutes,seconds = divmod(tdelta.seconds,60)
tenths = int(tdelta.microseconds/1e5)
except AttributeError:
minutes,seconds = divmod(tdelta.view(np.int64),60e9)
seconds,rest = divmod(seconds,1e9)
tenths = int(rest/1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def timedeltaconv(x):
if not np.isnan(x):
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def new_workout_from_file(r,f2,
workouttype='rower',
title='Workout',
notes=''):
fileformat = get_file_type(f2)
summary = ''
# handle non-Painsled
if (fileformat != 'csv'):
# handle RowPro:
if (fileformat == 'rp'):
row = RowProParser(f2)
# handle TCX
if (fileformat == 'tcx'):
row = TCXParser(f2)
# handle Mystery
if (fileformat == 'mystery'):
row = MysteryParser(f2)
# handle TCX no HR
if (fileformat == 'tcxnohr'):
row = TCXParserNoHR(f2)
# handle ErgData
if (fileformat == 'ergdata'):
row = ErgDataParser(f2)
# handle BoatCoach
if (fileformat == 'boatcoach'):
row = BoatCoachParser(f2)
# handle painsled desktop
if (fileformat == 'painsleddesktop'):
row = painsledDesktopParser(f2)
# handle speed coach GPS
if (fileformat == 'speedcoach'):
row = speedcoachParser(f2)
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
row = SpeedCoach2Parser(f2)
summary = row.allstats()
# handle ErgStick
if (fileformat == 'ergstick'):
row = ErgStickParser(f2)
# handle FIT
if (fileformat == 'fit'):
row = FITParser(f2)
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4]+'o.csv'
row.write_csv(f2,gzip=True)
#os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
os.remove(f_to_be_deleted+'.gz')
# make workout and put in database
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp)
row = rdata(f2,rower=rr)
if row == 0:
return HttpResponse("Error: CSV Data File Not Found")
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500./pace
f = row.df['TimeStamp (sec)'].diff().mean()
windowsize = 2*(int(10./(f)))+1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize<len(velo):
velo2 = savgol_filter(velo,windowsize,3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf,np.inf],np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500./abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2,gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2,gzip=True)
except:
pass
if fileformat != 'fit' and summary == '':
summary = row.summary()
summary += '\n'
summary += row.intervalstats_painsled()
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
totaldist = row.df['cum_dist'].max()
totaltime = row.df['TimeStamp (sec)'].max()-row.df['TimeStamp (sec)'].min()
totaltime = totaltime+row.df.ix[0,' ElapsedTime (sec)']
hours = int(totaltime/3600.)
minutes = int((totaltime - 3600.*hours)/60.)
seconds = int(totaltime - 3600.*hours - 60.*minutes)
tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds))
duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths)
workoutdate = row.rowdatetime.strftime('%Y-%m-%d')
workoutstarttime = row.rowdatetime.strftime('%H:%M:%S')
workoutstartdatetime = thetimezone.localize(row.rowdatetime).astimezone(utc)
# check for duplicate start times
ws = Workout.objects.filter(starttime=workoutstarttime,
user=r)
if (len(ws) != 0):
message = "Warning: This workout probably already exists in the database"
w = Workout(user=r,name=title,date=workoutdate,
workouttype=workouttype,
duration=duration,distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
csvfilename=f2,notes=notes,summary=summary,
maxhr=maxhr,averagehr=averagehr,
startdatetime=workoutstartdatetime)
w.save()
# put stroke data in database
res = dataprep(row.df,id=w.id,bands=True,barchart=True,otwpower=True,empower=True)
return True
def compare_data(id):
row = Workout.objects.get(id=id)
f1 = row.csvfilename
rowdata = rdata(f1)
try:
l1 = len(rowdata.df)
except AttributeError:
l1 = 0
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
res = conn.execute(query)
l2 = res.fetchall()[0][0]
except:
print "Database Locked"
conn.close()
engine.dispose()
return l1==l2
def repair_data(verbose=False):
ws = Workout.objects.all()
for w in ws:
if verbose:
sys.stdout.write(".")
if not compare_data(w.id):
if verbose:
print w.id
try:
rowdata = rdata(w.csvfilename)
if rowdata:
update_strokedata(w.id,rowdata.df)
except IOError, AttributeError:
pass
def rdata(file,rower=rrower()):
try:
res = rrdata(file,rower=rower)
except IOError:
try:
res = rrdata(file+'.gz',rower=rower)
except IOError:
res = 0
return res
def delete_strokedata(id):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print "Database Locked"
conn.close()
engine.dispose()
def update_strokedata(id,df):
delete_strokedata(id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True)
def testdata(time,distance,pace,spm):
t1 = np.issubdtype(time,np.number)
t2 = np.issubdtype(distance,np.number)
t3 = np.issubdtype(pace,np.number)
t4 = np.issubdtype(spm,np.number)
return t1 and t2 and t3 and t4
def getrowdata_db(id=0):
data = read_df_sql(id)
data['x_right'] = data['x_right']/1.0e6
if data.empty:
rowdata,row = getrowdata(id=id)
if rowdata:
data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True)
else:
data = pd.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
return data,row
def getsmallrowdata_db(columns,ids=[]):
prepmultipledata(ids)
data = read_cols_df_sql(ids,columns)
return data
def getrowdata(id=0):
# check if valid ID exists (workout exists)
row = Workout.objects.get(id=id)
f1 = row.csvfilename
# get user
r = row.user
u = r.user
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp)
rowdata = rdata(f1,rower=rr)
return rowdata,row
def prepmultipledata(ids,verbose=False):
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
res = conn.execute(query)
res = list(itertools.chain.from_iterable(res.fetchall()))
conn.close()
engine.dispose()
try:
ids2 = [int(id) for id in ids]
except ValueError:
ids2 = ids
res = list(set(ids2)-set(res))
for id in res:
rowdata,row = getrowdata(id=id)
if verbose:
print id
if rowdata:
data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True)
return res
def read_cols_df_sql(ids,columns):
columns = list(columns)+['distance','spm']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column+', '
cls = cls[:-2]
if len(ids) == 0:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
columns = cls,
))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format(
id = ids[0],
columns = cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format(
columns = cls,
ids = tuple(ids),
))
connection = engine.raw_connection()
df = pd.read_sql_query(query,engine)
engine.dispose()
return df
def read_df_sql(id):
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format(
id=id)), engine)
engine.dispose()
return df
def smalldataprep(therows,xparam,yparam1,yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(f1+'.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
pass
return df
def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True,
empower=True):
rowdatadf.set_index([range(len(rowdatadf))],inplace=True)
t = rowdatadf.ix[:,'TimeStamp (sec)']
t = pd.Series(t-rowdatadf.ix[0,'TimeStamp (sec)'])
row_index = rowdatadf.ix[:,' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.ix[:,' Stroke500mPace (sec/500m)']
hr = rowdatadf.ix[:,' HRCur (bpm)']
spm = rowdatadf.ix[:,' Cadence (stokes/min)']
cumdist = rowdatadf.ix[:,'cum_dist']
power = rowdatadf.ix[:,' Power (watts)']
averageforce = rowdatadf.ix[:,' AverageDriveForce (lbs)']
drivelength = rowdatadf.ix[:,' DriveLength (meters)']
try:
workoutstate = rowdatadf.ix[:,' WorkoutState']
except KeyError:
workoutstate = 0*hr
peakforce = rowdatadf.ix[:,' PeakDriveForce (lbs)']
forceratio = averageforce/peakforce
forceratio = forceratio.fillna(value=0)
f = rowdatadf['TimeStamp (sec)'].diff().mean()
windowsize = 2*(int(10./(f)))+1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize<len(hr):
spm = savgol_filter(spm,windowsize,3)
hr = savgol_filter(hr,windowsize,3)
drivelength = savgol_filter(drivelength,windowsize,3)
forceratio = savgol_filter(forceratio,windowsize,3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0*t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength/rowdatadf[' DriveTime (ms)']*1.0e3
except TypeError:
drivespeed = 0.0*rowdatadf['TimeStam (sec)']
drivespeed = drivespeed.fillna(value=0)
driveenergy = drivelength*averageforce*4.44822
distance = rowdatadf.ix[:,'cum_dist']
data = DataFrame(
dict(
time = t*1e3,
hr = hr,
pace = p*1e3,
spm = spm,
cumdist = cumdist,
ftime = niceformat(t2),
fpace = nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.ix[:,'hr_ut2']
data['hr_ut1'] = rowdatadf.ix[:,'hr_ut1']
data['hr_at'] = rowdatadf.ix[:,'hr_at']
data['hr_tr'] = rowdatadf.ix[:,'hr_tr']
data['hr_an'] = rowdatadf.ix[:,'hr_an']
data['hr_max'] = rowdatadf.ix[:,'hr_max']
data['hr_bottom'] = 0.0*data['hr']
if barchart:
# time increments for bar chart
time_increments = rowdatadf.ix[:,' ElapsedTime (sec)'].diff()
time_increments[0] = time_increments[1]
time_increments = 0.5*time_increments+0.5*np.abs(time_increments)
x_right = (t2+time_increments.apply(lambda x:timedeltaconv(x)))
data['x_right'] = x_right
if empower:
try:
wash = rowdatadf.ix[:,'wash']
catch = rowdatadf.ix[:,'catch']
finish = rowdatadf.ix[:,'finish']
peakforceangle = rowdatadf.ix[:,'peakforceangle']
driveenergy = rowdatadf.ix[:,'driveenergy']
drivelength = driveenergy/(averageforce*4.44822)
slip = rowdatadf.ix[:,'slip']
if windowsize > 3 and windowsize<len(slip):
wash = savgol_filter(wash,windowsize,3)
slip = savgol_filter(slip,windowsize,3)
catch = savgol_filter(catch,windowsize,3)
finish = savgol_filter(finish,windowsize,3)
peakforceangle = savgol_filter(peakforceangle,windowsize,3)
driveenergy = savgol_filter(driveenergy,windowsize,3)
drivelength = savgol_filter(drivelength,windowsize,3)
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
except KeyError:
pass
if otwpower:
try:
nowindpace = rowdatadf.ix[:,'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.ix[:,'equivergpower']
except KeyError:
equivergpower = 0*p+50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower/2.8)**(1./3.)
ergpace = 500./ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace*1e3
data['nowindpace'] = nowindpace*1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf,np.inf],np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
data.to_sql('strokedata',engine,if_exists='append',index=False)
conn.close()
engine.dispose()
return data