Private
Public Access
1
0
Files
rowsandall/rowers/dataprepnodjango.py
Sander Roosendaal 7c40ad3df1 bug fix
2016-12-01 22:12:36 +01:00

390 lines
11 KiB
Python

from rowingdata import rowingdata as rrdata
from rowingdata import rower as rrower
from rowingdata import main as rmain
from pandas import DataFrame,Series
import pandas as pd
import numpy as np
import itertools
from sqlalchemy import create_engine
import sqlalchemy as sa
from rowsandall_app.settings import DATABASES
user = DATABASES['default']['USER']
password = DATABASES['default']['PASSWORD']
database_name = DATABASES['default']['NAME']
host = DATABASES['default']['HOST']
port = DATABASES['default']['PORT']
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
database_url_debug = 'sqlite:///'+database_name
from scipy.signal import savgol_filter
import datetime
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def strfdelta(tdelta):
try:
minutes,seconds = divmod(tdelta.seconds,60)
tenths = int(tdelta.microseconds/1e5)
except AttributeError:
minutes,seconds = divmod(tdelta.view(np.int64),60e9)
seconds,rest = divmod(seconds,1e9)
tenths = int(rest/1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def timedeltaconv(x):
if not np.isnan(x):
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def rdata(file,rower=rrower()):
try:
res = rrdata(file,rower=rower)
except IOError:
res = 0
return res
def delete_strokedata(id,debug=True):
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
id=id,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print "Database Locked"
conn.close()
engine.dispose()
def update_strokedata(id,df,debug=True):
delete_strokedata(id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True,
debug=debug)
def testdata(time,distance,pace,spm):
t1 = np.issubdtype(time,np.number)
t2 = np.issubdtype(distance,np.number)
t3 = np.issubdtype(pace,np.number)
t4 = np.issubdtype(spm,np.number)
return t1 and t2 and t3 and t4
def getsmallrowdata_db(columns,ids=[]):
prepmultipledata(ids)
data = read_cols_df_sql(ids,columns)
return data
def prepmultipledata(ids,verbose=False,debug=True):
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
res = conn.execute(query)
res = list(itertools.chain.from_iterable(res.fetchall()))
conn.close()
engine.dispose()
res = list(set(ids)-set(res))
for id in res:
rowdata,row = getrowdata(id=id)
if verbose:
print id
if rowdata:
data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True)
return res
def read_cols_df_sql(ids,columns,debug=True):
columns = list(columns)+['distance','spm']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column+', '
cls = cls[:-2]
if len(ids) == 0:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
columns = cls,
))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format(
id = ids[0],
columns = cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format(
columns = cls,
ids = tuple(ids),
))
df = pd.read_sql_query(query,engine)
engine.dispose()
return df
def read_df_sql(id,debug=True):
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format(
id=id)), engine)
engine.dispose()
return df
def smalldataprep(therows,xparam,yparam1,yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
pass
return df
def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True,
empower=True,debug=True):
rowdatadf.set_index([range(len(rowdatadf))],inplace=True)
t = rowdatadf.ix[:,'TimeStamp (sec)']
t = pd.Series(t-rowdatadf.ix[0,'TimeStamp (sec)'])
row_index = rowdatadf.ix[:,' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.ix[:,' Stroke500mPace (sec/500m)']
hr = rowdatadf.ix[:,' HRCur (bpm)']
spm = rowdatadf.ix[:,' Cadence (stokes/min)']
cumdist = rowdatadf.ix[:,'cum_dist']
power = rowdatadf.ix[:,' Power (watts)']
averageforce = rowdatadf.ix[:,' AverageDriveForce (lbs)']
drivelength = rowdatadf.ix[:,' DriveLength (meters)']
try:
workoutstate = rowdatadf.ix[:,' WorkoutState']
except KeyError:
workoutstate = 0*hr
peakforce = rowdatadf.ix[:,' PeakDriveForce (lbs)']
forceratio = averageforce/peakforce
forceratio = forceratio.fillna(value=0)
f = rowdatadf['TimeStamp (sec)'].diff().mean()
windowsize = 2*(int(10./(f)))+1
if windowsize <= 3:
windowsize = 5
if windowsize > 3:
spm = savgol_filter(spm,windowsize,3)
hr = savgol_filter(hr,windowsize,3)
drivelength = savgol_filter(drivelength,windowsize,3)
forceratio = savgol_filter(forceratio,windowsize,3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0*t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
drivespeed = drivelength/rowdatadf[' DriveTime (ms)']*1.0e3
drivespeed = drivespeed.fillna(value=0)
driveenergy = drivelength*averageforce*4.44822
distance = rowdatadf.ix[:,'cum_dist']
data = DataFrame(
dict(
time = t*1e3,
hr = hr,
pace = p*1e3,
spm = spm,
cumdist = cumdist,
ftime = niceformat(t2),
fpace = nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.ix[:,'hr_ut2']
data['hr_ut1'] = rowdatadf.ix[:,'hr_ut1']
data['hr_at'] = rowdatadf.ix[:,'hr_at']
data['hr_tr'] = rowdatadf.ix[:,'hr_tr']
data['hr_an'] = rowdatadf.ix[:,'hr_an']
data['hr_max'] = rowdatadf.ix[:,'hr_max']
data['hr_bottom'] = 0.0*data['hr']
if barchart:
# time increments for bar chart
time_increments = rowdatadf.ix[:,' ElapsedTime (sec)'].diff()
time_increments[0] = time_increments[1]
time_increments = 0.5*time_increments+0.5*np.abs(time_increments)
x_right = (t2+time_increments.apply(lambda x:timedeltaconv(x)))
data['x_right'] = x_right
if empower:
try:
wash = rowdatadf.ix[:,'wash']
catch = rowdatadf.ix[:,'catch']
finish = rowdatadf.ix[:,'finish']
peakforceangle = rowdatadf.ix[:,'peakforceangle']
driveenergy = rowdatadf.ix[:,'driveenergy']
drivelength = driveenergy/(averageforce*4.44822)
slip = rowdatadf.ix[:,'slip']
if windowsize > 3:
wash = savgol_filter(wash,windowsize,3)
slip = savgol_filter(slip,windowsize,3)
catch = savgol_filter(catch,windowsize,3)
finish = savgol_filter(finish,windowsize,3)
peakforceangle = savgol_filter(peakforceangle,windowsize,3)
driveenergy = savgol_filter(driveenergy,windowsize,3)
drivelength = savgol_filter(drivelength,windowsize,3)
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['peakforce'] = peakforce
data['averageforce'] = averageforce
except KeyError:
pass
if otwpower:
try:
nowindpace = rowdatadf.ix[:,'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.ix[:,'equivergpower']
except KeyError:
equivergpower = 0*p+50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower/2.8)**(1./3.)
ergpace = 500./ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace*1e3
data['nowindpace'] = nowindpace*1e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data = data.replace([-np.inf,np.inf],np.nan)
data = data.fillna(method='ffill')
# write data if id given
if id != 0:
data['workoutid'] = id
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
data.to_sql('strokedata',engine,if_exists='append',index=False)
conn.close()
engine.dispose()
return data