Private
Public Access
1
0
Files
rowsandall/rowers/dataprepnodjango.py
Sander Roosendaal a747d47fed bug fix
2020-10-19 18:28:34 +02:00

1324 lines
37 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
# This is Data prep used for testing purposes (no Django environment)
# Uses the debug SQLite database for stroke data
from rowingdata import rowingdata as rrdata
from rowingdata import make_cumvalues
from rowingdata import rower as rrower
from rowingdata import main as rmain
from rowingdata import empower_bug_correction,get_empower_rigging
from rowingdata.csvparsers import make_cumvalues_array
from time import strftime
from pandas import DataFrame,Series
import shutil
from shutil import copyfile
import pyarrow as pa
import pandas as pd
import numpy as np
import itertools
import dask.dataframe as dd
from dask.delayed import delayed
from sqlalchemy import create_engine
import sqlalchemy as sa
from rowsandall_app.settings import DATABASES
from rowsandall_app.settings_dev import DATABASES as DEV_DATABASES
from rowsandall_app.settings_dev import use_sqlite
from rowers.utils import lbstoN
try:
user = DATABASES['default']['USER']
except KeyError:
user = ''
try:
password = DATABASES['default']['PASSWORD']
except KeyError:
password = ''
try:
database_name = DATABASES['default']['NAME']
except KeyError:
database_name = ''
try:
host = DATABASES['default']['HOST']
except KeyError:
host = ''
try:
port = DATABASES['default']['PORT']
except KeyError:
port = ''
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
database_name_dev = DEV_DATABASES['default']['NAME']
database_url_debug = database_url
if use_sqlite:
database_url_debug = 'sqlite:///'+database_name_dev
# mapping the DB column names to the CSV file column names
columndict = {
'time':'TimeStamp (sec)',
'hr':' HRCur (bpm)',
'velo': ' AverageBoatSpeed (m/s)',
'pace':' Stroke500mPace (sec/500m)',
'spm':' Cadence (stokes/min)',
'power':' Power (watts)',
'averageforce':' AverageDriveForce (lbs)',
'drivelength':' DriveLength (meters)',
'peakforce':' PeakDriveForce (lbs)',
'distance':' Horizontal (meters)',
'catch':'catch',
'finish':'finish',
'peakforceangle':'peakforceangle',
'wash':'wash',
'slip':'wash',
'workoutstate':' WorkoutState',
'cumdist':'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def strfdelta(tdelta):
try:
minutes,seconds = divmod(tdelta.seconds,60)
tenths = int(tdelta.microseconds/1e5)
except AttributeError:
minutes,seconds = divmod(tdelta.view(np.int64),60e9)
seconds,rest = divmod(seconds,1e9)
tenths = int(rest/1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def timedeltaconv(x):
if not np.isnan(x):
dt = datetime.timedelta(seconds=x)
else:
dt = datetime.timedelta(seconds=350.)
return dt
def rdata(file,rower=rrower()):
try:
res = rrdata(csvfile=file,rower=rower)
except IOError:
try:
res = rrdata(csvfile=file+'.gz',rower=rower)
except IOError:
res = 0
return res
from rowers.utils import totaltime_sec_to_string
from rowers.metrics import dtypes
# Creates C2 stroke data
def create_c2_stroke_data_db(
distance,duration,workouttype,
workoutid,starttimeunix,csvfilename,debug=False):
nr_strokes = int(distance/10.)
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20*np.zeros(nr_strokes)
try:
step = totalseconds/float(nr_strokes)
except ZeroDivisionError:
return 0
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
dstep = distance/float(nr_strokes)
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = starttimeunix + elapsed
pace = 500.*totalseconds/distance
if workouttype in ['rower','slides','dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError:
velo = 0
power = 2.8*velo**3
else:
power = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)':pace,
' ElapsedTime (sec)':elapsed,
' Power (watts)':power,
' HRCur (bpm)':np.zeros(nr_strokes),
' longitude':np.zeros(nr_strokes),
' latitude':np.zeros(nr_strokes),
' DragFactor':np.zeros(nr_strokes),
' DriveLength (meters)':np.zeros(nr_strokes),
' StrokeDistance (meters)':np.zeros(nr_strokes),
' DriveTime (ms)':np.zeros(nr_strokes),
' StrokeRecoveryTime (ms)':np.zeros(nr_strokes),
' AverageDriveForce (lbs)':np.zeros(nr_strokes),
' PeakDriveForce (lbs)':np.zeros(nr_strokes),
' lapIdx':np.zeros(nr_strokes),
'cum_dist': d
})
timestr = strftime("%Y%m%d-%H%M%S")
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
res = df.to_csv(csvfilename,index_label='index',
compression='gzip')
data = dataprep(df,id=workoutid,bands=False,debug=debug)
return data
# Saves C2 stroke data to CSV and database
def add_c2_stroke_data_db(strokedata,workoutid,starttimeunix,csvfilename,
debug=False,workouttype='rower'):
res = make_cumvalues(0.1*strokedata['t'])
cum_time = res[0]
lapidx = res[1]
unixtime = cum_time+starttimeunix
# unixtime[0] = starttimeunix
seconds = 0.1*strokedata.loc[:,'t']
nr_rows = len(unixtime)
try:
latcoord = strokedata.loc[:,'lat']
loncoord = strokedata.loc[:,'lon']
except:
latcoord = np.zeros(nr_rows)
loncoord = np.zeros(nr_rows)
try:
strokelength = strokedata.loc[:,'strokelength']
except:
strokelength = np.zeros(nr_rows)
dist2 = 0.1*strokedata.loc[:,'d']
try:
spm = strokedata.loc[:,'spm']
except KeyError:
spm = 0*dist2
try:
hr = strokedata.loc[:,'hr']
except KeyError:
hr = 0*spm
pace = strokedata.loc[:,'p']/10.
pace = np.clip(pace,0,1e4)
pace = pace.replace(0,300)
velo = 500./pace
power = 2.8*velo**3
if workouttype == 'bike':
velo = 1000./pace
# save csv
# Create data frame with all necessary data to write to csv
df = pd.DataFrame({'TimeStamp (sec)':unixtime,
' Horizontal (meters)': dist2,
' Cadence (stokes/min)':spm,
' HRCur (bpm)':hr,
' longitude':loncoord,
' latitude':latcoord,
' Stroke500mPace (sec/500m)':pace,
' Power (watts)':power,
' DragFactor':np.zeros(nr_rows),
' DriveLength (meters)':np.zeros(nr_rows),
' StrokeDistance (meters)':strokelength,
' DriveTime (ms)':np.zeros(nr_rows),
' StrokeRecoveryTime (ms)':np.zeros(nr_rows),
' AverageDriveForce (lbs)':np.zeros(nr_rows),
' PeakDriveForce (lbs)':np.zeros(nr_rows),
' lapIdx':lapidx,
' WorkoutState': 4,
' ElapsedTime (sec)':seconds,
'cum_dist': dist2
})
df.sort_values(by='TimeStamp (sec)',ascending=True)
timestr = strftime("%Y%m%d-%H%M%S")
# Create CSV file name and save data to CSV file
res = df.to_csv(csvfilename,index_label='index',
compression='gzip')
try:
data = dataprep(df,id=workoutid,bands=False,debug=debug)
except:
return 0
return data
# Processes painsled CSV file to database
def save_workout_database(f2,r,dosmooth=True,workouttype='rower',
dosummary=True,title='Workout',
notes='',totaldist=0,totaltime=0,
workoutsource='unknown',
summary='',
makeprivate=False,
oarlength=2.89,inboard=0.88):
message = None
powerperc = 100*np.array([r.pw_ut2,
r.pw_ut1,
r.pw_at,
r.pw_tr,r.pw_an])/r.ftp
# make workout and put in database
rr = rrower(hrmax=r.max,hrut2=r.ut2,
hrut1=r.ut1,hrat=r.at,
hrtr=r.tr,hran=r.an,ftp=r.ftp,
powerperc=powerperc,powerzones=r.powerzones)
row = rdata(f2,rower=rr)
checks = row.check_consistency()
allchecks = 1
for key,value in checks.iteritems():
if not value:
allchecks = 0
if not allchecks:
#row.repair()
pass
if row == 0:
return (0,'Error: CSV data file not found')
if dosmooth:
# auto smoothing
pace = row.df[' Stroke500mPace (sec/500m)'].values
velo = 500./pace
f = row.df['TimeStamp (sec)'].diff().mean()
if f !=0:
windowsize = 2*(int(10./(f)))+1
else:
windowsize = 1
if not 'originalvelo' in row.df:
row.df['originalvelo'] = velo
if windowsize > 3 and windowsize<len(velo):
velo2 = savgol_filter(velo,windowsize,3)
else:
velo2 = velo
velo3 = pd.Series(velo2)
velo3 = velo3.replace([-np.inf,np.inf],np.nan)
velo3 = velo3.fillna(method='ffill')
pace2 = 500./abs(velo3)
row.df[' Stroke500mPace (sec/500m)'] = pace2
row.df = row.df.fillna(0)
row.write_csv(f2,gzip=True)
try:
os.remove(f2)
except:
pass
# recalculate power data
if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides':
try:
row.erg_recalculatepower()
row.write_csv(f2,gzip=True)
except:
pass
averagehr = row.df[' HRCur (bpm)'].mean()
maxhr = row.df[' HRCur (bpm)'].max()
if totaldist == 0:
totaldist = row.df['cum_dist'].max()
if totaltime == 0:
totaltime = row.df['TimeStamp (sec)'].max()-row.df['TimeStamp (sec)'].min()
totaltime = totaltime+row.df.loc[0,' ElapsedTime (sec)']
hours = int(totaltime/3600.)
if hours>23:
message = 'Warning: The workout duration was longer than 23 hours. '
hours = 23
minutes = int((totaltime - 3600.*hours)/60.)
if minutes>59:
minutes = 59
if not message:
message = 'Warning: there is something wrong with the workout duration'
seconds = int(totaltime - 3600.*hours - 60.*minutes)
if seconds > 59:
seconds = 59
if not message:
message = 'Warning: there is something wrong with the workout duration'
tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds))
if tenths > 9:
tenths = 9
if not message:
message = 'Warning: there is something wrong with the workout duration'
duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths)
if dosummary:
summary = row.summary()
summary += '\n'
summary += row.intervalstats()
workoutdate = row.rowdatetime.strftime('%Y-%m-%d')
workoutstarttime = row.rowdatetime.strftime('%H:%M:%S')
workoutstartdatetime = thetimezone.localize(row.rowdatetime).astimezone(utc)
if makeprivate:
privacy = 'private'
else:
privacy = 'visible'
# check for duplicate start times
ws = Workout.objects.filter(startdatetime=workoutstartdatetime,
user=r)
if (len(ws) != 0):
message = "Warning: This workout probably already exists in the database"
privacy = 'private'
w = Workout(user=r,name=title,date=workoutdate,
workouttype=workouttype,
workoutsource=workoutsource,
duration=duration,distance=totaldist,
weightcategory=r.weightcategory,
starttime=workoutstarttime,
csvfilename=f2,notes=notes,summary=summary,
maxhr=maxhr,averagehr=averagehr,
startdatetime=workoutstartdatetime,
inboard=inboard,oarlength=oarlength,
privacy=privacy)
w.save()
if privacy == 'visible':
ts = Team.objects.filter(rower=r)
for t in ts:
w.team.add(t)
# put stroke data in database
res = dataprep(row.df,id=w.id,bands=True,
barchart=True,otwpower=True,empower=True,inboard=inboard)
return (w.id,message)
def handle_nonpainsled(f2,fileformat,summary=''):
oarlength = 2.89
inboard = 0.88
# handle RowPro:
if (fileformat == 'rp'):
row = RowProParser(f2)
# handle TCX
if (fileformat == 'tcx'):
row = TCXParser(f2)
# handle Mystery
if (fileformat == 'mystery'):
row = MysteryParser(f2)
# handle RowPerfect
if (fileformat == 'rowperfect3'):
row = RowPerfectParser(f2)
# handle ErgData
if (fileformat == 'ergdata'):
row = ErgDataParser(f2)
# handle CoxMate
if (fileformat == 'coxmate'):
row = CoxMateParser(f2)
# handle Mike
if (fileformat == 'bcmike'):
row = BoatCoachAdvancedParser(f2)
# handle BoatCoach OTW
if (fileformat == 'boatcoachotw'):
row = BoatCoachOTWParser(f2)
# handle BoatCoach
if (fileformat == 'boatcoach'):
row = BoatCoachParser(f2)
# handle painsled desktop
if (fileformat == 'painsleddesktop'):
row = painsledDesktopParser(f2)
# handle speed coach GPS
if (fileformat == 'speedcoach'):
row = speedcoachParser(f2)
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
row = SpeedCoach2Parser(f2)
try:
oarlength,inboard = get_empower_rigging(f2)
summary = row.allstats()
except:
pass
# handle ErgStick
if (fileformat == 'ergstick'):
row = ErgStickParser(f2)
# handle FIT
if (fileformat == 'fit'):
row = FITParser(f2)
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4]+'o.csv'
row.write_csv(f2,gzip=True)
#os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
os.remove(f_to_be_deleted+'.gz')
return (f2,summary,oarlength,inboard)
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.py
# Currently there is code duplication
def new_workout_from_file(r,f2,
workouttype='rower',
title='Workout',
makeprivate=False,
notes=''):
message = None
fileformat = get_file_type(f2)
summary = ''
oarlength = 2.89
inboard = 0.88
if len(fileformat)==3 and fileformat[0]=='zip':
f_to_be_deleted = f2
with zipfile.ZipFile(f2) as z:
for fname in z.namelist():
f3 = z.extract(fname,path='media/')
id,message,f2 = new_workout_from_file(r,f3,
workouttype=workouttype,
makeprivate=makeprivate,
title = title,
notes='')
os.remove(f_to_be_deleted)
return id,message,f2
# Some people try to upload Concept2 logbook summaries
if fileformat == 'c2log':
os.remove(f2)
message = "This C2 logbook summary does not contain stroke data. Please download the Export Stroke Data file from the workout details on the C2 logbook."
return (0,message,f2)
if fileformat == 'nostrokes':
os.remove(f2)
message = "It looks like this file doesn't contain stroke data."
return (0,message,f2)
# Some people try to upload RowPro summary logs
if fileformat == 'rowprolog':
os.remove(f2)
message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log."
return (0,message,f2)
# Sometimes people try an unsupported file type.
# Send an email to info@rowsandall.com with the file attached
# for me to check if it is a bug, or a new file type
# worth supporting
if fileformat == 'unknown':
message = "We couldn't recognize the file type"
if settings.DEBUG:
res = handle_sendemail_unrecognized.delay(f2,
r.user.email)
else:
res = queuehigh.enqueue(handle_sendemail_unrecognized,
f2,r.user.email)
return (0,message,f2)
# handle non-Painsled by converting it to painsled compatible CSV
if (fileformat != 'csv'):
try:
f2,summary,oarlength,inboard = handle_nonpainsled(f2,
fileformat,
summary=summary)
except:
errorstring = str(sys.exc_info()[0])
message = 'Something went wrong: '+errorstring
return (0,message,'')
dosummary = (fileformat != 'fit' and fileformat != 'speedcoach2')
id,message = save_workout_database(f2,r,
workouttype=workouttype,
makeprivate=makeprivate,
dosummary=dosummary,
summary=summary,
inboard=inboard,oarlength=oarlength,
title=title)
return (id,message,f2)
def delete_strokedata(id,debug=False):
dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
shutil.rmtree(dirname)
except FileNotFoundError:
pass
def update_strokedata(id,df,debug=False):
delete_strokedata(id,debug=debug)
if debug:
print("updating ",id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True,
debug=debug)
return rowdata
def update_empower(id, inboard, oarlength, boattype, df, f1, debug=False):
corr_factor = 1.0
if 'x' in boattype:
# sweep
a = 0.06
b = 0.275
else:
# scull
a = 0.15
b = 0.275
corr_factor = empower_bug_correction(oarlength,inboard,a,b)
success = False
try:
df['power empower old'] = df[' Power (watts)']
df[' Power (watts)'] = df[' Power (watts)'] * corr_factor
df['driveenergy empower old'] = df['driveenergy']
df['driveenergy'] = df['driveenergy'] * corr_factor
success = True
except KeyError:
pass
if success:
delete_strokedata(id,debug=debug)
if debug:
print("updated ",id)
print("correction ",corr_factor)
else:
if debug:
print("not updated ",id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True,
debug=debug)
row = rrdata(df=df)
row.write_csv(f1,gzip=True)
return success
def testdata(time,distance,pace,spm):
t1 = np.issubdtype(time,np.number)
t2 = np.issubdtype(distance,np.number)
t3 = np.issubdtype(pace,np.number)
t4 = np.issubdtype(spm,np.number)
return t1 and t2 and t3 and t4
def getsmallrowdata_db(columns,ids=[],debug=False):
csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
data = []
columns = [c for c in columns if c != 'None']
df = pd.DataFrame()
if len(ids)>1:
for id, f in zip(ids,csvfilenames):
try:
df = pd.read_parquet(f,columns=columns,engine='pyarrow')
data.append(df)
except OSError:
pass
except pa.lib.ArrowInvalid:
pass
try:
df = pd.concat(data,axis=0)
except ValueError:
df = pd.DataFrame()
elif len(ids)==1:
try:
df = pd.read_parquet(csvfilenames[0],columns=columns,engine='pyarrow')
except (OSError,IndexError):
df = pd.DataFrame()
else:
df = pd.DataFrame()
return df
def update_workout_field_sql(workoutid,fieldname,value,debug=False):
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
table = 'rowers_workout'
query = "UPDATE %s SET %s = '%s' WHERE `id` = %s;" % (table,fieldname,value,workoutid)
with engine.connect() as conn, conn.begin():
result = conn.execute(query)
conn.close()
engine.dispose()
return 1
def update_c2id_sql(id,c2id):
engine = create_engine(database_url, echo=False)
table = 'rowers_workout'
query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % (table,c2id,id)
with engine.connect() as conn, conn.begin():
result = conn.execute(query)
conn.close()
engine.dispose()
return 1
def fitnessmetric_to_sql(m,table='powertimefitnessmetric',debug=False,
doclean=False):
# test if nan among values
for key in m.keys():
if str(m[key]) == 'nan':
m[key] = -1
if 'inf' in str(m[key]):
m[key] = -1
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
columns = ', '.join(m.keys())
if use_sqlite:
placeholders = ", ".join(["?"] * len(m))
else:
placeholders = ", ".join(["%s"] * len(m))
query = "INSERT into %s ( %s ) Values (%s)" % (table, columns, placeholders)
query2 = "DELETE FROM %s WHERE PowerFourMin < 0 and PowerOneHour < 0 and PowerTwoK < 0 and user_id = %s " % (table,m['user_id'])
values = tuple(m[key] for key in m.keys())
with engine.connect() as conn, conn.begin():
if doclean:
result2 = conn.execute(query2)
result = conn.execute(query,values)
conn.close()
engine.dispose()
return 1
def read_cols_df_sql(ids,columns,debug=False):
columns = list(columns)+['distance','spm']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
ids = [int(id) for id in ids]
if len(ids) == 0:
return pd.DataFrame()
elif len(ids) == 1:
try:
filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0])
df = pd.read_parquet(filename,columns=columns)
except OSError:
pass
else:
data = []
filenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
for id,f in zip(ids,filenames):
try:
df = pd.read_parquet(f,columns=columns)
data.append(df)
except OSError:
pass
df = pd.concat(data,axis=0)
return df
def read_df_sql(id,debug=False):
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
except OSError:
pass
df = df.fillna(value=0)
return df
def getcpdata_sql(rower_id,table='cpdata',debug=False):
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata',debug=False):
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print("Database locked")
conn.close()
engine.dispose()
def delete_agegroup_db(age,sex,weightcategory,debug=False):
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE age={age} and weightcategory = {weightcategory} and sex={sex};'.format(
sex=sex,
age=age,
weightcategory=weightcategory,
table='calcagegrouprecords'
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except:
print("Database locked")
conn.close()
engine.dispose()
def update_agegroup_db(age,sex,weightcategory,wcdurations,wcpower,
debug=False):
delete_agegroup_db(age,sex,weightcategory,debug=debug)
wcdurations = [None if type(y) is float and np.isnan(y) else y for y in wcdurations]
wcpower = [None if type(y) is float and np.isnan(y) else y for y in wcpower]
df = pd.DataFrame(
{
'duration':wcdurations,
'power':wcpower,
}
)
df['sex'] = sex
df['age'] = age
df['weightcategory'] = weightcategory
df.replace([np.inf,-np.inf],np.nan,inplace=True)
df.dropna(axis=0,inplace=True)
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
table = 'calcagegrouprecords'
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=pd.Series([]),debug=False):
deletecpdata_sql(rower_id,table=table,debug=debug)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
if debug:
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def smalldataprep(therows,xparam,yparam1,yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(csvfile=f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(csvfile=f1+'.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
pass
return df
def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True,
empower=True,debug=False,inboard=0.88,forceunit='lbs'):
if rowdatadf.empty:
if debug:
print("empty")
return 0
if debug:
print("dataprep",id)
# rowdatadf.set_index([range(len(rowdatadf))],inplace=True)
t = rowdatadf.loc[:,'TimeStamp (sec)']
t = pd.Series(t-rowdatadf.loc[:,'TimeStamp (sec)'].iloc[0])
row_index = rowdatadf.loc[:,' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.loc[:,' Stroke500mPace (sec/500m)']
try:
velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)']
except KeyError:
velo = 500./p
hr = rowdatadf.loc[:,' HRCur (bpm)']
spm = rowdatadf.loc[:,' Cadence (stokes/min)']
cumdist = rowdatadf.loc[:,'cum_dist']
power = rowdatadf.loc[:,' Power (watts)']
averageforce = rowdatadf.loc[:,' AverageDriveForce (lbs)']
drivelength = rowdatadf.loc[:,' DriveLength (meters)']
try:
workoutstate = rowdatadf.loc[:,' WorkoutState']
except KeyError:
workoutstate = 0*hr
peakforce = rowdatadf.loc[:,' PeakDriveForce (lbs)']
forceratio = averageforce/peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.loc[:,' DriveTime (ms)']
recoverytime = rowdatadf.loc[:,' StrokeRecoveryTime (ms)']
rhythm = 100.*drivetime/(recoverytime+drivetime)
rhythm = rhythm.fillna(value=0)
except:
rhythm = 0.0*forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0:
try:
windowsize = 2*(int(10./(f)))+1
except ValueError:
windowsize = 1
else:
windowsize = 1
if windowsize <= 3:
windowsize = 5
if windowsize > 3 and windowsize<len(hr):
spm = savgol_filter(spm,windowsize,3)
hr = savgol_filter(hr,windowsize,3)
drivelength = savgol_filter(drivelength,windowsize,3)
forceratio = savgol_filter(forceratio,windowsize,3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError:
t2 = 0*t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength/rowdatadf[' DriveTime (ms)']*1.0e3
except KeyError:
drivespeed = 0.0*rowdatadf['TimeStamp (sec)']
except TypeError:
drivespeed = 0.0*rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError:
if forceunit == 'lbs':
driveenergy = drivelength*averageforce*lbstoN
else:
drivenergy = drivelength*averageforce
distance = rowdatadf.loc[:,'cum_dist']
velo = 500./p
distanceperstroke = 60.*velo/spm
if forceunit == 'lbs':
averageforce *= lbstoN
peakforce *= lbstoN
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
velo=velo,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.loc[:,'hr_ut2']
data['hr_ut1'] = rowdatadf.loc[:,'hr_ut1']
data['hr_at'] = rowdatadf.loc[:,'hr_at']
data['hr_tr'] = rowdatadf.loc[:,'hr_tr']
data['hr_an'] = rowdatadf.loc[:,'hr_an']
data['hr_max'] = rowdatadf.loc[:,'hr_max']
data['hr_bottom'] = 0.0*data['hr']
try:
tel = rowdatadf.loc[:,' ElapsedTime (sec)']
except KeyError:
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if empower:
try:
wash = rowdatadf.loc[:,'wash']
except KeyError:
wash = 0*t
try:
catch = rowdatadf.loc[:,'catch']
except KeyError:
catch = 0*t
try:
finish = rowdatadf.loc[:,'finish']
except KeyError:
finish = 0*t
try:
peakforceangle = rowdatadf.loc[:,'peakforceangle']
except KeyError:
peakforceangle = 0*t
if data['driveenergy'].mean() == 0:
try:
driveenergy = rowdatadf.loc[:,'driveenergy']
except KeyError:
driveenergy = power*60/spm
else:
driveenergy = data['driveenergy']
arclength = (inboard-0.05)*(np.radians(finish)-np.radians(catch))
if arclength.mean()>0:
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy/(averageforce*4.44822)
try:
slip = rowdatadf.loc[:,'slip']
except KeyError:
slip = 0*t
try:
totalangle = finish-catch
effectiveangle = finish-wash-catch-slip
except ValueError:
totalangle = 0*t
effectiveangle = 0*t
if windowsize > 3 and windowsize<len(slip):
try:
wash = savgol_filter(wash,windowsize,3)
except TypeError:
pass
try:
slip = savgol_filter(slip,windowsize,3)
except TypeError:
pass
try:
catch = savgol_filter(catch,windowsize,3)
except TypeError:
pass
try:
finish = savgol_filter(finish,windowsize,3)
except TypeError:
pass
try:
peakforceangle = savgol_filter(peakforceangle,windowsize,3)
except TypeError:
pass
try:
driveenergy = savgol_filter(driveenergy,windowsize,3)
except TypeError:
pass
try:
drivelength = savgol_filter(drivelength,windowsize,3)
except TypeError:
pass
try:
totalangle = savgol_filter(totalangle,windowsize,3)
except TypeError:
pass
try:
effectiveangle = savgol_filter(effectiveangle,windowsize,3)
except TypeError:
pass
velo = 500./p
ergpw = 2.8*velo**3
efficiency = 100.*ergpw/power
efficiency = efficiency.replace([-np.inf,np.inf],np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError:
pass
if otwpower:
try:
nowindpace = rowdatadf.loc[:,'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.loc[:,'equivergpower']
except KeyError:
equivergpower = 0*p+50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower/2.8)**(1./3.)
ergpace = 500./ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace*1.e3
data['nowindpace'] = nowindpace*1.e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data['efficiency'] = efficiency
data = data.replace([-np.inf,np.inf],np.nan)
data = data.fillna(method='ffill')
data.dropna(axis=0,inplace=True,how='all')
data.dropna(axis=1,inplace=True,how='any')
# write data if id given
if id != 0:
data['workoutid'] = id
data.fillna(0,inplace=True)
for k, v in dtypes.items():
try:
data[k] = data[k].astype(v)
except KeyError:
pass
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(data,npartitions=1)
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
return data