Private
Public Access
1
0
Files
rowsandall/rowers/dataprepnodjango.py
Sander Roosendaal 6f55a975c1 some warnings
2021-04-27 15:02:30 +02:00

1047 lines
29 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
# This is Data prep used for testing purposes (no Django environment)
# Uses the debug SQLite database for stroke data
from rowingdata import rowingdata as rrdata
from rowingdata import make_cumvalues
from rowingdata import rower as rrower
from rowingdata import main as rmain
from rowingdata import empower_bug_correction,get_empower_rigging, get_file_type
from rowingdata.csvparsers import make_cumvalues_array
from time import strftime
from pandas import DataFrame,Series
import shutil
from shutil import copyfile
import pyarrow as pa
import pandas as pd
import numpy as np
import itertools
import dask.dataframe as dd
from dask.delayed import delayed
from sqlalchemy import create_engine
import sqlalchemy as sa
from rowsandall_app.settings import DATABASES
from rowsandall_app.settings_dev import DATABASES as DEV_DATABASES
from rowsandall_app.settings_dev import use_sqlite
from rowers.utils import lbstoN
import pytz
from timezonefinder import TimezoneFinder
try:
user = DATABASES['default']['USER']
except KeyError: # pragma: no cover
user = ''
try:
password = DATABASES['default']['PASSWORD']
except KeyError: # pragma: no cover
password = ''
try:
database_name = DATABASES['default']['NAME']
except KeyError: # pragma: no cover
database_name = ''
try:
host = DATABASES['default']['HOST']
except KeyError: # pragma: no cover
host = ''
try:
port = DATABASES['default']['PORT']
except KeyError: # pragma: no cover
port = ''
database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
host=host,
port=port,
)
database_name_dev = DEV_DATABASES['default']['NAME']
database_url_debug = database_url
if use_sqlite:
database_url_debug = 'sqlite:///'+database_name_dev
database_url = database_url_debug
# mapping the DB column names to the CSV file column names
columndict = {
'time':'TimeStamp (sec)',
'hr':' HRCur (bpm)',
'velo': ' AverageBoatSpeed (m/s)',
'pace':' Stroke500mPace (sec/500m)',
'spm':' Cadence (stokes/min)',
'power':' Power (watts)',
'averageforce':' AverageDriveForce (lbs)',
'drivelength':' DriveLength (meters)',
'peakforce':' PeakDriveForce (lbs)',
'distance':' Horizontal (meters)',
'catch':'catch',
'finish':'finish',
'peakforceangle':'peakforceangle',
'wash':'wash',
'slip':'wash',
'workoutstate':' WorkoutState',
'cumdist':'cum_dist',
}
from scipy.signal import savgol_filter
import datetime
def niceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def strfdelta(tdelta):
try:
minutes,seconds = divmod(tdelta.seconds,60)
tenths = int(tdelta.microseconds/1e5)
except AttributeError: # pragma: no cover
minutes,seconds = divmod(tdelta.view(np.int64),60e9)
seconds,rest = divmod(seconds,1e9)
tenths = int(rest/1e8)
res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format(
minutes=minutes,
seconds=seconds,
tenths=tenths,
)
return res
def nicepaceformat(values):
out = []
for v in values:
formattedv = strfdelta(v)
out.append(formattedv)
return out
def timedeltaconv(x):
if not np.isnan(x):
dt = datetime.timedelta(seconds=x)
else: # pragma: no cover
dt = datetime.timedelta(seconds=350.)
return dt
def rdata(file,rower=rrower()): # pragma: no cover
try:
res = rrdata(csvfile=file,rower=rower)
except IOError:
try:
res = rrdata(csvfile=file+'.gz',rower=rower)
except IOError:
res = 0
return res
from rowers.utils import totaltime_sec_to_string
from rowers.metrics import dtypes
# Creates C2 stroke data
def create_c2_stroke_data_db(
distance,duration,workouttype,
workoutid,starttimeunix,csvfilename,debug=False): # pragma: no cover
nr_strokes = int(distance/10.)
totalseconds = duration.hour*3600.
totalseconds += duration.minute*60.
totalseconds += duration.second
totalseconds += duration.microsecond/1.e6
try:
spm = 60.*nr_strokes/totalseconds
except ZeroDivisionError:
spm = 20*np.zeros(nr_strokes)
try:
step = totalseconds/float(nr_strokes)
except ZeroDivisionError:
return 0
elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1))
dstep = distance/float(nr_strokes)
d = np.arange(nr_strokes)*distance/(float(nr_strokes-1))
unixtime = starttimeunix + elapsed
pace = 500.*totalseconds/distance
if workouttype in ['rower','slides','dynamic']:
try:
velo = distance/totalseconds
except ZeroDivisionError:
velo = 0
power = 2.8*velo**3
else:
power = 0
df = pd.DataFrame({
'TimeStamp (sec)': unixtime,
' Horizontal (meters)': d,
' Cadence (stokes/min)': spm,
' Stroke500mPace (sec/500m)':pace,
' ElapsedTime (sec)':elapsed,
' Power (watts)':power,
' HRCur (bpm)':np.zeros(nr_strokes),
' longitude':np.zeros(nr_strokes),
' latitude':np.zeros(nr_strokes),
' DragFactor':np.zeros(nr_strokes),
' DriveLength (meters)':np.zeros(nr_strokes),
' StrokeDistance (meters)':np.zeros(nr_strokes),
' DriveTime (ms)':np.zeros(nr_strokes),
' StrokeRecoveryTime (ms)':np.zeros(nr_strokes),
' AverageDriveForce (lbs)':np.zeros(nr_strokes),
' PeakDriveForce (lbs)':np.zeros(nr_strokes),
' lapIdx':np.zeros(nr_strokes),
'cum_dist': d
})
timestr = strftime("%Y%m%d-%H%M%S")
df[' ElapsedTime (sec)'] = df['TimeStamp (sec)']
res = df.to_csv(csvfilename,index_label='index',
compression='gzip')
data = dataprep(df,id=workoutid,bands=False,debug=debug)
return data
# Saves C2 stroke data to CSV and database
def add_c2_stroke_data_db(strokedata,workoutid,starttimeunix,csvfilename,
debug=False,workouttype='rower'):
res = make_cumvalues(0.1*strokedata['t'])
cum_time = res[0]
lapidx = res[1]
unixtime = cum_time+starttimeunix
# unixtime[0] = starttimeunix
seconds = 0.1*strokedata.loc[:,'t']
nr_rows = len(unixtime)
try: # pragma: no cover
latcoord = strokedata.loc[:,'lat']
loncoord = strokedata.loc[:,'lon']
except:
latcoord = np.zeros(nr_rows)
loncoord = np.zeros(nr_rows)
try:
strokelength = strokedata.loc[:,'strokelength']
except:
strokelength = np.zeros(nr_rows)
dist2 = 0.1*strokedata.loc[:,'d']
try:
spm = strokedata.loc[:,'spm']
except KeyError: # pragma: no cover
spm = 0*dist2
try:
hr = strokedata.loc[:,'hr']
except KeyError: # pragma: no cover
hr = 0*spm
pace = strokedata.loc[:,'p']/10.
pace = np.clip(pace,0,1e4)
pace = pace.replace(0,300)
velo = 500./pace
power = 2.8*velo**3
if workouttype == 'bike': # pragma: no cover
velo = 1000./pace
# save csv
# Create data frame with all necessary data to write to csv
df = pd.DataFrame({'TimeStamp (sec)':unixtime,
' Horizontal (meters)': dist2,
' Cadence (stokes/min)':spm,
' HRCur (bpm)':hr,
' longitude':loncoord,
' latitude':latcoord,
' Stroke500mPace (sec/500m)':pace,
' Power (watts)':power,
' DragFactor':np.zeros(nr_rows),
' DriveLength (meters)':np.zeros(nr_rows),
' StrokeDistance (meters)':strokelength,
' DriveTime (ms)':np.zeros(nr_rows),
' StrokeRecoveryTime (ms)':np.zeros(nr_rows),
' AverageDriveForce (lbs)':np.zeros(nr_rows),
' PeakDriveForce (lbs)':np.zeros(nr_rows),
' lapIdx':lapidx,
' WorkoutState': 4,
' ElapsedTime (sec)':seconds,
'cum_dist': dist2
})
df.sort_values(by='TimeStamp (sec)',ascending=True)
timestr = strftime("%Y%m%d-%H%M%S")
# Create CSV file name and save data to CSV file
res = df.to_csv(csvfilename,index_label='index',
compression='gzip')
try:
data = dataprep(df,id=workoutid,bands=False,debug=debug)
except: # pragma: no cover
return 0
return data
def handle_nonpainsled(f2,fileformat,summary=''): # pragma: no cover
oarlength = 2.89
inboard = 0.88
# handle RowPro:
if (fileformat == 'rp'):
row = RowProParser(f2)
# handle TCX
if (fileformat == 'tcx'):
row = TCXParser(f2)
# handle Mystery
if (fileformat == 'mystery'):
row = MysteryParser(f2)
# handle RowPerfect
if (fileformat == 'rowperfect3'):
row = RowPerfectParser(f2)
# handle ErgData
if (fileformat == 'ergdata'):
row = ErgDataParser(f2)
# handle CoxMate
if (fileformat == 'coxmate'):
row = CoxMateParser(f2)
# handle Mike
if (fileformat == 'bcmike'):
row = BoatCoachAdvancedParser(f2)
# handle BoatCoach OTW
if (fileformat == 'boatcoachotw'):
row = BoatCoachOTWParser(f2)
# handle BoatCoach
if (fileformat == 'boatcoach'):
row = BoatCoachParser(f2)
# handle painsled desktop
if (fileformat == 'painsleddesktop'):
row = painsledDesktopParser(f2)
# handle speed coach GPS
if (fileformat == 'speedcoach'):
row = speedcoachParser(f2)
# handle speed coach GPS 2
if (fileformat == 'speedcoach2'):
row = SpeedCoach2Parser(f2)
try:
oarlength,inboard = get_empower_rigging(f2)
summary = row.allstats()
except:
pass
# handle ErgStick
if (fileformat == 'ergstick'):
row = ErgStickParser(f2)
# handle FIT
if (fileformat == 'fit'):
row = FITParser(f2)
s = fitsummarydata(f2)
s.setsummary()
summary = s.summarytext
f_to_be_deleted = f2
# should delete file
f2 = f2[:-4]+'o.csv'
row.write_csv(f2,gzip=True)
#os.remove(f2)
try:
os.remove(f_to_be_deleted)
except:
os.remove(f_to_be_deleted+'.gz')
return (f2,summary,oarlength,inboard)
def delete_strokedata(id,debug=False):
dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
shutil.rmtree(dirname)
except FileNotFoundError: # pragma: no cover
pass
def update_strokedata(id,df,debug=False):
delete_strokedata(id,debug=debug)
if debug: # pragma: no cover # pragma: no cover
print("updating ",id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True,
debug=debug)
return rowdata
def update_empower(id, inboard, oarlength, boattype, df, f1, debug=False): # pragma: no cover
corr_factor = 1.0
if 'x' in boattype:
# sweep
a = 0.06
b = 0.275
else:
# scull
a = 0.15
b = 0.275
corr_factor = empower_bug_correction(oarlength,inboard,a,b)
success = False
try:
df['power empower old'] = df[' Power (watts)']
df[' Power (watts)'] = df[' Power (watts)'] * corr_factor
df['driveenergy empower old'] = df['driveenergy']
df['driveenergy'] = df['driveenergy'] * corr_factor
success = True
except KeyError:
pass
if success:
delete_strokedata(id,debug=debug)
if debug: # pragma: no cover
print("updated ",id)
print("correction ",corr_factor)
else:
if debug: # pragma: no cover
print("not updated ",id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True,
debug=debug)
row = rrdata(df=df)
row.write_csv(f1,gzip=True)
return success
def testdata(time,distance,pace,spm): # pragma: no cover
t1 = np.issubdtype(time,np.number)
t2 = np.issubdtype(distance,np.number)
t3 = np.issubdtype(pace,np.number)
t4 = np.issubdtype(spm,np.number)
return t1 and t2 and t3 and t4
def getsmallrowdata_db(columns,ids=[],debug=False):
csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
data = []
columns = [c for c in columns if c != 'None']
df = pd.DataFrame()
if len(ids)>1: # pragma: no cover
for id, f in zip(ids,csvfilenames):
try:
df = pd.read_parquet(f,columns=columns,engine='pyarrow')
data.append(df)
except OSError:
pass
except pa.lib.ArrowInvalid:
pass
try:
df = pd.concat(data,axis=0)
except ValueError:
df = pd.DataFrame()
elif len(ids)==1:
try:
df = pd.read_parquet(csvfilenames[0],columns=columns,engine='pyarrow')
except (OSError,IndexError): # pragma: no cover
df = pd.DataFrame()
else: # pragma: no cover
df = pd.DataFrame()
return df
def update_workout_field_sql(workoutid,fieldname,value,debug=False):
if debug: # pragma: no cover # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
table = 'rowers_workout'
query = "UPDATE %s SET %s = '%s' WHERE `id` = %s;" % (table,fieldname,value,workoutid)
with engine.connect() as conn, conn.begin():
result = conn.execute(query)
conn.close()
engine.dispose()
return 1
def update_c2id_sql(id,c2id): # pragma: no cover
engine = create_engine(database_url, echo=False)
table = 'rowers_workout'
query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % (table,c2id,id)
with engine.connect() as conn, conn.begin():
result = conn.execute(query)
conn.close()
engine.dispose()
return 1
def read_cols_df_sql(ids,columns,debug=False): # pragma: no cover
columns = list(columns)+['distance','spm']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
ids = [int(id) for id in ids]
if len(ids) == 0:
return pd.DataFrame()
elif len(ids) == 1:
try:
filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0])
df = pd.read_parquet(filename,columns=columns)
except OSError:
pass
else:
data = []
filenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
for id,f in zip(ids,filenames):
try:
df = pd.read_parquet(f,columns=columns)
data.append(df)
except OSError:
pass
df = pd.concat(data,axis=0)
return df
def read_df_sql(id,debug=False): # pragma: no cover
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
except OSError:
pass
df = df.fillna(value=0)
return df
def getcpdata_sql(rower_id,table='cpdata',debug=False): # pragma: no cover
if debug: # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
return df
def deletecpdata_sql(rower_id,table='cpdata',debug=False): # pragma: no cover
if debug: # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE user={rower_id};'.format(
rower_id=rower_id,
table=table,
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except: # pragma: no cover
print("Database locked")
conn.close()
engine.dispose()
def delete_agegroup_db(age,sex,weightcategory,debug=False):
if debug: # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else: # pragma: no cover
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE from {table} WHERE age={age} and weightcategory = {weightcategory} and sex={sex};'.format(
sex=sex,
age=age,
weightcategory=weightcategory,
table='calcagegrouprecords'
))
with engine.connect() as conn, conn.begin():
try:
result = conn.execute(query)
except: # pragma: no cover
print("Database locked")
conn.close()
engine.dispose()
def update_agegroup_db(age,sex,weightcategory,wcdurations,wcpower,
debug=False):
delete_agegroup_db(age,sex,weightcategory,debug=debug)
wcdurations = [None if type(y) is float and np.isnan(y) else y for y in wcdurations]
wcpower = [None if type(y) is float and np.isnan(y) else y for y in wcpower]
df = pd.DataFrame(
{
'duration':wcdurations,
'power':wcpower,
}
)
df['sex'] = sex
df['age'] = age
df['weightcategory'] = weightcategory
df.replace([np.inf,-np.inf],np.nan,inplace=True)
df.dropna(axis=0,inplace=True)
if debug: # pragma: no cover # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
table = 'calcagegrouprecords'
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=pd.Series([],dtype='float'),debug=False):
deletecpdata_sql(rower_id,table=table,debug=debug)
df = pd.DataFrame(
{
'delta':delta,
'cp':cp,
'user':rower_id
}
)
if not distance.empty:
df['distance'] = distance
if debug: # pragma: no cover
engine = create_engine(database_url_debug, echo=False)
else:
engine = create_engine(database_url, echo=False)
with engine.connect() as conn, conn.begin():
df.to_sql(table, engine, if_exists='append', index=False)
conn.close()
engine.dispose()
def smalldataprep(therows,xparam,yparam1,yparam2): # pragma: no cover
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(csvfile=f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(csvfile=f1+'.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
df = pd.concat([df,rowdata],ignore_index=True)
except IOError:
pass
return df
def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True,
empower=True,debug=False,inboard=0.88,forceunit='lbs'):
if rowdatadf.empty: # pragma: no cover
if debug: # pragma: no cover
print("empty")
return 0
# rowdatadf.set_index([range(len(rowdatadf))],inplace=True)
t = rowdatadf.loc[:,'TimeStamp (sec)']
t = pd.Series(t-rowdatadf.loc[:,'TimeStamp (sec)'].iloc[0])
row_index = rowdatadf.loc[:,' Stroke500mPace (sec/500m)'] > 3000
rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000.
p = rowdatadf.loc[:,' Stroke500mPace (sec/500m)']
try:
velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)']
except KeyError:
velo = 500./p
hr = rowdatadf.loc[:,' HRCur (bpm)']
spm = rowdatadf.loc[:,' Cadence (stokes/min)']
cumdist = rowdatadf.loc[:,'cum_dist']
power = rowdatadf.loc[:,' Power (watts)']
averageforce = rowdatadf.loc[:,' AverageDriveForce (lbs)']
drivelength = rowdatadf.loc[:,' DriveLength (meters)']
try:
workoutstate = rowdatadf.loc[:,' WorkoutState']
except KeyError: # pragma: no cover
workoutstate = 0*hr
peakforce = rowdatadf.loc[:,' PeakDriveForce (lbs)']
forceratio = averageforce/peakforce
forceratio = forceratio.fillna(value=0)
try:
drivetime = rowdatadf.loc[:,' DriveTime (ms)']
recoverytime = rowdatadf.loc[:,' StrokeRecoveryTime (ms)']
rhythm = 100.*drivetime/(recoverytime+drivetime)
rhythm = rhythm.fillna(value=0)
except: # pragma: no cover
rhythm = 0.0*forceratio
f = rowdatadf['TimeStamp (sec)'].diff().mean()
if f != 0:
try:
windowsize = 2*(int(10./(f)))+1
except ValueError: # pragma: no cover
windowsize = 1
else: # pragma: no cover
windowsize = 1
if windowsize <= 3: # pragma: no cover
windowsize = 5
if windowsize > 3 and windowsize<len(hr):
spm = savgol_filter(spm,windowsize,3)
hr = savgol_filter(hr,windowsize,3)
drivelength = savgol_filter(drivelength,windowsize,3)
forceratio = savgol_filter(forceratio,windowsize,3)
try:
t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
except TypeError: # pragma: no cover
t2 = 0*t
p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x))
try:
drivespeed = drivelength/rowdatadf[' DriveTime (ms)']*1.0e3
except KeyError: # pragma: no cover
drivespeed = 0.0*rowdatadf['TimeStamp (sec)']
except TypeError: # pragma: no cover
drivespeed = 0.0*rowdatadf['TimeStamp (sec)']
drivespeed = drivespeed.fillna(value=0)
try:
driveenergy = rowdatadf['driveenergy']
except KeyError: # pragma: no cover
if forceunit == 'lbs':
driveenergy = drivelength*averageforce*lbstoN
else: # pragma: no cover
drivenergy = drivelength*averageforce
distance = rowdatadf.loc[:,'cum_dist']
velo = 500./p
distanceperstroke = 60.*velo/spm
if forceunit == 'lbs':
averageforce *= lbstoN
peakforce *= lbstoN
data = DataFrame(
dict(
time=t * 1e3,
hr=hr,
pace=p * 1e3,
spm=spm,
velo=velo,
cumdist=cumdist,
ftime=niceformat(t2),
fpace=nicepaceformat(p2),
driveenergy=driveenergy,
power=power,
workoutstate=workoutstate,
averageforce=averageforce,
drivelength=drivelength,
peakforce=peakforce,
forceratio=forceratio,
distance=distance,
drivespeed=drivespeed,
rhythm=rhythm,
distanceperstroke=distanceperstroke,
)
)
if bands:
# HR bands
data['hr_ut2'] = rowdatadf.loc[:,'hr_ut2']
data['hr_ut1'] = rowdatadf.loc[:,'hr_ut1']
data['hr_at'] = rowdatadf.loc[:,'hr_at']
data['hr_tr'] = rowdatadf.loc[:,'hr_tr']
data['hr_an'] = rowdatadf.loc[:,'hr_an']
data['hr_max'] = rowdatadf.loc[:,'hr_max']
data['hr_bottom'] = 0.0*data['hr']
try:
tel = rowdatadf.loc[:,' ElapsedTime (sec)']
except KeyError: # pragma: no cover
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
if empower:
try:
wash = rowdatadf.loc[:,'wash']
except KeyError:
wash = 0*t
try:
catch = rowdatadf.loc[:,'catch']
except KeyError:
catch = 0*t
try:
finish = rowdatadf.loc[:,'finish']
except KeyError:
finish = 0*t
try:
peakforceangle = rowdatadf.loc[:,'peakforceangle']
except KeyError:
peakforceangle = 0*t
if data['driveenergy'].mean() == 0:
try:
driveenergy = rowdatadf.loc[:,'driveenergy']
except KeyError:
driveenergy = power*60/spm
else:
driveenergy = data['driveenergy']
arclength = (inboard-0.05)*(np.radians(finish)-np.radians(catch))
if arclength.mean()>0: # pragma: no cover
drivelength = arclength
elif drivelength.mean() == 0:
drivelength = driveenergy/(averageforce*4.44822)
try:
slip = rowdatadf.loc[:,'slip']
except KeyError:
slip = 0*t
try:
totalangle = finish-catch
effectiveangle = finish-wash-catch-slip
except ValueError: # pragma: no cover
totalangle = 0*t
effectiveangle = 0*t
if windowsize > 3 and windowsize<len(slip):
try:
wash = savgol_filter(wash,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
slip = savgol_filter(slip,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
catch = savgol_filter(catch,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
finish = savgol_filter(finish,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
peakforceangle = savgol_filter(peakforceangle,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
driveenergy = savgol_filter(driveenergy,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
drivelength = savgol_filter(drivelength,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
totalangle = savgol_filter(totalangle,windowsize,3)
except TypeError: # pragma: no cover
pass
try:
effectiveangle = savgol_filter(effectiveangle,windowsize,3)
except TypeError: # pragma: no cover
pass
velo = 500./p
ergpw = 2.8*velo**3
efficiency = 100.*ergpw/power
efficiency = efficiency.replace([-np.inf,np.inf],np.nan)
efficiency.fillna(method='ffill')
try:
data['wash'] = wash
data['catch'] = catch
data['slip'] = slip
data['finish'] = finish
data['peakforceangle'] = peakforceangle
data['driveenergy'] = driveenergy
data['drivelength'] = drivelength
data['totalangle'] = totalangle
data['effectiveangle'] = effectiveangle
data['efficiency'] = efficiency
except ValueError: # pragma: no cover
pass
if otwpower:
try:
nowindpace = rowdatadf.loc[:,'nowindpace']
except KeyError:
nowindpace = p
try:
equivergpower = rowdatadf.loc[:,'equivergpower']
except KeyError:
equivergpower = 0*p+50.
nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x))
ergvelo = (equivergpower/2.8)**(1./3.)
ergpace = 500./ergvelo
ergpace[ergpace == np.inf] = 240.
ergpace2 = ergpace.apply(lambda x: timedeltaconv(x))
data['ergpace'] = ergpace*1.e3
data['nowindpace'] = nowindpace*1.e3
data['equivergpower'] = equivergpower
data['fergpace'] = nicepaceformat(ergpace2)
data['fnowindpace'] = nicepaceformat(nowindpace2)
data['efficiency'] = efficiency
data = data.replace([-np.inf,np.inf],np.nan)
data = data.fillna(method='ffill')
data.dropna(axis=0,inplace=True,how='all')
data.dropna(axis=1,inplace=True,how='any')
# write data if id given
if id != 0:
data['workoutid'] = id
data.fillna(0,inplace=True)
for k, v in dtypes.items():
try:
data[k] = data[k].astype(v)
except KeyError:
pass
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = dd.from_pandas(data,npartitions=1)
df.to_parquet(filename,engine='fastparquet',compression='GZIP')
return data