Private
Public Access
1
0

done with dataprep for now

This commit is contained in:
Sander Roosendaal
2021-01-18 17:21:25 +01:00
parent 504d3c2d1b
commit 31ff1d5888
2 changed files with 9 additions and 252 deletions

View File

@@ -2447,106 +2447,6 @@ def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True,comput
return df
def getsmallrowdata_db_dask(columns, ids=[], doclean=True,workstrokesonly=True,compute=True):
# prepmultipledata(ids)
csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
data = []
columns = [c for c in columns if c != 'None']
columns = list(set(columns))
if len(ids)>1:
for id,f in zip(ids,csvfilenames):
try:
#df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = dd.read_parquet(f,columns=columns)
data.append(df)
except OSError:
rowdata, row = getrowdata(id=id)
if rowdata and len(rowdata.df):
datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True)
# df = dd.read_parquet(f,columns=columns,engine='pyarrow')
df = dd.read_parquet(f,columns=columns)
data.append(df)
df = dd.concat(data,axis=0)
# df = dd.concat(data,axis=0)
else:
try:
df = dd.read_parquet(csvfilenames[0],columns=columns)
except OSError:
rowdata,row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df):
data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True)
df = dd.read_parquet(csvfilenames[0],columns=columns)
# df = dd.read_parquet(csvfilenames[0],
# column=columns,engine='pyarrow',
# )
# df = df.loc[:,~df.columns.duplicated()]
if compute:
data = df.compute()
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
return df
def getsmallrowdata_db_old(columns, ids=[], doclean=True, workstrokesonly=True):
prepmultipledata(ids)
data,extracols = read_cols_df_sql(ids, columns)
if extracols and len(ids)==1:
w = Workout.objects.get(id=ids[0])
row = rdata(w.csvfilename)
try:
row.set_instroke_metrics()
except (AttributeError,TypeError):
pass
try:
f = row.df['TimeStamp (sec)'].diff().mean()
except (AttributeError,KeyError) as e:
f = 0
if f != 0 and not np.isnan(f):
windowsize = 2 * (int(10. / (f))) + 1
else:
windowsize = 1
for c in extracols:
try:
cdata = row.df[c]
cdata.fillna(inplace=True,method='bfill')
# This doesn't work because sometimes data are duplicated at save
try:
cdata2 = savgol_filter(cdata.values,windowsize,3)
data[c] = cdata2
except ValueError:
data[c] = cdata
except (KeyError, AttributeError):
data[c] = 0
# convert newtons
if doclean:
data = clean_df_stats(data, ignorehr=True,
workstrokesonly=workstrokesonly)
data.dropna(axis=1,how='all',inplace=True)
data.dropna(axis=0,how='any',inplace=True)
return data
# Fetch both the workout and the workout stroke data (from CSV file)
@@ -2659,72 +2559,6 @@ def read_cols_df_sql(ids, columns, convertnewtons=True):
return df,extracols
def read_cols_df_sql_old(ids, columns, convertnewtons=True):
# drop columns that are not in offical list
# axx = [ax[0] for ax in axes]
prepmultipledata(ids)
axx = [f.name for f in StrokeData._meta.get_fields()]
extracols = []
columns2 = list(columns)
for c in columns:
if not c in axx:
columns2.remove(c)
extracols.append(c)
columns = list(columns2) + ['distance', 'spm', 'workoutid']
columns = [x for x in columns if x != 'None']
columns = list(set(columns))
cls = ''
ids = [int(id) for id in ids]
engine = create_engine(database_url, echo=False)
for column in columns:
cls += column + ', '
cls = cls[:-2]
if len(ids) == 0:
return pd.DataFrame(),extracols
# query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format(
# columns=cls,
# ))
elif len(ids) == 1:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format(
id=ids[0],
columns=cls,
))
else:
query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids} ORDER BY time ASC'.format(
columns=cls,
ids=tuple(ids),
))
connection = engine.raw_connection()
df = pd.read_sql_query(query, engine)
df = df.fillna(value=0)
if 'peakforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN
if 'averageforce' in columns:
funits = ((w.id, w.forceunit)
for w in Workout.objects.filter(id__in=ids))
for id, u in funits:
if u == 'lbs':
mask = df['workoutid'] == id
df.loc[mask, 'averageforce'] = df.loc[mask,
'averageforce'] * lbstoN
engine.dispose()
return df,extracols
def initiate_cp(r):
success = update_rolling_cp(r,otwtypes,'water')
@@ -2750,93 +2584,7 @@ def read_df_sql(id):
return df
def read_df_sql_old(id):
engine = create_engine(database_url, echo=False)
df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format(
id=id)), engine)
engine.dispose()
df = df.fillna(value=0)
funit = Workout.objects.get(id=id).forceunit
if funit == 'lbs':
try:
df['peakforce'] = df['peakforce'] * lbstoN
except KeyError:
pass
try:
df['averageforce'] = df['averageforce'] * lbstoN
except KeyError:
pass
return df
# Get the necessary data from the strokedata table in the DB.
# For the flex plot
def smalldataprep(therows, xparam, yparam1, yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
yparam2 = 'power'
df[xparam] = []
df[yparam1] = []
df[yparam2] = []
df['distance'] = []
df['spm'] = []
for workout in therows:
f1 = workout.csvfilename
try:
rowdata = dataprep(rrdata(csvfile=f1).df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
try:
rowdata = dataprep(rrdata(csvfile=f1 + '.gz').df)
rowdata = pd.DataFrame({xparam: rowdata[xparam],
yparam1: rowdata[yparam1],
yparam2: rowdata[yparam2],
'distance': rowdata['distance'],
'spm': rowdata['spm'],
}
)
if workout.forceunit == 'lbs':
try:
rowdata['peakforce'] *= lbstoN
except KeyError:
pass
try:
rowdata['averageforce'] *= lbstoN
except KeyError:
pass
df = pd.concat([df, rowdata], ignore_index=True)
except IOError:
pass
return df
# data fusion

View File

@@ -76,6 +76,15 @@ class DataPrepTests(TestCase):
wmax = dataprep.check_marker(workouts[0])
self.assertTrue(wmax.rankingpiece)
def test_workouttype_fromfit(self):
filename = 'rowers/tests/testdata/3x250m.fit'
res = dataprep.get_workouttype_from_fit(filename)
self.assertEqual(res,'Workout')
def test_workouttype_fromtcx(self):
filename = 'rowers/tests/testdata/crewnerddata.tcx'
res = dataprep.get_workouttype_from_tcx(filename)
self.assertEqual(res,'water')
class InteractivePlotTests(TestCase):