From 31ff1d58883d9b3ea39cb4da201dd5f503facdd9 Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Mon, 18 Jan 2021 17:21:25 +0100 Subject: [PATCH] done with dataprep for now --- rowers/dataprep.py | 252 -------------------------------- rowers/tests/test_unit_tests.py | 9 ++ 2 files changed, 9 insertions(+), 252 deletions(-) diff --git a/rowers/dataprep.py b/rowers/dataprep.py index 5b23157b..ab951bba 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -2447,106 +2447,6 @@ def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True,comput return df -def getsmallrowdata_db_dask(columns, ids=[], doclean=True,workstrokesonly=True,compute=True): - # prepmultipledata(ids) - - csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] - data = [] - columns = [c for c in columns if c != 'None'] - columns = list(set(columns)) - - if len(ids)>1: - for id,f in zip(ids,csvfilenames): - try: - #df = dd.read_parquet(f,columns=columns,engine='pyarrow') - df = dd.read_parquet(f,columns=columns) - data.append(df) - except OSError: - rowdata, row = getrowdata(id=id) - if rowdata and len(rowdata.df): - datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True) - # df = dd.read_parquet(f,columns=columns,engine='pyarrow') - df = dd.read_parquet(f,columns=columns) - data.append(df) - - df = dd.concat(data,axis=0) - # df = dd.concat(data,axis=0) - - else: - try: - df = dd.read_parquet(csvfilenames[0],columns=columns) - except OSError: - rowdata,row = getrowdata(id=ids[0]) - if rowdata and len(rowdata.df): - data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True) - df = dd.read_parquet(csvfilenames[0],columns=columns) - # df = dd.read_parquet(csvfilenames[0], - # column=columns,engine='pyarrow', - # ) - - # df = df.loc[:,~df.columns.duplicated()] - - - - if compute: - data = df.compute() - if doclean: - data = clean_df_stats(data, ignorehr=True, - workstrokesonly=workstrokesonly) - data.dropna(axis=1,how='all',inplace=True) - data.dropna(axis=0,how='any',inplace=True) - return data - - return df - -def getsmallrowdata_db_old(columns, ids=[], doclean=True, workstrokesonly=True): - prepmultipledata(ids) - data,extracols = read_cols_df_sql(ids, columns) - if extracols and len(ids)==1: - w = Workout.objects.get(id=ids[0]) - row = rdata(w.csvfilename) - try: - row.set_instroke_metrics() - except (AttributeError,TypeError): - pass - - try: - f = row.df['TimeStamp (sec)'].diff().mean() - except (AttributeError,KeyError) as e: - f = 0 - - if f != 0 and not np.isnan(f): - windowsize = 2 * (int(10. / (f))) + 1 - else: - windowsize = 1 - for c in extracols: - try: - cdata = row.df[c] - cdata.fillna(inplace=True,method='bfill') - # This doesn't work because sometimes data are duplicated at save - try: - cdata2 = savgol_filter(cdata.values,windowsize,3) - data[c] = cdata2 - except ValueError: - data[c] = cdata - - - - except (KeyError, AttributeError): - data[c] = 0 - - - # convert newtons - - if doclean: - data = clean_df_stats(data, ignorehr=True, - workstrokesonly=workstrokesonly) - data.dropna(axis=1,how='all',inplace=True) - data.dropna(axis=0,how='any',inplace=True) - - - return data - # Fetch both the workout and the workout stroke data (from CSV file) @@ -2659,72 +2559,6 @@ def read_cols_df_sql(ids, columns, convertnewtons=True): return df,extracols -def read_cols_df_sql_old(ids, columns, convertnewtons=True): - # drop columns that are not in offical list - # axx = [ax[0] for ax in axes] - prepmultipledata(ids) - axx = [f.name for f in StrokeData._meta.get_fields()] - - extracols = [] - - columns2 = list(columns) - - for c in columns: - if not c in axx: - columns2.remove(c) - extracols.append(c) - - columns = list(columns2) + ['distance', 'spm', 'workoutid'] - columns = [x for x in columns if x != 'None'] - columns = list(set(columns)) - cls = '' - ids = [int(id) for id in ids] - engine = create_engine(database_url, echo=False) - - for column in columns: - cls += column + ', ' - cls = cls[:-2] - if len(ids) == 0: - return pd.DataFrame(),extracols - # query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format( - # columns=cls, - # )) - elif len(ids) == 1: - query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format( - id=ids[0], - columns=cls, - )) - else: - query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids} ORDER BY time ASC'.format( - columns=cls, - ids=tuple(ids), - )) - - - connection = engine.raw_connection() - df = pd.read_sql_query(query, engine) - - - df = df.fillna(value=0) - - if 'peakforce' in columns: - funits = ((w.id, w.forceunit) - for w in Workout.objects.filter(id__in=ids)) - for id, u in funits: - if u == 'lbs': - mask = df['workoutid'] == id - df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN - if 'averageforce' in columns: - funits = ((w.id, w.forceunit) - for w in Workout.objects.filter(id__in=ids)) - for id, u in funits: - if u == 'lbs': - mask = df['workoutid'] == id - df.loc[mask, 'averageforce'] = df.loc[mask, - 'averageforce'] * lbstoN - - engine.dispose() - return df,extracols def initiate_cp(r): success = update_rolling_cp(r,otwtypes,'water') @@ -2750,93 +2584,7 @@ def read_df_sql(id): return df -def read_df_sql_old(id): - engine = create_engine(database_url, echo=False) - df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format( - id=id)), engine) - - engine.dispose() - df = df.fillna(value=0) - - funit = Workout.objects.get(id=id).forceunit - - if funit == 'lbs': - try: - df['peakforce'] = df['peakforce'] * lbstoN - except KeyError: - pass - - try: - df['averageforce'] = df['averageforce'] * lbstoN - except KeyError: - pass - - return df - -# Get the necessary data from the strokedata table in the DB. -# For the flex plot - - -def smalldataprep(therows, xparam, yparam1, yparam2): - df = pd.DataFrame() - if yparam2 == 'None': - yparam2 = 'power' - df[xparam] = [] - df[yparam1] = [] - df[yparam2] = [] - df['distance'] = [] - df['spm'] = [] - for workout in therows: - f1 = workout.csvfilename - - try: - rowdata = dataprep(rrdata(csvfile=f1).df) - - rowdata = pd.DataFrame({xparam: rowdata[xparam], - yparam1: rowdata[yparam1], - yparam2: rowdata[yparam2], - 'distance': rowdata['distance'], - 'spm': rowdata['spm'], - } - ) - if workout.forceunit == 'lbs': - try: - rowdata['peakforce'] *= lbstoN - except KeyError: - pass - - try: - rowdata['averageforce'] *= lbstoN - except KeyError: - pass - - df = pd.concat([df, rowdata], ignore_index=True) - except IOError: - try: - rowdata = dataprep(rrdata(csvfile=f1 + '.gz').df) - rowdata = pd.DataFrame({xparam: rowdata[xparam], - yparam1: rowdata[yparam1], - yparam2: rowdata[yparam2], - 'distance': rowdata['distance'], - 'spm': rowdata['spm'], - } - ) - if workout.forceunit == 'lbs': - try: - rowdata['peakforce'] *= lbstoN - except KeyError: - pass - - try: - rowdata['averageforce'] *= lbstoN - except KeyError: - pass - df = pd.concat([df, rowdata], ignore_index=True) - except IOError: - pass - - return df # data fusion diff --git a/rowers/tests/test_unit_tests.py b/rowers/tests/test_unit_tests.py index 01d9ddb0..bc1acac7 100644 --- a/rowers/tests/test_unit_tests.py +++ b/rowers/tests/test_unit_tests.py @@ -76,6 +76,15 @@ class DataPrepTests(TestCase): wmax = dataprep.check_marker(workouts[0]) self.assertTrue(wmax.rankingpiece) + def test_workouttype_fromfit(self): + filename = 'rowers/tests/testdata/3x250m.fit' + res = dataprep.get_workouttype_from_fit(filename) + self.assertEqual(res,'Workout') + + def test_workouttype_fromtcx(self): + filename = 'rowers/tests/testdata/crewnerddata.tcx' + res = dataprep.get_workouttype_from_tcx(filename) + self.assertEqual(res,'water') class InteractivePlotTests(TestCase):