diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index b3e43767..2e6283da 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -1414,6 +1414,37 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True, return data, row +def getrowdata_pl(id=0, doclean=False, convertnewtons=True, + checkefficiency=True, for_chart=False): + data = read_df_sql(id,polars=True) + print(data) + try: + data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff() + except KeyError: # pragma: no cover + data = pl.DataFrame() + + if data.is_empty(): + rowdata, row = getrowdata(id=id) + if not rowdata.empty: # pragma: no cover + data = dataprep(rowdata.df, id=id, bands=True, + barchart=True, otwpower=True, polars=True) + else: + data = pl.DataFrame() # returning empty dataframe + else: + row = Workout.objects.get(id=id) + + if checkefficiency is True and not data.is_empty(): + try: + if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover + data = add_efficiency_pl(id=id, polars=True) + except KeyError: # pragma: no cover + data = add_efficiency_pl(id=id) + + if doclean: # pragma: no cover + data = clean_df_stats(data, ignorehr=True, for_chart=for_chart) + + return data, row + # Fetch a subset of the data from the DB def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True, @@ -1707,7 +1738,28 @@ def read_cols_df_sql(ids, columns, convertnewtons=True): # Read stroke data from the DB for a Workout ID. Returns a pandas dataframe -def read_df_sql(id): +def read_df_sql(id, polars=False): + if polars: + try: + f = 'media/strokedata_{id}.parquet.gz'.format(id=id) + df = pd.read_parquet(f) + except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover + rowdata, row = getrowdata(id=id) + try: + shutil.rmtree(f) + except: + pass + if rowdata and len(rowdata.df): + _ = dataprep(rowdata.df, id=id, + bands=True, otwpower=True, barchart=True, + polars=True) + try: + df = pl.read_parquet(f, columns=columns) + except (OSError, ArrowInvalid, IndexError): + pass + df = df.fill_nan(None).drop_nulls() + + return df try: f = 'media/strokedata_{id}.parquet.gz'.format(id=id) df = pd.read_parquet(f) @@ -1802,6 +1854,13 @@ def fix_newtons(id=0, limit=3000): # pragma: no cover pass +def remove_invalid_columns_pl(df): # pragma: no cover + for c in df.get_columns(): + if c not in allowedcolumns: + df = df.drop(c) + + return df + def remove_invalid_columns(df): # pragma: no cover for c in df.columns: if c not in allowedcolumns: @@ -1809,6 +1868,36 @@ def remove_invalid_columns(df): # pragma: no cover return df +def add_efficiency_pl(id=0): # pragma: no cover + rowdata, row = getrowdata_pl(id=id, + doclean=False, + convertnewtons=False, + checkefficiency=False) + power = rowdata['power'] + pace = rowdata['pace'] / 1.0e3 + velo = 500. / pace + ergpw = 2.8 * velo**3 + efficiency = 100. * ergpw / power + + efficiency = efficiency.replace([-np.inf, np.inf], np.nan) + efficiency.fillna(method='ffill') + rowdata = rowdata.with_columns(pl.col(efficiency).alias("efficiency")) # ['efficiency'] = efficiency + + rowdata = remove_invalid_columns_pl(rowdata) + rowdata = rowdata.replace([-np.inf, np.inf], np.nan) + rowdata = rowdata.fillna(method='ffill') + + delete_strokedata(id) + + + if id != 0: + rowdata = rowdata.with_column(pl.lit(id).alias("workoutid")) + filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) + rowdata.write_parquet(filename, compression='gzip') + + + return rowdata + def add_efficiency(id=0): # pragma: no cover rowdata, row = getrowdata_db(id=id, @@ -2144,7 +2233,10 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, os.remove(filename) df.to_parquet(filename, engine='fastparquet', compression='GZIP') - + if polars: + pldf = pl.from_pandas(data) + return pldf + return data diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index bb5ce5b7..22b4e606 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -469,18 +469,20 @@ def interactive_forcecurve(theworkouts): columns = ['catch', 'slip', 'wash', 'finish', 'averageforce', 'peakforceangle', 'peakforce', 'spm', 'distance', - 'workoutstate', 'driveenergy', 'cumdist'] + 'workoutstate', 'driveenergy', 'cumdist', 'workoutid'] + columns = columns + [name for name, d in metrics.rowingmetrics] - rowdata = dataprep.getsmallrowdata_db(columns, ids=ids, + + rowdata = dataprep.getsmallrowdata_pl(columns, ids=ids, workstrokesonly=False) - rowdata.dropna(axis=1, how='all', inplace=True) - rowdata.dropna(axis=0, how='any', inplace=True) + rowdata = rowdata.fill_nan(None).drop_nulls() - if rowdata.empty: + + if rowdata.is_empty(): return "", "No Valid Data Available" - data_dict = rowdata.to_dict("records") + data_dict = rowdata.to_dicts() thresholdforce = 100. if 'x' in boattype else 200. @@ -490,7 +492,7 @@ def interactive_forcecurve(theworkouts): 'thresholdforce': thresholdforce, } - script, div = get_chart("/forcecurve", chart_data) + script, div = get_chart("/forcecurve", chart_data, debug=False) return script, div @@ -822,25 +824,27 @@ def interactive_histoall(theworkouts, histoparam, includereststrokes, ids = [int(w.id) for w in theworkouts] + columns = [name for name, d in metrics.rowingmetrics]+['spm', 'driveenergy', 'distance', 'workoutstate', 'workoutid'] + workstrokesonly = not includereststrokes - rowdata = dataprep.getsmallrowdata_db( - [histoparam], ids=ids, doclean=True, workstrokesonly=workstrokesonly) + rowdata = dataprep.getsmallrowdata_pl( + columns, ids=ids, doclean=True, workstrokesonly=workstrokesonly) - rowdata.dropna(axis=0, how='any', inplace=True) + rowdata = rowdata.fill_nan(None).drop_nulls() - rowdata = dataprep.filter_df(rowdata, 'spm', spmmin, largerthan=True) - rowdata = dataprep.filter_df(rowdata, 'spm', spmmax, largerthan=False) + #rowdata = dataprep.filter_df(rowdata, 'spm', spmmin, largerthan=True) + #rowdata = dataprep.filter_df(rowdata, 'spm', spmmax, largerthan=False) - rowdata = dataprep.filter_df( - rowdata, 'driveenergy', workmin, largerthan=True) - rowdata = dataprep.filter_df( - rowdata, 'driveenergy', workmax, largerthan=False) + #rowdata = dataprep.filter_df( + # rowdata, 'driveenergy', workmin, largerthan=True) + #rowdata = dataprep.filter_df( + # rowdata, 'driveenergy', workmax, largerthan=False) - if rowdata.empty: + if rowdata.is_empty(): return "", "No Valid Data Available" try: - histopwr = rowdata[histoparam].values + histopwr = rowdata[histoparam].to_numpy() except KeyError: return "", "No data" if len(histopwr) == 0: # pragma: no cover diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index b20a8764..ec58e900 100644 Binary files a/rowers/tests/testdata/testdata.tcx.gz and b/rowers/tests/testdata/testdata.tcx.gz differ