diff --git a/rowers/dataprep.py b/rowers/dataprep.py index 5983bb91..275f8684 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -29,6 +29,7 @@ import itertools import numpy as np import pandas as pd import polars as pl +from polars.exceptions import ColumnNotFoundError from zipfile import BadZipFile import zipfile import os @@ -422,7 +423,8 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True): try: df = pl.read_parquet(cpfile) except: - df = getsmallrowdata_pl(['power'], ids=[workout.id]) + df = read_data(['power'], ids=[workout.id]) + df = remove_nulls_pl(df) background = True if settings.TESTING: background = False @@ -525,8 +527,9 @@ def setcp(workout, background=False, recurrance=True): except Exception as e: pass - strokesdf = getsmallrowdata_pl( + strokesdf = read_data( ['power', 'workoutid', 'time'], ids=[workout.id]) + strokesdf = remove_nulls_pl(strokesdf) if strokesdf.is_empty(): return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) @@ -617,14 +620,10 @@ def update_wps(r, types, mode='water', asynchron=True): mode ) - df = getsmallrowdata_db(['time', 'driveenergy'], ids=ids) + df = read_data(['time', 'driveenergy'], ids=ids) try: - mask = df['driveenergy'] > 100 - except (KeyError, TypeError): - return False - try: - wps_median = int(df.loc[mask, 'driveenergy'].median()) + wps_median = int(df.filter(pl.col("driveenergy")>100)["driveenergy"].median()) if mode == 'water': r.median_wps = wps_median else: # pragma: no cover @@ -635,6 +634,8 @@ def update_wps(r, types, mode='water', asynchron=True): pass except OverflowError: pass + except ColumnNotFoundError: + pass return True diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index ce986ac3..90f50aeb 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -1488,90 +1488,6 @@ def getrowdata_pl(id=0, doclean=False, convertnewtons=True, return data, row -# Fetch a subset of the data from the DB - -def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True, - debug=False, for_chart=False): - if ids: - csvfilenames = [ - 'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] - else: - return pl.DataFrame() - - data = [] - columns = [c for c in columns if c != 'None'] + ['distance', 'spm', 'workoutid'] - columns = list(set(columns)) - - df = pl.DataFrame() - if len(ids) > 1: - for id, f in zip(ids, csvfilenames): - try: - df = pl.read_parquet(f, columns=columns) - data.append(df) - except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover - rowdata, row = getrowdata(id=id) - try: - shutil.rmtree(f) - except: - pass - if rowdata and len(rowdata.df): - _ = dataplep(rowdata.df, id=id, - bands=True, otwpower=True, barchart=True, - polars=True) - try: - df = pl.read_parquet(f, columns=columns) - data.append(df) - except (OSError, ArrowInvalid, IndexError): - pass - try: - df = pl.concat(data, rechunk=True) - except ValueError: # pragma: no cover - return pl.DataFrame() - except SchemaError: - df = pl.concat(data, rechunk=True, how='vertical_relaxed') - - - else: - try: - df = pl.read_parquet(csvfilenames[0], columns=columns) - rowdata, row = getrowdata(id=ids[0]) - except (OSError, IndexError, ArrowInvalid): - rowdata, row = getrowdata(id=ids[0]) - if rowdata and len(rowdata.df): # pragma: no cover - data = dataplep( - rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True) - try: - df = pl.read_parquet(csvfilenames[0], columns=columns) - except: - df = pl.DataFrame - else: - df = pl.DataFrame() - except: - rowdata, row = getrowdata(id=ids[0]) - if rowdata and len(rowdata.df): # pragma: no cover - data = dataplep( - rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True) - try: - df = pl.read_parquet(csvfilenames[0], columns=columns) - except: - df = pl.DataFrame() - else: - df = pl.DataFrame() - - if compute and len(df): - data = df.clone() - if doclean: - data = clean_df_stats_pl(data, ignorehr=True, - workstrokesonly=workstrokesonly, - for_chart=for_chart) - - - data = remove_nulls_pl(data) - - if not df.is_empty(): - df = df.fill_nan(None).drop_nulls() - - return df def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True): @@ -1603,6 +1519,8 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, data.append(df) data = pl.collect_all(data) + if len(data)==0: + return pl.DataFrame() try: datadf = pl.concat(data).select(columns) @@ -1635,7 +1553,15 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for df in data ] - datadf = pl.concat(data) + try: + datadf = pl.concat(data) + except SchemaError: + data = [ + df.with_columns(cs.integer().cast(pl.Float64)) for df in data + ] + datadf = pl.concat(data) + + exprs = [] diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index 0098c007..06fe2637 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -1612,26 +1612,26 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, try: datadf = datadf.with_columns(pl.col(xparam).alias("x1")) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover try: datadf = datadf.with_columns(pl.col("distance").alias("x1")) - except KeyError: + except (KeyError, ColumnNotFoundError): try: datadf = datadf.with_columns(pl.col('time').alias("x1")) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover return ['', '

No non-zero data in selection

', '', ''] try: datadf = datadf.with_columns(pl.col(yparam1).alias("y1")) - except KeyError: + except (KeyError, ColumnNotFoundError): try: datadf = datadf.with_columns(pl.col('pace').alias("y1")) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover return ['', '

No non-zero data in selection

', '', ''] if yparam2 != 'None': try: datadf = datadf.with_columns(pl.col(yparam2).alias("y2")) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover datadf = datadf.with_columns(pl.col("y1").alias("y2")) else: # pragma: no cover datadf = datadf.with_columns(pl.col("y1").alias("y2")) @@ -2099,14 +2099,11 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line', promember=0, workstrokesonly=True, labeldict=None, startenddict={}): - message = '' - errormessage = '' - - columns = [name for name, d in metrics.rowingmetrics] - columns_basic = [name for name, d in metrics.rowingmetrics if d['group'] == 'basic'] + columns = [xparam,yparam] + columns_basic = [xparam,yparam] add_columns = [ 'ftime', 'distance', 'fpace', - 'power', 'hr', 'spm', + 'spm', 'time', 'pace', 'workoutstate', 'workoutid' ] @@ -2122,47 +2119,37 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line', datadf = pd.DataFrame() if promember: - datadf = dataprep.getsmallrowdata_db(columns, ids=ids, doclean=doclean, - compute=compute, - workstrokesonly=workstrokesonly, for_chart=True) + datadf = dataprep.read_data(columns, ids=ids, doclean=doclean, + compute=compute, + workstrokesonly=workstrokesonly, for_chart=True) else: - datadf = dataprep.getsmallrowdata_db(columns_basic, ids=ids, doclean=doclean, - compute=compute, - workstrokesonly=workstrokesonly, for_chart=True) + datadf = dataprep.read_data(columns_basic, ids=ids, doclean=doclean, + compute=compute, + workstrokesonly=workstrokesonly, for_chart=True) - + + datadf = dataprep.remove_nulls_pl(datadf) # check if dataframe not empty - if datadf.empty: # pragma: no cover + if datadf.is_empty(): # pragma: no cover return ['

No non-zero data in selection

', ''] - datadf['workoutid'] = datadf['workoutid'].astype(int) - datadf.dropna(axis=1, how='all', inplace=True) - datadf.dropna(axis=0, how='all', inplace=True) + datadf = datadf.with_columns(pl.col("workoutid").cast(pl.UInt32).keep_name()) nrworkouts = len(ids) try: - tseconds = datadf.loc[:, 'time'] - except KeyError: # pragma: no cover + tseconds = datadf['time'] + except (KeyError, ColumnNotFoundError): # pragma: no cover try: - tseconds = datadf.loc[:, xparam] + tseconds = datadf[xparam] except: return ['

A chart data error occurred

', ''] - # check if dataframe not empty - if datadf.empty: # pragma: no cover - return ['

No non-zero data in selection

', ''] - - if (xparam == 'time'): - datadf[xparam] = datadf[xparam] - datadf[xparam].iloc[0] + datadf = datadf.with_columns((pl.col(xparam)-datadf[0,xparam]).alias(xparam)) - datadf = datadf.fillna(0) - datadf.replace([np.inf, -np.inf], np.nan, inplace=True) - datadf = datadf.fillna(0) - - data_dict = datadf.to_dict("records") + data_dict = datadf.to_dicts() metrics_list = [{'name': name, 'rowingmetrics':d } for name, d in metrics.rowingmetrics] @@ -2178,8 +2165,8 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line', 'workouts': workoutsdict, } - script, div = get_chart("/compare", chart_data) - return script, div, message, errormessage + script, div = get_chart("/compare", chart_data, debug=False) + return script, div def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg='week', yaxis='time'): @@ -2202,7 +2189,10 @@ def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg= df = dataprep.read_data(columns, ids=ids, workstrokesonly=False, doclean=False) df = dataprep.remove_nulls_pl(df) - df = df.with_columns((pl.col("time").diff().clip(0, 20*1.e3)).alias("deltat")).lazy() + try: + df = df.with_columns((pl.col("time").diff().clip(0, 20*1.e3)).alias("deltat")).lazy() + except ColumnNotFoundError: + pass hrzones = rower.hrzones powerzones = rower.powerzones diff --git a/rowers/tests/mocks.py b/rowers/tests/mocks.py index cfae2db4..617c7c38 100644 --- a/rowers/tests/mocks.py +++ b/rowers/tests/mocks.py @@ -301,7 +301,7 @@ def mocked_getrowdata_db(*args, **kwargs): return df,row def mocked_getrowdata_uh(*args, **kwargs): # pragma: no cover - df = pd.read_csv('rowers/tests/testdata/uhfull.csv') + df = pl.read_csv('rowers/tests/testdata/uhfull.csv') id = kwargs['id'] @@ -315,7 +315,7 @@ def mocked_getsmallrowdata_uh(*args, **kwargs): # pragma: no cover return df def mocked_getsmallrowdata_forfusion(*args, **kwargs): - df = pd.read_csv('rowers/tests/testdata/getrowdata_mock.csv') + df = pl.read_csv('rowers/tests/testdata/getrowdata_mock.csv') return df diff --git a/rowers/tests/test_analysis.py b/rowers/tests/test_analysis.py index f7101bb7..69d98787 100644 --- a/rowers/tests/test_analysis.py +++ b/rowers/tests/test_analysis.py @@ -169,7 +169,7 @@ class ForcecurveTest(TestCase): pass - @patch('rowers.dataprep.getsmallrowdata_db',side_effect = mocked_getempowerdata_db) + @patch('rowers.dataprep.read_data',side_effect = mocked_read_data) def test_forcecurve_plot(self, mocked_getsmallrowdata_db): login = self.c.login(username=self.u.username, password = self.password) self.assertTrue(login) @@ -600,9 +600,9 @@ class History(TestCase): pass @patch('rowers.dataprep.create_engine') - @patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_db) + @patch('rowers.dataprep.read_data',side_effect=mocked_read_data) def test_workouts_history(self, mocked_sqlalchemy, - mocked_getsmallrowdata_db): + mocked_read_data): login = self.c.login(username=self.u.username, password=self.password) self.assertTrue(login) @@ -916,7 +916,7 @@ class WorkoutStatsTestNew(TestCase): self.assertEqual(response.status_code,200) @patch('rowers.dataprep.create_engine') - @patch('rowers.dataprep.getsmallrowdata_db', side_effect=mocked_getsmallrowdata_db) + @patch('rowers.dataprep.read_data', side_effect=mocked_read_data) @patch('rowers.dataprep.read_cols_df_sql', side_effect=mocked_read_cols_df_sql) def test_analysis_data(self, mocked_sqlalchemy, @@ -1257,8 +1257,8 @@ class MarkerPerformanceTest(TestCase): self.assertRedirects(response, expected_url=expected_url, status_code=302,target_status_code=200) - @patch('rowers.dataprep.getsmallrowdata_db', side_effect=mocked_getsmallrowdata_uh) - def test_trainingzones_view(self,mocked_getsmallrowdata_db): + @patch('rowers.dataprep.read_data', side_effect=mocked_getsmallrowdata_uh) + def test_trainingzones_view(self,mocked_getsmallrowdata_uh): login = self.c.login(username=self.u.username,password=self.password) self.assertTrue(login) diff --git a/rowers/tests/test_emails.py b/rowers/tests/test_emails.py index 6523754b..a606263d 100644 --- a/rowers/tests/test_emails.py +++ b/rowers/tests/test_emails.py @@ -55,8 +55,8 @@ class EmailUpload(TestCase): @patch('rowers.dataprep.create_engine') - @patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_db) - def test_uploadapi(self,mocked_sqlalchemy,mocked_getsmallrowdata_db): + @patch('rowers.dataprep.read_data',side_effect=mocked_read_data) + def test_uploadapi(self,mocked_sqlalchemy,mocked_read_data): form_data = { 'title': 'test', 'workouttype':'rower', diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index 5bf43e1e..d8a32774 100644 Binary files a/rowers/tests/testdata/testdata.tcx.gz and b/rowers/tests/testdata/testdata.tcx.gz differ diff --git a/rowers/utils.py b/rowers/utils.py index 95e5fb78..7f96a94a 100644 --- a/rowers/utils.py +++ b/rowers/utils.py @@ -7,6 +7,7 @@ import math import numpy as np import pandas as pd import polars as pl +from polars.exceptions import ColumnNotFoundError import colorsys from django.conf import settings import collections diff --git a/rowers/views/analysisviews.py b/rowers/views/analysisviews.py index 86ed09d3..3d0323f6 100644 --- a/rowers/views/analysisviews.py +++ b/rowers/views/analysisviews.py @@ -571,7 +571,7 @@ def flexalldata(workouts, options): workstrokesonly = not includereststrokes columns = [xparam, yparam1, yparam2, 'spm', 'driveenergy', 'distance'] ids = [int(w.id) for w in workouts] - df = dataprep.getsmallrowdata_pl(columns, ids=ids, + df = dataprep.read_data(columns, ids=ids, workstrokesonly=workstrokesonly, doclean=True, ) @@ -928,9 +928,9 @@ def boxplotdata(workouts, options): ids = [w.id for w in workouts] # prepare data frame - datadf = getsmallrowdata_pl(fieldlist, ids) + datadf = dataprep.read_data(fieldlist, ids) - datadf = dataprep.clean_df_stats_pl(datadf, workstrokesonly=workstrokesonly) + datadf = dataprep.remove_nulls_pl(datadf) try: datadf = datadf.filter( @@ -2361,17 +2361,16 @@ def history_view_data(request, userid=0): ids = [w.id for w in g_workouts] - # columns = ['hr', 'power', 'time'] - columns = [name for name, d in metrics.rowingmetrics]+['workoutstate', 'workoutid'] + columns = ['hr', 'power', 'time', 'workoutstate', 'workoutid'] + + df = dataprep.read_data(columns, ids=ids) + df = dataprep.remove_nulls_pl(df) - df = getsmallrowdata_pl(columns, ids=ids) try: df = df.with_columns(pl.col('time').diff().clip(lower_bound=0).alias("deltat")) except KeyError: # pragma: no cover pass - df = dataprep.clean_df_stats_pl(df, workstrokesonly=True, - ignoreadvanced=True, ignorehr=False) totalmeters, totalhours, totalminutes, totalseconds = get_totals( g_workouts) @@ -2400,7 +2399,8 @@ def history_view_data(request, userid=0): whours=whours, wminutes=wminutes, wseconds=wseconds, ) - ddf = getsmallrowdata_pl(columns, ids=[w.id for w in a_workouts]) + ddf = dataprep.read_data(columns, ids=[w.id for w in a_workouts]) + ddf = dataprep.remove_nulls_pl(ddf) try: ddf = ddf.with_columns(pl.col("time").diff().clip(lower_bound=0).alias("deltat")) except KeyError: # pragma: no cover @@ -2421,7 +2421,7 @@ def history_view_data(request, userid=0): ddict['powermean'] = int(wavg(ddf, 'power', 'deltat')) try: ddict['powermax'] = int(ddf['power'].max()) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover ddict['powermax'] = 0 ddict['nrworkouts'] = a_workouts.count() listofdicts.append(ddict) @@ -2436,13 +2436,13 @@ def history_view_data(request, userid=0): try: totalsdict['powermean'] = int(wavg(df, 'power', 'deltat')) totalsdict['powermax'] = int(df['power'].max()) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover totalsdict['powermean'] = 0 totalsdict['powermax'] = 0 try: totalsdict['hrmean'] = int(wavg(df, 'hr', 'deltat')) totalsdict['hrmax'] = int(df['hr'].max()) - except KeyError: # pragma: no cover + except (KeyError, ColumnNotFoundError): # pragma: no cover totalsdict['hrmean'] = 0 totalsdict['hrmax'] = 0 @@ -2461,7 +2461,8 @@ def history_view_data(request, userid=0): a_workouts = g_workouts.filter(workouttype=typeselect) meters, hours, minutes, seconds = get_totals(a_workouts) totalseconds = 3600 * hours + 60 * minutes + seconds - ddf = getsmallrowdata_pl(columns, ids=[w.id for w in a_workouts]) + ddf = dataprep.read_data(columns, ids=[w.id for w in a_workouts]) + ddf = dataprep.remove_nulls_pl(ddf) if ddf.is_empty(): totalscript = "" totaldiv = "No data" diff --git a/rowers/views/statements.py b/rowers/views/statements.py index 991b440f..83e22708 100644 --- a/rowers/views/statements.py +++ b/rowers/views/statements.py @@ -16,7 +16,7 @@ from rowers.utils import ( from rowers.celery import result as celery_result from rowers.interactiveplots import * from scipy.interpolate import griddata -from rowers.dataprep import getsmallrowdata_db, getsmallrowdata_pl +from rowers.dataprep import getsmallrowdata_db, read_data from rowers.dataprep import timedeltaconv from scipy.special import lambertw from io import BytesIO