From a53d932c597242f983e9c9aefb22043523624bec Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Thu, 18 Apr 2024 19:44:33 +0200 Subject: [PATCH] passing tests --- rowers/dataprep.py | 32 +++++++++----------------- rowers/dataroutines.py | 26 +++++++++++++++++---- rowers/interactiveplots.py | 18 +++++++-------- rowers/tests/mocks.py | 6 ++--- rowers/tests/test_unit_tests.py | 2 +- rowers/tests/testdata/testdata.tcx.gz | Bin 4000 -> 4000 bytes rowers/utils.py | 6 +++-- rowers/views/analysisviews.py | 16 ++++++++----- 8 files changed, 59 insertions(+), 47 deletions(-) diff --git a/rowers/dataprep.py b/rowers/dataprep.py index f6de443b..5983bb91 100644 --- a/rowers/dataprep.py +++ b/rowers/dataprep.py @@ -529,7 +529,7 @@ def setcp(workout, background=False, recurrance=True): ['power', 'workoutid', 'time'], ids=[workout.id]) if strokesdf.is_empty(): - return pl.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float') + return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) totaltime = strokesdf['time'].max() maxt = totaltime/1000. @@ -544,7 +544,7 @@ def setcp(workout, background=False, recurrance=True): elif os.path.exists(csvfilename+'.gz'): # pragma: no cover csvfile = csvfilename+'.gz' else: # pragma: no cover - return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float') + return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) csvfile = os.path.abspath(csvfile) @@ -558,7 +558,7 @@ def setcp(workout, background=False, recurrance=True): grpc.channel_ready_future(channel).result(timeout=10) except grpc.FutureTimeoutError: # pragma: no cover dologging('metrics.log','grpc channel time out in setcp') - return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float') + return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) stub = metrics_pb2_grpc.MetricsStub(channel) req = metrics_pb2.CPRequest(filename = csvfile, filetype = "CSV", tarr = logarr) @@ -567,7 +567,7 @@ def setcp(workout, background=False, recurrance=True): response = stub.GetCP(req, timeout=60) except Exception as e: dologging('metrics.log', traceback.format_exc()) - return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float') + return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) delta = pl.Series(np.array(response.delta)) cpvalues = pl.Series(np.array(response.power)) @@ -735,32 +735,22 @@ def fetchcp_new(rower, workouts): data = [] for workout in workouts: - cpfile = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id) - try: - df, delta, cpvalues = setcp(workout) - df = pd.read_parquet(cpfile) - df['workout'] = str(workout) - df['url'] = workout.url() - data.append(df) - except: - # CP data file doesn't exist yet. has to be created - df, delta, cpvalues = setcp(workout) - df = df.with_columns((pl.lit(str(workout))).alias("workout")) - df = df.with_columns((pl.lit(workout.url())).alias("url")) + df, delta, cpvalues = setcp(workout) + df = df.drop('id') + df = df.with_columns((pl.lit(str(workout))).alias("workout")) + df = df.with_columns((pl.lit(workout.url())).alias("url")) + if not df.is_empty(): data.append(df) if len(data) == 0: - return pl.Series(dtype='float'), pl.Series(dtype='float'), 0, pl.Series(dtype='float'), pl.Series(dtype='float') + return pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64), 0, pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) if len(data) > 1: df = pl.concat(data) - #df = df.to_pandas() - - try: df = df.group_by(pl.col("delta")).agg(pl.max("cp"), pl.max("workout"), pl.max("url")).sort("delta") except (KeyError, ColumnNotFoundError): # pragma: no cover - return pd.Series(dtype='float'), pd.Series(dtype='float'), 0, pd.Series(dtype='float'), pd.Series(dtype='float') + return pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64), 0, pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) df = df.filter(pl.col("cp")>20) diff --git a/rowers/dataroutines.py b/rowers/dataroutines.py index dad0dfc6..9f9e571c 100644 --- a/rowers/dataroutines.py +++ b/rowers/dataroutines.py @@ -32,6 +32,7 @@ import zipfile import os from rowers.models import strokedatafields import polars as pl +import polars.selectors as cs from polars.exceptions import ( ColumnNotFoundError, SchemaError, ComputeError, InvalidOperationError @@ -176,6 +177,19 @@ columndict = { 'cumdist': 'cum_dist', } +def remove_nulls_pl(data): + data = data.lazy().fill_nan(None) + data = data.select(cs.by_dtype(pl.NUMERIC_DTYPES)).collect() + data = data[[s.name for s in data if not s.is_infinite().sum()]] + data = data[[s.name for s in data if not (s.null_count() == data.height)]] + if not data.is_empty(): + try: + data = data.drop_nulls() + except: + pass + + return data + def get_video_data(w, groups=['basic'], mode='water'): modes = [mode, 'both', 'basic'] @@ -798,14 +812,11 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, datadf = datadf.filter( pl.col("spm") >=0, - pl.col("efficiency")<=200, pl.col("spm")>=10, pl.col("pace")<=300*1000., - pl.col("efficiency")>=0, pl.col("pace")>=60*1000, pl.col("power")<=5000, pl.col("spm")<=120, - pl.col("wash")>=1 ) @@ -823,7 +834,10 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, datadf = datadf.filter(pl.col("rhythm")>=0, pl.col("rhythm")<=70, pl.col("power")>=20, + pl.col("efficiency")<=200, pl.col("drivelength")>=0.5, + pl.col("wash")>=1, + pl.col("efficiency")>=0, pl.col("forceratio")>=0.2, pl.col("forceratio")<=1.0, pl.col("drivespeed")>=0.5, @@ -833,6 +847,7 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, pl.col("catch")<=-30) + # workoutstateswork = [1, 4, 5, 8, 9, 6, 7] workoutstatesrest = [3] # workoutstatetransition = [0, 2, 10, 11, 12, 13] @@ -850,6 +865,8 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True, if ratio < 0.01 or after[workoutid] < 2: return data_orig + + return datadf @@ -1541,9 +1558,8 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp workstrokesonly=workstrokesonly, for_chart=for_chart) - data = data.fill_nan(None).drop_nulls() - return data + data = remove_nulls_pl(data) if not df.is_empty(): df = df.fill_nan(None).drop_nulls() diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index 80517b36..c5d79779 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -1588,19 +1588,19 @@ def interactive_cum_flex_chart2(theworkouts, promember=0, except (KeyError, ColumnNotFoundError): yparam1 = 'None' - datadf = datadf.fill_nan(None).drop_nulls() + datadf = dataprep.remove_nulls_pl(datadf) # test if we have drive energy try: # pragma: no cover _ = datadf['driveenergy'].mean() - except KeyError: # pragma: no cover - datadf['driveenergy'] = 500. + except (KeyError, ColumnNotFoundError): # pragma: no cover + datadf = datadf.with_columns((pl.lit(500)).alias("driveenergy")) # test if we have power try: # pragma: no cover _ = datadf['power'].mean() - except KeyError: # pragma: no cover - datadf['power'] = 50. + except (KeyError, ColumnNotFoundError): # pragma: no cover + datadf = datadf.with_columns((pl.lit(50)).alias("power")) yparamname1 = axlabels[yparam1] if yparam2 != 'None': @@ -1687,8 +1687,8 @@ def interactive_flexchart_stacked(id, r, xparam='time', columns = [name for name, d in metrics.rowingmetrics] columns_basic = [name for name, d in metrics.rowingmetrics if d['group'] == 'basic'] - columns = columns + ['spm', 'driveenergy', 'distance'] - columns_basic = columns_basic + ['spm', 'driveenergy', 'distance'] + columns = columns + ['spm', 'driveenergy', 'distance','workoutid','workoutstate'] + columns_basic = columns_basic + ['spm', 'driveenergy', 'distance','workoutid','workoutstate'] rowdata = pd.DataFrame() row = Workout.objects.get(id=id) @@ -1763,7 +1763,7 @@ def interactive_flexchart_stacked(id, r, xparam='time', rowdata = rowdata.with_columns(y4=pl.col("time")) rowdata = rowdata.with_columns((pl.col("y4")).alias(yparam4)) - + # replace nans rowdata = rowdata.fill_nan(0) @@ -1783,7 +1783,7 @@ def interactive_flexchart_stacked(id, r, xparam='time', 'metrics': metrics_list, } - script, div = get_chart("/stacked", chart_data) + script, div = get_chart("/stacked", chart_data, debug=False) return script, div diff --git a/rowers/tests/mocks.py b/rowers/tests/mocks.py index 80a12eb1..012dddf2 100644 --- a/rowers/tests/mocks.py +++ b/rowers/tests/mocks.py @@ -272,7 +272,7 @@ def mocked_fetchcperg(*args, **kwargs): return df -import pandas as pd + def mocked_read_df_sql(id): # pragma: no cover df = pd.read_csv('rowers/tests/testdata/fake_strokedata.csv') @@ -310,7 +310,7 @@ def mocked_getrowdata_uh(*args, **kwargs): # pragma: no cover return df, row def mocked_getsmallrowdata_uh(*args, **kwargs): # pragma: no cover - df = pd.read_csv('rowers/tests/testdata/uhfull.csv') + df = pl.read_csv('rowers/tests/testdata/uhfull.csv') return df @@ -390,7 +390,7 @@ def mocked_read_cols_df_sql(*args, **kwargs): def mock_workout_summaries(*args, **kwargs): - df = pd.read_csv('rowers/tests/testdata/workout_summaries.csv') + df = pl.read_csv('rowers/tests/testdata/workout_summaries.csv') return df def mocked_read_df_cols_sql_multi(ids, columns, convertnewtons=True): # pragma: no cover diff --git a/rowers/tests/test_unit_tests.py b/rowers/tests/test_unit_tests.py index 5e57721c..113faeb6 100644 --- a/rowers/tests/test_unit_tests.py +++ b/rowers/tests/test_unit_tests.py @@ -582,7 +582,7 @@ class DataPrepTests(TestCase): age = dataprep.calculate_age(born,today=today) self.assertEqual(age,49) - @patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_uh) + @patch('rowers.dataprep.getsmallrowdata_pl',side_effect=mocked_getsmallrowdata_uh) def test_goldmedalstandard(self,mocked_getsmallrowdata_uh): maxvalue, delta = dataprep.calculate_goldmedalstandard(self.r,self.wuh_otw) records = CalcAgePerformance.objects.filter( diff --git a/rowers/tests/testdata/testdata.tcx.gz b/rowers/tests/testdata/testdata.tcx.gz index 132867e104e5cbf19a16c3afdbd4a693272e648a..664305075f62deffbed4cd82b8f8d786cd9359b3 100644 GIT binary patch delta 456 zcmV;(0XP1jAD|xxABzYGMOh({2QmW343RrXe`mK@?xmgAn|pU(_5Jz&V)6FvTXWo9 zoUYd9=yMDROH+ba)?>4K? z&hVDa*~RgK0C4fglgH1tr}$yD-t^u2Xqk@MtoC>3eSCJ*_1j;2qZ`=MAN7XJyUmNg z566VLa`}+#Ti`DUQ zy0vBh>g>g{Gy;1Je=nhvmsUFa623d`e>_<14!E;B)5qVKKVN=Y?Y=*FaFIBkU3}R0 zX_wxfi?g?zZvCP=IoUp~^l`O5e$n+^I&ZgoyAqX8#~s{%-M>1!xbm9ZKR-WN9d&*B z>6b6Y<+;B)S^Dnh*Q=8kmp}LS^ggCTuRNR&Tb^|PNtevda)PU$zus-NPMf`a+Hbqz z^3i^A{)6Y~rVoExo-Dh~GI7YCK0LZB_jT~NTOU7qc=*fH%x3%NZu_t4Eq?m?^v`9w yGk_lecK;tNu6(h3^4rIBcxQ`xwNB6BK|;B_c$l8XpXt(vcm4+o)fi>OfB^vEdj%!{ delta 456 zcmV;(0XP1jAD|xxABzYG=M^B42QmUv3UFTtvT*4 zPFHJlbauK(Uv7?GEl<16;`+lEZ|Lss6#Lz~Z#c{Sqki>f)xZDcUB6uC8@zIYcbnB` zXL!rz?BaMq0J!+$$>V3+Q~a=6Z~AV1v`j~BR{Oj2K0Z6@`t7g1(GBeBk9tGq-R9+A z5+C^Yu7A*-oL#Jzo5dUa>;3g?sSGy;1Je}AT#=m?hXMS$;)`yUS$y94g*&h+s&=FgX(R=e*H9$X}jXBQv# zecGkB=i=<`rdz-4PENK@D}7w8k6(6um(JVm-mXOD({Ts)U-z%hF0Q;L_s`EyR!3c* ze){E$ae3~qPL{s=`St4L<>k-)J-v_V&?^t;!NpZ42s zxO}u)7Z20Zc#$rBc;|l&JZ&t*fB^vW4g}l) diff --git a/rowers/utils.py b/rowers/utils.py index 7cb1413b..95e5fb78 100644 --- a/rowers/utils.py +++ b/rowers/utils.py @@ -440,17 +440,19 @@ def wavg(group, avg_name, weight_name): """ try: d = group[avg_name] - except KeyError: + except (KeyError, ColumnNotFoundError): return 0 try: w = group[weight_name] - except KeyError: + except (KeyError, ColumnNotFoundError): return d.mean() try: return (d * w).sum() / w.sum() except ZeroDivisionError: # pragma: no cover return d.mean() + return 0 + from string import Formatter def totaltime_sec_to_string(totaltime, shorten=False): diff --git a/rowers/views/analysisviews.py b/rowers/views/analysisviews.py index cbab1c80..86ed09d3 100644 --- a/rowers/views/analysisviews.py +++ b/rowers/views/analysisviews.py @@ -932,12 +932,16 @@ def boxplotdata(workouts, options): datadf = dataprep.clean_df_stats_pl(datadf, workstrokesonly=workstrokesonly) - datadf = datadf.filter( - pl.col("spm")>spmmin, - pl.col("spm")workmin, - pl.col("driveenergy")spmmin, + pl.col("spm")workmin, + pl.col("driveenergy")