Private
Public Access
1
0
This commit is contained in:
2024-04-20 17:14:22 +02:00
parent ffe295d8fc
commit 30bf61cbe1
10 changed files with 76 additions and 157 deletions

View File

@@ -29,6 +29,7 @@ import itertools
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import polars as pl import polars as pl
from polars.exceptions import ColumnNotFoundError
from zipfile import BadZipFile from zipfile import BadZipFile
import zipfile import zipfile
import os import os
@@ -422,7 +423,8 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True):
try: try:
df = pl.read_parquet(cpfile) df = pl.read_parquet(cpfile)
except: except:
df = getsmallrowdata_pl(['power'], ids=[workout.id]) df = read_data(['power'], ids=[workout.id])
df = remove_nulls_pl(df)
background = True background = True
if settings.TESTING: if settings.TESTING:
background = False background = False
@@ -525,8 +527,9 @@ def setcp(workout, background=False, recurrance=True):
except Exception as e: except Exception as e:
pass pass
strokesdf = getsmallrowdata_pl( strokesdf = read_data(
['power', 'workoutid', 'time'], ids=[workout.id]) ['power', 'workoutid', 'time'], ids=[workout.id])
strokesdf = remove_nulls_pl(strokesdf)
if strokesdf.is_empty(): if strokesdf.is_empty():
return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64) return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64)
@@ -617,14 +620,10 @@ def update_wps(r, types, mode='water', asynchron=True):
mode mode
) )
df = getsmallrowdata_db(['time', 'driveenergy'], ids=ids) df = read_data(['time', 'driveenergy'], ids=ids)
try: try:
mask = df['driveenergy'] > 100 wps_median = int(df.filter(pl.col("driveenergy")>100)["driveenergy"].median())
except (KeyError, TypeError):
return False
try:
wps_median = int(df.loc[mask, 'driveenergy'].median())
if mode == 'water': if mode == 'water':
r.median_wps = wps_median r.median_wps = wps_median
else: # pragma: no cover else: # pragma: no cover
@@ -635,6 +634,8 @@ def update_wps(r, types, mode='water', asynchron=True):
pass pass
except OverflowError: except OverflowError:
pass pass
except ColumnNotFoundError:
pass
return True return True

View File

@@ -1488,90 +1488,6 @@ def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
return data, row return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
debug=False, for_chart=False):
if ids:
csvfilenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pl.DataFrame()
data = []
columns = [c for c in columns if c != 'None'] + ['distance', 'spm', 'workoutid']
columns = list(set(columns))
df = pl.DataFrame()
if len(ids) > 1:
for id, f in zip(ids, csvfilenames):
try:
df = pl.read_parquet(f, columns=columns)
data.append(df)
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,
polars=True)
try:
df = pl.read_parquet(f, columns=columns)
data.append(df)
except (OSError, ArrowInvalid, IndexError):
pass
try:
df = pl.concat(data, rechunk=True)
except ValueError: # pragma: no cover
return pl.DataFrame()
except SchemaError:
df = pl.concat(data, rechunk=True, how='vertical_relaxed')
else:
try:
df = pl.read_parquet(csvfilenames[0], columns=columns)
rowdata, row = getrowdata(id=ids[0])
except (OSError, IndexError, ArrowInvalid):
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataplep(
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
try:
df = pl.read_parquet(csvfilenames[0], columns=columns)
except:
df = pl.DataFrame
else:
df = pl.DataFrame()
except:
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataplep(
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
try:
df = pl.read_parquet(csvfilenames[0], columns=columns)
except:
df = pl.DataFrame()
else:
df = pl.DataFrame()
if compute and len(df):
data = df.clone()
if doclean:
data = clean_df_stats_pl(data, ignorehr=True,
workstrokesonly=workstrokesonly,
for_chart=for_chart)
data = remove_nulls_pl(data)
if not df.is_empty():
df = df.fill_nan(None).drop_nulls()
return df
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True): def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True):
@@ -1603,6 +1519,8 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False,
data.append(df) data.append(df)
data = pl.collect_all(data) data = pl.collect_all(data)
if len(data)==0:
return pl.DataFrame()
try: try:
datadf = pl.concat(data).select(columns) datadf = pl.concat(data).select(columns)
@@ -1635,7 +1553,15 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False,
for df in data for df in data
] ]
try:
datadf = pl.concat(data) datadf = pl.concat(data)
except SchemaError:
data = [
df.with_columns(cs.integer().cast(pl.Float64)) for df in data
]
datadf = pl.concat(data)
exprs = [] exprs = []

View File

@@ -1612,26 +1612,26 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
try: try:
datadf = datadf.with_columns(pl.col(xparam).alias("x1")) datadf = datadf.with_columns(pl.col(xparam).alias("x1"))
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
try: try:
datadf = datadf.with_columns(pl.col("distance").alias("x1")) datadf = datadf.with_columns(pl.col("distance").alias("x1"))
except KeyError: except (KeyError, ColumnNotFoundError):
try: try:
datadf = datadf.with_columns(pl.col('time').alias("x1")) datadf = datadf.with_columns(pl.col('time').alias("x1"))
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
return ['', '<p>No non-zero data in selection</p>', '', ''] return ['', '<p>No non-zero data in selection</p>', '', '']
try: try:
datadf = datadf.with_columns(pl.col(yparam1).alias("y1")) datadf = datadf.with_columns(pl.col(yparam1).alias("y1"))
except KeyError: except (KeyError, ColumnNotFoundError):
try: try:
datadf = datadf.with_columns(pl.col('pace').alias("y1")) datadf = datadf.with_columns(pl.col('pace').alias("y1"))
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
return ['', '<p>No non-zero data in selection</p>', '', ''] return ['', '<p>No non-zero data in selection</p>', '', '']
if yparam2 != 'None': if yparam2 != 'None':
try: try:
datadf = datadf.with_columns(pl.col(yparam2).alias("y2")) datadf = datadf.with_columns(pl.col(yparam2).alias("y2"))
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
datadf = datadf.with_columns(pl.col("y1").alias("y2")) datadf = datadf.with_columns(pl.col("y1").alias("y2"))
else: # pragma: no cover else: # pragma: no cover
datadf = datadf.with_columns(pl.col("y1").alias("y2")) datadf = datadf.with_columns(pl.col("y1").alias("y2"))
@@ -2099,14 +2099,11 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line',
promember=0, workstrokesonly=True, promember=0, workstrokesonly=True,
labeldict=None, startenddict={}): labeldict=None, startenddict={}):
message = '' columns = [xparam,yparam]
errormessage = '' columns_basic = [xparam,yparam]
columns = [name for name, d in metrics.rowingmetrics]
columns_basic = [name for name, d in metrics.rowingmetrics if d['group'] == 'basic']
add_columns = [ add_columns = [
'ftime', 'distance', 'fpace', 'ftime', 'distance', 'fpace',
'power', 'hr', 'spm', 'spm',
'time', 'pace', 'workoutstate', 'time', 'pace', 'workoutstate',
'workoutid' 'workoutid'
] ]
@@ -2122,47 +2119,37 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line',
datadf = pd.DataFrame() datadf = pd.DataFrame()
if promember: if promember:
datadf = dataprep.getsmallrowdata_db(columns, ids=ids, doclean=doclean, datadf = dataprep.read_data(columns, ids=ids, doclean=doclean,
compute=compute, compute=compute,
workstrokesonly=workstrokesonly, for_chart=True) workstrokesonly=workstrokesonly, for_chart=True)
else: else:
datadf = dataprep.getsmallrowdata_db(columns_basic, ids=ids, doclean=doclean, datadf = dataprep.read_data(columns_basic, ids=ids, doclean=doclean,
compute=compute, compute=compute,
workstrokesonly=workstrokesonly, for_chart=True) workstrokesonly=workstrokesonly, for_chart=True)
datadf = dataprep.remove_nulls_pl(datadf)
# check if dataframe not empty # check if dataframe not empty
if datadf.empty: # pragma: no cover if datadf.is_empty(): # pragma: no cover
return ['<p>No non-zero data in selection</p>', ''] return ['<p>No non-zero data in selection</p>', '']
datadf['workoutid'] = datadf['workoutid'].astype(int) datadf = datadf.with_columns(pl.col("workoutid").cast(pl.UInt32).keep_name())
datadf.dropna(axis=1, how='all', inplace=True)
datadf.dropna(axis=0, how='all', inplace=True)
nrworkouts = len(ids) nrworkouts = len(ids)
try: try:
tseconds = datadf.loc[:, 'time'] tseconds = datadf['time']
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
try: try:
tseconds = datadf.loc[:, xparam] tseconds = datadf[xparam]
except: except:
return ['<p>A chart data error occurred</p>', ''] return ['<p>A chart data error occurred</p>', '']
# check if dataframe not empty
if datadf.empty: # pragma: no cover
return ['<p>No non-zero data in selection</p>', '']
if (xparam == 'time'): if (xparam == 'time'):
datadf[xparam] = datadf[xparam] - datadf[xparam].iloc[0] datadf = datadf.with_columns((pl.col(xparam)-datadf[0,xparam]).alias(xparam))
datadf = datadf.fillna(0) data_dict = datadf.to_dicts()
datadf.replace([np.inf, -np.inf], np.nan, inplace=True)
datadf = datadf.fillna(0)
data_dict = datadf.to_dict("records")
metrics_list = [{'name': name, 'rowingmetrics':d } for name, d in metrics.rowingmetrics] metrics_list = [{'name': name, 'rowingmetrics':d } for name, d in metrics.rowingmetrics]
@@ -2178,8 +2165,8 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line',
'workouts': workoutsdict, 'workouts': workoutsdict,
} }
script, div = get_chart("/compare", chart_data) script, div = get_chart("/compare", chart_data, debug=False)
return script, div, message, errormessage return script, div
def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg='week', def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg='week',
yaxis='time'): yaxis='time'):
@@ -2202,7 +2189,10 @@ def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg=
df = dataprep.read_data(columns, ids=ids, workstrokesonly=False, doclean=False) df = dataprep.read_data(columns, ids=ids, workstrokesonly=False, doclean=False)
df = dataprep.remove_nulls_pl(df) df = dataprep.remove_nulls_pl(df)
try:
df = df.with_columns((pl.col("time").diff().clip(0, 20*1.e3)).alias("deltat")).lazy() df = df.with_columns((pl.col("time").diff().clip(0, 20*1.e3)).alias("deltat")).lazy()
except ColumnNotFoundError:
pass
hrzones = rower.hrzones hrzones = rower.hrzones
powerzones = rower.powerzones powerzones = rower.powerzones

View File

@@ -301,7 +301,7 @@ def mocked_getrowdata_db(*args, **kwargs):
return df,row return df,row
def mocked_getrowdata_uh(*args, **kwargs): # pragma: no cover def mocked_getrowdata_uh(*args, **kwargs): # pragma: no cover
df = pd.read_csv('rowers/tests/testdata/uhfull.csv') df = pl.read_csv('rowers/tests/testdata/uhfull.csv')
id = kwargs['id'] id = kwargs['id']
@@ -315,7 +315,7 @@ def mocked_getsmallrowdata_uh(*args, **kwargs): # pragma: no cover
return df return df
def mocked_getsmallrowdata_forfusion(*args, **kwargs): def mocked_getsmallrowdata_forfusion(*args, **kwargs):
df = pd.read_csv('rowers/tests/testdata/getrowdata_mock.csv') df = pl.read_csv('rowers/tests/testdata/getrowdata_mock.csv')
return df return df

View File

@@ -169,7 +169,7 @@ class ForcecurveTest(TestCase):
pass pass
@patch('rowers.dataprep.getsmallrowdata_db',side_effect = mocked_getempowerdata_db) @patch('rowers.dataprep.read_data',side_effect = mocked_read_data)
def test_forcecurve_plot(self, mocked_getsmallrowdata_db): def test_forcecurve_plot(self, mocked_getsmallrowdata_db):
login = self.c.login(username=self.u.username, password = self.password) login = self.c.login(username=self.u.username, password = self.password)
self.assertTrue(login) self.assertTrue(login)
@@ -600,9 +600,9 @@ class History(TestCase):
pass pass
@patch('rowers.dataprep.create_engine') @patch('rowers.dataprep.create_engine')
@patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_db) @patch('rowers.dataprep.read_data',side_effect=mocked_read_data)
def test_workouts_history(self, mocked_sqlalchemy, def test_workouts_history(self, mocked_sqlalchemy,
mocked_getsmallrowdata_db): mocked_read_data):
login = self.c.login(username=self.u.username, password=self.password) login = self.c.login(username=self.u.username, password=self.password)
self.assertTrue(login) self.assertTrue(login)
@@ -916,7 +916,7 @@ class WorkoutStatsTestNew(TestCase):
self.assertEqual(response.status_code,200) self.assertEqual(response.status_code,200)
@patch('rowers.dataprep.create_engine') @patch('rowers.dataprep.create_engine')
@patch('rowers.dataprep.getsmallrowdata_db', side_effect=mocked_getsmallrowdata_db) @patch('rowers.dataprep.read_data', side_effect=mocked_read_data)
@patch('rowers.dataprep.read_cols_df_sql', side_effect=mocked_read_cols_df_sql) @patch('rowers.dataprep.read_cols_df_sql', side_effect=mocked_read_cols_df_sql)
def test_analysis_data(self, def test_analysis_data(self,
mocked_sqlalchemy, mocked_sqlalchemy,
@@ -1257,8 +1257,8 @@ class MarkerPerformanceTest(TestCase):
self.assertRedirects(response, expected_url=expected_url, status_code=302,target_status_code=200) self.assertRedirects(response, expected_url=expected_url, status_code=302,target_status_code=200)
@patch('rowers.dataprep.getsmallrowdata_db', side_effect=mocked_getsmallrowdata_uh) @patch('rowers.dataprep.read_data', side_effect=mocked_getsmallrowdata_uh)
def test_trainingzones_view(self,mocked_getsmallrowdata_db): def test_trainingzones_view(self,mocked_getsmallrowdata_uh):
login = self.c.login(username=self.u.username,password=self.password) login = self.c.login(username=self.u.username,password=self.password)
self.assertTrue(login) self.assertTrue(login)

View File

@@ -55,8 +55,8 @@ class EmailUpload(TestCase):
@patch('rowers.dataprep.create_engine') @patch('rowers.dataprep.create_engine')
@patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_db) @patch('rowers.dataprep.read_data',side_effect=mocked_read_data)
def test_uploadapi(self,mocked_sqlalchemy,mocked_getsmallrowdata_db): def test_uploadapi(self,mocked_sqlalchemy,mocked_read_data):
form_data = { form_data = {
'title': 'test', 'title': 'test',
'workouttype':'rower', 'workouttype':'rower',

Binary file not shown.

View File

@@ -7,6 +7,7 @@ import math
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import polars as pl import polars as pl
from polars.exceptions import ColumnNotFoundError
import colorsys import colorsys
from django.conf import settings from django.conf import settings
import collections import collections

View File

@@ -571,7 +571,7 @@ def flexalldata(workouts, options):
workstrokesonly = not includereststrokes workstrokesonly = not includereststrokes
columns = [xparam, yparam1, yparam2, 'spm', 'driveenergy', 'distance'] columns = [xparam, yparam1, yparam2, 'spm', 'driveenergy', 'distance']
ids = [int(w.id) for w in workouts] ids = [int(w.id) for w in workouts]
df = dataprep.getsmallrowdata_pl(columns, ids=ids, df = dataprep.read_data(columns, ids=ids,
workstrokesonly=workstrokesonly, workstrokesonly=workstrokesonly,
doclean=True, doclean=True,
) )
@@ -928,9 +928,9 @@ def boxplotdata(workouts, options):
ids = [w.id for w in workouts] ids = [w.id for w in workouts]
# prepare data frame # prepare data frame
datadf = getsmallrowdata_pl(fieldlist, ids) datadf = dataprep.read_data(fieldlist, ids)
datadf = dataprep.clean_df_stats_pl(datadf, workstrokesonly=workstrokesonly) datadf = dataprep.remove_nulls_pl(datadf)
try: try:
datadf = datadf.filter( datadf = datadf.filter(
@@ -2361,17 +2361,16 @@ def history_view_data(request, userid=0):
ids = [w.id for w in g_workouts] ids = [w.id for w in g_workouts]
# columns = ['hr', 'power', 'time'] columns = ['hr', 'power', 'time', 'workoutstate', 'workoutid']
columns = [name for name, d in metrics.rowingmetrics]+['workoutstate', 'workoutid']
df = dataprep.read_data(columns, ids=ids)
df = dataprep.remove_nulls_pl(df)
df = getsmallrowdata_pl(columns, ids=ids)
try: try:
df = df.with_columns(pl.col('time').diff().clip(lower_bound=0).alias("deltat")) df = df.with_columns(pl.col('time').diff().clip(lower_bound=0).alias("deltat"))
except KeyError: # pragma: no cover except KeyError: # pragma: no cover
pass pass
df = dataprep.clean_df_stats_pl(df, workstrokesonly=True,
ignoreadvanced=True, ignorehr=False)
totalmeters, totalhours, totalminutes, totalseconds = get_totals( totalmeters, totalhours, totalminutes, totalseconds = get_totals(
g_workouts) g_workouts)
@@ -2400,7 +2399,8 @@ def history_view_data(request, userid=0):
whours=whours, whours=whours,
wminutes=wminutes, wseconds=wseconds, wminutes=wminutes, wseconds=wseconds,
) )
ddf = getsmallrowdata_pl(columns, ids=[w.id for w in a_workouts]) ddf = dataprep.read_data(columns, ids=[w.id for w in a_workouts])
ddf = dataprep.remove_nulls_pl(ddf)
try: try:
ddf = ddf.with_columns(pl.col("time").diff().clip(lower_bound=0).alias("deltat")) ddf = ddf.with_columns(pl.col("time").diff().clip(lower_bound=0).alias("deltat"))
except KeyError: # pragma: no cover except KeyError: # pragma: no cover
@@ -2421,7 +2421,7 @@ def history_view_data(request, userid=0):
ddict['powermean'] = int(wavg(ddf, 'power', 'deltat')) ddict['powermean'] = int(wavg(ddf, 'power', 'deltat'))
try: try:
ddict['powermax'] = int(ddf['power'].max()) ddict['powermax'] = int(ddf['power'].max())
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
ddict['powermax'] = 0 ddict['powermax'] = 0
ddict['nrworkouts'] = a_workouts.count() ddict['nrworkouts'] = a_workouts.count()
listofdicts.append(ddict) listofdicts.append(ddict)
@@ -2436,13 +2436,13 @@ def history_view_data(request, userid=0):
try: try:
totalsdict['powermean'] = int(wavg(df, 'power', 'deltat')) totalsdict['powermean'] = int(wavg(df, 'power', 'deltat'))
totalsdict['powermax'] = int(df['power'].max()) totalsdict['powermax'] = int(df['power'].max())
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
totalsdict['powermean'] = 0 totalsdict['powermean'] = 0
totalsdict['powermax'] = 0 totalsdict['powermax'] = 0
try: try:
totalsdict['hrmean'] = int(wavg(df, 'hr', 'deltat')) totalsdict['hrmean'] = int(wavg(df, 'hr', 'deltat'))
totalsdict['hrmax'] = int(df['hr'].max()) totalsdict['hrmax'] = int(df['hr'].max())
except KeyError: # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
totalsdict['hrmean'] = 0 totalsdict['hrmean'] = 0
totalsdict['hrmax'] = 0 totalsdict['hrmax'] = 0
@@ -2461,7 +2461,8 @@ def history_view_data(request, userid=0):
a_workouts = g_workouts.filter(workouttype=typeselect) a_workouts = g_workouts.filter(workouttype=typeselect)
meters, hours, minutes, seconds = get_totals(a_workouts) meters, hours, minutes, seconds = get_totals(a_workouts)
totalseconds = 3600 * hours + 60 * minutes + seconds totalseconds = 3600 * hours + 60 * minutes + seconds
ddf = getsmallrowdata_pl(columns, ids=[w.id for w in a_workouts]) ddf = dataprep.read_data(columns, ids=[w.id for w in a_workouts])
ddf = dataprep.remove_nulls_pl(ddf)
if ddf.is_empty(): if ddf.is_empty():
totalscript = "" totalscript = ""
totaldiv = "No data" totaldiv = "No data"

View File

@@ -16,7 +16,7 @@ from rowers.utils import (
from rowers.celery import result as celery_result from rowers.celery import result as celery_result
from rowers.interactiveplots import * from rowers.interactiveplots import *
from scipy.interpolate import griddata from scipy.interpolate import griddata
from rowers.dataprep import getsmallrowdata_db, getsmallrowdata_pl from rowers.dataprep import getsmallrowdata_db, read_data
from rowers.dataprep import timedeltaconv from rowers.dataprep import timedeltaconv
from scipy.special import lambertw from scipy.special import lambertw
from io import BytesIO from io import BytesIO