Private
Public Access
1
0
This commit is contained in:
2024-04-20 17:14:22 +02:00
parent ffe295d8fc
commit 30bf61cbe1
10 changed files with 76 additions and 157 deletions

View File

@@ -29,6 +29,7 @@ import itertools
import numpy as np
import pandas as pd
import polars as pl
from polars.exceptions import ColumnNotFoundError
from zipfile import BadZipFile
import zipfile
import os
@@ -422,7 +423,8 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True):
try:
df = pl.read_parquet(cpfile)
except:
df = getsmallrowdata_pl(['power'], ids=[workout.id])
df = read_data(['power'], ids=[workout.id])
df = remove_nulls_pl(df)
background = True
if settings.TESTING:
background = False
@@ -525,8 +527,9 @@ def setcp(workout, background=False, recurrance=True):
except Exception as e:
pass
strokesdf = getsmallrowdata_pl(
strokesdf = read_data(
['power', 'workoutid', 'time'], ids=[workout.id])
strokesdf = remove_nulls_pl(strokesdf)
if strokesdf.is_empty():
return pl.DataFrame({'delta': [], 'cp': []}), pl.Series(dtype=pl.Float64), pl.Series(dtype=pl.Float64)
@@ -617,14 +620,10 @@ def update_wps(r, types, mode='water', asynchron=True):
mode
)
df = getsmallrowdata_db(['time', 'driveenergy'], ids=ids)
df = read_data(['time', 'driveenergy'], ids=ids)
try:
mask = df['driveenergy'] > 100
except (KeyError, TypeError):
return False
try:
wps_median = int(df.loc[mask, 'driveenergy'].median())
wps_median = int(df.filter(pl.col("driveenergy")>100)["driveenergy"].median())
if mode == 'water':
r.median_wps = wps_median
else: # pragma: no cover
@@ -635,6 +634,8 @@ def update_wps(r, types, mode='water', asynchron=True):
pass
except OverflowError:
pass
except ColumnNotFoundError:
pass
return True

View File

@@ -1488,90 +1488,6 @@ def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
debug=False, for_chart=False):
if ids:
csvfilenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pl.DataFrame()
data = []
columns = [c for c in columns if c != 'None'] + ['distance', 'spm', 'workoutid']
columns = list(set(columns))
df = pl.DataFrame()
if len(ids) > 1:
for id, f in zip(ids, csvfilenames):
try:
df = pl.read_parquet(f, columns=columns)
data.append(df)
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,
polars=True)
try:
df = pl.read_parquet(f, columns=columns)
data.append(df)
except (OSError, ArrowInvalid, IndexError):
pass
try:
df = pl.concat(data, rechunk=True)
except ValueError: # pragma: no cover
return pl.DataFrame()
except SchemaError:
df = pl.concat(data, rechunk=True, how='vertical_relaxed')
else:
try:
df = pl.read_parquet(csvfilenames[0], columns=columns)
rowdata, row = getrowdata(id=ids[0])
except (OSError, IndexError, ArrowInvalid):
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataplep(
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
try:
df = pl.read_parquet(csvfilenames[0], columns=columns)
except:
df = pl.DataFrame
else:
df = pl.DataFrame()
except:
rowdata, row = getrowdata(id=ids[0])
if rowdata and len(rowdata.df): # pragma: no cover
data = dataplep(
rowdata.df, id=ids[0], bands=True, otwpower=True, barchart=True)
try:
df = pl.read_parquet(csvfilenames[0], columns=columns)
except:
df = pl.DataFrame()
else:
df = pl.DataFrame()
if compute and len(df):
data = df.clone()
if doclean:
data = clean_df_stats_pl(data, ignorehr=True,
workstrokesonly=workstrokesonly,
for_chart=for_chart)
data = remove_nulls_pl(data)
if not df.is_empty():
df = df.fill_nan(None).drop_nulls()
return df
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True):
@@ -1603,6 +1519,8 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False,
data.append(df)
data = pl.collect_all(data)
if len(data)==0:
return pl.DataFrame()
try:
datadf = pl.concat(data).select(columns)
@@ -1635,7 +1553,15 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False,
for df in data
]
datadf = pl.concat(data)
try:
datadf = pl.concat(data)
except SchemaError:
data = [
df.with_columns(cs.integer().cast(pl.Float64)) for df in data
]
datadf = pl.concat(data)
exprs = []

View File

@@ -1612,26 +1612,26 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
try:
datadf = datadf.with_columns(pl.col(xparam).alias("x1"))
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
try:
datadf = datadf.with_columns(pl.col("distance").alias("x1"))
except KeyError:
except (KeyError, ColumnNotFoundError):
try:
datadf = datadf.with_columns(pl.col('time').alias("x1"))
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
return ['', '<p>No non-zero data in selection</p>', '', '']
try:
datadf = datadf.with_columns(pl.col(yparam1).alias("y1"))
except KeyError:
except (KeyError, ColumnNotFoundError):
try:
datadf = datadf.with_columns(pl.col('pace').alias("y1"))
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
return ['', '<p>No non-zero data in selection</p>', '', '']
if yparam2 != 'None':
try:
datadf = datadf.with_columns(pl.col(yparam2).alias("y2"))
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
datadf = datadf.with_columns(pl.col("y1").alias("y2"))
else: # pragma: no cover
datadf = datadf.with_columns(pl.col("y1").alias("y2"))
@@ -2099,14 +2099,11 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line',
promember=0, workstrokesonly=True,
labeldict=None, startenddict={}):
message = ''
errormessage = ''
columns = [name for name, d in metrics.rowingmetrics]
columns_basic = [name for name, d in metrics.rowingmetrics if d['group'] == 'basic']
columns = [xparam,yparam]
columns_basic = [xparam,yparam]
add_columns = [
'ftime', 'distance', 'fpace',
'power', 'hr', 'spm',
'spm',
'time', 'pace', 'workoutstate',
'workoutid'
]
@@ -2122,47 +2119,37 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line',
datadf = pd.DataFrame()
if promember:
datadf = dataprep.getsmallrowdata_db(columns, ids=ids, doclean=doclean,
compute=compute,
workstrokesonly=workstrokesonly, for_chart=True)
datadf = dataprep.read_data(columns, ids=ids, doclean=doclean,
compute=compute,
workstrokesonly=workstrokesonly, for_chart=True)
else:
datadf = dataprep.getsmallrowdata_db(columns_basic, ids=ids, doclean=doclean,
compute=compute,
workstrokesonly=workstrokesonly, for_chart=True)
datadf = dataprep.read_data(columns_basic, ids=ids, doclean=doclean,
compute=compute,
workstrokesonly=workstrokesonly, for_chart=True)
datadf = dataprep.remove_nulls_pl(datadf)
# check if dataframe not empty
if datadf.empty: # pragma: no cover
if datadf.is_empty(): # pragma: no cover
return ['<p>No non-zero data in selection</p>', '']
datadf['workoutid'] = datadf['workoutid'].astype(int)
datadf.dropna(axis=1, how='all', inplace=True)
datadf.dropna(axis=0, how='all', inplace=True)
datadf = datadf.with_columns(pl.col("workoutid").cast(pl.UInt32).keep_name())
nrworkouts = len(ids)
try:
tseconds = datadf.loc[:, 'time']
except KeyError: # pragma: no cover
tseconds = datadf['time']
except (KeyError, ColumnNotFoundError): # pragma: no cover
try:
tseconds = datadf.loc[:, xparam]
tseconds = datadf[xparam]
except:
return ['<p>A chart data error occurred</p>', '']
# check if dataframe not empty
if datadf.empty: # pragma: no cover
return ['<p>No non-zero data in selection</p>', '']
if (xparam == 'time'):
datadf[xparam] = datadf[xparam] - datadf[xparam].iloc[0]
datadf = datadf.with_columns((pl.col(xparam)-datadf[0,xparam]).alias(xparam))
datadf = datadf.fillna(0)
datadf.replace([np.inf, -np.inf], np.nan, inplace=True)
datadf = datadf.fillna(0)
data_dict = datadf.to_dict("records")
data_dict = datadf.to_dicts()
metrics_list = [{'name': name, 'rowingmetrics':d } for name, d in metrics.rowingmetrics]
@@ -2178,8 +2165,8 @@ def interactive_multiple_compare_chart(ids, xparam, yparam, plottype='line',
'workouts': workoutsdict,
}
script, div = get_chart("/compare", chart_data)
return script, div, message, errormessage
script, div = get_chart("/compare", chart_data, debug=False)
return script, div
def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg='week',
yaxis='time'):
@@ -2202,7 +2189,10 @@ def get_zones_report_pl(rower, startdate, enddate, trainingzones='hr', date_agg=
df = dataprep.read_data(columns, ids=ids, workstrokesonly=False, doclean=False)
df = dataprep.remove_nulls_pl(df)
df = df.with_columns((pl.col("time").diff().clip(0, 20*1.e3)).alias("deltat")).lazy()
try:
df = df.with_columns((pl.col("time").diff().clip(0, 20*1.e3)).alias("deltat")).lazy()
except ColumnNotFoundError:
pass
hrzones = rower.hrzones
powerzones = rower.powerzones

View File

@@ -301,7 +301,7 @@ def mocked_getrowdata_db(*args, **kwargs):
return df,row
def mocked_getrowdata_uh(*args, **kwargs): # pragma: no cover
df = pd.read_csv('rowers/tests/testdata/uhfull.csv')
df = pl.read_csv('rowers/tests/testdata/uhfull.csv')
id = kwargs['id']
@@ -315,7 +315,7 @@ def mocked_getsmallrowdata_uh(*args, **kwargs): # pragma: no cover
return df
def mocked_getsmallrowdata_forfusion(*args, **kwargs):
df = pd.read_csv('rowers/tests/testdata/getrowdata_mock.csv')
df = pl.read_csv('rowers/tests/testdata/getrowdata_mock.csv')
return df

View File

@@ -169,7 +169,7 @@ class ForcecurveTest(TestCase):
pass
@patch('rowers.dataprep.getsmallrowdata_db',side_effect = mocked_getempowerdata_db)
@patch('rowers.dataprep.read_data',side_effect = mocked_read_data)
def test_forcecurve_plot(self, mocked_getsmallrowdata_db):
login = self.c.login(username=self.u.username, password = self.password)
self.assertTrue(login)
@@ -600,9 +600,9 @@ class History(TestCase):
pass
@patch('rowers.dataprep.create_engine')
@patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_db)
@patch('rowers.dataprep.read_data',side_effect=mocked_read_data)
def test_workouts_history(self, mocked_sqlalchemy,
mocked_getsmallrowdata_db):
mocked_read_data):
login = self.c.login(username=self.u.username, password=self.password)
self.assertTrue(login)
@@ -916,7 +916,7 @@ class WorkoutStatsTestNew(TestCase):
self.assertEqual(response.status_code,200)
@patch('rowers.dataprep.create_engine')
@patch('rowers.dataprep.getsmallrowdata_db', side_effect=mocked_getsmallrowdata_db)
@patch('rowers.dataprep.read_data', side_effect=mocked_read_data)
@patch('rowers.dataprep.read_cols_df_sql', side_effect=mocked_read_cols_df_sql)
def test_analysis_data(self,
mocked_sqlalchemy,
@@ -1257,8 +1257,8 @@ class MarkerPerformanceTest(TestCase):
self.assertRedirects(response, expected_url=expected_url, status_code=302,target_status_code=200)
@patch('rowers.dataprep.getsmallrowdata_db', side_effect=mocked_getsmallrowdata_uh)
def test_trainingzones_view(self,mocked_getsmallrowdata_db):
@patch('rowers.dataprep.read_data', side_effect=mocked_getsmallrowdata_uh)
def test_trainingzones_view(self,mocked_getsmallrowdata_uh):
login = self.c.login(username=self.u.username,password=self.password)
self.assertTrue(login)

View File

@@ -55,8 +55,8 @@ class EmailUpload(TestCase):
@patch('rowers.dataprep.create_engine')
@patch('rowers.dataprep.getsmallrowdata_db',side_effect=mocked_getsmallrowdata_db)
def test_uploadapi(self,mocked_sqlalchemy,mocked_getsmallrowdata_db):
@patch('rowers.dataprep.read_data',side_effect=mocked_read_data)
def test_uploadapi(self,mocked_sqlalchemy,mocked_read_data):
form_data = {
'title': 'test',
'workouttype':'rower',

Binary file not shown.

View File

@@ -7,6 +7,7 @@ import math
import numpy as np
import pandas as pd
import polars as pl
from polars.exceptions import ColumnNotFoundError
import colorsys
from django.conf import settings
import collections

View File

@@ -571,7 +571,7 @@ def flexalldata(workouts, options):
workstrokesonly = not includereststrokes
columns = [xparam, yparam1, yparam2, 'spm', 'driveenergy', 'distance']
ids = [int(w.id) for w in workouts]
df = dataprep.getsmallrowdata_pl(columns, ids=ids,
df = dataprep.read_data(columns, ids=ids,
workstrokesonly=workstrokesonly,
doclean=True,
)
@@ -928,9 +928,9 @@ def boxplotdata(workouts, options):
ids = [w.id for w in workouts]
# prepare data frame
datadf = getsmallrowdata_pl(fieldlist, ids)
datadf = dataprep.read_data(fieldlist, ids)
datadf = dataprep.clean_df_stats_pl(datadf, workstrokesonly=workstrokesonly)
datadf = dataprep.remove_nulls_pl(datadf)
try:
datadf = datadf.filter(
@@ -2361,17 +2361,16 @@ def history_view_data(request, userid=0):
ids = [w.id for w in g_workouts]
# columns = ['hr', 'power', 'time']
columns = [name for name, d in metrics.rowingmetrics]+['workoutstate', 'workoutid']
columns = ['hr', 'power', 'time', 'workoutstate', 'workoutid']
df = dataprep.read_data(columns, ids=ids)
df = dataprep.remove_nulls_pl(df)
df = getsmallrowdata_pl(columns, ids=ids)
try:
df = df.with_columns(pl.col('time').diff().clip(lower_bound=0).alias("deltat"))
except KeyError: # pragma: no cover
pass
df = dataprep.clean_df_stats_pl(df, workstrokesonly=True,
ignoreadvanced=True, ignorehr=False)
totalmeters, totalhours, totalminutes, totalseconds = get_totals(
g_workouts)
@@ -2400,7 +2399,8 @@ def history_view_data(request, userid=0):
whours=whours,
wminutes=wminutes, wseconds=wseconds,
)
ddf = getsmallrowdata_pl(columns, ids=[w.id for w in a_workouts])
ddf = dataprep.read_data(columns, ids=[w.id for w in a_workouts])
ddf = dataprep.remove_nulls_pl(ddf)
try:
ddf = ddf.with_columns(pl.col("time").diff().clip(lower_bound=0).alias("deltat"))
except KeyError: # pragma: no cover
@@ -2421,7 +2421,7 @@ def history_view_data(request, userid=0):
ddict['powermean'] = int(wavg(ddf, 'power', 'deltat'))
try:
ddict['powermax'] = int(ddf['power'].max())
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
ddict['powermax'] = 0
ddict['nrworkouts'] = a_workouts.count()
listofdicts.append(ddict)
@@ -2436,13 +2436,13 @@ def history_view_data(request, userid=0):
try:
totalsdict['powermean'] = int(wavg(df, 'power', 'deltat'))
totalsdict['powermax'] = int(df['power'].max())
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
totalsdict['powermean'] = 0
totalsdict['powermax'] = 0
try:
totalsdict['hrmean'] = int(wavg(df, 'hr', 'deltat'))
totalsdict['hrmax'] = int(df['hr'].max())
except KeyError: # pragma: no cover
except (KeyError, ColumnNotFoundError): # pragma: no cover
totalsdict['hrmean'] = 0
totalsdict['hrmax'] = 0
@@ -2461,7 +2461,8 @@ def history_view_data(request, userid=0):
a_workouts = g_workouts.filter(workouttype=typeselect)
meters, hours, minutes, seconds = get_totals(a_workouts)
totalseconds = 3600 * hours + 60 * minutes + seconds
ddf = getsmallrowdata_pl(columns, ids=[w.id for w in a_workouts])
ddf = dataprep.read_data(columns, ids=[w.id for w in a_workouts])
ddf = dataprep.remove_nulls_pl(ddf)
if ddf.is_empty():
totalscript = ""
totaldiv = "No data"

View File

@@ -16,7 +16,7 @@ from rowers.utils import (
from rowers.celery import result as celery_result
from rowers.interactiveplots import *
from scipy.interpolate import griddata
from rowers.dataprep import getsmallrowdata_db, getsmallrowdata_pl
from rowers.dataprep import getsmallrowdata_db, read_data
from rowers.dataprep import timedeltaconv
from scipy.special import lambertw
from io import BytesIO