histo converted to polars
This commit is contained in:
@@ -1414,6 +1414,37 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True,
|
|||||||
|
|
||||||
return data, row
|
return data, row
|
||||||
|
|
||||||
|
def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
|
||||||
|
checkefficiency=True, for_chart=False):
|
||||||
|
data = read_df_sql(id,polars=True)
|
||||||
|
print(data)
|
||||||
|
try:
|
||||||
|
data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff()
|
||||||
|
except KeyError: # pragma: no cover
|
||||||
|
data = pl.DataFrame()
|
||||||
|
|
||||||
|
if data.is_empty():
|
||||||
|
rowdata, row = getrowdata(id=id)
|
||||||
|
if not rowdata.empty: # pragma: no cover
|
||||||
|
data = dataprep(rowdata.df, id=id, bands=True,
|
||||||
|
barchart=True, otwpower=True, polars=True)
|
||||||
|
else:
|
||||||
|
data = pl.DataFrame() # returning empty dataframe
|
||||||
|
else:
|
||||||
|
row = Workout.objects.get(id=id)
|
||||||
|
|
||||||
|
if checkefficiency is True and not data.is_empty():
|
||||||
|
try:
|
||||||
|
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
|
||||||
|
data = add_efficiency_pl(id=id, polars=True)
|
||||||
|
except KeyError: # pragma: no cover
|
||||||
|
data = add_efficiency_pl(id=id)
|
||||||
|
|
||||||
|
if doclean: # pragma: no cover
|
||||||
|
data = clean_df_stats(data, ignorehr=True, for_chart=for_chart)
|
||||||
|
|
||||||
|
return data, row
|
||||||
|
|
||||||
# Fetch a subset of the data from the DB
|
# Fetch a subset of the data from the DB
|
||||||
|
|
||||||
def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
|
def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
|
||||||
@@ -1707,7 +1738,28 @@ def read_cols_df_sql(ids, columns, convertnewtons=True):
|
|||||||
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
|
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
|
||||||
|
|
||||||
|
|
||||||
def read_df_sql(id):
|
def read_df_sql(id, polars=False):
|
||||||
|
if polars:
|
||||||
|
try:
|
||||||
|
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
||||||
|
df = pd.read_parquet(f)
|
||||||
|
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
|
||||||
|
rowdata, row = getrowdata(id=id)
|
||||||
|
try:
|
||||||
|
shutil.rmtree(f)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if rowdata and len(rowdata.df):
|
||||||
|
_ = dataprep(rowdata.df, id=id,
|
||||||
|
bands=True, otwpower=True, barchart=True,
|
||||||
|
polars=True)
|
||||||
|
try:
|
||||||
|
df = pl.read_parquet(f, columns=columns)
|
||||||
|
except (OSError, ArrowInvalid, IndexError):
|
||||||
|
pass
|
||||||
|
df = df.fill_nan(None).drop_nulls()
|
||||||
|
|
||||||
|
return df
|
||||||
try:
|
try:
|
||||||
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
||||||
df = pd.read_parquet(f)
|
df = pd.read_parquet(f)
|
||||||
@@ -1802,6 +1854,13 @@ def fix_newtons(id=0, limit=3000): # pragma: no cover
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def remove_invalid_columns_pl(df): # pragma: no cover
|
||||||
|
for c in df.get_columns():
|
||||||
|
if c not in allowedcolumns:
|
||||||
|
df = df.drop(c)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
def remove_invalid_columns(df): # pragma: no cover
|
def remove_invalid_columns(df): # pragma: no cover
|
||||||
for c in df.columns:
|
for c in df.columns:
|
||||||
if c not in allowedcolumns:
|
if c not in allowedcolumns:
|
||||||
@@ -1809,6 +1868,36 @@ def remove_invalid_columns(df): # pragma: no cover
|
|||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
def add_efficiency_pl(id=0): # pragma: no cover
|
||||||
|
rowdata, row = getrowdata_pl(id=id,
|
||||||
|
doclean=False,
|
||||||
|
convertnewtons=False,
|
||||||
|
checkefficiency=False)
|
||||||
|
power = rowdata['power']
|
||||||
|
pace = rowdata['pace'] / 1.0e3
|
||||||
|
velo = 500. / pace
|
||||||
|
ergpw = 2.8 * velo**3
|
||||||
|
efficiency = 100. * ergpw / power
|
||||||
|
|
||||||
|
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
|
||||||
|
efficiency.fillna(method='ffill')
|
||||||
|
rowdata = rowdata.with_columns(pl.col(efficiency).alias("efficiency")) # ['efficiency'] = efficiency
|
||||||
|
|
||||||
|
rowdata = remove_invalid_columns_pl(rowdata)
|
||||||
|
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
|
||||||
|
rowdata = rowdata.fillna(method='ffill')
|
||||||
|
|
||||||
|
delete_strokedata(id)
|
||||||
|
|
||||||
|
|
||||||
|
if id != 0:
|
||||||
|
rowdata = rowdata.with_column(pl.lit(id).alias("workoutid"))
|
||||||
|
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
|
||||||
|
rowdata.write_parquet(filename, compression='gzip')
|
||||||
|
|
||||||
|
|
||||||
|
return rowdata
|
||||||
|
|
||||||
|
|
||||||
def add_efficiency(id=0): # pragma: no cover
|
def add_efficiency(id=0): # pragma: no cover
|
||||||
rowdata, row = getrowdata_db(id=id,
|
rowdata, row = getrowdata_db(id=id,
|
||||||
@@ -2144,6 +2233,9 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
|
|||||||
os.remove(filename)
|
os.remove(filename)
|
||||||
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
|
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
|
||||||
|
|
||||||
|
if polars:
|
||||||
|
pldf = pl.from_pandas(data)
|
||||||
|
return pldf
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|||||||
@@ -469,18 +469,20 @@ def interactive_forcecurve(theworkouts):
|
|||||||
|
|
||||||
columns = ['catch', 'slip', 'wash', 'finish', 'averageforce',
|
columns = ['catch', 'slip', 'wash', 'finish', 'averageforce',
|
||||||
'peakforceangle', 'peakforce', 'spm', 'distance',
|
'peakforceangle', 'peakforce', 'spm', 'distance',
|
||||||
'workoutstate', 'driveenergy', 'cumdist']
|
'workoutstate', 'driveenergy', 'cumdist', 'workoutid']
|
||||||
|
columns = columns + [name for name, d in metrics.rowingmetrics]
|
||||||
|
|
||||||
rowdata = dataprep.getsmallrowdata_db(columns, ids=ids,
|
|
||||||
|
rowdata = dataprep.getsmallrowdata_pl(columns, ids=ids,
|
||||||
workstrokesonly=False)
|
workstrokesonly=False)
|
||||||
|
|
||||||
rowdata.dropna(axis=1, how='all', inplace=True)
|
rowdata = rowdata.fill_nan(None).drop_nulls()
|
||||||
rowdata.dropna(axis=0, how='any', inplace=True)
|
|
||||||
|
|
||||||
if rowdata.empty:
|
|
||||||
|
if rowdata.is_empty():
|
||||||
return "", "No Valid Data Available"
|
return "", "No Valid Data Available"
|
||||||
|
|
||||||
data_dict = rowdata.to_dict("records")
|
data_dict = rowdata.to_dicts()
|
||||||
|
|
||||||
thresholdforce = 100. if 'x' in boattype else 200.
|
thresholdforce = 100. if 'x' in boattype else 200.
|
||||||
|
|
||||||
@@ -490,7 +492,7 @@ def interactive_forcecurve(theworkouts):
|
|||||||
'thresholdforce': thresholdforce,
|
'thresholdforce': thresholdforce,
|
||||||
}
|
}
|
||||||
|
|
||||||
script, div = get_chart("/forcecurve", chart_data)
|
script, div = get_chart("/forcecurve", chart_data, debug=False)
|
||||||
return script, div
|
return script, div
|
||||||
|
|
||||||
|
|
||||||
@@ -822,25 +824,27 @@ def interactive_histoall(theworkouts, histoparam, includereststrokes,
|
|||||||
|
|
||||||
ids = [int(w.id) for w in theworkouts]
|
ids = [int(w.id) for w in theworkouts]
|
||||||
|
|
||||||
|
columns = [name for name, d in metrics.rowingmetrics]+['spm', 'driveenergy', 'distance', 'workoutstate', 'workoutid']
|
||||||
|
|
||||||
workstrokesonly = not includereststrokes
|
workstrokesonly = not includereststrokes
|
||||||
rowdata = dataprep.getsmallrowdata_db(
|
rowdata = dataprep.getsmallrowdata_pl(
|
||||||
[histoparam], ids=ids, doclean=True, workstrokesonly=workstrokesonly)
|
columns, ids=ids, doclean=True, workstrokesonly=workstrokesonly)
|
||||||
|
|
||||||
rowdata.dropna(axis=0, how='any', inplace=True)
|
rowdata = rowdata.fill_nan(None).drop_nulls()
|
||||||
|
|
||||||
rowdata = dataprep.filter_df(rowdata, 'spm', spmmin, largerthan=True)
|
#rowdata = dataprep.filter_df(rowdata, 'spm', spmmin, largerthan=True)
|
||||||
rowdata = dataprep.filter_df(rowdata, 'spm', spmmax, largerthan=False)
|
#rowdata = dataprep.filter_df(rowdata, 'spm', spmmax, largerthan=False)
|
||||||
|
|
||||||
rowdata = dataprep.filter_df(
|
#rowdata = dataprep.filter_df(
|
||||||
rowdata, 'driveenergy', workmin, largerthan=True)
|
# rowdata, 'driveenergy', workmin, largerthan=True)
|
||||||
rowdata = dataprep.filter_df(
|
#rowdata = dataprep.filter_df(
|
||||||
rowdata, 'driveenergy', workmax, largerthan=False)
|
# rowdata, 'driveenergy', workmax, largerthan=False)
|
||||||
|
|
||||||
if rowdata.empty:
|
if rowdata.is_empty():
|
||||||
return "", "No Valid Data Available"
|
return "", "No Valid Data Available"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
histopwr = rowdata[histoparam].values
|
histopwr = rowdata[histoparam].to_numpy()
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return "", "No data"
|
return "", "No data"
|
||||||
if len(histopwr) == 0: # pragma: no cover
|
if len(histopwr) == 0: # pragma: no cover
|
||||||
|
|||||||
BIN
rowers/tests/testdata/testdata.tcx.gz
vendored
BIN
rowers/tests/testdata/testdata.tcx.gz
vendored
Binary file not shown.
Reference in New Issue
Block a user