Private
Public Access
1
0

histo converted to polars

This commit is contained in:
2024-04-08 20:50:08 +02:00
parent 6d549e3b8b
commit fd46732b6e
3 changed files with 116 additions and 20 deletions

View File

@@ -1414,6 +1414,37 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True,
return data, row
def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
checkefficiency=True, for_chart=False):
data = read_df_sql(id,polars=True)
print(data)
try:
data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff()
except KeyError: # pragma: no cover
data = pl.DataFrame()
if data.is_empty():
rowdata, row = getrowdata(id=id)
if not rowdata.empty: # pragma: no cover
data = dataprep(rowdata.df, id=id, bands=True,
barchart=True, otwpower=True, polars=True)
else:
data = pl.DataFrame() # returning empty dataframe
else:
row = Workout.objects.get(id=id)
if checkefficiency is True and not data.is_empty():
try:
if data['efficiency'].mean() == 0 and data['power'].mean() != 0: # pragma: no cover
data = add_efficiency_pl(id=id, polars=True)
except KeyError: # pragma: no cover
data = add_efficiency_pl(id=id)
if doclean: # pragma: no cover
data = clean_df_stats(data, ignorehr=True, for_chart=for_chart)
return data, row
# Fetch a subset of the data from the DB
def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
@@ -1707,7 +1738,28 @@ def read_cols_df_sql(ids, columns, convertnewtons=True):
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
def read_df_sql(id, polars=False):
if polars:
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataprep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,
polars=True)
try:
df = pl.read_parquet(f, columns=columns)
except (OSError, ArrowInvalid, IndexError):
pass
df = df.fill_nan(None).drop_nulls()
return df
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
@@ -1802,6 +1854,13 @@ def fix_newtons(id=0, limit=3000): # pragma: no cover
pass
def remove_invalid_columns_pl(df): # pragma: no cover
for c in df.get_columns():
if c not in allowedcolumns:
df = df.drop(c)
return df
def remove_invalid_columns(df): # pragma: no cover
for c in df.columns:
if c not in allowedcolumns:
@@ -1809,6 +1868,36 @@ def remove_invalid_columns(df): # pragma: no cover
return df
def add_efficiency_pl(id=0): # pragma: no cover
rowdata, row = getrowdata_pl(id=id,
doclean=False,
convertnewtons=False,
checkefficiency=False)
power = rowdata['power']
pace = rowdata['pace'] / 1.0e3
velo = 500. / pace
ergpw = 2.8 * velo**3
efficiency = 100. * ergpw / power
efficiency = efficiency.replace([-np.inf, np.inf], np.nan)
efficiency.fillna(method='ffill')
rowdata = rowdata.with_columns(pl.col(efficiency).alias("efficiency")) # ['efficiency'] = efficiency
rowdata = remove_invalid_columns_pl(rowdata)
rowdata = rowdata.replace([-np.inf, np.inf], np.nan)
rowdata = rowdata.fillna(method='ffill')
delete_strokedata(id)
if id != 0:
rowdata = rowdata.with_column(pl.lit(id).alias("workoutid"))
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
rowdata.write_parquet(filename, compression='gzip')
return rowdata
def add_efficiency(id=0): # pragma: no cover
rowdata, row = getrowdata_db(id=id,
@@ -2144,7 +2233,10 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
os.remove(filename)
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
if polars:
pldf = pl.from_pandas(data)
return pldf
return data