Private
Public Access
1
0
This commit is contained in:
2024-04-08 19:02:28 +02:00
parent 69007e56be
commit fc96d44e72
3 changed files with 33 additions and 112 deletions

View File

@@ -773,55 +773,21 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
# clean data for useful ranges per column
if not ignorehr:
try:
datadf = datadf.filter(pl.col("hr")>=30)
except (KeyError, TypeError): # pragma: no cover
pass
datadf = datadf.filter(pl.col("hr")>=30)
try:
datadf = datadf.filter(pl.col("spm") >=0)
except (KeyError, TypeError):
pass
datadf = datadf.filter(
pl.col("spm") >=0,
pl.col("efficiency")<=200,
pl.col("spm")>=10,
pl.col("pace")<=300*1000.,
pl.col("efficiency")>=0,
pl.col("pace")>=60*1000,
pl.col("power")<=5000,
pl.col("spm")<=120,
pl.col("wash")>=1
)
try:
datadf = datadf.filter(pl.col("efficiency")<=200)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("spm")>=10)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("pace")<=300*1000.)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("efficiency")>=0)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("pace")>=60*1000)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("power")<=5000)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("spm")<=120)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("wash")>=1)
except (KeyError, TypeError):
pass
# try to guess ignoreadvanced
if not ignoreadvanced:
@@ -834,70 +800,25 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
pass
if not ignoreadvanced:
try:
datadf = datadf.filter(pl.col("rhythm")>=0)
except (KeyError, TypeError):
pass
datadf = datadf.filter(pl.col("rhythm")>=0,
pl.col("rhythm")<=70,
pl.col("power")>=20,
pl.col("drivelength")>=0.5,
pl.col("forceratio")>=0.2,
pl.col("forceratio")<=1.0,
pl.col("drivespeed")>=0.5,
pl.col("drivespeed")<=4,
pl.col("driveenergy")<=2000,
pl.col("driveenergy")>=100,
pl.col("catch")<=-30)
try:
datadf = datadf.filter(pl.col("rhythm")<=70)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("power")>=20)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("drivelength")>=0.5)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("forceratio")>=0.2)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("forceratio")<=1.0)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("drivespeed")>=0.5)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("drivespeed")<=4)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("driveenergy")<=2000)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("driveenergy")>=100)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("catch")<=-30)
except (KeyError, TypeError):
pass
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3]
# workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly is True:
try:
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
except:
pass
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
after = {}
@@ -1571,7 +1492,7 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp
return data
df = df.fill_nan(None).drop_nulls()
return df
@@ -2207,7 +2128,11 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
df = dd.from_pandas(data, npartitions=1)
if polars:
pldf = pl.from_pandas(data)
pldf.write_parquet(filename, compression='gzip')
try:
pldf.write_parquet(filename, compression='gzip')
except IsADirectoryError:
shutil.rmtree(filename)
pldf.write_parquet(filename, compression='gzip')
else:
try:
df.to_parquet(filename, engine='fastparquet', compression='gzip')

View File

@@ -478,7 +478,7 @@ def interactive_forcecurve(theworkouts):
rowdata.dropna(axis=0, how='any', inplace=True)
if rowdata.empty:
return "", "No Valid Data Available", "", ""
return "", "No Valid Data Available"
data_dict = rowdata.to_dict("records")
@@ -2312,7 +2312,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
columns_basic = columns_basic + ['spm', 'driveenergy', 'distance', 'workoutstate']
datadf = pd.DataFrame()
start = timezone.now()
if promember:
datadf = dataprep.getsmallrowdata_pl(columns, ids=ids, doclean=True,
workstrokesonly=workstrokesonly, for_chart=True)
@@ -2320,7 +2319,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
datadf = dataprep.getsmallrowdata_pl(columns_basic, ids=ids, doclean=True,
workstrokesonly=workstrokesonly, for_chart=True)
print(timezone.now()-start)
try:
_ = datadf[yparam2]
except (KeyError, ColumnNotFoundError): # pragma: no cover
@@ -2331,9 +2329,7 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
except (KeyError, ColumnNotFoundError):
yparam1 = 'None'
datadf.drop_nulls()
#datadf.dropna(axis=1, how='all', inplace=True)
#datadf.dropna(axis=0, how='any', inplace=True)
datadf = datadf.fill_nan(None).drop_nulls()
# test if we have drive energy
try: # pragma: no cover

Binary file not shown.