Private
Public Access
1
0
This commit is contained in:
2024-04-08 19:02:28 +02:00
parent 69007e56be
commit fc96d44e72
3 changed files with 33 additions and 112 deletions

View File

@@ -773,55 +773,21 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
# clean data for useful ranges per column # clean data for useful ranges per column
if not ignorehr: if not ignorehr:
try: datadf = datadf.filter(pl.col("hr")>=30)
datadf = datadf.filter(pl.col("hr")>=30)
except (KeyError, TypeError): # pragma: no cover
pass
try:
datadf = datadf.filter(pl.col("spm") >=0) datadf = datadf.filter(
except (KeyError, TypeError): pl.col("spm") >=0,
pass pl.col("efficiency")<=200,
pl.col("spm")>=10,
pl.col("pace")<=300*1000.,
pl.col("efficiency")>=0,
pl.col("pace")>=60*1000,
pl.col("power")<=5000,
pl.col("spm")<=120,
pl.col("wash")>=1
)
try:
datadf = datadf.filter(pl.col("efficiency")<=200)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("spm")>=10)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("pace")<=300*1000.)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("efficiency")>=0)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("pace")>=60*1000)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("power")<=5000)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("spm")<=120)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("wash")>=1)
except (KeyError, TypeError):
pass
# try to guess ignoreadvanced # try to guess ignoreadvanced
if not ignoreadvanced: if not ignoreadvanced:
@@ -834,70 +800,25 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
pass pass
if not ignoreadvanced: if not ignoreadvanced:
try: datadf = datadf.filter(pl.col("rhythm")>=0,
datadf = datadf.filter(pl.col("rhythm")>=0) pl.col("rhythm")<=70,
except (KeyError, TypeError): pl.col("power")>=20,
pass pl.col("drivelength")>=0.5,
pl.col("forceratio")>=0.2,
pl.col("forceratio")<=1.0,
pl.col("drivespeed")>=0.5,
pl.col("drivespeed")<=4,
pl.col("driveenergy")<=2000,
pl.col("driveenergy")>=100,
pl.col("catch")<=-30)
try:
datadf = datadf.filter(pl.col("rhythm")<=70)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("power")>=20)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("drivelength")>=0.5)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("forceratio")>=0.2)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("forceratio")<=1.0)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("drivespeed")>=0.5)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("drivespeed")<=4)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("driveenergy")<=2000)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("driveenergy")>=100)
except (KeyError, TypeError):
pass
try:
datadf = datadf.filter(pl.col("catch")<=-30)
except (KeyError, TypeError):
pass
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7] # workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
workoutstatesrest = [3] workoutstatesrest = [3]
# workoutstatetransition = [0, 2, 10, 11, 12, 13] # workoutstatetransition = [0, 2, 10, 11, 12, 13]
if workstrokesonly == 'True' or workstrokesonly is True: if workstrokesonly == 'True' or workstrokesonly is True:
try: datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
except:
pass
after = {} after = {}
@@ -1571,7 +1492,7 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp
return data return data
df = df.fill_nan(None).drop_nulls()
return df return df
@@ -2207,7 +2128,11 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
df = dd.from_pandas(data, npartitions=1) df = dd.from_pandas(data, npartitions=1)
if polars: if polars:
pldf = pl.from_pandas(data) pldf = pl.from_pandas(data)
pldf.write_parquet(filename, compression='gzip') try:
pldf.write_parquet(filename, compression='gzip')
except IsADirectoryError:
shutil.rmtree(filename)
pldf.write_parquet(filename, compression='gzip')
else: else:
try: try:
df.to_parquet(filename, engine='fastparquet', compression='gzip') df.to_parquet(filename, engine='fastparquet', compression='gzip')

View File

@@ -478,7 +478,7 @@ def interactive_forcecurve(theworkouts):
rowdata.dropna(axis=0, how='any', inplace=True) rowdata.dropna(axis=0, how='any', inplace=True)
if rowdata.empty: if rowdata.empty:
return "", "No Valid Data Available", "", "" return "", "No Valid Data Available"
data_dict = rowdata.to_dict("records") data_dict = rowdata.to_dict("records")
@@ -2312,7 +2312,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
columns_basic = columns_basic + ['spm', 'driveenergy', 'distance', 'workoutstate'] columns_basic = columns_basic + ['spm', 'driveenergy', 'distance', 'workoutstate']
datadf = pd.DataFrame() datadf = pd.DataFrame()
start = timezone.now()
if promember: if promember:
datadf = dataprep.getsmallrowdata_pl(columns, ids=ids, doclean=True, datadf = dataprep.getsmallrowdata_pl(columns, ids=ids, doclean=True,
workstrokesonly=workstrokesonly, for_chart=True) workstrokesonly=workstrokesonly, for_chart=True)
@@ -2320,7 +2319,6 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
datadf = dataprep.getsmallrowdata_pl(columns_basic, ids=ids, doclean=True, datadf = dataprep.getsmallrowdata_pl(columns_basic, ids=ids, doclean=True,
workstrokesonly=workstrokesonly, for_chart=True) workstrokesonly=workstrokesonly, for_chart=True)
print(timezone.now()-start)
try: try:
_ = datadf[yparam2] _ = datadf[yparam2]
except (KeyError, ColumnNotFoundError): # pragma: no cover except (KeyError, ColumnNotFoundError): # pragma: no cover
@@ -2331,9 +2329,7 @@ def interactive_cum_flex_chart2(theworkouts, promember=0,
except (KeyError, ColumnNotFoundError): except (KeyError, ColumnNotFoundError):
yparam1 = 'None' yparam1 = 'None'
datadf.drop_nulls() datadf = datadf.fill_nan(None).drop_nulls()
#datadf.dropna(axis=1, how='all', inplace=True)
#datadf.dropna(axis=0, how='any', inplace=True)
# test if we have drive energy # test if we have drive energy
try: # pragma: no cover try: # pragma: no cover

Binary file not shown.