more pl
This commit is contained in:
@@ -773,55 +773,21 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
|
||||
|
||||
# clean data for useful ranges per column
|
||||
if not ignorehr:
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("hr")>=30)
|
||||
except (KeyError, TypeError): # pragma: no cover
|
||||
pass
|
||||
datadf = datadf.filter(pl.col("hr")>=30)
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("spm") >=0)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
datadf = datadf.filter(
|
||||
pl.col("spm") >=0,
|
||||
pl.col("efficiency")<=200,
|
||||
pl.col("spm")>=10,
|
||||
pl.col("pace")<=300*1000.,
|
||||
pl.col("efficiency")>=0,
|
||||
pl.col("pace")>=60*1000,
|
||||
pl.col("power")<=5000,
|
||||
pl.col("spm")<=120,
|
||||
pl.col("wash")>=1
|
||||
)
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("efficiency")<=200)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("spm")>=10)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("pace")<=300*1000.)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("efficiency")>=0)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("pace")>=60*1000)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("power")<=5000)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("spm")<=120)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("wash")>=1)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
# try to guess ignoreadvanced
|
||||
if not ignoreadvanced:
|
||||
@@ -834,70 +800,25 @@ def clean_df_stats_pl(datadf, workstrokesonly=True, ignorehr=True,
|
||||
pass
|
||||
|
||||
if not ignoreadvanced:
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("rhythm")>=0)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
datadf = datadf.filter(pl.col("rhythm")>=0,
|
||||
pl.col("rhythm")<=70,
|
||||
pl.col("power")>=20,
|
||||
pl.col("drivelength")>=0.5,
|
||||
pl.col("forceratio")>=0.2,
|
||||
pl.col("forceratio")<=1.0,
|
||||
pl.col("drivespeed")>=0.5,
|
||||
pl.col("drivespeed")<=4,
|
||||
pl.col("driveenergy")<=2000,
|
||||
pl.col("driveenergy")>=100,
|
||||
pl.col("catch")<=-30)
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("rhythm")<=70)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("power")>=20)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("drivelength")>=0.5)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("forceratio")>=0.2)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("forceratio")<=1.0)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("drivespeed")>=0.5)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("drivespeed")<=4)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("driveenergy")<=2000)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("driveenergy")>=100)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.filter(pl.col("catch")<=-30)
|
||||
except (KeyError, TypeError):
|
||||
pass
|
||||
|
||||
# workoutstateswork = [1, 4, 5, 8, 9, 6, 7]
|
||||
workoutstatesrest = [3]
|
||||
# workoutstatetransition = [0, 2, 10, 11, 12, 13]
|
||||
|
||||
if workstrokesonly == 'True' or workstrokesonly is True:
|
||||
try:
|
||||
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
|
||||
except:
|
||||
pass
|
||||
datadf = datadf.filter(~pl.col("workoutstate").is_in(workoutstatesrest))
|
||||
|
||||
after = {}
|
||||
|
||||
@@ -1571,7 +1492,7 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp
|
||||
|
||||
return data
|
||||
|
||||
|
||||
df = df.fill_nan(None).drop_nulls()
|
||||
return df
|
||||
|
||||
|
||||
@@ -2207,7 +2128,11 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
|
||||
df = dd.from_pandas(data, npartitions=1)
|
||||
if polars:
|
||||
pldf = pl.from_pandas(data)
|
||||
pldf.write_parquet(filename, compression='gzip')
|
||||
try:
|
||||
pldf.write_parquet(filename, compression='gzip')
|
||||
except IsADirectoryError:
|
||||
shutil.rmtree(filename)
|
||||
pldf.write_parquet(filename, compression='gzip')
|
||||
else:
|
||||
try:
|
||||
df.to_parquet(filename, engine='fastparquet', compression='gzip')
|
||||
|
||||
Reference in New Issue
Block a user