Private
Public Access
1
0

fixed dataplep

This commit is contained in:
2024-04-10 22:17:07 +02:00
parent 2a9beb7a25
commit eccb20b82f
2 changed files with 21 additions and 12 deletions

View File

@@ -569,6 +569,7 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
except KeyError:
pass
if not ignoreadvanced:
try:
mask = datadf['rhythm'] < 0
@@ -1418,7 +1419,7 @@ def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
checkefficiency=True, for_chart=False):
data = read_df_sql(id,polars=True)
try:
data = data.with_columns(pl.col(data['time'].diff()).alias("deltat")) # data['time'].diff()
data = data.with_columns((pl.col('time').diff()).alias("deltat")) # data['time'].diff()
except KeyError: # pragma: no cover
data = pl.DataFrame()
@@ -1781,7 +1782,7 @@ def read_df_sql(id, polars=False):
if polars:
try:
f = 'media/strokedata_{id}.parquet.gz'.format(id=id)
df = pd.read_parquet(f)
df = pl.read_parquet(f)
except (IsADirectoryError, FileNotFoundError, OSError, ArrowInvalid, IndexError): # pragma: no cover
rowdata, row = getrowdata(id=id)
try:
@@ -2018,11 +2019,15 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar
if 'driveenergy' not in df.columns:
if forceunit == 'lbs':
df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)") * lbstoN).alias("driveenergy"))
df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)"))
df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)"))
else:
df = df.with_columns((pl.col(" DriveLength (meters)") * pl.col(" AverageDriveForce (lbs)")).alias("driveenergy"))
if forceunit == 'lbs':
df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)"))
df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)"))
if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0:
df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy"))
@@ -2067,14 +2072,14 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar
if 'wash' not in df.columns:
data = data.with_columns(
wash = pl.lit("0"),
catch = pl.lit("0"),
peakforceangle = pl.lit("0"),
finish = pl.lit("0"),
slip = pl.lit("0"),
totalangle = pl.lit("0"),
effectiveangle = pl.lit("0"),
efficiency = pl.lit("0"),
wash = pl.lit(0.0),
catch = pl.lit(0.0),
peakforceangle = pl.lit(0.0),
finish = pl.lit(0.0),
slip = pl.lit(0.0),
totalangle = pl.lit(0.0),
effectiveangle = pl.lit(0.0),
efficiency = pl.lit(0.0),
)
else:
wash = df['wash']
@@ -2145,6 +2150,10 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar
data = data.with_columns(
workoutid = pl.lit(id)
)
# cast data
for k, v in dtypes.items():
if v == 'int':
data = data.cast({k: pl.Int64})
filename = 'media/strokedata_{id}.parquet.gz'.format(id=id)
try:
data.write_parquet(filename, compression='gzip')

Binary file not shown.