Private
Public Access
1
0
This commit is contained in:
2024-04-19 18:17:54 +02:00
parent 8dc0e7fb58
commit ac7c114649
3 changed files with 166 additions and 40 deletions

View File

@@ -1,4 +1,4 @@
from rowers.metrics import axes, calc_trimp, rowingmetrics, dtypes, metricsgroups
from rowers.metrics import axes, calc_trimp, rowingmetrics, dtypes, metricsgroups, metricsdicts
from rowers.utils import lbstoN, wavg, dologging
from rowers.mytypes import otwtypes, otetypes, rowtypes
import glob
@@ -35,7 +35,7 @@ import polars as pl
import polars.selectors as cs
from polars.exceptions import (
ColumnNotFoundError, SchemaError, ComputeError,
InvalidOperationError
InvalidOperationError, ShapeError
)
from rowingdata import (
@@ -178,10 +178,18 @@ columndict = {
}
def remove_nulls_pl(data):
data = data.lazy().fill_nan(None)
data = data.lazy().with_columns(
pl.when(
pl.all().is_infinite()
).then(None).otherwise(pl.all()).keep_name()
)
data = data.select(pl.all().forward_fill())
data = data.select(pl.all().backward_fill())
data = data.fill_nan(None)
data = data.select(cs.by_dtype(pl.NUMERIC_DTYPES)).collect()
data = data[[s.name for s in data if not s.is_infinite().sum()]]
data = data[[s.name for s in data if not (s.null_count() == data.height)]]
if not data.is_empty():
try:
data = data.drop_nulls()
@@ -1566,7 +1574,134 @@ def getsmallrowdata_pl(columns, ids=[], doclean=True, workstrokesonly=True, comp
return df
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True):
if ids:
csvfilenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
else:
return pl.DataFrame()
data = []
columns = [c for c in columns if c != 'None'] + ['distance', 'spm', 'workoutid','workoutstate', 'driveenergy']
columns = list(set(columns))
for id, f in zip(ids, csvfilenames):
if os.path.isfile(f):
df = pl.scan_parquet(f)
else:
rowdata, row = getrowdata(id=id)
try:
shutil.rmtree(f)
except:
pass
if rowdata and len(rowdata.df):
_ = dataplep(rowdata.df, id=id,
bands=True, otwpower=True, barchart=True,
polars=True)
df = pl.scan_parquet(f)
data.append(df)
data = pl.collect_all(data)
try:
datadf = pl.concat(data).select(columns)
except (SchemaError, ShapeError):
data = [
df.select(columns)
for df in data]
# float columns
floatcolumns = []
intcolumns = []
for c in columns:
try:
if metricsdicts[c]['numtype'] == 'float':
floatcolumns.append(c)
if metricsdicts[c]['numtype'] == 'integer':
intcolumns.append(c)
except KeyError:
pass
data = [
df.with_columns(
cs.float().cast(pl.Float64)
).with_columns(
cs.integer().cast(pl.Int64)
).with_columns(
cs.by_name(intcolumns).cast(pl.Int64)
).with_columns(
cs.by_name(floatcolumns).cast(pl.Float64)
)
for df in data
]
datadf = pl.concat(data)
exprs = []
if workstrokesonly:
workoutstatesrest = [3]
exprs.append(~pl.col("workoutstate").is_in(workoutstatesrest))
# got data
if not doclean:
if exprs:
datadf2 = datadf.filter(exprs)
if not datadf2.is_empty():
return datadf2
return datadf
# do clean
if "spm" in datadf.columns:
exprs.append(pl.col("spm") >= 10 )
exprs.append(pl.col("spm") <= 120)
if "pace" in datadf.columns:
exprs.append(pl.col("pace") <= 300*1000.)
exprs.append(pl.col("pace") >= 60*1000.)
if "power" in datadf.columns:
exprs.append(pl.col("power") <= 5000)
exprs.append(pl.col("power")>=20)
if "rhythm" in datadf.columns:
exprs.append(pl.col("rhythm")>=0)
exprs.append(pl.col("rhythm")<=70)
if "efficiency" in datadf.columns:
exprs.append(pl.col("efficiency")<=200)
exprs.append(pl.col("efficiency")>=0)
if "wash" in datadf.columns:
exprs.append(pl.col("wash")>=1)
if "drivelength" in datadf.columns:
exprs.append(pl.col("drivelength")>=0.5)
if "forceratio" in datadf.columns:
exprs.append(pl.col("forceratio")>=0.2)
exprs.append(pl.col("forceratio")<=1.0)
if "drivespeed" in datadf.columns:
exprs.append(pl.col("drivespeed")>=0.5)
exprs.append(pl.col("drivespeed")<=4)
if "driveenergy" in datadf.columns:
exprs.append(pl.col("driveenergy")<=2000)
exprs.append(pl.col("driveenergy")>=100)
if "catch" in datadf.columns:
exprs.append(pl.col("catch")<=-30)
if exprs:
datadf2 = datadf.filter(exprs)
if not datadf2.is_empty():
return datadf2
exprs = []
if workstrokesonly:
workoutstatesrest = [3]
exprs.append(~pl.col("workoutstate").is_in(workoutstatesrest))
if exprs:
datadf2 = datadf.filter(exprs)
if not datadf2.is_empty():
return datadf2
return datadf
def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True, compute=True,
debug=False, for_chart=False):
@@ -2105,7 +2240,6 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar
df = df.with_columns((pl.col(" AverageDriveForce (lbs)") * lbstoN).alias(" AverageDriveForce (lbs)"))
df = df.with_columns((pl.col(" PeakDriveForce (lbs)") * lbstoN).alias(" PeakDriveForce (lbs)"))
if df["driveenergy"].mean() == 0 and df["driveenergy"].std() == 0:
df = df.with_columns((0.0*pl.col("driveenergy")+100).alias("driveenergy"))
@@ -2113,7 +2247,7 @@ def dataplep(rowdatadf, id=0, inboard=0.88, forceunit='lbs', bands=True, barchar
t2 = df["TimeStamp (sec)"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
p2 = df[" Stroke500mPace"].map_elements(lambda x: timedeltaconv(x), return_dtype=pl.Datetime)
data = pl.DataFrame(
dict(
time=df["TimeStamp (sec)"] * 1e3,