Private
Public Access
1
0

some fixes, after manual testing

This commit is contained in:
2024-05-03 16:56:01 +02:00
parent ada06da0fc
commit ebea0047cc
6 changed files with 61 additions and 18 deletions

View File

@@ -186,10 +186,12 @@ def remove_nulls_pl(data):
data = data.select(pl.all().forward_fill())
data = data.select(pl.all().backward_fill())
data = data.fill_nan(None)
data = data.select(cs.by_dtype(pl.NUMERIC_DTYPES)).collect()
data = data[[s.name for s in data if not s.is_infinite().sum()]]
data = data[[s.name for s in data if not (s.null_count() == data.height)]]
if not data.is_empty():
try:
data = data.drop_nulls()
@@ -207,6 +209,8 @@ def get_video_data(w, groups=['basic'], mode='water'):
columns = list(set(columns))
df = getsmallrowdata_pd(columns, ids=[w.id],
workstrokesonly=False, doclean=False, compute=False)
df.dropna(axis=0, how='all', inplace=True)
df.dropna(axis=1, how='all', inplace=True)
df['time'] = (df['time']-df['time'].min())/1000.
@@ -245,7 +249,6 @@ def get_video_data(w, groups=['basic'], mode='water'):
coordinates['time'] = coordinates['time']-coordinates['time'].min()
latitude = coordinates['latitude']
longitude = coordinates['longitude']
# bundle data
data = {
'boatspeed': boatspeed.values.tolist(),
@@ -263,7 +266,10 @@ def get_video_data(w, groups=['basic'], mode='water'):
else:
sigfigs = dict(rowingmetrics)[c]['sigfigs']
if (c != 'pace'):
da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs)
try:
da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs)
except:
da = df2[c]
else:
da = df2[c]
data[c] = da.values.tolist()
@@ -281,6 +287,14 @@ def get_video_data(w, groups=['basic'], mode='water'):
maxtime = coordinates['time'].max()
data = pd.DataFrame(data)
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)
data = pl.from_pandas(data)
data = data.to_dict(as_series=False)
return data, metrics, maxtime
@@ -1490,7 +1504,8 @@ def getrowdata_pl(id=0, doclean=False, convertnewtons=True,
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True):
def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False, for_chart=False, compute=True,
startenddict={}):
if ids:
csvfilenames = [
'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids]
@@ -1504,6 +1519,17 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False,
for id, f in zip(ids, csvfilenames):
if os.path.isfile(f):
df = pl.scan_parquet(f)
if startenddict:
try:
startsecond, endsecond = startenddict[id]
df = df.filter(pl.col("time") >= 1.0e3*startsecond,
pl.col("time") <= 1.0e3*endsecond)
df = df.with_columns(time = pl.col("time")-1.0e3*startsecond)
if 'cumdist' in columns:
df = df.collect()
df = df.with_columns(cumdist = pl.col("cumdist")-df[0, "cumdist"]).lazy()
except KeyError:
pass
data.append(df)
else:
rowdata, row = getrowdata(id=id)
@@ -1516,6 +1542,17 @@ def read_data(columns, ids=[], doclean=True, workstrokesonly=True, debug=False,
bands=True, otwpower=True, barchart=True,
polars=True)
df = pl.scan_parquet(f)
if startenddict:
try:
startsecond, endsecond = startenddict[id]
df = df.filter(pl.col("time") >= 1.0e3*startsecond,
pl.col("time") <= 1.0e3*endsecond)
df = df.with_columns(time = pl.col("time")-1.0e3*startsecond)
if 'cumdist' in columns:
df = df.collect()
df = df.with_columns(cumdist = pl.col("cumdist")-df[0, "cumdist"]).lazy()
except KeyError:
pass
data.append(df)
data = pl.collect_all(data)