Private
Public Access
1
0

some more polars, passing test_uploads

This commit is contained in:
2024-04-18 08:31:08 +02:00
parent e510591dba
commit eb44623e24
7 changed files with 68 additions and 59 deletions

View File

@@ -8,6 +8,7 @@ from rowers.datautils import p0
from scipy import optimize
from rowers.utils import calculate_age
import datetime
import gzip
from scipy.signal import savgol_filter
from rowers.opaque import encoder
from rowers.database import *
@@ -27,6 +28,7 @@ from fitparse import FitFile
import itertools
import numpy as np
import pandas as pd
import polars as pl
from zipfile import BadZipFile
import zipfile
import os
@@ -219,18 +221,18 @@ def check_marker(workout):
ids.append(w.id)
gms.append(gmstandard)
df = pd.DataFrame({
df = pl.DataFrame({
'id': ids,
'gms': gms,
})
if df.empty: # pragma: no cover
if df.is_empty(): # pragma: no cover
workout.ranking = True
workout.save()
return workout
indexmax = df['gms'].idxmax()
theid = df.loc[indexmax, 'id']
theid = df.filter(pl.col("gms") == pl.col("gms").max())['id'][0]
wmax = Workout.objects.get(id=theid)
# gms_max = wmax.goldmedalstandard
@@ -326,7 +328,7 @@ def workout_summary_to_df(
goldstandarddurations.append(int(goldstandardduration))
rankingpieces.append(w.rankingpiece)
df = pd.DataFrame({
df = pl.DataFrame({
'ID': ids,
'date': startdatetimes,
'name': names,
@@ -420,19 +422,21 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True):
try:
df = pl.read_parquet(cpfile)
except:
df = getsmallrowdata_pl(['power'], ids=[workout.id])
background = True
if settings.TESTING:
background = False
df, delta, cpvalues = setcp(workout, background=background)
if df.empty:
return 0, 0
df = pl.from_pandas(df)
if recurrance:
df, delta, cpvalues = setcp(workout, background=background)
if df.is_empty():
return 0, 0
else:
return 0,0
if df.is_empty() and recurrance: # pragma: no cover
df, delta, cpvalues = setcp(workout, recurrance=False, background=True)
if df.empty:
if df.is_empty():
return 0, 0
df = pl.from_pandas(df)
age = calculate_age(rower.birthdate, today=workout.date)
@@ -459,7 +463,7 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True):
if getrecords: # pragma: no cover
durations = [1, 4, 30, 60]
distances = [100, 500, 1000, 2000, 5000, 6000, 10000, 21097, 42195]
df2 = pd.DataFrame(
df2 = pl.DataFrame(
list(
C2WorldClassAgePerformance.objects.filter(
sex=rower.sex,
@@ -467,7 +471,7 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True):
).values()
)
)
jsondf = df2.to_json()
jsondf = df2.write_json()
_ = myqueue(queuelow, handle_getagegrouprecords,
jsondf, distances, durations, age, rower.sex, rower.weightcategory)
@@ -511,21 +515,21 @@ def calculate_goldmedalstandard(rower, workout, recurrance=True):
def setcp(workout, background=False, recurrance=True):
try:
filename = 'media/cpdata_{id}.parquet.gz'.format(id=workout.id)
df = pd.read_parquet(filename)
df = pl.read_parquet(filename)
if not df.empty:
if not df.is_empty():
# check dts
tarr = datautils.getlogarr(4000)
if df['delta'][0] in tarr:
return(df, df['delta'], df['cp'])
except:
except Exception as e:
pass
strokesdf = getsmallrowdata_db(
strokesdf = getsmallrowdata_pl(
['power', 'workoutid', 'time'], ids=[workout.id])
if strokesdf.empty:
return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float')
if strokesdf.is_empty():
return pl.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float')
totaltime = strokesdf['time'].max()
maxt = totaltime/1000.
@@ -565,23 +569,27 @@ def setcp(workout, background=False, recurrance=True):
dologging('metrics.log', traceback.format_exc())
return pd.DataFrame({'delta': [], 'cp': []}), pd.Series(dtype='float'), pd.Series(dtype='float')
delta = pd.Series(np.array(response.delta))
cpvalues = pd.Series(np.array(response.power))
delta = pl.Series(np.array(response.delta))
cpvalues = pl.Series(np.array(response.power))
powermean = response.avgpower
df = pd.DataFrame({
df = pl.DataFrame({
'delta': delta,
'cp': cpvalues,
'id': workout.id,
})
df.to_parquet(filename, engine='fastparquet', compression='GZIP')
df = df.drop_nulls()
with gzip.open(filename, 'w') as f:
df.write_parquet(f)
#df.to_parquet(filename, engine='fastparquet', compression='GZIP')
if recurrance:
goldmedalstandard, goldmedalduration = calculate_goldmedalstandard(
workout.user, workout)
workout.user, workout, recurrance=False)
workout.goldmedalstandard = goldmedalstandard
workout.goldmedalduration = goldmedalduration
workout.save()
@@ -737,24 +745,24 @@ def fetchcp_new(rower, workouts):
except:
# CP data file doesn't exist yet. has to be created
df, delta, cpvalues = setcp(workout)
df['workout'] = str(workout)
df['url'] = workout.url()
df = df.with_columns((pl.lit(str(workout))).alias("workout"))
df = df.with_columns((pl.lit(workout.url())).alias("url"))
data.append(df)
if len(data) == 0:
return pd.Series(dtype='float'), pd.Series(dtype='float'), 0, pd.Series(dtype='float'), pd.Series(dtype='float')
return pl.Series(dtype='float'), pl.Series(dtype='float'), 0, pl.Series(dtype='float'), pl.Series(dtype='float')
if len(data) > 1:
df = pd.concat(data, axis=0)
df = pl.concat(data)
#df = df.to_pandas()
try:
df = df[df['cp'] == df.groupby(['delta'])['cp'].transform('max')]
except KeyError: # pragma: no cover
df = df.group_by(pl.col("delta")).agg(pl.max("cp"), pl.max("workout"), pl.max("url")).sort("delta")
except (KeyError, ColumnNotFoundError): # pragma: no cover
return pd.Series(dtype='float'), pd.Series(dtype='float'), 0, pd.Series(dtype='float'), pd.Series(dtype='float')
df = df.sort_values(['delta']).reset_index()
df = df[df['cp']>20]
df = df.filter(pl.col("cp")>20)
return df['delta'], df['cp'], 0, df['workout'], df['url']
@@ -810,16 +818,16 @@ def update_rolling_cp(r, types, mode='water', dosend=False):
delta, cp, avgpower, workoutnames, urls = fetchcp_new(r, workouts)
powerdf = pd.DataFrame({
powerdf = pl.DataFrame({
'Delta': delta,
'CP': cp,
})
powerdf = powerdf[powerdf['CP'] > 0]
powerdf.dropna(axis=0, inplace=True)
powerdf.sort_values(['Delta', 'CP'], ascending=[1, 0], inplace=True)
powerdf.drop_duplicates(subset='Delta', keep='first', inplace=True)
if powerdf.empty:
powerdf = powerdf.filter(pl.col("CP")>0)
powerdf = powerdf.fill_nan(None).drop_nulls().sort(["Delta", "CP"])
powerdf = powerdf.unique(subset=["Delta"], keep="first")
if powerdf.is_empty():
return False
res2 = datautils.cpfit(powerdf)
@@ -1062,7 +1070,7 @@ def checkbreakthrough(w, r):
workouttype = w.workouttype
if workouttype in rowtypes:
cpdf, delta, cpvalues = setcp(w)
if not cpdf.empty:
if not cpdf.is_empty():
if workouttype in otwtypes:
try:
res, btvalues, res2 = utils.isbreakthrough(