Private
Public Access
1
0

some testing, some pandas to polars in tasks.py

This commit is contained in:
2024-04-17 13:35:02 +02:00
parent 4e05799e35
commit af62267996
9 changed files with 35 additions and 507 deletions

View File

@@ -56,7 +56,7 @@ from scipy.signal import savgol_filter
from scipy.interpolate import griddata
import rowingdata
from rowingdata import make_cumvalues
from rowingdata import make_cumvalues, make_cumvalues_array
from uuid import uuid4
from rowingdata import rowingdata as rdata
@@ -115,6 +115,9 @@ tpapilocation = TP_API_LOCATION
from requests_oauthlib import OAuth1, OAuth1Session
import pandas as pd
import polars as pl
from polars.exceptions import ColumnNotFoundError
from django_rq import job
from django.utils import timezone
@@ -2583,53 +2586,6 @@ def handle_otwsetpower(self, f1, boattype, boatclass, coastalbrand, weightvalue,
return 1
@app.task
def handle_updateergcp(rower_id, workoutfilenames, debug=False, **kwargs):
therows = []
for f1 in workoutfilenames:
try:
rowdata = rdata(csvfile=f1)
except IOError: # pragma: no cover
try:
rowdata = rdata(csvfile=f1 + '.csv')
except IOError:
try:
rowdata = rdata(csvfile=f1 + '.gz')
except IOError:
rowdata = 0
if rowdata != 0:
therows.append(rowdata)
cpdata = rowingdata.cumcpdata(therows)
cpdata.columns = cpdata.columns.str.lower()
updatecpdata_sql(rower_id, cpdata['delta'], cpdata['cp'],
table='ergcpdata', distance=cpdata['distance'],
debug=debug)
return 1
@app.task
def handle_updatecp(rower_id, workoutids, debug=False, table='cpdata', **kwargs):
columns = ['power', 'workoutid', 'time']
df = getsmallrowdata_db(columns, ids=workoutids, debug=debug)
if df.empty: # pragma: no cover
return 0
maxt = 1.05*df['time'].max()/1000.
logarr = datautils.getlogarr(maxt)
dfgrouped = df.groupby(['workoutid'])
delta, cpvalue, avgpower = datautils.getcp(dfgrouped, logarr)
updatecpdata_sql(rower_id, delta, cpvalue, debug=debug, table=table)
return 1
@app.task
def handle_makeplot(f1, f2, t, hrdata, plotnr, imagename,
@@ -3179,43 +3135,6 @@ def handle_sendemail_invite_reject(email, name, teamname, managername,
return 1
@app.task
def handle_setcp(strokesdf, filename, workoutid, debug=False, **kwargs):
try:
os.remove(filename)
except FileNotFoundError:
pass
if not strokesdf.empty:
try:
totaltime = strokesdf['time'].max()
except KeyError: # pragma: no cover
return 0
try:
powermean = strokesdf['power'].mean()
except KeyError: # pragma: no cover
powermean = 0
if powermean != 0:
thesecs = totaltime
maxt = 1.05 * thesecs
if maxt > 0:
logarr = datautils.getlogarr(maxt)
dfgrouped = strokesdf.groupby(['workoutid'])
delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
df = pd.DataFrame({
'delta': delta,
'cp': cpvalues,
'id': workoutid,
})
df.to_parquet(filename, engine='fastparquet',
compression='GZIP')
return 1
return 1 # pragma: no cover
@app.task
def handle_sendemail_invite_accept(email, name, teamname, managername,
@@ -3647,7 +3566,7 @@ def handle_c2_async_workout(alldata, userid, c2token, c2id, delaysec,
loncoord = np.zeros(nr_rows)
try:
strokelength = strokedata.loc[:, 'strokelength']
strokelength = strokedata.loc[:,'strokelength']
except: # pragma: no cover
strokelength = np.zeros(nr_rows)
@@ -3901,7 +3820,7 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
pace[np.isinf(pace)] = 0.0
try:
strokedata = pd.DataFrame({'t': 10*t,
strokedata = pl.DataFrame({'t': 10*t,
'd': 10*d,
'p': 10*pace,
'spm': spm,
@@ -3947,18 +3866,18 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
starttimeunix = arrow.get(rowdatetime).timestamp()
res = make_cumvalues(0.1*strokedata['t'])
cum_time = res[0]
lapidx = res[1]
res = make_cumvalues_array(0.1*strokedata['t'].to_numpy())
cum_time = pl.Series(res[0])
lapidx = pl.Series(res[1])
unixtime = cum_time+starttimeunix
seconds = 0.1*strokedata.loc[:, 't']
seconds = 0.1*strokedata['t']
nr_rows = len(unixtime)
try:
latcoord = strokedata.loc[:, 'lat']
loncoord = strokedata.loc[:, 'lon']
latcoord = strokedata['lat']
loncoord = strokedata['lon']
if latcoord.std() == 0 and loncoord.std() == 0 and workouttype == 'water': # pragma: no cover
workouttype = 'rower'
except: # pragma: no cover
@@ -3968,29 +3887,29 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
workouttype = 'rower'
try:
strokelength = strokedata.loc[:, 'strokelength']
strokelength = strokedata['strokelength']
except: # pragma: no cover
strokelength = np.zeros(nr_rows)
dist2 = 0.1*strokedata.loc[:, 'd']
dist2 = 0.1*strokedata['d']
try:
spm = strokedata.loc[:, 'spm']
except KeyError: # pragma: no cover
spm = strokedata['spm']
except (KeyError, ColumnNotFoundError): # pragma: no cover
spm = 0*dist2
try:
hr = strokedata.loc[:, 'hr']
except KeyError: # pragma: no cover
hr = strokedata['hr']
except (KeyError, ColumnNotFoundError): # pragma: no cover
hr = 0*spm
pace = strokedata.loc[:, 'p']/10.
pace = strokedata['p']/10.
pace = np.clip(pace, 0, 1e4)
pace = pace.replace(0, 300)
pace = pl.Series(pace).replace(0, 300)
velo = 500./pace
try:
power = strokedata.loc[:, 'power']
power = strokedata['power']
except KeyError: # pragma: no cover
power = 2.8*velo**3
@@ -3999,7 +3918,7 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
# save csv
# Create data frame with all necessary data to write to csv
df = pd.DataFrame({'TimeStamp (sec)': unixtime,
df = pl.DataFrame({'TimeStamp (sec)': unixtime,
' Horizontal (meters)': dist2,
' Cadence (stokes/min)': spm,
' HRCur (bpm)': hr,
@@ -4019,10 +3938,10 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
'cum_dist': dist2,
})
df.sort_values(by='TimeStamp (sec)', ascending=True)
df.sort('TimeStamp (sec)')
row = rowingdata.rowingdata(df=df)
row.write_csv(csvfilename, gzip=False)
row = rowingdata.rowingdata_pl(df=df)
row.write_csv(csvfilename, compressed=False)
# summary = row.allstats()
# maxdist = df['cum_dist'].max()