some testing, some pandas to polars in tasks.py

2024-04-17 13:35:02 +02:00
parent 4e05799e35
commit af62267996
9 changed files with 35 additions and 507 deletions
--- a/rowers/tasks.py
+++ b/rowers/tasks.py
@@ -56,7 +56,7 @@ from scipy.signal import savgol_filter
 from scipy.interpolate import griddata

 import rowingdata
-from rowingdata import make_cumvalues
+from rowingdata import make_cumvalues, make_cumvalues_array
 from uuid import uuid4
 from rowingdata import rowingdata as rdata

@@ -115,6 +115,9 @@ tpapilocation = TP_API_LOCATION
 from requests_oauthlib import OAuth1, OAuth1Session

 import pandas as pd
+import polars as pl
+from polars.exceptions import ColumnNotFoundError
+

 from django_rq import job
 from django.utils import timezone
@@ -2583,53 +2586,6 @@ def handle_otwsetpower(self, f1, boattype, boatclass, coastalbrand, weightvalue,
    return 1


-@app.task
-def handle_updateergcp(rower_id, workoutfilenames, debug=False, **kwargs):
-    therows = []
-    for f1 in workoutfilenames:
-        try:
-            rowdata = rdata(csvfile=f1)
-        except IOError:  # pragma: no cover
-            try:
-                rowdata = rdata(csvfile=f1 + '.csv')
-            except IOError:
-                try:
-                    rowdata = rdata(csvfile=f1 + '.gz')
-                except IOError:
-                    rowdata = 0
-        if rowdata != 0:
-            therows.append(rowdata)
-
-    cpdata = rowingdata.cumcpdata(therows)
-    cpdata.columns = cpdata.columns.str.lower()
-
-    updatecpdata_sql(rower_id, cpdata['delta'], cpdata['cp'],
-                     table='ergcpdata', distance=cpdata['distance'],
-                     debug=debug)
-
-    return 1
-
-
-@app.task
-def handle_updatecp(rower_id, workoutids, debug=False, table='cpdata', **kwargs):
-    columns = ['power', 'workoutid', 'time']
-    df = getsmallrowdata_db(columns, ids=workoutids, debug=debug)
-
-    if df.empty:  # pragma: no cover
-        return 0
-
-    maxt = 1.05*df['time'].max()/1000.
-
-    logarr = datautils.getlogarr(maxt)
-
-    dfgrouped = df.groupby(['workoutid'])
-
-    delta, cpvalue, avgpower = datautils.getcp(dfgrouped, logarr)
-
-    updatecpdata_sql(rower_id, delta, cpvalue, debug=debug, table=table)
-
-    return 1
-

@app.task
 def handle_makeplot(f1, f2, t, hrdata, plotnr, imagename,
@@ -3179,43 +3135,6 @@ def handle_sendemail_invite_reject(email, name, teamname, managername,
    return 1


-@app.task
-def handle_setcp(strokesdf, filename, workoutid, debug=False, **kwargs):
-    try:
-        os.remove(filename)
-    except FileNotFoundError:
-        pass
-    if not strokesdf.empty:
-
-        try:
-            totaltime = strokesdf['time'].max()
-        except KeyError:  # pragma: no cover
-            return 0
-        try:
-            powermean = strokesdf['power'].mean()
-        except KeyError:  # pragma: no cover
-            powermean = 0
-
-        if powermean != 0:
-            thesecs = totaltime
-            maxt = 1.05 * thesecs
-
-            if maxt > 0:
-                logarr = datautils.getlogarr(maxt)
-                dfgrouped = strokesdf.groupby(['workoutid'])
-                delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr)
-
-                df = pd.DataFrame({
-                    'delta': delta,
-                    'cp': cpvalues,
-                    'id': workoutid,
-                })
-                df.to_parquet(filename, engine='fastparquet',
-                              compression='GZIP')
-                return 1
-
-    return 1  # pragma: no cover
-

@app.task
 def handle_sendemail_invite_accept(email, name, teamname, managername,
@@ -3647,7 +3566,7 @@ def handle_c2_async_workout(alldata, userid, c2token, c2id, delaysec,
            loncoord = np.zeros(nr_rows)

        try:
-            strokelength = strokedata.loc[:, 'strokelength']
+            strokelength = strokedata.loc[:,'strokelength']
        except:  # pragma: no cover
            strokelength = np.zeros(nr_rows)

@@ -3901,7 +3820,7 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
    pace[np.isinf(pace)] = 0.0

    try:
-        strokedata = pd.DataFrame({'t': 10*t,
+        strokedata = pl.DataFrame({'t': 10*t,
                                   'd': 10*d,
                                   'p': 10*pace,
                                   'spm': spm,
@@ -3947,18 +3866,18 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,

    starttimeunix = arrow.get(rowdatetime).timestamp()

-    res = make_cumvalues(0.1*strokedata['t'])
-    cum_time = res[0]
-    lapidx = res[1]
+    res = make_cumvalues_array(0.1*strokedata['t'].to_numpy())
+    cum_time = pl.Series(res[0])
+    lapidx = pl.Series(res[1])

    unixtime = cum_time+starttimeunix
-    seconds = 0.1*strokedata.loc[:, 't']
+    seconds = 0.1*strokedata['t']

    nr_rows = len(unixtime)

    try:
-        latcoord = strokedata.loc[:, 'lat']
-        loncoord = strokedata.loc[:, 'lon']
+        latcoord = strokedata['lat']
+        loncoord = strokedata['lon']
        if latcoord.std() == 0 and loncoord.std() == 0 and workouttype == 'water':  # pragma: no cover
            workouttype = 'rower'
    except:  # pragma: no cover
@@ -3968,29 +3887,29 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
            workouttype = 'rower'

    try:
-        strokelength = strokedata.loc[:, 'strokelength']
+        strokelength = strokedata['strokelength']
    except:  # pragma: no cover
        strokelength = np.zeros(nr_rows)

-    dist2 = 0.1*strokedata.loc[:, 'd']
+    dist2 = 0.1*strokedata['d']

    try:
-        spm = strokedata.loc[:, 'spm']
-    except KeyError:  # pragma: no cover
+        spm = strokedata['spm']
+    except (KeyError, ColumnNotFoundError):  # pragma: no cover
        spm = 0*dist2

    try:
-        hr = strokedata.loc[:, 'hr']
-    except KeyError:  # pragma: no cover
+        hr = strokedata['hr']
+    except (KeyError, ColumnNotFoundError):  # pragma: no cover
        hr = 0*spm
-    pace = strokedata.loc[:, 'p']/10.
+    pace = strokedata['p']/10.
    pace = np.clip(pace, 0, 1e4)
-    pace = pace.replace(0, 300)
+    pace = pl.Series(pace).replace(0, 300)

    velo = 500./pace

    try:
-        power = strokedata.loc[:, 'power']
+        power = strokedata['power']
    except KeyError:  # pragma: no cover
        power = 2.8*velo**3

@@ -3999,7 +3918,7 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,

    # save csv
    # Create data frame with all necessary data to write to csv
-    df = pd.DataFrame({'TimeStamp (sec)': unixtime,
+    df = pl.DataFrame({'TimeStamp (sec)': unixtime,
                       ' Horizontal (meters)': dist2,
                       ' Cadence (stokes/min)': spm,
                       ' HRCur (bpm)': hr,
@@ -4019,10 +3938,10 @@ def fetch_strava_workout(stravatoken, oauth_data, stravaid, csvfilename, userid,
                       'cum_dist': dist2,
                       })

-    df.sort_values(by='TimeStamp (sec)', ascending=True)
+    df.sort('TimeStamp (sec)')

-    row = rowingdata.rowingdata(df=df)
-    row.write_csv(csvfilename, gzip=False)
+    row = rowingdata.rowingdata_pl(df=df)
+    row.write_csv(csvfilename, compressed=False)

    # summary = row.allstats()
    # maxdist = df['cum_dist'].max()