from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals # All the data preparation, data cleaning and data mangling should # be defined here from __future__ import unicode_literals, absolute_import from rowers.models import Workout, Team import pytz import collections from rowingdata import rowingdata as rrdata from rowingdata import rower as rrower import yaml import shutil from shutil import copyfile from rowingdata import ( get_file_type, get_empower_rigging,get_empower_firmware ) from rowers.tasks import handle_sendemail_unrecognized from rowers.tasks import handle_zip_file from pandas import DataFrame, Series import dask.dataframe as dd from dask.delayed import delayed import pyarrow.parquet as pq import pyarrow as pa from pyarrow.lib import ArrowInvalid from django.utils import timezone from django.utils.timezone import get_current_timezone from django_mailbox.models import Message,Mailbox,MessageAttachment from django.core.exceptions import ValidationError from time import strftime import arrow thetimezone = get_current_timezone() from rowingdata import ( TCXParser, RowProParser, ErgDataParser, CoxMateParser, BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser, ETHParser, MysteryParser, BoatCoachOTWParser,QuiskeParser, painsledDesktopParser, speedcoachParser, ErgStickParser, SpeedCoach2Parser, FITParser, fitsummarydata, RitmoTimeParser,KinoMapParser, make_cumvalues,cumcpdata,ExcelTemplate, summarydata, get_file_type, ) from rowingdata.csvparsers import HumonParser from rowers.metrics import axes,calc_trimp,rowingmetrics,dtypes,metricsgroups from rowers.models import strokedatafields #allowedcolumns = [item[0] for item in rowingmetrics] allowedcolumns = [key for key,value in strokedatafields.items()] #from async_messages import messages as a_messages import os import zipfile import pandas as pd import numpy as np import itertools from fitparse import FitFile import math from rowers.tasks import ( handle_sendemail_unrecognized, handle_sendemail_breakthrough, handle_sendemail_hard, handle_updatecp,handle_updateergcp, handle_calctrimp, ) from django.conf import settings from sqlalchemy import create_engine import sqlalchemy as sa import sys import rowers.utils as utils import rowers.datautils as datautils from rowers.utils import lbstoN,myqueue,is_ranking_piece,wavg from timezonefinder import TimezoneFinder import django_rq queue = django_rq.get_queue('default') queuelow = django_rq.get_queue('low') queuehigh = django_rq.get_queue('default') from rowsandall_app.settings import SITE_URL from rowers.mytypes import otwtypes,otetypes from rowers import mytypes from rowers.database import * from rowers.opaque import encoder # mapping the DB column names to the CSV file column names columndict = { 'time': 'TimeStamp (sec)', 'hr': ' HRCur (bpm)', 'velo': ' AverageBoatSpeed (m/s)', 'pace': ' Stroke500mPace (sec/500m)', 'spm': ' Cadence (stokes/min)', 'power': ' Power (watts)', 'averageforce': ' AverageDriveForce (lbs)', 'drivelength': ' DriveLength (meters)', 'peakforce': ' PeakDriveForce (lbs)', 'distance': ' Horizontal (meters)', 'catch': 'catch', 'finish': 'finish', 'peakforceangle': 'peakforceangle', 'wash': 'wash', 'slip': 'slip', 'workoutstate': ' WorkoutState', 'cumdist': 'cum_dist', } from scipy.signal import savgol_filter import datetime def get_video_data(w,groups=['basic'],mode='water'): modes = [mode,'both','basic'] columns = ['time','velo','spm'] columns += [name for name,d in rowingmetrics if d['group'] in groups and d['mode'] in modes] columns = list(set(columns)) df = getsmallrowdata_db(columns,ids=[w.id], workstrokesonly=False,doclean=False,compute=False) df['time'] = (df['time']-df['time'].min())/1000. df.sort_values(by='time',inplace=True) df.set_index(pd.to_timedelta(df['time'],unit='s'),inplace=True) df2 = df.resample('1s').first().fillna(method='ffill') df2['time'] = df2.index.total_seconds() if 'pace' in columns: df2['pace'] = df2['pace']/1000. p = df2['pace'] p = p.apply(lambda x:timedeltaconv(x)) p = nicepaceformat(p) df2['pace'] = p #mask = df2['time'] < delay #df2 = df2.mask(mask).dropna() df2['time'] = (df2['time']-df2['time'].min()) df2 = df2.round(decimals=2) boatspeed = (100*df2['velo']).astype(int)/100. try: coordinates = get_latlon_time(w.id) except KeyError: nulseries = df['time']*0 coordinates = pd.DataFrame({ 'time': df['time'], 'latitude': nulseries, 'longitude': nulseries, }) coordinates.set_index(pd.to_timedelta(coordinates['time'],unit='s'),inplace=True) coordinates = coordinates.resample('1s').mean().interpolate() #mask = coordinates['time'] < delay #coordinates = coordinates.mask(mask).dropna() coordinates['time'] = coordinates['time']-coordinates['time'].min() latitude = coordinates['latitude'] longitude = coordinates['longitude'] # bundle data data = { 'boatspeed':boatspeed.values.tolist(), 'latitude':latitude.values.tolist(), 'longitude':longitude.values.tolist(), } # metrics = { # 'boatspeed': { # 'unit': 'm/s', # 'metric': 'boatspeed', # 'name': 'Boat Speed' # }, # } metrics = {} for c in columns: if c != 'time': try: if dict(rowingmetrics)[c]['numtype'] == 'integer': data[c] = df2[c].astype(int).tolist() else: sigfigs = dict(rowingmetrics)[c]['sigfigs'] if (c != 'pace'): da = ((10**sigfigs)*df2[c]).astype(int)/(10**sigfigs) else: da = df2[c] data[c] = da.values.tolist() metrics[c] = { 'name': dict(rowingmetrics)[c]['verbose_name'], 'metric': c, 'unit': '' } except KeyError: pass metrics['boatspeed'] = metrics.pop('velo') # metrics['workperstroke'] = metrics.pop('driveenergy') metrics = collections.OrderedDict(sorted(metrics.items())) maxtime = coordinates['time'].max() return data, metrics, maxtime def polarization_index(df,rower): df['dt'] = df['time'].diff()/6.e4 # remove rest (spm<15) df.dropna(axis=0,inplace=True) df['dt'] = df['dt'].clip(upper=4,lower=0) masklow = (df['power']>0) & (df['power']=rower.pw_at) & (df['power']rower.pw_an) time_low_pw = df.loc[masklow,'dt'].sum() time_mid_pw = df.loc[maskmid,'dt'].sum() time_high_pw = df.loc[maskhigh,'dt'].sum() frac_low = time_low_pw/(time_low_pw+time_mid_pw+time_high_pw) frac_mid = time_mid_pw/(time_low_pw+time_mid_pw+time_high_pw) frac_high = time_high_pw/(time_low_pw+time_mid_pw+time_high_pw) index = math.log10(frac_high*100.*frac_low/frac_mid) return index def get_latlon(id): try: w = Workout.objects.get(id=id) except Workout.DoesNotExist: return False rowdata = rdata(w.csvfilename) if rowdata.df.empty: return [pd.Series([]), pd.Series([])] try: try: latitude = rowdata.df.loc[:, ' latitude'] longitude = rowdata.df.loc[:, ' longitude'] except KeyError: latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)'] longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)'] return [latitude, longitude] except AttributeError: return [pd.Series([]), pd.Series([])] return [pd.Series([]), pd.Series([])] def get_latlon_time(id): try: w = Workout.objects.get(id=id) except Workout.DoesNotExist: return False rowdata = rdata(w.csvfilename) if rowdata.df.empty: return [pd.Series([]), pd.Series([])] try: try: latitude = rowdata.df.loc[:, ' latitude'] longitude = rowdata.df.loc[:, ' longitude'] except KeyError: latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)'] longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)'] except AttributeError: return pd.DataFrame() df = pd.DataFrame({ 'time': rowdata.df['TimeStamp (sec)']-rowdata.df['TimeStamp (sec)'].min(), 'latitude': rowdata.df[' latitude'], 'longitude': rowdata.df[' longitude'] }) return df def workout_summary_to_df( rower, startdate=datetime.datetime(1970,1,1), enddate=timezone.now()+timezone.timedelta(days=1)): ws = Workout.objects.filter(user=rower).order_by("startdatetime") types = [] names = [] startdatetimes = [] timezones = [] distances = [] durations = [] weightcategories = [] adaptivetypes = [] weightvalues = [] notes = [] tcx_links = [] csv_links = [] rscores = [] trimps = [] for w in ws: types.append(w.workouttype) names.append(w.name) startdatetimes.append(w.startdatetime) timezones.append(w.timezone) distances.append(w.distance) durations.append(w.duration) weightcategories.append(w.weightcategory) adaptivetypes.append(w.adaptiveclass) weightvalues.append(w.weightvalue) notes.append(w.notes) tcx_link = SITE_URL+'/rowers/workout/{id}/emailtcx'.format( id=encoder.encode_hex(w.id) ) tcx_links.append(tcx_link) csv_link = SITE_URL+'/rowers/workout/{id}/emailcsv'.format( id=encoder.encode_hex(w.id) ) csv_links.append(csv_link) trimps.append(workout_trimp(w)[0]) rscore = workout_rscore(w) rscores.append(int(rscore[0])) df = pd.DataFrame({ 'name':names, 'date':startdatetimes, 'timezone':timezones, 'type':types, 'distance (m)':distances, 'duration ':durations, 'weight category':weightcategories, 'adaptive classification':adaptivetypes, 'weight (kg)':weightvalues, 'notes':notes, 'Stroke Data TCX':tcx_links, 'Stroke Data CSV':csv_links, 'TRIMP Training Load':trimps, 'TSS Training Load':rscores, }) return df def get_workouts(ids, userid): goodids = [] for id in ids: w = Workout.objects.get(id=id) if int(w.user.user.id) == int(userid): goodids.append(id) return [Workout.objects.get(id=id) for id in goodids] def filter_df(datadf, fieldname, value, largerthan=True): try: x = datadf[fieldname] except KeyError: return datadf try: if largerthan: mask = datadf[fieldname] < value else: mask = datadf[fieldname] >= value datadf.loc[mask, fieldname] = np.nan except TypeError: pass return datadf # joins workouts def join_workouts(r,ids,title='Joined Workout', parent=None, setprivate=False, forceunit='lbs',killparents=False): message = None summary = '' if parent: oarlength = parent.oarlength inboard = parent.inboard workouttype = parent.workouttype notes = parent.notes summary = parent.summary if parent.privacy == 'hidden': makeprivate = True else: makeprivate = False startdatetime = parent.startdatetime else: oarlength = 2.89 inboard = 0.88 workouttype = 'rower' notes = '' summary = '' makeprivate = False startdatetime = timezone.now() if setprivate == True and makeprivate == False: makeprivate = True elif setprivate == False and makeprivate == True: makeprivate = False # reorder in chronological order ws = Workout.objects.filter(id__in=ids).order_by("startdatetime") if not parent: parent = ws[0] oarlength = parent.oarlength inboard = parent.inboard workouttype = parent.workouttype notes = parent.notes summary = parent.summary files = [w.csvfilename for w in ws] row = rdata(files[0]) files = files[1:] while len(files): row2 = rdata(files[0]) if row2 != 0: row = row+row2 files = files[1:] timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' row.write_csv(csvfilename,gzip=True) id, message = save_workout_database(csvfilename, r, workouttype=workouttype, title=title, notes=notes, oarlength=oarlength, inboard=inboard, makeprivate=makeprivate, dosmooth=False, consistencychecks=False) if killparents: for w in ws: w.delete() w = Workout.objects.get(id=id) w.duplicate = False w.save() if message is not None and "duplicate" in message: message = "" return (id, message) def df_resample(datadf): # time stamps must be in seconds timestamps = datadf['TimeStamp (sec)'].astype('int') datadf['timestamps'] = timestamps newdf = datadf.groupby(['timestamps']).mean() return newdf def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values before = len(datadf) data_orig = datadf.copy() # bring metrics which have negative values to positive domain if len(datadf)==0: return datadf try: datadf['catch'] = -datadf['catch'] except (KeyError,TypeError): pass try: datadf['peakforceangle'] = datadf['peakforceangle'] + 1000 except (KeyError,TypeError): pass try: datadf['hr'] = datadf['hr'] + 10 except (KeyError,TypeError): pass # protect 0 spm values from being nulled try: datadf['spm'] = datadf['spm'] + 1.0 except (KeyError,TypeError) as e: pass # protect 0 workoutstate values from being nulled try: datadf['workoutstate'] = datadf['workoutstate'] + 1 except (KeyError,TypeError) as e: pass try: datadf = datadf.clip(lower=0) except TypeError: pass # protect advanced metrics columns advancedcols = [ 'rhythm', 'power', 'drivelength', 'forceratio', 'drivespeed', 'driveenergy', 'catch', 'finish', 'averageforce', 'peakforce', 'slip', 'wash', 'peakforceangle', 'effectiveangle', ] datadf.replace(to_replace=0, value=np.nan, inplace=True) # datadf = datadf.map_partitions(lambda df:df.replace(to_replace=0,value=np.nan)) # bring spm back to real values try: datadf['spm'] = datadf['spm'] - 1 except (TypeError,KeyError) as e: pass # bring workoutstate back to real values try: datadf['workoutstate'] = datadf['workoutstate'] - 1 except (TypeError,KeyError) as e: pass # return from positive domain to negative try: datadf['catch'] = -datadf['catch'] except (KeyError,TypeError): pass try: datadf['peakforceangle'] = datadf['peakforceangle'] - 1000 except (KeyError,TypeError): pass try: datadf['hr'] = datadf['hr'] - 10 except (KeyError,TypeError): pass # clean data for useful ranges per column if not ignorehr: try: mask = datadf['hr'] < 30 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['spm'] < 0 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['efficiency'] > 200. datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['spm'] < 10 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['pace'] / 1000. > 300. datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['efficiency'] < 0. datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['pace'] / 1000. < 60. datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['spm'] > 60 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['wash'] < 1 datadf.loc[mask, 'wash'] = np.nan except (KeyError,TypeError): pass # try to guess ignoreadvanced if not ignoreadvanced: for metric in advancedcols: try: sum = datadf[metric].std() if sum == 0 or np.isnan(sum): ignoreadvanced = True except KeyError: pass if not ignoreadvanced: try: mask = datadf['rhythm'] < 0 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['rhythm'] > 70 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['power'] < 20 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['drivelength'] < 0.5 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['forceratio'] < 0.2 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['forceratio'] > 1.0 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['drivespeed'] < 0.5 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['drivespeed'] > 4 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['driveenergy'] > 2000 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['driveenergy'] < 100 datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass try: mask = datadf['catch'] > -30. datadf.mask(mask,inplace=True) except (KeyError,TypeError): pass workoutstateswork = [1, 4, 5, 8, 9, 6, 7] workoutstatesrest = [3] workoutstatetransition = [0, 2, 10, 11, 12, 13] if workstrokesonly == 'True' or workstrokesonly == True: try: datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)] except: pass after = len(datadf.dropna()) ratio = float(after)/float(before) if ratio < 0.001 or after < 2: return data_orig return datadf def getpartofday(row,r): workoutstartdatetime = row.rowdatetime try: latavg = row.df[' latitude'].mean() lonavg = row.df[' longitude'].mean() tf = TimezoneFinder() try: timezone_str = tf.timezone_at(lng=lonavg, lat=latavg) except (ValueError,OverflowError): timezone_str = 'UTC' if timezone_str == None: timezone_str = tf.closest_timezone_at(lng=lonavg, lat=latavg) if timezone_str == None: timezone_str = r.defaulttimezone try: workoutstartdatetime = pytz.timezone(timezone_str).localize( row.rowdatetime ) except ValueError: workoutstartdatetime = row.rowdatetime except KeyError: timezone_str = r.defaulttimezone workoutstartdatetime = row.rowdatetime h = workoutstartdatetime.astimezone(pytz.timezone(timezone_str)).hour if h < 12: return "Morning" elif h < 18: return "Afternoon" elif h < 22: return "Evening" else: return "Night" return None def getstatsfields(): fielddict = {name:d['verbose_name'] for name,d in rowingmetrics} # fielddict.pop('ergpace') # fielddict.pop('hr_an') # fielddict.pop('hr_tr') # fielddict.pop('hr_at') # fielddict.pop('hr_ut2') # fielddict.pop('hr_ut1') fielddict.pop('time') fielddict.pop('distance') # fielddict.pop('nowindpace') # fielddict.pop('fnowindpace') # fielddict.pop('fergpace') # fielddict.pop('equivergpower') # fielddict.pop('workoutstate') # fielddict.pop('fpace') # fielddict.pop('pace') # fielddict.pop('id') # fielddict.pop('ftime') # fielddict.pop('x_right') # fielddict.pop('hr_max') # fielddict.pop('hr_bottom') fielddict.pop('cumdist') try: fieldlist = [field for field, value in fielddict.iteritems()] except AttributeError: fieldlist = [field for field, value in fielddict.items()] return fieldlist, fielddict # A string representation for time deltas def niceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # A nice printable format for time delta values def strfdelta(tdelta): try: minutes, seconds = divmod(tdelta.seconds, 60) tenths = int(tdelta.microseconds / 1e5) except AttributeError: minutes, seconds = divmod(tdelta.view(np.int64), 60e9) seconds, rest = divmod(seconds, 1e9) tenths = int(rest / 1e8) res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( minutes=minutes, seconds=seconds, tenths=tenths, ) return res def timedelta_to_seconds(tdelta): return 60.*tdelta.minute+tdelta.second # A nice printable format for pace values def nicepaceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace def timedeltaconv(x): if np.isfinite(x) and x != 0 and x > 0 and x < 175000: dt = datetime.timedelta(seconds=x) else: dt = datetime.timedelta(seconds=350.) return dt def paceformatsecs(values): out = [] for v in values: td = timedeltaconv(v) formattedv = strfdelta(td) out.append(formattedv) return out def update_c2id_sql(id,c2id): engine = create_engine(database_url, echo=False) table = 'rowers_workout' query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % (table,c2id,id) with engine.connect() as conn, conn.begin(): result = conn.execute(query) conn.close() engine.dispose() return 1 def fitnessmetric_to_sql(m,table='powertimefitnessmetric',debug=False): engine = create_engine(database_url, echo=False) columns = ', '.join(m.keys()) placeholders = ", ".join(["?"] * len(m)) query = "INSERT into %s ( %s ) Values (%s)" % (table, columns, placeholders) values = tuple(m[key] for key in m.keys()) with engine.connect() as conn, conn.begin(): result = conn.execute(query,values) conn.close() engine.dispose() return 1 def getcpdata_sql(rower_id,table='cpdata'): engine = create_engine(database_url, echo=False) query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) return df def deletecpdata_sql(rower_id,table='cpdata'): engine = create_engine(database_url, echo=False) query = sa.text('DELETE from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: print("Database locked") conn.close() engine.dispose() def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]): deletecpdata_sql(rower_id) df = pd.DataFrame( { 'delta':delta, 'cp':cp, 'user':rower_id } ) if not distance.empty: df['distance'] = distance engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): df.to_sql(table, engine, if_exists='append', index=False) conn.close() engine.dispose() def runcpupdate( rower,type='water', startdate=timezone.now()-datetime.timedelta(days=365), enddate=timezone.now()+datetime.timedelta(days=5) ): if type == 'water': theworkouts = Workout.objects.filter( user=rower,rankingpiece=True, workouttype='water', startdatetime__gte=startdate, startdatetime__lte=enddate ) table = 'cpdata' else: theworkouts = Workout.objects.filter( user=rower,rankingpiece=True, workouttype__in=[ 'rower', 'dynamic', 'slides' ], startdatetime__gte=startdate, startdatetime__lte=enddate ) table = 'cpergdata' theids = [w.id for w in theworkouts] job = myqueue( queue, handle_updatecp, rower.id, theids, table=table) return job def fetchcperg(rower,theworkouts): theids = [int(w.id) for w in theworkouts] thefilenames = [w.csvfilename for w in theworkouts] cpdf = getcpdata_sql(rower.id,table='ergcpdata') job = myqueue( queue, handle_updateergcp, rower.id, thefilenames) return cpdf def fetchcp(rower,theworkouts,table='cpdata'): # get all power data from database (plus workoutid) theids = [int(w.id) for w in theworkouts] columns = ['power','workoutid','time'] df = getsmallrowdata_db(columns,ids=theids) df.dropna(inplace=True,axis=0) if df.empty: avgpower2 = {} for id in theids: avgpower2[id] = 0 return pd.Series([]),pd.Series([]),avgpower2 try: dfgrouped = df.groupby(['workoutid']) except KeyError: avgpower2 = {} return pd.Series([]),pd.Series([]),avgpower2 try: avgpower2 = dict(dfgrouped.mean()['power'].astype(int)) except KeyError: avgpower2 = {} for id in theids: avgpower2[id] = 0 return pd.Series([]),pd.Series([]),avgpower2 cpdf = getcpdata_sql(rower.id,table=table) if not cpdf.empty: return cpdf['delta'],cpdf['cp'],avgpower2 else: job = myqueue(queue, handle_updatecp, rower.id, theids, table=table) return pd.Series([]),pd.Series([]),avgpower2 return pd.Series([]),pd.Series([]),avgpower2 # create a new workout from manually entered data def create_row_df(r,distance,duration,startdatetime,workouttype='rower', avghr=None,avgpwr=None,avgspm=None, rankingpiece = False, duplicate=False, title='Manual entry',notes='',weightcategory='hwt', adaptiveclass='None'): if duration is not None: totalseconds = duration.hour*3600. totalseconds += duration.minute*60. totalseconds += duration.second totalseconds += duration.microsecond/1.e6 else: totalseconds = 60. if distance is None: distance = 0 try: nr_strokes = int(distance/10.) except TypeError: nr_strokes = int(20.*totalseconds) if nr_strokes == 0: nr_strokes = 100 unixstarttime = arrow.get(startdatetime).timestamp if not avgspm: try: spm = 60.*nr_strokes/totalseconds except ZeroDivisionError: spm = 20. else: spm = avgspm step = totalseconds/float(nr_strokes) elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1)) dstep = distance/float(nr_strokes) d = np.arange(nr_strokes)*distance/(float(nr_strokes-1)) unixtime = unixstarttime + elapsed try: pace = 500.*totalseconds/distance except ZeroDivisionError: pace = 240. if workouttype in ['rower','slides','dynamic']: try: velo = distance/totalseconds except ZeroDivisionError: velo = 2.4 power = 2.8*velo**3 elif avgpwr is not None: power = avgpwr else: power = 0 if avghr is not None: hr = avghr else: hr = 0 df = pd.DataFrame({ 'TimeStamp (sec)': unixtime, ' Horizontal (meters)': d, ' Cadence (stokes/min)': spm, ' Stroke500mPace (sec/500m)':pace, ' ElapsedTime (sec)':elapsed, ' Power (watts)':power, ' HRCur (bpm)':hr, }) timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] row = rrdata(df=df) row.write_csv(csvfilename, gzip = True) id, message = save_workout_database(csvfilename, r, title=title, notes=notes, rankingpiece=rankingpiece, duplicate=duplicate, dosmooth=False, workouttype=workouttype, consistencychecks=False, weightcategory=weightcategory, adaptiveclass=adaptiveclass, totaltime=totalseconds) return (id, message) from rowers.utils import totaltime_sec_to_string # Processes painsled CSV file to database def save_workout_database(f2, r, dosmooth=True, workouttype='rower', boattype='1x', adaptiveclass='None', weightcategory='hwt', dosummary=True, title='Workout', workoutsource='unknown', notes='', totaldist=0, totaltime=0, rankingpiece=False, duplicate=False, summary='', makeprivate=False, oarlength=2.89, inboard=0.88, forceunit='lbs', consistencychecks=False, impeller=False): message = None powerperc = 100 * np.array([r.pw_ut2, r.pw_ut1, r.pw_at, r.pw_tr, r.pw_an]) / r.ftp # make workout and put in database rr = rrower(hrmax=r.max, hrut2=r.ut2, hrut1=r.ut1, hrat=r.at, hrtr=r.tr, hran=r.an, ftp=r.ftp, powerperc=powerperc, powerzones=r.powerzones) row = rdata(f2, rower=rr) if title is None or title == '': title = 'Workout' partofday = getpartofday(row,r) if partofday is not None: title = '{partofday} {workouttype}'.format( partofday=partofday, workouttype=workouttype, ) if row.df.empty: return (0, 'Error: CSV data file was empty') dtavg = row.df['TimeStamp (sec)'].diff().mean() if dtavg < 1: newdf = df_resample(row.df) try: os.remove(f2) except: pass return new_workout_from_df(r, newdf, title=title,boattype=boattype, workouttype=workouttype, workoutsource=workoutsource) try: checks = row.check_consistency() allchecks = 1 for key, value in checks.items(): if not value: allchecks = 0 except ZeroDivisionError: pass if not allchecks and consistencychecks: # row.repair() pass if row == 0: return (0, 'Error: CSV data file not found') try: lat = row.df[' latitude'] if lat.mean() != 0 and lat.std() != 0 and workouttype == 'rower': workouttype = 'water' except KeyError: pass if dosmooth: # auto smoothing pace = row.df[' Stroke500mPace (sec/500m)'].values velo = 500. / pace f = row.df['TimeStamp (sec)'].diff().mean() if f != 0 and not np.isnan(f): windowsize = 2 * (int(10. / (f))) + 1 else: windowsize = 1 if not 'originalvelo' in row.df: row.df['originalvelo'] = velo if windowsize > 3 and windowsize < len(velo): velo2 = savgol_filter(velo, windowsize, 3) else: velo2 = velo velo3 = pd.Series(velo2) velo3 = velo3.replace([-np.inf, np.inf], np.nan) velo3 = velo3.fillna(method='ffill') pace2 = 500. / abs(velo3) row.df[' Stroke500mPace (sec/500m)'] = pace2 row.df = row.df.fillna(0) row.write_csv(f2, gzip=True) try: os.remove(f2) except: pass # recalculate power data if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides': try: row.erg_recalculatepower() row.write_csv(f2, gzip=True) except: pass averagehr = row.df[' HRCur (bpm)'].mean() maxhr = row.df[' HRCur (bpm)'].max() if totaldist == 0: totaldist = row.df['cum_dist'].max() if totaltime == 0: totaltime = row.df['TimeStamp (sec)'].max( ) - row.df['TimeStamp (sec)'].min() try: totaltime = totaltime + row.df.loc[:, ' ElapsedTime (sec)'].iloc[0] except KeyError: pass if np.isnan(totaltime): totaltime = 0 if dosummary: summary = row.allstats() timezone_str = 'UTC' try: workoutstartdatetime = timezone.make_aware(row.rowdatetime) except ValueError: workoutstartdatetime = row.rowdatetime try: latavg = row.df[' latitude'].mean() lonavg = row.df[' longitude'].mean() tf = TimezoneFinder() try: timezone_str = tf.timezone_at(lng=lonavg, lat=latavg) except ValueError: timezone_str = 'UTC' if timezone_str == None: timezone_str = tf.closest_timezone_at(lng=lonavg, lat=latavg) if timezone_str == None: timezone_str = r.defaulttimezone try: workoutstartdatetime = pytz.timezone(timezone_str).localize( row.rowdatetime ) except ValueError: workoutstartdatetime = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ) except KeyError: timezone_str = r.defaulttimezone duration = totaltime_sec_to_string(totaltime) workoutdate = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ).strftime('%Y-%m-%d') workoutstarttime = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ).strftime('%H:%M:%S') if makeprivate: privacy = 'hidden' else: privacy = 'visible' # checking for inf values totaldist = np.nan_to_num(totaldist) maxhr = np.nan_to_num(maxhr) averagehr = np.nan_to_num(averagehr) dragfactor = 0 if workouttype in otetypes: dragfactor = row.dragfactor t = datetime.datetime.strptime(duration,"%H:%M:%S.%f") delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) workoutenddatetime = workoutstartdatetime+delta # check for duplicate start times and duration ws = Workout.objects.filter(user=r,date=workoutdate,duplicate=False).exclude( startdatetime__gt=workoutenddatetime ) ws2 = [] for ww in ws: t = ww.duration delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) enddatetime = ww.startdatetime+delta if enddatetime > workoutstartdatetime: ws2.append(ww) if (len(ws2) != 0): message = "Warning: This workout overlaps with an existing one and was marked as a duplicate" duplicate = True # test title length if title is not None and len(title)>140: title = title[0:140] w = Workout(user=r, name=title, date=workoutdate, workouttype=workouttype, boattype=boattype, dragfactor=dragfactor, duration=duration, distance=totaldist, weightcategory=weightcategory, adaptiveclass=adaptiveclass, starttime=workoutstarttime, duplicate=duplicate, workoutsource=workoutsource, rankingpiece=rankingpiece, forceunit=forceunit, csvfilename=f2, notes=notes, summary=summary, maxhr=maxhr, averagehr=averagehr, startdatetime=workoutstartdatetime, inboard=inboard, oarlength=oarlength, timezone=timezone_str, privacy=privacy, impeller=impeller) try: w.save() except ValidationError: try: w.startdatetime = timezone.now() w.save() except ValidationError: return (0,'Unable to create your workout') if privacy == 'visible': ts = Team.objects.filter(rower=r) for t in ts: w.team.add(t) # put stroke data in database res = dataprep(row.df, id=w.id, bands=True, barchart=True, otwpower=True, empower=True, inboard=inboard) rscore,normp = workout_rscore(w) trimp,hrtss = workout_trimp(w) isbreakthrough = False ishard = False if workouttype == 'water': df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id]) try: powermean = df['power'].mean() except KeyError: powermean = 0 if powermean != 0: thesecs = totaltime maxt = 1.05 * thesecs if maxt > 0: logarr = datautils.getlogarr(maxt) dfgrouped = df.groupby(['workoutid']) delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr) res, btvalues, res2 = utils.isbreakthrough( delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio) else: res = 0 res2 = 0 if res: isbreakthrough = True res = datautils.updatecp(delta, cpvalues, r) if res2 and not isbreakthrough: ishard = True # submit email task to send email about breakthrough workout if isbreakthrough: if r.getemailnotifications and not r.emailbounced: job = myqueue(queuehigh,handle_sendemail_breakthrough, w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) # submit email task to send email about breakthrough workout if ishard: if r.getemailnotifications and not r.emailbounced: job = myqueue(queuehigh,handle_sendemail_hard, w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) return (w.id, message) parsers = { 'kinomap': KinoMapParser, 'xls': ExcelTemplate, 'rp': RowProParser, 'tcx':TCXParser, 'mystery':MysteryParser, 'ritmotime':RitmoTimeParser, 'quiske': QuiskeParser, 'rowperfect3': RowPerfectParser, 'coxmate': CoxMateParser, 'bcmike': BoatCoachAdvancedParser, 'boatcoach': BoatCoachParser, 'boatcoachotw': BoatCoachOTWParser, 'painsleddesktop': painsledDesktopParser, 'speedcoach': speedcoachParser, 'speedcoach2': SpeedCoach2Parser, 'ergstick': ErgStickParser, 'fit': FITParser, 'ergdata': ErgDataParser, 'humon': HumonParser, 'eth': ETHParser, } def parsenonpainsled(fileformat,f2,summary): try: row = parsers[fileformat](f2) hasrecognized = True except (KeyError,IndexError,ValueError): hasrecognized = False return None, hasrecognized, '', 'unknown' # handle speed coach GPS 2 if (fileformat == 'speedcoach2'): oarlength, inboard = get_empower_rigging(f2) empowerfirmware = get_empower_firmware(f2) if empowerfirmware != '': fileformat = fileformat+'v'+str(empowerfirmware) else: fileformat = 'speedcoach2v0' try: summary = row.allstats() except ZeroDivisionError: summary = '' # handle FIT if (fileformat == 'fit'): try: s = fitsummarydata(f2) s.setsummary() summary = s.summarytext except: pass hasrecognized = True return row,hasrecognized,summary,fileformat def handle_nonpainsled(f2, fileformat, summary=''): oarlength = 2.89 inboard = 0.88 hasrecognized = False impeller = False row,hasrecognized,summary,fileformat = parsenonpainsled(fileformat,f2,summary) # Handle c2log if (fileformat == 'c2log' or fileformat == 'rowprolog'): return (0,'',0,0,'',impeller) if not hasrecognized: return (0,'',0,0,'',impeller) f_to_be_deleted = f2 # should delete file f2 = f2[:-4] + 'o.csv' row2 = rrdata(df = row.df) if 'speedcoach2' in fileformat: # impeller consistency impellerdata, consistent, ratio = row.impellerconsistent(threshold=0.3) if impellerdata and consistent: impeller = True if impellerdata and not consistent: row2.use_gpsdata() row2.write_csv(f2, gzip=True) # os.remove(f2) try: os.remove(f_to_be_deleted) except: try: os.remove(f_to_be_deleted + '.gz') except: pass return (f2, summary, oarlength, inboard, fileformat, impeller) # Create new workout from file and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def get_workouttype_from_fit(filename,workouttype='water'): fitfile = FitFile(filename,check_crc=False) records = fitfile.messages fittype = 'rowing' for record in records: if record.name in ['sport','lap']: fittype = record.get_values()['sport'].lower() try: workouttype = mytypes.fitmappinginv[fittype] except KeyError: return workouttype return workouttype import rowingdata.tcxtools as tcxtools def get_workouttype_from_tcx(filename,workouttype='water'): tcxtype = 'rowing' try: d = tcxtools.tcx_getdict(filename) try: tcxtype = d['Activities']['Activity']['@Sport'].lower() if tcxtype == 'other': tcxtype = 'rowing' except KeyError: return workouttype except TypeError: pass try: workouttype = mytypes.garminmappinginv[tcxtype.upper()] except KeyError: return workouttype return workouttype def new_workout_from_file(r, f2, workouttype='rower', workoutsource=None, title='Workout', boattype='1x', makeprivate=False, notes='', uploadoptions={'boattype':'1x','workouttype':'rower'}): message = "" impeller = False try: fileformat = get_file_type(f2) except (IOError,UnicodeDecodeError): os.remove(f2) message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect." return (0, message, f2) summary = '' oarlength = 2.89 inboard = 0.88 # Save zip files to email box for further processing if len(fileformat) == 3 and fileformat[0] == 'zip': uploadoptions['fromuploadform'] = True bodyyaml = yaml.safe_dump(uploadoptions,default_flow_style=False) f_to_be_deleted = f2 impeller = False workoutsbox = Mailbox.objects.filter(name='workouts')[0] msg = Message(mailbox=workoutsbox, from_header=r.user.email, subject = title,body=bodyyaml) msg.save() f3 = 'media/mailbox_attachments/'+f2[6:] copyfile(f2,f3) f3 = f3[6:] a = MessageAttachment(message=msg,document=f3) a.save() message = "Zip file was stored for offline processing" return -1, message, f2 # Some people try to upload Concept2 logbook summaries if fileformat == 'c2log': os.remove(f2) message = "This summary does not contain stroke data. Use the files containing stroke by stroke data." return (0, message, f2) if fileformat == 'nostrokes': os.remove(f2) message = "It looks like this file doesn't contain stroke data." return (0, message, f2) if fileformat == 'kml': os.remove(f2) message = "KML files are not supported" return (0, message, f2) # Some people upload corrupted zip files if fileformat == 'notgzip': os.remove(f2) message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect." return (0, message, f2) # Some people try to upload RowPro summary logs if fileformat == 'rowprolog': os.remove(f2) message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log." return (0, message, f2) # Sometimes people try an unsupported file type. # Send an email to info@rowsandall.com with the file attached # for me to check if it is a bug, or a new file type # worth supporting if fileformat == 'unknown': message = "We couldn't recognize the file type" extension = os.path.splitext(f2)[1] filename = os.path.splitext(f2)[0] if extension == '.gz': filename = os.path.splitext(filename)[0] extension2 = os.path.splitext(filename)[1]+extension extension = extension2 f4 = filename+'a'+extension copyfile(f2,f4) job = myqueue(queuehigh, handle_sendemail_unrecognized, f4, r.user.email) return (0, message, f2) if fileformat == 'att': # email attachment which can safely be ignored return (0, '', f2) # Get workout type from fit & tcx if (fileformat == 'fit'): workouttype = get_workouttype_from_fit(f2,workouttype=workouttype) if (fileformat == 'tcx'): workouttype = get_workouttype_from_tcx(f2,workouttype=workouttype) # handle non-Painsled by converting it to painsled compatible CSV if (fileformat != 'csv'): f2, summary, oarlength, inboard, fileformat, impeller = handle_nonpainsled( f2, fileformat, summary=summary ) if not f2: message = 'Something went wrong' return (0, message, '') dosummary = (fileformat != 'fit' and 'speedcoach2' not in fileformat) dosummary = dosummary or summary == '' if 'speedcoach2' in fileformat and workouttype == 'rower': workouttype = 'water' if workoutsource is None: workoutsource = fileformat id, message = save_workout_database( f2, r, notes=notes, workouttype=workouttype, weightcategory=r.weightcategory, adaptiveclass=r.adaptiveclass, boattype=boattype, makeprivate=makeprivate, dosummary=dosummary, workoutsource=workoutsource, summary=summary, inboard=inboard, oarlength=oarlength, title=title, forceunit='N', impeller=impeller, ) job = myqueue(queuehigh,handle_calctrimp,id,f2,r.ftp,r.sex,r.hrftp,r.max,r.rest) return (id, message, f2) def split_workout(r, parent, splitsecond, splitmode): data, row = getrowdata_db(id=parent.id) latitude, longitude = get_latlon(parent.id) if not latitude.empty and not longitude.empty: data[' latitude'] = latitude data[' longitude'] = longitude data['time'] = data['time'] / 1000. data1 = data[data['time'] <= splitsecond].copy() data2 = data[data['time'] > splitsecond].copy() data1 = data1.sort_values(['time']) data1 = data1.interpolate(method='linear', axis=0, limit_direction='both', limit=10) data1.fillna(method='bfill', inplace=True) # Some new stuff to try out data1 = data1.groupby('time', axis=0).mean() data1['time'] = data1.index data1.reset_index(drop=True, inplace=True) data2 = data2.sort_values(['time']) data2 = data2.interpolate(method='linear', axis=0, limit_direction='both', limit=10) data2.fillna(method='bfill', inplace=True) # Some new stuff to try out data2 = data2.groupby('time', axis=0).mean() data2['time'] = data2.index data2.reset_index(drop=True, inplace=True) data1['pace'] = data1['pace'] / 1000. data2['pace'] = data2['pace'] / 1000. data1.drop_duplicates(subset='time', inplace=True) data2.drop_duplicates(subset='time', inplace=True) messages = [] ids = [] if 'keep first' in splitmode: if 'firstprivate' in splitmode: setprivate = True else: setprivate = False id, message = new_workout_from_df(r, data1, title=parent.name + ' (1)', parent=parent, setprivate=setprivate, forceunit='N') messages.append(message) ids.append(encoder.encode_hex(id)) if 'keep second' in splitmode: data2['cumdist'] = data2['cumdist'] - data2.iloc[ 0, data2.columns.get_loc('cumdist') ] data2['distance'] = data2['distance'] - data2.iloc[ 0, data2.columns.get_loc('distance') ] data2['time'] = data2['time'] - data2.iloc[ 0, data2.columns.get_loc('time') ] if 'secondprivate' in splitmode: setprivate = True else: setprivate = False dt = datetime.timedelta(seconds=splitsecond) id, message = new_workout_from_df(r, data2, title=parent.name + ' (2)', parent=parent, setprivate=setprivate, dt=dt, forceunit='N') messages.append(message) ids.append(encoder.encode_hex(id)) if not 'keep original' in splitmode: if 'keep second' in splitmode or 'keep first' in splitmode: parent.delete() messages.append('Deleted Workout: ' + parent.name) else: messages.append('That would delete your workout') ids.append(encoder.encode_hex(parent.id)) elif 'originalprivate' in splitmode: parent.privacy = 'hidden' parent.save() return ids, messages # Create new workout from data frame and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_df(r, df, title='New Workout', workoutsource='unknown', boattype='1x', workouttype='rower', parent=None, setprivate=False, forceunit='lbs', dt=datetime.timedelta()): message = None summary = '' if parent: oarlength = parent.oarlength inboard = parent.inboard workoutsource = parent.workoutsource workouttype = parent.workouttype boattype = parent.boattype notes = parent.notes summary = parent.summary if parent.privacy == 'hidden': makeprivate = True else: makeprivate = False startdatetime = parent.startdatetime + dt else: oarlength = 2.89 inboard = 0.88 notes = '' summary = '' makeprivate = False startdatetime = timezone.now() if setprivate: makeprivate = True timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' if forceunit == 'N': # change to lbs for now df['peakforce'] /= lbstoN df['averageforce'] /= lbstoN df.rename(columns=columndict, inplace=True) #starttimeunix = mktime(startdatetime.utctimetuple()) starttimeunix = arrow.get(startdatetime).timestamp df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix row = rrdata(df=df) row.write_csv(csvfilename, gzip=True) # res = df.to_csv(csvfilename+'.gz',index_label='index', # compression='gzip') id, message = save_workout_database(csvfilename, r, workouttype=workouttype, boattype=boattype, title=title, workoutsource=workoutsource, notes=notes, oarlength=oarlength, inboard=inboard, makeprivate=makeprivate, dosmooth=False, consistencychecks=False) return (id, message) # A wrapper around the rowingdata class, with some error catching def rdata(file, rower=rrower()): try: res = rrdata(csvfile=file, rower=rower) except (IOError, IndexError): try: res = rrdata(csvfile=file + '.gz', rower=rower) except (IOError, IndexError): res = rrdata() except: res = rrdata() except EOFError: res = rrdata() except: res = rrdata() return res # Remove all stroke data for workout ID from database def delete_strokedata(id): dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id) try: shutil.rmtree(dirname) except OSError: try: os.remove(dirname) except FileNotFoundError: pass except FileNotFoundError: pass # Replace stroke data in DB with data from CSV file def update_strokedata(id, df): delete_strokedata(id) rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True) # Test that all data are of a numerical time def testdata(time, distance, pace, spm): t1 = np.issubdtype(time, np.number) t2 = np.issubdtype(distance, np.number) t3 = np.issubdtype(pace, np.number) t4 = np.issubdtype(spm, np.number) return t1 and t2 and t3 and t4 # Get data from DB for one workout (fetches all data). If data # is not in DB, read from CSV file (and create DB entry) def getrowdata_db(id=0, doclean=False, convertnewtons=True, checkefficiency=True): data = read_df_sql(id) try: data['deltat'] = data['time'].diff() except KeyError: data = pd.DataFrame() if data.empty: rowdata, row = getrowdata(id=id) if not rowdata.empty: data = dataprep(rowdata.df, id=id, bands=True, barchart=True, otwpower=True) else: data = pd.DataFrame() # returning empty dataframe else: row = Workout.objects.get(id=id) if checkefficiency==True and not data.empty: try: if data['efficiency'].mean() == 0 and data['power'].mean() != 0: data = add_efficiency(id=id) except KeyError: data = add_efficiency(id=id) if doclean: data = clean_df_stats(data, ignorehr=True) return data, row # Fetch a subset of the data from the DB def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True,compute=True): # prepmultipledata(ids) if ids: csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] else: return pd.DataFrame() data = [] columns = [c for c in columns if c != 'None'] columns = list(set(columns)) if len(ids)>1: for id,f in zip(ids,csvfilenames): try: #df = dd.read_parquet(f,columns=columns,engine='pyarrow') df = pd.read_parquet(f,columns=columns) data.append(df) except OSError: rowdata, row = getrowdata(id=id) if rowdata and len(rowdata.df): datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True) # df = dd.read_parquet(f,columns=columns,engine='pyarrow') df = pd.read_parquet(f,columns=columns) data.append(df) df = pd.concat(data,axis=0) # df = dd.concat(data,axis=0) else: try: df = pd.read_parquet(csvfilenames[0],columns=columns) except (OSError,ArrowInvalid): rowdata,row = getrowdata(id=ids[0]) if rowdata and len(rowdata.df): data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True) df = pd.read_parquet(csvfilenames[0],columns=columns) # df = dd.read_parquet(csvfilenames[0], # column=columns,engine='pyarrow', # ) # df = df.loc[:,~df.columns.duplicated()] else: df = pd.DataFrame() if compute and len(df): data = df.copy() if doclean: data = clean_df_stats(data, ignorehr=True, workstrokesonly=workstrokesonly) data.dropna(axis=1,how='all',inplace=True) data.dropna(axis=0,how='any',inplace=True) return data return df def getsmallrowdata_db_dask(columns, ids=[], doclean=True,workstrokesonly=True,compute=True): # prepmultipledata(ids) csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] data = [] columns = [c for c in columns if c != 'None'] columns = list(set(columns)) if len(ids)>1: for id,f in zip(ids,csvfilenames): try: #df = dd.read_parquet(f,columns=columns,engine='pyarrow') df = dd.read_parquet(f,columns=columns) data.append(df) except OSError: rowdata, row = getrowdata(id=id) if rowdata and len(rowdata.df): datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True) # df = dd.read_parquet(f,columns=columns,engine='pyarrow') df = dd.read_parquet(f,columns=columns) data.append(df) df = dd.concat(data,axis=0) # df = dd.concat(data,axis=0) else: try: df = dd.read_parquet(csvfilenames[0],columns=columns) except OSError: rowdata,row = getrowdata(id=ids[0]) if rowdata and len(rowdata.df): data = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True) df = dd.read_parquet(csvfilenames[0],columns=columns) # df = dd.read_parquet(csvfilenames[0], # column=columns,engine='pyarrow', # ) # df = df.loc[:,~df.columns.duplicated()] if compute: data = df.compute() if doclean: data = clean_df_stats(data, ignorehr=True, workstrokesonly=workstrokesonly) data.dropna(axis=1,how='all',inplace=True) data.dropna(axis=0,how='any',inplace=True) return data return df def getsmallrowdata_db_old(columns, ids=[], doclean=True, workstrokesonly=True): prepmultipledata(ids) data,extracols = read_cols_df_sql(ids, columns) if extracols and len(ids)==1: w = Workout.objects.get(id=ids[0]) row = rdata(w.csvfilename) try: row.set_instroke_metrics() except (AttributeError,TypeError): pass try: f = row.df['TimeStamp (sec)'].diff().mean() except (AttributeError,KeyError) as e: f = 0 if f != 0 and not np.isnan(f): windowsize = 2 * (int(10. / (f))) + 1 else: windowsize = 1 for c in extracols: try: cdata = row.df[c] cdata.fillna(inplace=True,method='bfill') # This doesn't work because sometimes data are duplicated at save try: cdata2 = savgol_filter(cdata.values,windowsize,3) data[c] = cdata2 except ValueError: data[c] = cdata except (KeyError, AttributeError): data[c] = 0 # convert newtons if doclean: data = clean_df_stats(data, ignorehr=True, workstrokesonly=workstrokesonly) data.dropna(axis=1,how='all',inplace=True) data.dropna(axis=0,how='any',inplace=True) return data # Fetch both the workout and the workout stroke data (from CSV file) def getrowdata(id=0): # check if valid ID exists (workout exists) try: row = Workout.objects.get(id=id) except Workout.DoesNotExist: return rrdata(),None f1 = row.csvfilename # get user r = row.user u = r.user rr = rrower(hrmax=r.max, hrut2=r.ut2, hrut1=r.ut1, hrat=r.at, hrtr=r.tr, hran=r.an, ftp=r.ftp) rowdata = rdata(f1, rower=rr) return rowdata, row # Checks if all rows for a list of workout IDs have entries in the # stroke_data table. If this is not the case, it creates the stroke # data # In theory, this should never yield any work, but it's a good # safety net for programming errors elsewhere in the app # Also used heavily when I moved from CSV file only to CSV+Stroke data import glob def prepmultipledata(ids, verbose=False): filenames = glob.glob('media/*.parquet') ids = [id for id in ids if 'media/strokedata_{id}.parquet.gz'.format(id=id) not in filenames] for id in ids: rowdata, row = getrowdata(id=id) if verbose: print(id) if rowdata and len(rowdata.df): data = dataprep(rowdata.df, id=id, bands=True, barchart=True, otwpower=True) return ids # Read a set of columns for a set of workout ids, returns data as a # pandas dataframe def read_cols_df_sql(ids, columns, convertnewtons=True): # drop columns that are not in offical list # axx = [ax[0] for ax in axes] extracols = [] columns = list(columns) + ['distance', 'spm', 'workoutid'] columns = [x for x in columns if x != 'None'] columns = list(set(columns)) ids = [int(id) for id in ids] if len(ids) == 0: return pd.DataFrame(),extracols elif len(ids) == 1: try: filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0]) df = pd.read_parquet(filename,columns=columns) except OSError: rowdata,row = getrowdata(id=ids[0]) if rowdata and len(rowdata.df): datadf = dataprep(rowdata.df,id=ids[0],bands=True,otwpower=True,barchart=True) df = pd.read_parquet(filename,columns=columns) else: data = [] filenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] for id,f in zip(ids,filenames): try: df = pd.read_parquet(f,columns=columns) data.append(df) except (OSError,IndexError): rowdata,row = getrowdata(id=id) if rowdata and len(rowdata.df): datadf = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True) df = pd.read_parquet(f,columns=columns) data.append(df) df = pd.concat(data,axis=0) df = df.fillna(value=0) if 'peakforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN if 'averageforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'averageforce'] = df.loc[mask, 'averageforce'] * lbstoN return df,extracols def read_cols_df_sql_old(ids, columns, convertnewtons=True): # drop columns that are not in offical list # axx = [ax[0] for ax in axes] prepmultipledata(ids) axx = [f.name for f in StrokeData._meta.get_fields()] extracols = [] columns2 = list(columns) for c in columns: if not c in axx: columns2.remove(c) extracols.append(c) columns = list(columns2) + ['distance', 'spm', 'workoutid'] columns = [x for x in columns if x != 'None'] columns = list(set(columns)) cls = '' ids = [int(id) for id in ids] engine = create_engine(database_url, echo=False) for column in columns: cls += column + ', ' cls = cls[:-2] if len(ids) == 0: return pd.DataFrame(),extracols # query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format( # columns=cls, # )) elif len(ids) == 1: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format( id=ids[0], columns=cls, )) else: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids} ORDER BY time ASC'.format( columns=cls, ids=tuple(ids), )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) df = df.fillna(value=0) if 'peakforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN if 'averageforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'averageforce'] = df.loc[mask, 'averageforce'] * lbstoN engine.dispose() return df,extracols # Read stroke data from the DB for a Workout ID. Returns a pandas dataframe def read_df_sql(id): try: f = 'media/strokedata_{id}.parquet.gz'.format(id=id) df = pd.read_parquet(f) except (OSError,ArrowInvalid): rowdata,row = getrowdata(id=id) if rowdata and len(rowdata.df): data = dataprep(rowdata.df,id=id,bands=True,otwpower=True,barchart=True) df = pd.read_parquet(f) else: df = pd.DataFrame() df = df.fillna(value=0) return df def read_df_sql_old(id): engine = create_engine(database_url, echo=False) df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id} ORDER BY time ASC'.format( id=id)), engine) engine.dispose() df = df.fillna(value=0) funit = Workout.objects.get(id=id).forceunit if funit == 'lbs': try: df['peakforce'] = df['peakforce'] * lbstoN except KeyError: pass try: df['averageforce'] = df['averageforce'] * lbstoN except KeyError: pass return df # Get the necessary data from the strokedata table in the DB. # For the flex plot def smalldataprep(therows, xparam, yparam1, yparam2): df = pd.DataFrame() if yparam2 == 'None': yparam2 = 'power' df[xparam] = [] df[yparam1] = [] df[yparam2] = [] df['distance'] = [] df['spm'] = [] for workout in therows: f1 = workout.csvfilename try: rowdata = dataprep(rrdata(csvfile=f1).df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) if workout.forceunit == 'lbs': try: rowdata['peakforce'] *= lbstoN except KeyError: pass try: rowdata['averageforce'] *= lbstoN except KeyError: pass df = pd.concat([df, rowdata], ignore_index=True) except IOError: try: rowdata = dataprep(rrdata(csvfile=f1 + '.gz').df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) if workout.forceunit == 'lbs': try: rowdata['peakforce'] *= lbstoN except KeyError: pass try: rowdata['averageforce'] *= lbstoN except KeyError: pass df = pd.concat([df, rowdata], ignore_index=True) except IOError: pass return df # data fusion def datafusion(id1, id2, columns, offset): workout1 = Workout.objects.get(id=id1) workout2 = Workout.objects.get(id=id2) df1, w1 = getrowdata_db(id=id1) df1 = df1.drop([ # 'cumdist', 'hr_ut2', 'hr_ut1', 'hr_at', 'hr_tr', 'hr_an', 'hr_max', 'ftime', 'fpace', 'workoutid', 'id'], 1, errors='ignore') # Add coordinates to DataFrame latitude, longitude = get_latlon(id1) df1[' latitude'] = latitude df1[' longitude'] = longitude df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False) forceunit = 'N' offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000. offsetmillisecs += offset.days * (3600 * 24 * 1000) df2['time'] = df2['time'] + offsetmillisecs keep1 = {c: c for c in set(df1.columns)} for c in columns: keep1.pop(c) for c in df1.columns: if not c in keep1: df1 = df1.drop(c, 1, errors='ignore') df = pd.concat([df1, df2], ignore_index=True) df = df.sort_values(['time']) df = df.interpolate(method='linear', axis=0, limit_direction='both', limit=10) df.fillna(method='bfill', inplace=True) # Some new stuff to try out df = df.groupby('time', axis=0).mean() df['time'] = df.index df.reset_index(drop=True, inplace=True) df['time'] = df['time'] / 1000. df['pace'] = df['pace'] / 1000. df['cum_dist'] = df['cumdist'] return df, forceunit def fix_newtons(id=0, limit=3000): # rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False) rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False) try: #avgforce = rowdata['averageforce'] peakforce = rowdata['peakforce'] if peakforce.mean() > limit: w = Workout.objects.get(id=id) print("fixing ", id) rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id, rowdata.df) except KeyError: pass def remove_invalid_columns(df): for c in df.columns: if not c in allowedcolumns: df.drop(labels=c,axis=1,inplace=True) return df def add_efficiency(id=0): rowdata, row = getrowdata_db(id=id, doclean=False, convertnewtons=False, checkefficiency=False) power = rowdata['power'] pace = rowdata['pace'] / 1.0e3 velo = 500. / pace ergpw = 2.8 * velo**3 efficiency = 100. * ergpw / power efficiency = efficiency.replace([-np.inf, np.inf], np.nan) efficiency.fillna(method='ffill') rowdata['efficiency'] = efficiency rowdata = remove_invalid_columns(rowdata) rowdata = rowdata.replace([-np.inf, np.inf], np.nan) rowdata = rowdata.fillna(method='ffill') delete_strokedata(id) if id != 0: rowdata['workoutid'] = id filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) df = dd.from_pandas(rowdata,npartitions=1) df.to_parquet(filename,engine='fastparquet',compression='GZIP') return rowdata # This is the main routine. # it reindexes, sorts, filters, and smooths the data, then # saves it to the stroke_data table in the database # Takes a rowingdata object's DataFrame as input def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, empower=True, inboard=0.88, forceunit='lbs'): if rowdatadf.empty: return 0 #rowdatadf.set_index([range(len(rowdatadf))], inplace=True) t = rowdatadf.loc[:, 'TimeStamp (sec)'] t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0]) row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000 rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000. p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] try: velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)'] except KeyError: velo = 500./p hr = rowdatadf.loc[:, ' HRCur (bpm)'] spm = rowdatadf.loc[:, ' Cadence (stokes/min)'] cumdist = rowdatadf.loc[:, 'cum_dist'] power = rowdatadf.loc[:, ' Power (watts)'] averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)'] drivelength = rowdatadf.loc[:, ' DriveLength (meters)'] try: workoutstate = rowdatadf.loc[:, ' WorkoutState'] except KeyError: workoutstate = 0 * hr peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)'] forceratio = averageforce / peakforce forceratio = forceratio.fillna(value=0) try: drivetime = rowdatadf.loc[:, ' DriveTime (ms)'] recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)'] rhythm = 100. * drivetime / (recoverytime + drivetime) rhythm = rhythm.fillna(value=0) except: rhythm = 0.0 * forceratio f = rowdatadf['TimeStamp (sec)'].diff().mean() if f != 0 and not np.isinf(f): try: windowsize = 2 * (int(10. / (f))) + 1 except ValueError: windowsize = 1 else: windowsize = 1 if windowsize <= 3: windowsize = 5 if windowsize > 3 and windowsize < len(hr): spm = savgol_filter(spm, windowsize, 3) hr = savgol_filter(hr, windowsize, 3) drivelength = savgol_filter(drivelength, windowsize, 3) forceratio = savgol_filter(forceratio, windowsize, 3) try: t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) except TypeError: t2 = 0 * t p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) try: drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3 except TypeError: drivespeed = 0.0 * rowdatadf['TimeStamp (sec)'] drivespeed = drivespeed.fillna(value=0) try: driveenergy = rowdatadf['driveenergy'] except KeyError: if forceunit == 'lbs': driveenergy = drivelength * averageforce * lbstoN else: drivenergy = drivelength * averageforce if forceunit == 'lbs': averageforce *= lbstoN peakforce *= lbstoN powerhr = 60.*power/hr powerhr = powerhr.fillna(value=0) if driveenergy.mean() == 0 and driveenergy.std() == 0: driveenergy = 0*driveenergy+100 distance = rowdatadf.loc[:, 'cum_dist'] velo = 500. / p distanceperstroke = 60. * velo / spm data = DataFrame( dict( time=t * 1e3, hr=hr, pace=p * 1e3, spm=spm, velo=velo, cumdist=cumdist, ftime=niceformat(t2), fpace=nicepaceformat(p2), driveenergy=driveenergy, power=power, workoutstate=workoutstate, averageforce=averageforce, drivelength=drivelength, peakforce=peakforce, forceratio=forceratio, distance=distance, drivespeed=drivespeed, rhythm=rhythm, distanceperstroke=distanceperstroke, # powerhr=powerhr, ) ) if bands: # HR bands data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2'] data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1'] data['hr_at'] = rowdatadf.loc[:, 'hr_at'] data['hr_tr'] = rowdatadf.loc[:, 'hr_tr'] data['hr_an'] = rowdatadf.loc[:, 'hr_an'] data['hr_max'] = rowdatadf.loc[:, 'hr_max'] data['hr_bottom'] = 0.0 * data['hr'] try: tel = rowdatadf.loc[:, ' ElapsedTime (sec)'] except KeyError: rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)'] if empower: try: wash = rowdatadf.loc[:, 'wash'] except KeyError: wash = 0 * power try: catch = rowdatadf.loc[:, 'catch'] except KeyError: catch = 0 * power try: finish = rowdatadf.loc[:, 'finish'] except KeyError: finish = 0 * power try: peakforceangle = rowdatadf.loc[:, 'peakforceangle'] except KeyError: peakforceangle = 0 * power if data['driveenergy'].mean() == 0: try: driveenergy = rowdatadf.loc[:, 'driveenergy'] except KeyError: driveenergy = power * 60 / spm else: driveenergy = data['driveenergy'] arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch)) if arclength.mean() > 0: drivelength = arclength elif drivelength.mean() == 0: drivelength = driveenergy / (averageforce * 4.44822) try: slip = rowdatadf.loc[:, 'slip'] except KeyError: slip = 0 * power try: totalangle = finish - catch effectiveangle = finish - wash - catch - slip except ValueError: totalangle = 0 * power effectiveangle = 0 * power if windowsize > 3 and windowsize < len(slip): try: wash = savgol_filter(wash, windowsize, 3) except TypeError: pass try: slip = savgol_filter(slip, windowsize, 3) except TypeError: pass try: catch = savgol_filter(catch, windowsize, 3) except TypeError: pass try: finish = savgol_filter(finish, windowsize, 3) except TypeError: pass try: peakforceangle = savgol_filter(peakforceangle, windowsize, 3) except TypeError: pass try: driveenergy = savgol_filter(driveenergy, windowsize, 3) except TypeError: pass try: drivelength = savgol_filter(drivelength, windowsize, 3) except TypeError: pass try: totalangle = savgol_filter(totalangle, windowsize, 3) except TypeError: pass try: effectiveangle = savgol_filter(effectiveangle, windowsize, 3) except TypeError: pass velo = 500. / p ergpw = 2.8 * velo**3 efficiency = 100. * ergpw / power efficiency = efficiency.replace([-np.inf, np.inf], np.nan) efficiency.fillna(method='ffill') try: data['wash'] = wash data['catch'] = catch data['slip'] = slip data['finish'] = finish data['peakforceangle'] = peakforceangle data['driveenergy'] = driveenergy data['drivelength'] = drivelength data['totalangle'] = totalangle data['effectiveangle'] = effectiveangle data['efficiency'] = efficiency except ValueError: pass if otwpower: try: nowindpace = rowdatadf.loc[:, 'nowindpace'] except KeyError: nowindpace = p try: equivergpower = rowdatadf.loc[:, 'equivergpower'] except KeyError: equivergpower = 0 * p + 50. nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x)) ergvelo = (equivergpower / 2.8)**(1. / 3.) ergpace = 500. / ergvelo ergpace[ergpace == np.inf] = 240. ergpace2 = ergpace.apply(lambda x: timedeltaconv(x)) data['ergpace'] = ergpace * 1e3 data['nowindpace'] = nowindpace * 1e3 data['equivergpower'] = equivergpower data['fergpace'] = nicepaceformat(ergpace2) data['fnowindpace'] = nicepaceformat(nowindpace2) data = data.replace([-np.inf, np.inf], np.nan) data = data.fillna(method='ffill') # write data if id given if id != 0: data['workoutid'] = id data.fillna(0,inplace=True) for k, v in dtypes.items(): try: data[k] = data[k].astype(v) except KeyError: pass filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) df = dd.from_pandas(data,npartitions=1) df.to_parquet(filename,engine='fastparquet',compression='GZIP') return data def workout_trimp(w): r = w.user if w.trimp > 0: return w.trimp,w.hrtss r = w.user ftp = float(r.ftp) if w.workouttype in otwtypes: ftp = ftp*(100.-r.otwslack)/100. if r.hrftp == 0: hrftp = (r.an+r.tr)/2. r.hrftp = int(hrftp) r.save() if w.averagehr is None: rowdata = rdata(w.csvfilename) try: avghr = rowdata.df[' HRCur (bpm)'].mean() maxhr = rowdata.df[' HRCur (bpm)'].max() except KeyError: avghr = None maxhr = None w.averagehr = avghr w.maxhr = maxhr w.save() job = myqueue( queuehigh, handle_calctrimp, w.id, w.csvfilename, ftp, r.sex, r.hrftp, r.max, r.rest) return 0,0 def workout_rscore(w): if w.rscore > 0: return w.rscore,w.normp r = w.user ftp = float(r.ftp) if w.workouttype in otwtypes: ftp = ftp*(100.-r.otwslack)/100. if r.hrftp == 0: hrftp = (r.an+r.tr)/2. r.hrftp = int(hrftp) r.save() job = myqueue( queuehigh, handle_calctrimp, w.id, w.csvfilename, ftp, r.sex, r.hrftp, r.max, r.rest) return 0,0 def workout_normv(w,pp=4.0): if w.normv > 0: return w.normv,w.normw r = w.user ftp = float(r.ftp) if w.workouttype in otwtypes: ftp = ftp*(100.-r.otwslack)/100. if r.hrftp == 0: hrftp = (r.an+r.tr)/2. r.hrftp = int(hrftp) r.save() job = myqueue( queuehigh, handle_calctrimp, w.id, w.csvfilename, ftp, r.sex, r.hrftp, r.max, r.rest) return 0,0