from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals # All the data preparation, data cleaning and data mangling should # be defined here from __future__ import unicode_literals, absolute_import from rowers.models import Workout, StrokeData,Team import pytz from rowingdata import rowingdata as rrdata from rowingdata import rower as rrower from shutil import copyfile from rowingdata import ( get_file_type, get_empower_rigging,get_empower_firmware ) from rowers.tasks import handle_sendemail_unrecognized from rowers.tasks import handle_zip_file from pandas import DataFrame, Series from django.utils import timezone from django.utils.timezone import get_current_timezone from django_mailbox.models import Message,Mailbox,MessageAttachment from django.core.exceptions import ValidationError from time import strftime import arrow thetimezone = get_current_timezone() from rowingdata import ( TCXParser, RowProParser, ErgDataParser, CoxMateParser, BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser, MysteryParser, BoatCoachOTWParser,QuiskeParser, painsledDesktopParser, speedcoachParser, ErgStickParser, SpeedCoach2Parser, FITParser, fitsummarydata, RitmoTimeParser,KinoMapParser, make_cumvalues,cumcpdata,ExcelTemplate, summarydata, get_file_type, ) from rowingdata.csvparsers import HumonParser from rowers.metrics import axes,calc_trimp,rowingmetrics from rowers.models import strokedatafields #allowedcolumns = [item[0] for item in rowingmetrics] allowedcolumns = [key for key,value in strokedatafields.items()] #from async_messages import messages as a_messages import os import zipfile import pandas as pd import numpy as np import itertools import math from rowers.tasks import ( handle_sendemail_unrecognized, handle_sendemail_breakthrough, handle_sendemail_hard, handle_updatecp,handle_updateergcp, handle_calctrimp, ) from django.conf import settings from sqlalchemy import create_engine import sqlalchemy as sa import sys import rowers.utils as utils import rowers.datautils as datautils from rowers.utils import lbstoN,myqueue,is_ranking_piece,wavg from timezonefinder import TimezoneFinder import django_rq queue = django_rq.get_queue('default') queuelow = django_rq.get_queue('low') queuehigh = django_rq.get_queue('default') from rowsandall_app.settings import SITE_URL from rowers.mytypes import otwtypes,otetypes from rowers.database import * from rowers.opaque import encoder # mapping the DB column names to the CSV file column names columndict = { 'time': 'TimeStamp (sec)', 'hr': ' HRCur (bpm)', 'velo': ' AverageBoatSpeed (m/s)', 'pace': ' Stroke500mPace (sec/500m)', 'spm': ' Cadence (stokes/min)', 'power': ' Power (watts)', 'averageforce': ' AverageDriveForce (lbs)', 'drivelength': ' DriveLength (meters)', 'peakforce': ' PeakDriveForce (lbs)', 'distance': ' Horizontal (meters)', 'catch': 'catch', 'finish': 'finish', 'peakforceangle': 'peakforceangle', 'wash': 'wash', 'slip': 'slip', 'workoutstate': ' WorkoutState', 'cumdist': 'cum_dist', } from scipy.signal import savgol_filter import datetime def polarization_index(df,rower): df['dt'] = df['time'].diff()/6.e4 # remove rest (spm<15) df.dropna(axis=0,inplace=True) df['dt'] = df['dt'].clip(upper=4,lower=0) masklow = (df['power']>0) & (df['power']=rower.pw_at) & (df['power']rower.pw_an) time_low_pw = df.loc[masklow,'dt'].sum() time_mid_pw = df.loc[maskmid,'dt'].sum() time_high_pw = df.loc[maskhigh,'dt'].sum() frac_low = time_low_pw/(time_low_pw+time_mid_pw+time_high_pw) frac_mid = time_mid_pw/(time_low_pw+time_mid_pw+time_high_pw) frac_high = time_high_pw/(time_low_pw+time_mid_pw+time_high_pw) index = math.log10(frac_high*100.*frac_low/frac_mid) return index def get_latlon(id): try: w = Workout.objects.get(id=id) except Workout.DoesNotExist: return False rowdata = rdata(w.csvfilename) if rowdata.df.empty: return [pd.Series([]), pd.Series([])] try: try: latitude = rowdata.df.loc[:, ' latitude'] longitude = rowdata.df.loc[:, ' longitude'] except KeyError: latitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)'] longitude = 0 * rowdata.df.loc[:, 'TimeStamp (sec)'] return [latitude, longitude] except AttributeError: return [pd.Series([]), pd.Series([])] return [pd.Series([]), pd.Series([])] def workout_summary_to_df( rower, startdate=datetime.datetime(1970,1,1), enddate=timezone.now()+timezone.timedelta(days=1)): ws = Workout.objects.filter(user=rower).order_by("startdatetime") types = [] names = [] startdatetimes = [] timezones = [] distances = [] durations = [] weightcategories = [] adaptivetypes = [] weightvalues = [] notes = [] tcx_links = [] csv_links = [] rscores = [] trimps = [] for w in ws: types.append(w.workouttype) names.append(w.name) startdatetimes.append(w.startdatetime) timezones.append(w.timezone) distances.append(w.distance) durations.append(w.duration) weightcategories.append(w.weightcategory) adaptivetypes.append(w.adaptiveclass) weightvalues.append(w.weightvalue) notes.append(w.notes) tcx_link = SITE_URL+'/rowers/workout/{id}/emailtcx'.format( id=encoder.encode_hex(w.id) ) tcx_links.append(tcx_link) csv_link = SITE_URL+'/rowers/workout/{id}/emailcsv'.format( id=encoder.encode_hex(w.id) ) csv_links.append(csv_link) trimps.append(workout_trimp(w)[0]) rscore = workout_rscore(w) rscores.append(int(rscore[0])) df = pd.DataFrame({ 'name':names, 'date':startdatetimes, 'timezone':timezones, 'type':types, 'distance (m)':distances, 'duration ':durations, 'weight category':weightcategories, 'adaptive classification':adaptivetypes, 'weight (kg)':weightvalues, 'notes':notes, 'Stroke Data TCX':tcx_links, 'Stroke Data CSV':csv_links, 'TRIMP Training Load':trimps, 'TSS Training Load':rscores, }) return df def get_workouts(ids, userid): goodids = [] for id in ids: w = Workout.objects.get(id=id) if int(w.user.user.id) == int(userid): goodids.append(id) return [Workout.objects.get(id=id) for id in goodids] def filter_df(datadf, fieldname, value, largerthan=True): try: x = datadf[fieldname] except KeyError: return datadf try: if largerthan: mask = datadf[fieldname] < value else: mask = datadf[fieldname] >= value datadf.loc[mask, fieldname] = np.nan except TypeError: pass return datadf # joins workouts def join_workouts(r,ids,title='Joined Workout', parent=None, setprivate=False, forceunit='lbs'): message = None summary = '' if parent: oarlength = parent.oarlength inboard = parent.inboard workouttype = parent.workouttype notes = parent.notes summary = parent.summary if parent.privacy == 'hidden': makeprivate = True else: makeprivate = False startdatetime = parent.startdatetime else: oarlength = 2.89 inboard = 0.88 workouttype = 'rower' notes = '' summary = '' makeprivate = False startdatetime = timezone.now() if setprivate == True and makeprivate == False: makeprivate = True elif setprivate == False and makeprivate == True: makeprivate = False # reorder in chronological order ws = Workout.objects.filter(id__in=ids).order_by("startdatetime") if not parent: parent = ws[0] oarlength = parent.oarlength inboard = parent.inboard workouttype = parent.workouttype notes = parent.notes summary = parent.summary files = [w.csvfilename for w in ws] row = rdata(files[0]) files = files[1:] while len(files): row2 = rdata(files[0]) if row2 != 0: row = row+row2 files = files[1:] timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' row.write_csv(csvfilename,gzip=True) id, message = save_workout_database(csvfilename, r, workouttype=workouttype, title=title, notes=notes, oarlength=oarlength, inboard=inboard, makeprivate=makeprivate, dosmooth=False, consistencychecks=False) return (id, message) def df_resample(datadf): # time stamps must be in seconds timestamps = datadf['TimeStamp (sec)'].astype('int') datadf['timestamps'] = timestamps newdf = datadf.groupby(['timestamps']).mean() return newdf def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values # bring metrics which have negative values to positive domain if datadf.empty: return datadf try: datadf['catch'] = -datadf['catch'] except KeyError: pass try: datadf['peakforceangle'] = datadf['peakforceangle'] + 1000 except KeyError: pass try: datadf['hr'] = datadf['hr'] + 10 except KeyError: pass # protect 0 spm values from being nulled try: datadf['spm'] = datadf['spm'] + 1.0 except (KeyError,TypeError) as e: pass try: datadf = datadf.clip(lower=0) except TypeError: pass datadf.replace(to_replace=0, value=np.nan, inplace=True) # bring spm back to real values try: datadf['spm'] = datadf['spm'] - 1 except (TypeError,KeyError) as e: pass # return from positive domain to negative try: datadf['catch'] = -datadf['catch'] except KeyError: pass try: datadf['peakforceangle'] = datadf['peakforceangle'] - 1000 except KeyError: pass try: datadf['hr'] = datadf['hr'] - 10 except KeyError: pass # clean data for useful ranges per column if not ignorehr: try: mask = datadf['hr'] < 30 datadf.loc[mask, 'hr'] = np.nan except KeyError: pass try: mask = datadf['spm'] < 0 datadf.loc[mask,'spm'] = np.nan except KeyError: pass try: mask = datadf['efficiency'] > 200. datadf.loc[mask, 'efficiency'] = np.nan except KeyError: pass try: mask = datadf['spm'] < 10 datadf.loc[mask, 'spm'] = np.nan except KeyError: pass try: mask = datadf['pace'] / 1000. > 300. datadf.loc[mask, 'pace'] = np.nan except KeyError: pass try: mask = datadf['efficiency'] < 0. datadf.loc[mask, 'efficiency'] = np.nan except KeyError: pass try: mask = datadf['pace'] / 1000. < 60. datadf.loc[mask, 'pace'] = np.nan except KeyError: pass try: mask = datadf['spm'] > 60 datadf.loc[mask, 'spm'] = np.nan except KeyError: pass try: mask = datadf['wash'] < 1 datadf.loc[mask, 'wash'] = np.nan except KeyError: pass if not ignoreadvanced: try: mask = datadf['rhythm'] < 5 datadf.loc[mask, 'rhythm'] = np.nan except KeyError: pass try: mask = datadf['rhythm'] > 70 datadf.loc[mask, 'rhythm'] = np.nan except KeyError: pass try: mask = datadf['power'] < 20 datadf.loc[mask, 'power'] = np.nan except KeyError: pass try: mask = datadf['drivelength'] < 0.5 datadf.loc[mask, 'drivelength'] = np.nan except KeyError: pass try: mask = datadf['forceratio'] < 0.2 datadf.loc[mask, 'forceratio'] = np.nan except KeyError: pass try: mask = datadf['forceratio'] > 1.0 datadf.loc[mask, 'forceratio'] = np.nan except KeyError: pass try: mask = datadf['drivespeed'] < 0.5 datadf.loc[mask, 'drivespeed'] = np.nan except KeyError: pass try: mask = datadf['drivespeed'] > 4 datadf.loc[mask, 'drivespeed'] = np.nan except KeyError: pass try: mask = datadf['driveenergy'] > 2000 datadf.loc[mask, 'driveenergy'] = np.nan except KeyError: pass try: mask = datadf['driveenergy'] < 100 datadf.loc[mask, 'driveenergy'] = np.nan except KeyError: pass try: mask = datadf['catch'] > -30. datadf.loc[mask, 'catch'] = np.nan except KeyError: pass workoutstateswork = [1, 4, 5, 8, 9, 6, 7] workoutstatesrest = [3] workoutstatetransition = [0, 2, 10, 11, 12, 13] if workstrokesonly == 'True' or workstrokesonly == True: try: datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)] except: pass return datadf def getstatsfields(): # Get field names and remove those that are not useful in stats fields = StrokeData._meta.get_fields() fielddict = {field.name: field.verbose_name for field in fields} # fielddict.pop('workoutid') fielddict.pop('ergpace') fielddict.pop('hr_an') fielddict.pop('hr_tr') fielddict.pop('hr_at') fielddict.pop('hr_ut2') fielddict.pop('hr_ut1') fielddict.pop('time') fielddict.pop('distance') fielddict.pop('nowindpace') fielddict.pop('fnowindpace') fielddict.pop('fergpace') fielddict.pop('equivergpower') # fielddict.pop('workoutstate') fielddict.pop('fpace') fielddict.pop('pace') fielddict.pop('id') fielddict.pop('ftime') fielddict.pop('x_right') fielddict.pop('hr_max') fielddict.pop('hr_bottom') fielddict.pop('cumdist') try: fieldlist = [field for field, value in fielddict.iteritems()] except AttributeError: fieldlist = [field for field, value in fielddict.items()] return fieldlist, fielddict # A string representation for time deltas def niceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # A nice printable format for time delta values def strfdelta(tdelta): try: minutes, seconds = divmod(tdelta.seconds, 60) tenths = int(tdelta.microseconds / 1e5) except AttributeError: minutes, seconds = divmod(tdelta.view(np.int64), 60e9) seconds, rest = divmod(seconds, 1e9) tenths = int(rest / 1e8) res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( minutes=minutes, seconds=seconds, tenths=tenths, ) return res def timedelta_to_seconds(tdelta): return 60.*tdelta.minute+tdelta.second # A nice printable format for pace values def nicepaceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace def timedeltaconv(x): if np.isfinite(x) and x != 0 and x > 0 and x < 175000: dt = datetime.timedelta(seconds=x) else: dt = datetime.timedelta(seconds=350.) return dt def paceformatsecs(values): out = [] for v in values: td = timedeltaconv(v) formattedv = strfdelta(td) out.append(formattedv) return out def fitnessmetric_to_sql(m,table='powertimefitnessmetric',debug=False): engine = create_engine(database_url, echo=False) columns = ', '.join(m.keys()) placeholders = ", ".join(["?"] * len(m)) query = "INSERT into %s ( %s ) Values (%s)" % (table, columns, placeholders) values = tuple(m[key] for key in m.keys()) with engine.connect() as conn, conn.begin(): result = conn.execute(query,values) conn.close() engine.dispose() return 1 def getcpdata_sql(rower_id,table='cpdata'): engine = create_engine(database_url, echo=False) query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) return df def deletecpdata_sql(rower_id,table='cpdata'): engine = create_engine(database_url, echo=False) query = sa.text('DELETE from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: print("Database locked") conn.close() engine.dispose() def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]): deletecpdata_sql(rower_id) df = pd.DataFrame( { 'delta':delta, 'cp':cp, 'user':rower_id } ) if not distance.empty: df['distance'] = distance engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): df.to_sql(table, engine, if_exists='append', index=False) conn.close() engine.dispose() def runcpupdate( rower,type='water', startdate=timezone.now()-datetime.timedelta(days=365), enddate=timezone.now()+datetime.timedelta(days=5) ): if type == 'water': theworkouts = Workout.objects.filter( user=rower,rankingpiece=True, workouttype='water', startdatetime__gte=startdate, startdatetime__lte=enddate ) table = 'cpdata' else: theworkouts = Workout.objects.filter( user=rower,rankingpiece=True, workouttype__in=[ 'rower', 'dynamic', 'slides' ], startdatetime__gte=startdate, startdatetime__lte=enddate ) table = 'cpergdata' theids = [w.id for w in theworkouts] job = myqueue( queue, handle_updatecp, rower.id, theids, table=table) return job def fetchcperg(rower,theworkouts): theids = [int(w.id) for w in theworkouts] thefilenames = [w.csvfilename for w in theworkouts] cpdf = getcpdata_sql(rower.id,table='ergcpdata') job = myqueue( queue, handle_updateergcp, rower.id, thefilenames) return cpdf def fetchcp(rower,theworkouts,table='cpdata'): # get all power data from database (plus workoutid) theids = [int(w.id) for w in theworkouts] columns = ['power','workoutid','time'] df = getsmallrowdata_db(columns,ids=theids) df.dropna(inplace=True,axis=0) if df.empty: avgpower2 = {} for id in theids: avgpower2[id] = 0 return pd.Series([]),pd.Series([]),avgpower2 try: dfgrouped = df.groupby(['workoutid']) except KeyError: avgpower2 = {} return pd.Series([]),pd.Series([]),avgpower2 try: avgpower2 = dict(dfgrouped.mean()['power'].astype(int)) except KeyError: avgpower2 = {} for id in theids: avgpower2[id] = 0 return pd.Series([]),pd.Series([]),avgpower2 cpdf = getcpdata_sql(rower.id,table=table) if not cpdf.empty: return cpdf['delta'],cpdf['cp'],avgpower2 else: job = myqueue(queue, handle_updatecp, rower.id, theids, table=table) return pd.Series([]),pd.Series([]),avgpower2 return pd.Series([]),pd.Series([]),avgpower2 # create a new workout from manually entered data def create_row_df(r,distance,duration,startdatetime,workouttype='rower', avghr=None,avgpwr=None,avgspm=None, rankingpiece = False, duplicate=False, title='Manual entry',notes='',weightcategory='hwt', adaptiveclass='None'): if duration is not None: totalseconds = duration.hour*3600. totalseconds += duration.minute*60. totalseconds += duration.second totalseconds += duration.microsecond/1.e6 else: totalseconds = 60. if distance is None: distance = 0 try: nr_strokes = int(distance/10.) except TypeError: nr_strokes = int(20.*totalseconds) if nr_strokes == 0: nr_strokes = 100 unixstarttime = arrow.get(startdatetime).timestamp if not avgspm: try: spm = 60.*nr_strokes/totalseconds except ZeroDivisionError: spm = 20. else: spm = avgspm step = totalseconds/float(nr_strokes) elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1)) dstep = distance/float(nr_strokes) d = np.arange(nr_strokes)*distance/(float(nr_strokes-1)) unixtime = unixstarttime + elapsed try: pace = 500.*totalseconds/distance except ZeroDivisionError: pace = 240. if workouttype in ['rower','slides','dynamic']: try: velo = distance/totalseconds except ZeroDivisionError: velo = 2.4 power = 2.8*velo**3 elif avgpwr is not None: power = avgpwr else: power = 0 if avghr is not None: hr = avghr else: hr = 0 df = pd.DataFrame({ 'TimeStamp (sec)': unixtime, ' Horizontal (meters)': d, ' Cadence (stokes/min)': spm, ' Stroke500mPace (sec/500m)':pace, ' ElapsedTime (sec)':elapsed, ' Power (watts)':power, ' HRCur (bpm)':hr, }) timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] row = rrdata(df=df) row.write_csv(csvfilename, gzip = True) id, message = save_workout_database(csvfilename, r, title=title, notes=notes, rankingpiece=rankingpiece, duplicate=duplicate, dosmooth=False, workouttype=workouttype, consistencychecks=False, weightcategory=weightcategory, adaptiveclass=adaptiveclass, totaltime=totalseconds) return (id, message) from rowers.utils import totaltime_sec_to_string # Processes painsled CSV file to database def save_workout_database(f2, r, dosmooth=True, workouttype='rower', boattype='1x', adaptiveclass='None', weightcategory='hwt', dosummary=True, title='Workout', workoutsource='unknown', notes='', totaldist=0, totaltime=0, rankingpiece=False, duplicate=False, summary='', makeprivate=False, oarlength=2.89, inboard=0.88, forceunit='lbs', consistencychecks=False): message = None powerperc = 100 * np.array([r.pw_ut2, r.pw_ut1, r.pw_at, r.pw_tr, r.pw_an]) / r.ftp # make workout and put in database rr = rrower(hrmax=r.max, hrut2=r.ut2, hrut1=r.ut1, hrat=r.at, hrtr=r.tr, hran=r.an, ftp=r.ftp, powerperc=powerperc, powerzones=r.powerzones) row = rdata(f2, rower=rr) if row.df.empty: return (0, 'Error: CSV data file was empty') dtavg = row.df['TimeStamp (sec)'].diff().mean() if dtavg < 1: newdf = df_resample(row.df) try: os.remove(f2) except: pass return new_workout_from_df(r, newdf, title=title,boattype=boattype, workouttype=workouttype, workoutsource=workoutsource) try: checks = row.check_consistency() allchecks = 1 for key, value in checks.items(): if not value: allchecks = 0 except ZeroDivisionError: pass if not allchecks and consistencychecks: # row.repair() pass if row == 0: return (0, 'Error: CSV data file not found') if dosmooth: # auto smoothing pace = row.df[' Stroke500mPace (sec/500m)'].values velo = 500. / pace f = row.df['TimeStamp (sec)'].diff().mean() if f != 0 and not np.isnan(f): windowsize = 2 * (int(10. / (f))) + 1 else: windowsize = 1 if not 'originalvelo' in row.df: row.df['originalvelo'] = velo if windowsize > 3 and windowsize < len(velo): velo2 = savgol_filter(velo, windowsize, 3) else: velo2 = velo velo3 = pd.Series(velo2) velo3 = velo3.replace([-np.inf, np.inf], np.nan) velo3 = velo3.fillna(method='ffill') pace2 = 500. / abs(velo3) row.df[' Stroke500mPace (sec/500m)'] = pace2 row.df = row.df.fillna(0) row.write_csv(f2, gzip=True) try: os.remove(f2) except: pass # recalculate power data if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides': try: row.erg_recalculatepower() row.write_csv(f2, gzip=True) except: pass averagehr = row.df[' HRCur (bpm)'].mean() maxhr = row.df[' HRCur (bpm)'].max() if totaldist == 0: totaldist = row.df['cum_dist'].max() if totaltime == 0: totaltime = row.df['TimeStamp (sec)'].max( ) - row.df['TimeStamp (sec)'].min() try: totaltime = totaltime + row.df.loc[:, ' ElapsedTime (sec)'].iloc[0] except KeyError: pass if np.isnan(totaltime): totaltime = 0 if dosummary: summary = row.allstats() timezone_str = 'UTC' try: workoutstartdatetime = timezone.make_aware(row.rowdatetime) except ValueError: workoutstartdatetime = row.rowdatetime try: latavg = row.df[' latitude'].mean() lonavg = row.df[' longitude'].mean() tf = TimezoneFinder() try: timezone_str = tf.timezone_at(lng=lonavg, lat=latavg) except ValueError: timezone_str = 'UTC' if timezone_str == None: timezone_str = tf.closest_timezone_at(lng=lonavg, lat=latavg) if timezone_str == None: timezone_str = r.defaulttimezone try: workoutstartdatetime = pytz.timezone(timezone_str).localize( row.rowdatetime ) except ValueError: workoutstartdatetime = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ) except KeyError: timezone_str = r.defaulttimezone duration = totaltime_sec_to_string(totaltime) workoutdate = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ).strftime('%Y-%m-%d') workoutstarttime = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ).strftime('%H:%M:%S') if makeprivate: privacy = 'hidden' else: privacy = 'visible' # checking for inf values totaldist = np.nan_to_num(totaldist) maxhr = np.nan_to_num(maxhr) averagehr = np.nan_to_num(averagehr) dragfactor = 0 if workouttype in otetypes: dragfactor = row.dragfactor t = datetime.datetime.strptime(duration,"%H:%M:%S.%f") delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) workoutenddatetime = workoutstartdatetime+delta # check for duplicate start times and duration ws = Workout.objects.filter(user=r,date=workoutdate,duplicate=False).exclude( startdatetime__gt=workoutenddatetime ) ws2 = [] for ww in ws: t = ww.duration delta = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) enddatetime = ww.startdatetime+delta if enddatetime > workoutstartdatetime: ws2.append(ww) if (len(ws2) != 0): message = "Warning: This workout overlaps with an existing one and was marked as a duplicate" duplicate = True w = Workout(user=r, name=title, date=workoutdate, workouttype=workouttype, boattype=boattype, dragfactor=dragfactor, duration=duration, distance=totaldist, weightcategory=weightcategory, adaptiveclass=adaptiveclass, starttime=workoutstarttime, duplicate=duplicate, workoutsource=workoutsource, rankingpiece=rankingpiece, forceunit=forceunit, csvfilename=f2, notes=notes, summary=summary, maxhr=maxhr, averagehr=averagehr, startdatetime=workoutstartdatetime, inboard=inboard, oarlength=oarlength, timezone=timezone_str, privacy=privacy) try: w.save() except ValidationError: try: w.startdatetime = timezone.now() w.save() except ValidationError: return (0,'Unable to create your workout') if privacy == 'visible': ts = Team.objects.filter(rower=r) for t in ts: w.team.add(t) # put stroke data in database res = dataprep(row.df, id=w.id, bands=True, barchart=True, otwpower=True, empower=True, inboard=inboard) rscore,normp = workout_rscore(w) trimp,hrtss = workout_trimp(w) isbreakthrough = False ishard = False if workouttype == 'water': df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id]) try: powermean = df['power'].mean() except KeyError: powermean = 0 if powermean != 0: thesecs = totaltime maxt = 1.05 * thesecs if maxt > 0: logarr = datautils.getlogarr(maxt) dfgrouped = df.groupby(['workoutid']) delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr) res, btvalues, res2 = utils.isbreakthrough( delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio) else: res = 0 res2 = 0 if res: isbreakthrough = True res = datautils.updatecp(delta, cpvalues, r) if res2 and not isbreakthrough: ishard = True # submit email task to send email about breakthrough workout if isbreakthrough: if r.getemailnotifications and not r.emailbounced: job = myqueue(queuehigh,handle_sendemail_breakthrough, w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) # submit email task to send email about breakthrough workout if ishard: if r.getemailnotifications and not r.emailbounced: job = myqueue(queuehigh,handle_sendemail_hard, w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) return (w.id, message) parsers = { 'kinomap': KinoMapParser, 'xls': ExcelTemplate, 'rp': RowProParser, 'tcx':TCXParser, 'mystery':MysteryParser, 'ritmotime':RitmoTimeParser, 'quiske': QuiskeParser, 'rowperfect3': RowPerfectParser, 'coxmate': CoxMateParser, 'bcmike': BoatCoachAdvancedParser, 'boatcoach': BoatCoachParser, 'boatcoachotw': BoatCoachOTWParser, 'painsleddesktop': painsledDesktopParser, 'speedcoach': speedcoachParser, 'speedcoach2': SpeedCoach2Parser, 'ergstick': ErgStickParser, 'fit': FITParser, 'ergdata': ErgDataParser, 'humon': HumonParser, } def parsenonpainsled(fileformat,f2,summary): try: row = parsers[fileformat](f2) hasrecognized = True except KeyError: hasrecognized = False return None, hasrecognized, '', 'unknown' # handle speed coach GPS 2 if (fileformat == 'speedcoach2'): oarlength, inboard = get_empower_rigging(f2) empowerfirmware = get_empower_firmware(f2) if empowerfirmware != '': fileformat = fileformat+'v'+str(empowerfirmware) else: fileformat = 'speedcoach2v0' try: summary = row.allstats() except ZeroDivisionError: summary = '' # handle FIT if (fileformat == 'fit'): try: s = fitsummarydata(f2) s.setsummary() summary = s.summarytext except: pass hasrecognized = True return row,hasrecognized,summary,fileformat def handle_nonpainsled(f2, fileformat, summary=''): oarlength = 2.89 inboard = 0.88 hasrecognized = False row,hasrecognized,summary,fileformat = parsenonpainsled(fileformat,f2,summary) # Handle c2log if (fileformat == 'c2log' or fileformat == 'rowprolog'): return (0,'',0,0,'') if not hasrecognized: return (0,'',0,0,'') f_to_be_deleted = f2 # should delete file f2 = f2[:-4] + 'o.csv' try: row2 = rrdata(df = row.df) row2.write_csv(f2, gzip=True) except: return (0,'',0,0,'') # os.remove(f2) try: os.remove(f_to_be_deleted) except: try: os.remove(f_to_be_deleted + '.gz') except: pass return (f2, summary, oarlength, inboard, fileformat) # Create new workout from file and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_file(r, f2, workouttype='rower', workoutsource=None, title='Workout', boattype='1x', makeprivate=False, notes=''): message = None print('noot',notes) try: fileformat = get_file_type(f2) except (IOError,UnicodeDecodeError): os.remove(f2) message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect." return (0, message, f2) summary = '' oarlength = 2.89 inboard = 0.88 if len(fileformat) == 3 and fileformat[0] == 'zip': f_to_be_deleted = f2 workoutsbox = Mailbox.objects.filter(name='workouts')[0] msg = Message(mailbox=workoutsbox, from_header=r.user.email, subject = title) msg.save() f3 = 'media/mailbox_attachments/'+f2[6:] copyfile(f2,f3) f3 = f3[6:] a = MessageAttachment(message=msg,document=f3) a.save() return -1, message, f2 # Some people try to upload Concept2 logbook summaries if fileformat == 'c2log': os.remove(f2) message = "This summary does not contain stroke data. Use the files containing stroke by stroke data." return (0, message, f2) if fileformat == 'nostrokes': os.remove(f2) message = "It looks like this file doesn't contain stroke data." return (0, message, f2) if fileformat == 'kml': os.remove(f2) message = "KML files are not supported" return (0, message, f2) # Some people upload corrupted zip files if fileformat == 'notgzip': os.remove(f2) message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect." return (0, message, f2) # Some people try to upload RowPro summary logs if fileformat == 'rowprolog': os.remove(f2) message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log." return (0, message, f2) # Sometimes people try an unsupported file type. # Send an email to info@rowsandall.com with the file attached # for me to check if it is a bug, or a new file type # worth supporting if fileformat == 'unknown': message = "We couldn't recognize the file type" extension = os.path.splitext(f2)[1] filename = os.path.splitext(f2)[0] if extension == '.gz': filename = os.path.splitext(filename)[0] extension2 = os.path.splitext(filename)[1]+extension extension = extension2 f4 = filename+'a'+extension copyfile(f2,f4) job = myqueue(queuehigh, handle_sendemail_unrecognized, f4, r.user.email) return (0, message, f2) if fileformat == 'att': # email attachment which can safely be ignored return (0, '', f2) # handle non-Painsled by converting it to painsled compatible CSV if (fileformat != 'csv'): f2, summary, oarlength, inboard, fileformat = handle_nonpainsled( f2, fileformat, summary=summary ) if not f2: message = 'Something went wrong' return (0, message, '') dosummary = (fileformat != 'fit' and 'speedcoach2' not in fileformat) dosummary = dosummary or summary == '' if workoutsource is None: workoutsource = fileformat id, message = save_workout_database( f2, r, notes=notes, workouttype=workouttype, weightcategory=r.weightcategory, adaptiveclass=r.adaptiveclass, boattype=boattype, makeprivate=makeprivate, dosummary=dosummary, workoutsource=workoutsource, summary=summary, inboard=inboard, oarlength=oarlength, title=title ) return (id, message, f2) def split_workout(r, parent, splitsecond, splitmode): data, row = getrowdata_db(id=parent.id) latitude, longitude = get_latlon(parent.id) if not latitude.empty and not longitude.empty: data[' latitude'] = latitude data[' longitude'] = longitude data['time'] = data['time'] / 1000. data1 = data[data['time'] <= splitsecond].copy() data2 = data[data['time'] > splitsecond].copy() data1 = data1.sort_values(['time']) data1 = data1.interpolate(method='linear', axis=0, limit_direction='both', limit=10) data1.fillna(method='bfill', inplace=True) # Some new stuff to try out data1 = data1.groupby('time', axis=0).mean() data1['time'] = data1.index data1.reset_index(drop=True, inplace=True) data2 = data2.sort_values(['time']) data2 = data2.interpolate(method='linear', axis=0, limit_direction='both', limit=10) data2.fillna(method='bfill', inplace=True) # Some new stuff to try out data2 = data2.groupby('time', axis=0).mean() data2['time'] = data2.index data2.reset_index(drop=True, inplace=True) data1['pace'] = data1['pace'] / 1000. data2['pace'] = data2['pace'] / 1000. data1.drop_duplicates(subset='time', inplace=True) data2.drop_duplicates(subset='time', inplace=True) messages = [] ids = [] if 'keep first' in splitmode: if 'firstprivate' in splitmode: setprivate = True else: setprivate = False id, message = new_workout_from_df(r, data1, title=parent.name + ' (1)', parent=parent, setprivate=setprivate, forceunit='N') messages.append(message) ids.append(encoder.encode_hex(id)) if 'keep second' in splitmode: data2['cumdist'] = data2['cumdist'] - data2.iloc[ 0, data2.columns.get_loc('cumdist') ] data2['distance'] = data2['distance'] - data2.iloc[ 0, data2.columns.get_loc('distance') ] data2['time'] = data2['time'] - data2.iloc[ 0, data2.columns.get_loc('time') ] if 'secondprivate' in splitmode: setprivate = True else: setprivate = False dt = datetime.timedelta(seconds=splitsecond) id, message = new_workout_from_df(r, data2, title=parent.name + ' (2)', parent=parent, setprivate=setprivate, dt=dt, forceunit='N') messages.append(message) ids.append(encoder.encode_hex(id)) if not 'keep original' in splitmode: if 'keep second' in splitmode or 'keep first' in splitmode: parent.delete() messages.append('Deleted Workout: ' + parent.name) else: messages.append('That would delete your workout') ids.append(encoder.encode_hex(parent.id)) elif 'originalprivate' in splitmode: parent.privacy = 'hidden' parent.save() return ids, messages # Create new workout from data frame and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_df(r, df, title='New Workout', workoutsource='unknown', boattype='1x', workouttype='rower', parent=None, setprivate=False, forceunit='lbs', dt=datetime.timedelta()): message = None summary = '' if parent: oarlength = parent.oarlength inboard = parent.inboard workoutsource = parent.workoutsource workouttype = parent.workouttype boattype = parent.boattype notes = parent.notes summary = parent.summary if parent.privacy == 'hidden': makeprivate = True else: makeprivate = False startdatetime = parent.startdatetime + dt else: oarlength = 2.89 inboard = 0.88 notes = '' summary = '' makeprivate = False startdatetime = timezone.now() if setprivate: makeprivate = True timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' if forceunit == 'N': # change to lbs for now df['peakforce'] /= lbstoN df['averageforce'] /= lbstoN df.rename(columns=columndict, inplace=True) #starttimeunix = mktime(startdatetime.utctimetuple()) starttimeunix = arrow.get(startdatetime).timestamp df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix row = rrdata(df=df) row.write_csv(csvfilename, gzip=True) # res = df.to_csv(csvfilename+'.gz',index_label='index', # compression='gzip') id, message = save_workout_database(csvfilename, r, workouttype=workouttype, boattype=boattype, title=title, workoutsource=workoutsource, notes=notes, oarlength=oarlength, inboard=inboard, makeprivate=makeprivate, dosmooth=False, consistencychecks=False) return (id, message) # Compare the data from the CSV file and the database # Currently only calculates number of strokes. To be expanded with # more elaborate testing if needed def compare_data(id): row = Workout.objects.get(id=id) f1 = row.csvfilename try: rowdata = rdata(f1) l1 = len(rowdata.df) except AttributeError: rowdata = 0 l1 = 0 engine = create_engine(database_url, echo=False) query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format( id=id, )) with engine.connect() as conn, conn.begin(): try: res = conn.execute(query) l2 = res.fetchall()[0][0] except: print("Database Locked") conn.close() engine.dispose() lfile = l1 ldb = l2 return l1 == l2 and l1 != 0, ldb, lfile # Repair data for workouts where the CSV file is lost (or the DB entries # don't exist) def repair_data(verbose=False): ws = Workout.objects.all() for w in ws: if verbose: sys.stdout.write(".") test, ldb, lfile = compare_data(w.id) if not test: if verbose: print(w.id, lfile, ldb) try: rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id, rowdata.df) except (IOError, AttributeError): pass if lfile == 0: # if not ldb - delete workout try: data = read_df_sql(w.id) try: datalength = len(data) except AttributeError: datalength = 0 if datalength != 0: data.rename(columns=columndict, inplace=True) res = data.to_csv(w.csvfilename + '.gz', index_label='index', compression='gzip') else: w.delete() except: pass # A wrapper around the rowingdata class, with some error catching def rdata(file, rower=rrower()): try: res = rrdata(csvfile=file, rower=rower) except (IOError, IndexError): try: res = rrdata(csvfile=file + '.gz', rower=rower) except (IOError, IndexError): res = rrdata() except: res = rrdata() except EOFError: res = rrdata() except: res = rrdata() return res # Remove all stroke data for workout ID from database def delete_strokedata(id): engine = create_engine(database_url, echo=False) query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format( id=id, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: print("Database Locked") conn.close() engine.dispose() # Replace stroke data in DB with data from CSV file def update_strokedata(id, df): delete_strokedata(id) rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True) # Test that all data are of a numerical time def testdata(time, distance, pace, spm): t1 = np.issubdtype(time, np.number) t2 = np.issubdtype(distance, np.number) t3 = np.issubdtype(pace, np.number) t4 = np.issubdtype(spm, np.number) return t1 and t2 and t3 and t4 # Get data from DB for one workout (fetches all data). If data # is not in DB, read from CSV file (and create DB entry) def getrowdata_db(id=0, doclean=False, convertnewtons=True, checkefficiency=True): data = read_df_sql(id) data['x_right'] = data['x_right'] / 1.0e6 data['deltat'] = data['time'].diff() if data.empty: rowdata, row = getrowdata(id=id) if not rowdata.empty: data = dataprep(rowdata.df, id=id, bands=True, barchart=True, otwpower=True) else: data = pd.DataFrame() # returning empty dataframe else: row = Workout.objects.get(id=id) if not data.empty and data['efficiency'].mean() == 0 and data['power'].mean() != 0 and checkefficiency == True: data = add_efficiency(id=id) if doclean: data = clean_df_stats(data, ignorehr=True) return data, row # Fetch a subset of the data from the DB def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True): prepmultipledata(ids) data,extracols = read_cols_df_sql(ids, columns) if extracols and len(ids)==1: w = Workout.objects.get(id=ids[0]) row = rdata(w.csvfilename) try: row.set_instroke_metrics() except (AttributeError,TypeError): pass try: f = row.df['TimeStamp (sec)'].diff().mean() except (AttributeError,KeyError) as e: f = 0 if f != 0 and not np.isnan(f): windowsize = 2 * (int(10. / (f))) + 1 else: windowsize = 1 for c in extracols: try: cdata = row.df[c] cdata.fillna(inplace=True,method='bfill') # This doesn't work because sometimes data are duplicated at save try: cdata2 = savgol_filter(cdata.values,windowsize,3) data[c] = cdata2 except ValueError: data[c] = cdata except (KeyError, AttributeError): data[c] = 0 # convert newtons if doclean: data = clean_df_stats(data, ignorehr=True, workstrokesonly=workstrokesonly) data.dropna(axis=1,how='all',inplace=True) data.dropna(axis=0,how='any',inplace=True) return data # Fetch both the workout and the workout stroke data (from CSV file) def getrowdata(id=0): # check if valid ID exists (workout exists) row = Workout.objects.get(id=id) f1 = row.csvfilename # get user r = row.user u = r.user rr = rrower(hrmax=r.max, hrut2=r.ut2, hrut1=r.ut1, hrat=r.at, hrtr=r.tr, hran=r.an, ftp=r.ftp) rowdata = rdata(f1, rower=rr) return rowdata, row # Checks if all rows for a list of workout IDs have entries in the # stroke_data table. If this is not the case, it creates the stroke # data # In theory, this should never yield any work, but it's a good # safety net for programming errors elsewhere in the app # Also used heavily when I moved from CSV file only to CSV+Stroke data def prepmultipledata(ids, verbose=False): query = sa.text('SELECT DISTINCT workoutid FROM strokedata') engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): res = conn.execute(query) res = list(itertools.chain.from_iterable(res.fetchall())) conn.close() engine.dispose() try: ids2 = [int(id) for id in ids] except ValueError: ids2 = ids res = list(set(ids2) - set(res)) for id in res: rowdata, row = getrowdata(id=id) if verbose: print(id) if rowdata and len(rowdata.df): data = dataprep(rowdata.df, id=id, bands=True, barchart=True, otwpower=True) return res # Read a set of columns for a set of workout ids, returns data as a # pandas dataframe def read_cols_df_sql(ids, columns, convertnewtons=True): # drop columns that are not in offical list # axx = [ax[0] for ax in axes] prepmultipledata(ids) axx = [f.name for f in StrokeData._meta.get_fields()] extracols = [] columns2 = list(columns) for c in columns: if not c in axx: columns2.remove(c) extracols.append(c) columns = list(columns2) + ['distance', 'spm', 'workoutid'] columns = [x for x in columns if x != 'None'] columns = list(set(columns)) cls = '' ids = [int(id) for id in ids] engine = create_engine(database_url, echo=False) for column in columns: cls += column + ', ' cls = cls[:-2] if len(ids) == 0: return pd.DataFrame(),extracols # query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format( # columns=cls, # )) elif len(ids) == 1: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format( id=ids[0], columns=cls, )) else: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format( columns=cls, ids=tuple(ids), )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) df = df.fillna(value=0) if 'peakforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN if 'averageforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'averageforce'] = df.loc[mask, 'averageforce'] * lbstoN engine.dispose() return df,extracols # Read stroke data from the DB for a Workout ID. Returns a pandas dataframe def read_df_sql(id): engine = create_engine(database_url, echo=False) df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format( id=id)), engine) engine.dispose() df = df.fillna(value=0) funit = Workout.objects.get(id=id).forceunit if funit == 'lbs': try: df['peakforce'] = df['peakforce'] * lbstoN except KeyError: pass try: df['averageforce'] = df['averageforce'] * lbstoN except KeyError: pass return df # Get the necessary data from the strokedata table in the DB. # For the flex plot def smalldataprep(therows, xparam, yparam1, yparam2): df = pd.DataFrame() if yparam2 == 'None': yparam2 = 'power' df[xparam] = [] df[yparam1] = [] df[yparam2] = [] df['distance'] = [] df['spm'] = [] for workout in therows: f1 = workout.csvfilename try: rowdata = dataprep(rrdata(csvfile=f1).df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) if workout.forceunit == 'lbs': try: rowdata['peakforce'] *= lbstoN except KeyError: pass try: rowdata['averageforce'] *= lbstoN except KeyError: pass df = pd.concat([df, rowdata], ignore_index=True) except IOError: try: rowdata = dataprep(rrdata(csvfile=f1 + '.gz').df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) if workout.forceunit == 'lbs': try: rowdata['peakforce'] *= lbstoN except KeyError: pass try: rowdata['averageforce'] *= lbstoN except KeyError: pass df = pd.concat([df, rowdata], ignore_index=True) except IOError: pass return df # data fusion def datafusion(id1, id2, columns, offset): workout1 = Workout.objects.get(id=id1) workout2 = Workout.objects.get(id=id2) df1, w1 = getrowdata_db(id=id1) df1 = df1.drop([ # 'cumdist', 'hr_ut2', 'hr_ut1', 'hr_at', 'hr_tr', 'hr_an', 'hr_max', 'ftime', 'fpace', 'workoutid', 'id'], 1, errors='ignore') # Add coordinates to DataFrame latitude, longitude = get_latlon(id1) df1[' latitude'] = latitude df1[' longitude'] = longitude df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False) forceunit = 'N' offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000. offsetmillisecs += offset.days * (3600 * 24 * 1000) df2['time'] = df2['time'] + offsetmillisecs keep1 = {c: c for c in set(df1.columns)} for c in columns: keep1.pop(c) for c in df1.columns: if not c in keep1: df1 = df1.drop(c, 1, errors='ignore') df = pd.concat([df1, df2], ignore_index=True) df = df.sort_values(['time']) df = df.interpolate(method='linear', axis=0, limit_direction='both', limit=10) df.fillna(method='bfill', inplace=True) # Some new stuff to try out df = df.groupby('time', axis=0).mean() df['time'] = df.index df.reset_index(drop=True, inplace=True) df['time'] = df['time'] / 1000. df['pace'] = df['pace'] / 1000. df['cum_dist'] = df['cumdist'] return df, forceunit def fix_newtons(id=0, limit=3000): # rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False) rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False) try: #avgforce = rowdata['averageforce'] peakforce = rowdata['peakforce'] if peakforce.mean() > limit: w = Workout.objects.get(id=id) print("fixing ", id) rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id, rowdata.df) except KeyError: pass def remove_invalid_columns(df): for c in df.columns: if not c in allowedcolumns: df.drop(labels=c,axis=1,inplace=True) return df def add_efficiency(id=0): rowdata, row = getrowdata_db(id=id, doclean=False, convertnewtons=False, checkefficiency=False) power = rowdata['power'] pace = rowdata['pace'] / 1.0e3 velo = 500. / pace ergpw = 2.8 * velo**3 efficiency = 100. * ergpw / power efficiency = efficiency.replace([-np.inf, np.inf], np.nan) efficiency.fillna(method='ffill') rowdata['efficiency'] = efficiency rowdata = remove_invalid_columns(rowdata) rowdata = rowdata.replace([-np.inf, np.inf], np.nan) rowdata = rowdata.fillna(method='ffill') delete_strokedata(id) if id != 0: rowdata['workoutid'] = id engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): rowdata.to_sql('strokedata', engine, if_exists='append', index=False) conn.close() engine.dispose() return rowdata # This is the main routine. # it reindexes, sorts, filters, and smooths the data, then # saves it to the stroke_data table in the database # Takes a rowingdata object's DataFrame as input def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, empower=True, inboard=0.88, forceunit='lbs'): if rowdatadf.empty: return 0 #rowdatadf.set_index([range(len(rowdatadf))], inplace=True) t = rowdatadf.loc[:, 'TimeStamp (sec)'] t = pd.Series(t - rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0]) row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000 rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000. p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] try: velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)'] except KeyError: velo = 500./p hr = rowdatadf.loc[:, ' HRCur (bpm)'] spm = rowdatadf.loc[:, ' Cadence (stokes/min)'] cumdist = rowdatadf.loc[:, 'cum_dist'] power = rowdatadf.loc[:, ' Power (watts)'] averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)'] drivelength = rowdatadf.loc[:, ' DriveLength (meters)'] try: workoutstate = rowdatadf.loc[:, ' WorkoutState'] except KeyError: workoutstate = 0 * hr peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)'] forceratio = averageforce / peakforce forceratio = forceratio.fillna(value=0) try: drivetime = rowdatadf.loc[:, ' DriveTime (ms)'] recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)'] rhythm = 100. * drivetime / (recoverytime + drivetime) rhythm = rhythm.fillna(value=0) except: rhythm = 0.0 * forceratio f = rowdatadf['TimeStamp (sec)'].diff().mean() if f != 0 and not np.isinf(f): try: windowsize = 2 * (int(10. / (f))) + 1 except ValueError: windowsize = 1 else: windowsize = 1 if windowsize <= 3: windowsize = 5 if windowsize > 3 and windowsize < len(hr): spm = savgol_filter(spm, windowsize, 3) hr = savgol_filter(hr, windowsize, 3) drivelength = savgol_filter(drivelength, windowsize, 3) forceratio = savgol_filter(forceratio, windowsize, 3) try: t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) except TypeError: t2 = 0 * t p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) try: drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3 except TypeError: drivespeed = 0.0 * rowdatadf['TimeStamp (sec)'] drivespeed = drivespeed.fillna(value=0) try: driveenergy = rowdatadf['driveenergy'] except KeyError: if forceunit == 'lbs': driveenergy = drivelength * averageforce * lbstoN else: drivenergy = drivelength * averageforce powerhr = 60.*power/hr powerhr = powerhr.fillna(value=0) if driveenergy.mean() == 0 and driveenergy.std() == 0: driveenergy = 0*driveenergy+100 distance = rowdatadf.loc[:, 'cum_dist'] velo = 500. / p distanceperstroke = 60. * velo / spm data = DataFrame( dict( time=t * 1e3, hr=hr, pace=p * 1e3, spm=spm, velo=velo, cumdist=cumdist, ftime=niceformat(t2), fpace=nicepaceformat(p2), driveenergy=driveenergy, power=power, workoutstate=workoutstate, averageforce=averageforce, drivelength=drivelength, peakforce=peakforce, forceratio=forceratio, distance=distance, drivespeed=drivespeed, rhythm=rhythm, distanceperstroke=distanceperstroke, # powerhr=powerhr, ) ) if bands: # HR bands data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2'] data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1'] data['hr_at'] = rowdatadf.loc[:, 'hr_at'] data['hr_tr'] = rowdatadf.loc[:, 'hr_tr'] data['hr_an'] = rowdatadf.loc[:, 'hr_an'] data['hr_max'] = rowdatadf.loc[:, 'hr_max'] data['hr_bottom'] = 0.0 * data['hr'] try: tel = rowdatadf.loc[:, ' ElapsedTime (sec)'] except KeyError: rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)'] if barchart: # time increments for bar chart time_increments = rowdatadf.loc[:, ' ElapsedTime (sec)'].diff() try: time_increments.iloc[0] = time_increments.iloc[1] except (KeyError, IndexError): time_increments.iloc[0] = 1. time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments) x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x))) data['x_right'] = x_right if empower: try: wash = rowdatadf.loc[:, 'wash'] except KeyError: wash = 0 * power try: catch = rowdatadf.loc[:, 'catch'] except KeyError: catch = 0 * power try: finish = rowdatadf.loc[:, 'finish'] except KeyError: finish = 0 * power try: peakforceangle = rowdatadf.loc[:, 'peakforceangle'] except KeyError: peakforceangle = 0 * power if data['driveenergy'].mean() == 0: try: driveenergy = rowdatadf.loc[:, 'driveenergy'] except KeyError: driveenergy = power * 60 / spm else: driveenergy = data['driveenergy'] arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch)) if arclength.mean() > 0: drivelength = arclength elif drivelength.mean() == 0: drivelength = driveenergy / (averageforce * 4.44822) try: slip = rowdatadf.loc[:, 'slip'] except KeyError: slip = 0 * power try: totalangle = finish - catch effectiveangle = finish - wash - catch - slip except ValueError: totalangle = 0 * power effectiveangle = 0 * power if windowsize > 3 and windowsize < len(slip): try: wash = savgol_filter(wash, windowsize, 3) except TypeError: pass try: slip = savgol_filter(slip, windowsize, 3) except TypeError: pass try: catch = savgol_filter(catch, windowsize, 3) except TypeError: pass try: finish = savgol_filter(finish, windowsize, 3) except TypeError: pass try: peakforceangle = savgol_filter(peakforceangle, windowsize, 3) except TypeError: pass try: driveenergy = savgol_filter(driveenergy, windowsize, 3) except TypeError: pass try: drivelength = savgol_filter(drivelength, windowsize, 3) except TypeError: pass try: totalangle = savgol_filter(totalangle, windowsize, 3) except TypeError: pass try: effectiveangle = savgol_filter(effectiveangle, windowsize, 3) except TypeError: pass velo = 500. / p ergpw = 2.8 * velo**3 efficiency = 100. * ergpw / power efficiency = efficiency.replace([-np.inf, np.inf], np.nan) efficiency.fillna(method='ffill') try: data['wash'] = wash data['catch'] = catch data['slip'] = slip data['finish'] = finish data['peakforceangle'] = peakforceangle data['driveenergy'] = driveenergy data['drivelength'] = drivelength data['totalangle'] = totalangle data['effectiveangle'] = effectiveangle data['efficiency'] = efficiency except ValueError: pass if otwpower: try: nowindpace = rowdatadf.loc[:, 'nowindpace'] except KeyError: nowindpace = p try: equivergpower = rowdatadf.loc[:, 'equivergpower'] except KeyError: equivergpower = 0 * p + 50. nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x)) ergvelo = (equivergpower / 2.8)**(1. / 3.) ergpace = 500. / ergvelo ergpace[ergpace == np.inf] = 240. ergpace2 = ergpace.apply(lambda x: timedeltaconv(x)) data['ergpace'] = ergpace * 1e3 data['nowindpace'] = nowindpace * 1e3 data['equivergpower'] = equivergpower data['fergpace'] = nicepaceformat(ergpace2) data['fnowindpace'] = nicepaceformat(nowindpace2) data = data.replace([-np.inf, np.inf], np.nan) data = data.fillna(method='ffill') # write data if id given if id != 0: data['workoutid'] = id engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): data.to_sql('strokedata', engine, if_exists='append', index=False) conn.close() engine.dispose() return data def workout_trimp(w): r = w.user if w.trimp > 0: return w.trimp,w.hrtss r = w.user ftp = float(r.ftp) if w.workouttype in otwtypes: ftp = ftp*(100.-r.otwslack)/100. if r.hrftp == 0: hrftp = (r.an+r.tr)/2. r.hrftp = int(hrftp) r.save() if w.averagehr is None: rowdata = rdata(w.csvfilename) try: avghr = rowdata.df[' HRCur (bpm)'].mean() maxhr = rowdata.df[' HRCur (bpm)'].max() except KeyError: avghr = None maxhr = None w.averagehr = avghr w.maxhr = maxhr w.save() job = myqueue( queuehigh, handle_calctrimp, w.id, w.csvfilename, ftp, r.sex, r.hrftp, r.max, r.rest) return 0,0 def workout_rscore(w): if w.rscore > 0: return w.rscore,w.normp r = w.user ftp = float(r.ftp) if w.workouttype in otwtypes: ftp = ftp*(100.-r.otwslack)/100. if r.hrftp == 0: hrftp = (r.an+r.tr)/2. r.hrftp = int(hrftp) r.save() job = myqueue( queuehigh, handle_calctrimp, w.id, w.csvfilename, ftp, r.sex, r.hrftp, r.max, r.rest) return 0,0 def workout_normv(w,pp=4.0): if w.normv > 0: return w.normv,w.normw r = w.user ftp = float(r.ftp) if w.workouttype in otwtypes: ftp = ftp*(100.-r.otwslack)/100. if r.hrftp == 0: hrftp = (r.an+r.tr)/2. r.hrftp = int(hrftp) r.save() job = myqueue( queuehigh, handle_calctrimp, w.id, w.csvfilename, ftp, r.sex, r.hrftp, r.max, r.rest) return 0,0