# All the data preparation, data cleaning and data mangling should # be defined here from rowers.models import Workout, User, Rower, StrokeData from rowingdata import rowingdata as rrdata from rowers.tasks import handle_sendemail_unrecognized from rowers.tasks import handle_zip_file import pytz from rowingdata import rower as rrower from rowingdata import main as rmain from rowingdata import get_file_type, get_empower_rigging from pandas import DataFrame, Series from pytz import timezone as tz, utc from django.utils import timezone from time import strftime, strptime, mktime, time, daylight import arrow from django.utils.timezone import get_current_timezone thetimezone = get_current_timezone() from rowingdata import ( TCXParser, RowProParser, ErgDataParser, CoxMateParser, BoatCoachParser, RowPerfectParser, BoatCoachAdvancedParser, MysteryParser, BoatCoachOTWParser, painsledDesktopParser, speedcoachParser, ErgStickParser, SpeedCoach2Parser, FITParser, fitsummarydata, make_cumvalues,cumcpdata, summarydata, get_file_type, ) from rowers.models import Team from rowers.metrics import axes from async_messages import messages as a_messages import os import zipfile import pandas as pd import numpy as np import itertools import math from tasks import ( handle_sendemail_unrecognized, handle_sendemail_breakthrough, handle_sendemail_hard, handle_updatecp,handle_updateergcp ) from django.conf import settings from sqlalchemy import create_engine import sqlalchemy as sa import sys import utils import datautils from utils import lbstoN from scipy.interpolate import griddata from timezonefinder import TimezoneFinder import django_rq queue = django_rq.get_queue('default') queuelow = django_rq.get_queue('low') queuehigh = django_rq.get_queue('default') user = settings.DATABASES['default']['USER'] password = settings.DATABASES['default']['PASSWORD'] database_name = settings.DATABASES['default']['NAME'] host = settings.DATABASES['default']['HOST'] port = settings.DATABASES['default']['PORT'] database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format( user=user, password=password, database_name=database_name, host=host, port=port, ) # Use SQLite local database when we're in debug mode if settings.DEBUG or user == '': # database_url = 'sqlite:///db.sqlite3' database_url = 'sqlite:///' + database_name # mapping the DB column names to the CSV file column names columndict = { 'time': 'TimeStamp (sec)', 'hr': ' HRCur (bpm)', 'pace': ' Stroke500mPace (sec/500m)', 'spm': ' Cadence (stokes/min)', 'power': ' Power (watts)', 'averageforce': ' AverageDriveForce (lbs)', 'drivelength': ' DriveLength (meters)', 'peakforce': ' PeakDriveForce (lbs)', 'distance': ' Horizontal (meters)', 'catch': 'catch', 'finish': 'finish', 'peakforceangle': 'peakforceangle', 'wash': 'wash', 'slip': 'slip', 'workoutstate': ' WorkoutState', 'cumdist': 'cum_dist', } from scipy.signal import savgol_filter import datetime def get_latlon(id): try: w = Workout.objects.get(id=id) except Workout.DoesNotExist: return False rowdata = rdata(w.csvfilename) try: try: latitude = rowdata.df.ix[:, ' latitude'] longitude = rowdata.df.ix[:, ' longitude'] except KeyError: latitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)'] longitude = 0 * rowdata.df.ix[:, 'TimeStamp (sec)'] return [latitude, longitude] except AttributeError: return [pd.Series([]), pd.Series([])] return [pd.Series([]), pd.Series([])] def get_workouts(ids, userid): goodids = [] for id in ids: w = Workout.objects.get(id=id) if int(w.user.user.id) == int(userid): goodids.append(id) return [Workout.objects.get(id=id) for id in goodids] def filter_df(datadf, fieldname, value, largerthan=True): try: x = datadf[fieldname] except KeyError: return datadf if largerthan: mask = datadf[fieldname] < value else: mask = datadf[fieldname] >= value datadf.loc[mask, fieldname] = np.nan return datadf def df_resample(datadf): # time stamps must be in seconds timestamps = datadf['TimeStamp (sec)'].astype('int') datadf['timestamps'] = timestamps newdf = datadf.groupby(['timestamps']).mean() return newdf def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values # bring metrics which have negative values to positive domain try: datadf['catch'] = -datadf['catch'] except KeyError: pass try: datadf['peakforceangle'] = datadf['peakforceangle'] + 1000 except KeyError: pass try: datadf['hr'] = datadf['hr'] + 10 except KeyError: pass try: datadf = datadf.clip(lower=0) except TypeError: pass datadf.replace(to_replace=0, value=np.nan, inplace=True) # return from positive domain to negative try: datadf['catch'] = -datadf['catch'] except KeyError: pass try: datadf['peakforceangle'] = datadf['peakforceangle'] - 1000 except KeyError: pass try: datadf['hr'] = datadf['hr'] - 10 except KeyError: pass # clean data for useful ranges per column if not ignorehr: try: mask = datadf['hr'] < 30 datadf.loc[mask, 'hr'] = np.nan except KeyError: pass try: mask = datadf['spm'] < 10 datadf.loc[mask, 'spm'] = np.nan except KeyError: pass try: mask = datadf['pace'] / 1000. > 300. datadf.loc[mask, 'pace'] = np.nan except KeyError: pass try: mask = datadf['efficiency'] < 0. datadf.loc[mask, 'efficiency'] = np.nan except KeyError: pass try: mask = datadf['pace'] / 1000. < 60. datadf.loc[mask, 'pace'] = np.nan except KeyError: pass try: mask = datadf['spm'] > 60 datadf.loc[mask, 'spm'] = np.nan except KeyError: pass try: mask = datadf['wash'] < 1 datadf.loc[mask, 'wash'] = np.nan except KeyError: pass if not ignoreadvanced: try: mask = datadf['rhythm'] < 5 datadf.loc[mask, 'rhythm'] = np.nan except KeyError: pass try: mask = datadf['rhythm'] > 70 datadf.loc[mask, 'rhythm'] = np.nan except KeyError: pass try: mask = datadf['power'] < 20 datadf.loc[mask, 'power'] = np.nan except KeyError: pass try: mask = datadf['drivelength'] < 0.5 datadf.loc[mask, 'drivelength'] = np.nan except KeyError: pass try: mask = datadf['forceratio'] < 0.2 datadf.loc[mask, 'forceratio'] = np.nan except KeyError: pass try: mask = datadf['forceratio'] > 1.0 datadf.loc[mask, 'forceratio'] = np.nan except KeyError: pass try: mask = datadf['drivespeed'] < 0.5 datadf.loc[mask, 'drivespeed'] = np.nan except KeyError: pass try: mask = datadf['drivespeed'] > 4 datadf.loc[mask, 'drivespeed'] = np.nan except KeyError: pass try: mask = datadf['driveenergy'] > 2000 datadf.loc[mask, 'driveenergy'] = np.nan except KeyError: pass try: mask = datadf['driveenergy'] < 100 datadf.loc[mask, 'driveenergy'] = np.nan except KeyError: pass try: mask = datadf['catch'] > -30. datadf.loc[mask, 'catch'] = np.nan except KeyError: pass workoutstateswork = [1, 4, 5, 8, 9, 6, 7] workoutstatesrest = [3] workoutstatetransition = [0, 2, 10, 11, 12, 13] if workstrokesonly == 'True' or workstrokesonly == True: try: datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)] except: pass return datadf def getstatsfields(): # Get field names and remove those that are not useful in stats fields = StrokeData._meta.get_fields() fielddict = {field.name: field.verbose_name for field in fields} # fielddict.pop('workoutid') fielddict.pop('ergpace') fielddict.pop('hr_an') fielddict.pop('hr_tr') fielddict.pop('hr_at') fielddict.pop('hr_ut2') fielddict.pop('hr_ut1') fielddict.pop('time') fielddict.pop('distance') fielddict.pop('nowindpace') fielddict.pop('fnowindpace') fielddict.pop('fergpace') fielddict.pop('equivergpower') # fielddict.pop('workoutstate') fielddict.pop('fpace') fielddict.pop('pace') fielddict.pop('id') fielddict.pop('ftime') fielddict.pop('x_right') fielddict.pop('hr_max') fielddict.pop('hr_bottom') fielddict.pop('cumdist') fieldlist = [field for field, value in fielddict.iteritems()] return fieldlist, fielddict # A string representation for time deltas def niceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # A nice printable format for time delta values def strfdelta(tdelta): try: minutes, seconds = divmod(tdelta.seconds, 60) tenths = int(tdelta.microseconds / 1e5) except AttributeError: minutes, seconds = divmod(tdelta.view(np.int64), 60e9) seconds, rest = divmod(seconds, 1e9) tenths = int(rest / 1e8) res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( minutes=minutes, seconds=seconds, tenths=tenths, ) return res # A nice printable format for pace values def nicepaceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace def timedeltaconv(x): if np.isfinite(x) and x != 0 and x > 0 and x < 175000: dt = datetime.timedelta(seconds=x) else: dt = datetime.timedelta(seconds=350.) return dt def paceformatsecs(values): out = [] for v in values: td = timedeltaconv(v) formattedv = strfdelta(td) out.append(formattedv) return out def getcpdata_sql(rower_id,table='cpdata'): engine = create_engine(database_url, echo=False) query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) return df def deletecpdata_sql(rower_id,table='cpdata'): engine = create_engine(database_url, echo=False) query = sa.text('DELETE from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: print "Database locked" conn.close() engine.dispose() def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=[]): deletecpdata_sql(rower_id) df = pd.DataFrame( { 'delta':delta, 'cp':cp, 'user':rower_id } ) if not distance.empty: df['distance'] = distance engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): df.to_sql(table, engine, if_exists='append', index=False) conn.close() engine.dispose() def runcpupdate( rower,type='water', startdate=timezone.now()-datetime.timedelta(days=365), enddate=timezone.now()+datetime.timedelta(days=5) ): if type == 'water': theworkouts = Workout.objects.filter( user=rower,rankingpiece=True, workouttype='water', startdatetime__gte=startdate, startdatetime__lte=enddate ) table = 'cpdata' else: theworkouts = Workout.objects.filter( user=rower,rankingpiece=True, workouttype__in=[ 'rower', 'dynamic', 'slides' ], startdatetime__gte=startdate, startdatetime__lte=enddate ) table = 'cpergdata' theids = [w.id for w in theworkouts] if settings.DEBUG: job = handle_updatecp.delay(rower.id,theids,debug=True,table=table) else: job = queue.enqueue(handle_updatecp,rower.id,theids,table=table) return job def fetchcperg(rower,theworkouts): theids = [int(w.id) for w in theworkouts] thefilenames = [w.csvfilename for w in theworkouts] cpdf = getcpdata_sql(rower.id,table='ergcpdata') if settings.DEBUG: res = handle_updateergcp.delay(rower.id,thefilenames,debug=True) else: res = queue.enqueue(handle_updateergcp,rower.id,thefilenames) return cpdf def fetchcp(rower,theworkouts,table='cpdata'): # get all power data from database (plus workoutid) theids = [int(w.id) for w in theworkouts] columns = ['power','workoutid','time'] df = getsmallrowdata_db(columns,ids=theids) if df.empty: avgpower2 = {} for id in theids: avgpower2[id] = 0 return pd.Series([]),pd.Series([]),avgpower2 dfgrouped = df.groupby(['workoutid']) avgpower2 = dict(dfgrouped.mean()['power'].astype(int)) cpdf = getcpdata_sql(rower.id,table=table) if not cpdf.empty: return cpdf['delta'],cpdf['cp'],avgpower2 else: if settings.DEBUG: res = handle_updatecp.delay(rower.id,theids,debug=True,table=table) else: res = queue.enqueue(handle_updatecp,rower.id,theids,table=table) return [],[],avgpower2 return [],[],avgpower2 # Processes painsled CSV file to database def save_workout_database(f2, r, dosmooth=True, workouttype='rower', dosummary=True, title='Workout', workoutsource='unknown', notes='', totaldist=0, totaltime=0, summary='', makeprivate=False, oarlength=2.89, inboard=0.88, forceunit='lbs', consistencychecks=False): message = None powerperc = 100 * np.array([r.pw_ut2, r.pw_ut1, r.pw_at, r.pw_tr, r.pw_an]) / r.ftp # make workout and put in database rr = rrower(hrmax=r.max, hrut2=r.ut2, hrut1=r.ut1, hrat=r.at, hrtr=r.tr, hran=r.an, ftp=r.ftp, powerperc=powerperc, powerzones=r.powerzones) row = rdata(f2, rower=rr) dtavg = row.df['TimeStamp (sec)'].diff().mean() if dtavg < 1: newdf = df_resample(row.df) try: os.remove(f2) except: pass return new_workout_from_df(r, newdf, title=title) try: checks = row.check_consistency() allchecks = 1 for key, value in checks.iteritems(): if not value: allchecks = 0 if consistencychecks: a_messages.error( r.user, 'Failed consistency check: ' + key + ', autocorrected') else: pass # a_messages.error(r.user,'Failed consistency check: '+key+', not corrected') except ZeroDivisionError: pass if not allchecks and consistencychecks: # row.repair() pass if row == 0: return (0, 'Error: CSV data file not found') if dosmooth: # auto smoothing pace = row.df[' Stroke500mPace (sec/500m)'].values velo = 500. / pace f = row.df['TimeStamp (sec)'].diff().mean() if f != 0 and not np.isnan(f): windowsize = 2 * (int(10. / (f))) + 1 else: windowsize = 1 if not 'originalvelo' in row.df: row.df['originalvelo'] = velo if windowsize > 3 and windowsize < len(velo): velo2 = savgol_filter(velo, windowsize, 3) else: velo2 = velo velo3 = pd.Series(velo2) velo3 = velo3.replace([-np.inf, np.inf], np.nan) velo3 = velo3.fillna(method='ffill') pace2 = 500. / abs(velo3) row.df[' Stroke500mPace (sec/500m)'] = pace2 row.df = row.df.fillna(0) row.write_csv(f2, gzip=True) try: os.remove(f2) except: pass # recalculate power data if workouttype == 'rower' or workouttype == 'dynamic' or workouttype == 'slides': try: row.erg_recalculatepower() row.write_csv(f2, gzip=True) except: pass averagehr = row.df[' HRCur (bpm)'].mean() maxhr = row.df[' HRCur (bpm)'].max() if totaldist == 0: totaldist = row.df['cum_dist'].max() if totaltime == 0: totaltime = row.df['TimeStamp (sec)'].max( ) - row.df['TimeStamp (sec)'].min() try: totaltime = totaltime + row.df.ix[0, ' ElapsedTime (sec)'] except KeyError: pass if np.isnan(totaltime): totaltime = 0 hours = int(totaltime / 3600.) if hours > 23: message = 'Warning: The workout duration was longer than 23 hours. ' hours = 23 minutes = int((totaltime - 3600. * hours) / 60.) if minutes > 59: minutes = 59 if not message: message = 'Warning: there is something wrong with the workout duration' seconds = int(totaltime - 3600. * hours - 60. * minutes) if seconds > 59: seconds = 59 if not message: message = 'Warning: there is something wrong with the workout duration' tenths = int(10 * (totaltime - 3600. * hours - 60. * minutes - seconds)) if tenths > 9: tenths = 9 if not message: message = 'Warning: there is something wrong with the workout duration' duration = "%s:%s:%s.%s" % (hours, minutes, seconds, tenths) if dosummary: summary = row.allstats() timezone_str = 'UTC' try: workoutstartdatetime = timezone.make_aware(row.rowdatetime) except ValueError: workoutstartdatetime = row.rowdatetime try: latavg = row.df[' latitude'].mean() lonavg = row.df[' longitude'].mean() tf = TimezoneFinder() try: timezone_str = tf.timezone_at(lng=lonavg, lat=latavg) except ValueError: timezone_str = 'UTC' if timezone_str == None: timezone_str = tf.closest_timezone_at(lng=lonavg, lat=latavg) if timezone_str == None: timezone_str = r.defaulttimezone try: workoutstartdatetime = pytz.timezone(timezone_str).localize( row.rowdatetime ) except ValueError: workoutstartdatetime = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ) except KeyError: timezone_str = r.defaulttimezone workoutdate = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ).strftime('%Y-%m-%d') workoutstarttime = workoutstartdatetime.astimezone( pytz.timezone(timezone_str) ).strftime('%H:%M:%S') if makeprivate: privacy = 'hidden' else: privacy = 'visible' # checking for inf values totaldist = np.nan_to_num(totaldist) maxhr = np.nan_to_num(maxhr) averagehr = np.nan_to_num(averagehr) # check for duplicate start times and duration ws = Workout.objects.filter(startdatetime=workoutstartdatetime, distance=totaldist, user=r) if (len(ws) != 0): message = "Warning: This workout probably already exists in the database" privacy = 'hidden' w = Workout(user=r, name=title, date=workoutdate, workouttype=workouttype, duration=duration, distance=totaldist, weightcategory=r.weightcategory, starttime=workoutstarttime, workoutsource=workoutsource, forceunit=forceunit, csvfilename=f2, notes=notes, summary=summary, maxhr=maxhr, averagehr=averagehr, startdatetime=workoutstartdatetime, inboard=inboard, oarlength=oarlength, timezone=timezone_str, privacy=privacy) w.save() isbreakthrough = False ishard = False if workouttype == 'water': df = getsmallrowdata_db(['power', 'workoutid', 'time'], ids=[w.id]) if df['power'].mean(): thesecs = totaltime maxt = 1.05 * thesecs if maxt > 0: logarr = datautils.getlogarr(maxt) dfgrouped = df.groupby(['workoutid']) delta, cpvalues, avgpower = datautils.getcp(dfgrouped, logarr) res, btvalues, res2 = utils.isbreakthrough( delta, cpvalues, r.p0, r.p1, r.p2, r.p3, r.cpratio) else: res = 0 res2 = 0 if res: isbreakthrough = True res = datautils.updatecp(delta, cpvalues, r) if res2 and not isbreakthrough: ishard = True # submit email task to send email about breakthrough workout if isbreakthrough: a_messages.info( r.user, 'It looks like you have a new breakthrough workout') if settings.DEBUG and r.getemailnotifications: res = handle_sendemail_breakthrough.delay(w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) elif r.getemailnotifications: try: res = queuehigh.enqueue( handle_sendemail_breakthrough(w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json())) except AttributeError: pass else: pass # submit email task to send email about breakthrough workout if ishard: a_messages.info(r.user, 'That was a pretty hard workout') if settings.DEBUG and r.getemailnotifications: res = handle_sendemail_hard.delay(w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) elif r.getemailnotifications: try: res = queuehigh.enqueue( handle_sendemail_hard(w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json())) except AttributeError: pass else: pass if privacy == 'visible': ts = Team.objects.filter(rower=r) for t in ts: w.team.add(t) # put stroke data in database res = dataprep(row.df, id=w.id, bands=True, barchart=True, otwpower=True, empower=True, inboard=inboard) return (w.id, message) def handle_nonpainsled(f2, fileformat, summary=''): oarlength = 2.89 inboard = 0.88 # handle RowPro: if (fileformat == 'rp'): row = RowProParser(f2) # handle TCX if (fileformat == 'tcx'): row = TCXParser(f2) # handle Mystery if (fileformat == 'mystery'): row = MysteryParser(f2) # handle RowPerfect if (fileformat == 'rowperfect3'): row = RowPerfectParser(f2) # handle ErgData if (fileformat == 'ergdata'): row = ErgDataParser(f2) # handle CoxMate if (fileformat == 'coxmate'): row = CoxMateParser(f2) # handle Mike if (fileformat == 'bcmike'): row = BoatCoachAdvancedParser(f2) # handle BoatCoach if (fileformat == 'boatcoach'): row = BoatCoachParser(f2) # handle BoatCoach OTW if (fileformat == 'boatcoachotw'): row = BoatCoachOTWParser(f2) # handle painsled desktop if (fileformat == 'painsleddesktop'): row = painsledDesktopParser(f2) # handle speed coach GPS if (fileformat == 'speedcoach'): row = speedcoachParser(f2) # handle speed coach GPS 2 if (fileformat == 'speedcoach2'): row = SpeedCoach2Parser(f2) try: oarlength, inboard = get_empower_rigging(f2) summary = row.allstats() except: pass # handle ErgStick if (fileformat == 'ergstick'): row = ErgStickParser(f2) # handle FIT if (fileformat == 'fit'): row = FITParser(f2) try: s = fitsummarydata(f2) s.setsummary() summary = s.summarytext except: pass f_to_be_deleted = f2 # should delete file f2 = f2[:-4] + 'o.csv' row.write_csv(f2, gzip=True) # os.remove(f2) try: os.remove(f_to_be_deleted) except: os.remove(f_to_be_deleted + '.gz') return (f2, summary, oarlength, inboard) # Create new workout from file and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_file(r, f2, workouttype='rower', title='Workout', makeprivate=False, notes=''): message = None try: fileformat = get_file_type(f2) except IOError: os.remove(f2) message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect." return (0, message, f2) summary = '' oarlength = 2.89 inboard = 0.88 if len(fileformat) == 3 and fileformat[0] == 'zip': f_to_be_deleted = f2 title = os.path.basename(f2) if settings.DEBUG: res = handle_zip_file.delay( r.user.email, title, f2 ) else: res = queuelow.enqueue( handle_zip_file, r.user.email, title, f2 ) return -1, message, f2 # Some people try to upload Concept2 logbook summaries if fileformat == 'c2log': os.remove(f2) message = "This C2 logbook summary does not contain stroke data. Please download the Export Stroke Data file from the workout details on the C2 logbook." return (0, message, f2) if fileformat == 'nostrokes': os.remove(f2) message = "It looks like this file doesn't contain stroke data." return (0, message, f2) # Some people upload corrupted zip files if fileformat == 'notgzip': os.remove(f2) message = "Rowsandall could not process this file. The extension is supported but the file seems corrupt. Contact info@rowsandall.com if you think this is incorrect." return (0, message, f2) # Some people try to upload RowPro summary logs if fileformat == 'rowprolog': os.remove(f2) message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log." return (0, message, f2) # Sometimes people try an unsupported file type. # Send an email to info@rowsandall.com with the file attached # for me to check if it is a bug, or a new file type # worth supporting if fileformat == 'unknown': message = "We couldn't recognize the file type" if settings.DEBUG: res = handle_sendemail_unrecognized.delay(f2, r.user.email) else: res = queuehigh.enqueue(handle_sendemail_unrecognized, f2, r.user.email) return (0, message, f2) # handle non-Painsled by converting it to painsled compatible CSV if (fileformat != 'csv'): try: f2, summary, oarlength, inboard = handle_nonpainsled(f2, fileformat, summary=summary) except: errorstring = str(sys.exc_info()[0]) message = 'Something went wrong: ' + errorstring return (0, message, '') dosummary = (fileformat != 'fit') id, message = save_workout_database(f2, r, workouttype=workouttype, makeprivate=makeprivate, dosummary=dosummary, workoutsource=fileformat, summary=summary, inboard=inboard, oarlength=oarlength, title=title) return (id, message, f2) def split_workout(r, parent, splitsecond, splitmode): data, row = getrowdata_db(id=parent.id) latitude, longitude = get_latlon(parent.id) if not latitude.empty and not longitude.empty: data[' latitude'] = latitude data[' longitude'] = longitude data['time'] = data['time'] / 1000. data1 = data[data['time'] <= splitsecond].copy() data2 = data[data['time'] > splitsecond].copy() data1 = data1.sort_values(['time']) data1 = data1.interpolate(method='linear', axis=0, limit_direction='both', limit=10) data1.fillna(method='bfill', inplace=True) # Some new stuff to try out data1 = data1.groupby('time', axis=0).mean() data1['time'] = data1.index data1.reset_index(drop=True, inplace=True) data2 = data2.sort_values(['time']) data2 = data2.interpolate(method='linear', axis=0, limit_direction='both', limit=10) data2.fillna(method='bfill', inplace=True) # Some new stuff to try out data2 = data2.groupby('time', axis=0).mean() data2['time'] = data2.index data2.reset_index(drop=True, inplace=True) data1['pace'] = data1['pace'] / 1000. data2['pace'] = data2['pace'] / 1000. data1.drop_duplicates(subset='time', inplace=True) data2.drop_duplicates(subset='time', inplace=True) messages = [] ids = [] if 'keep first' in splitmode: if 'firstprivate' in splitmode: setprivate = True else: setprivate = False id, message = new_workout_from_df(r, data1, title=parent.name + ' (1)', parent=parent, setprivate=setprivate, forceunit='N') messages.append(message) ids.append(id) if 'keep second' in splitmode: data2['cumdist'] = data2['cumdist'] - data2.ix[0, 'cumdist'] data2['distance'] = data2['distance'] - data2.ix[0, 'distance'] data2['time'] = data2['time'] - data2.ix[0, 'time'] if 'secondprivate' in splitmode: setprivate = True else: setprivate = False dt = datetime.timedelta(seconds=splitsecond) id, message = new_workout_from_df(r, data2, title=parent.name + ' (2)', parent=parent, setprivate=setprivate, dt=dt, forceunit='N') messages.append(message) ids.append(id) if not 'keep original' in splitmode: if 'keep second' in splitmode or 'keep first' in splitmode: parent.delete() messages.append('Deleted Workout: ' + parent.name) else: messages.append('That would delete your workout') ids.append(parent.id) elif 'originalprivate' in splitmode: parent.privacy = 'hidden' parent.save() return ids, messages # Create new workout from data frame and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_df(r, df, title='New Workout', parent=None, setprivate=False, forceunit='lbs', dt=datetime.timedelta()): message = None summary = '' if parent: oarlength = parent.oarlength inboard = parent.inboard workouttype = parent.workouttype notes = parent.notes summary = parent.summary if parent.privacy == 'hidden': makeprivate = True else: makeprivate = False startdatetime = parent.startdatetime + dt else: oarlength = 2.89 inboard = 0.88 workouttype = 'rower' notes = '' summary = '' makeprivate = False startdatetime = timezone.now() if setprivate: makeprivate = True timestr = strftime("%Y%m%d-%H%M%S") csvfilename = 'media/df_' + timestr + '.csv' if forceunit == 'N': # change to lbs for now df['peakforce'] /= lbstoN df['averageforce'] /= lbstoN df.rename(columns=columndict, inplace=True) #starttimeunix = mktime(startdatetime.utctimetuple()) starttimeunix = arrow.get(startdatetime).timestamp df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] df['TimeStamp (sec)'] = df['TimeStamp (sec)'] + starttimeunix row = rrdata(df=df) row.write_csv(csvfilename, gzip=True) # res = df.to_csv(csvfilename+'.gz',index_label='index', # compression='gzip') id, message = save_workout_database(csvfilename, r, workouttype=workouttype, title=title, notes=notes, oarlength=oarlength, inboard=inboard, makeprivate=makeprivate, dosmooth=False, consistencychecks=False) return (id, message) # Compare the data from the CSV file and the database # Currently only calculates number of strokes. To be expanded with # more elaborate testing if needed def compare_data(id): row = Workout.objects.get(id=id) f1 = row.csvfilename try: rowdata = rdata(f1) l1 = len(rowdata.df) except AttributeError: rowdata = 0 l1 = 0 engine = create_engine(database_url, echo=False) query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format( id=id, )) with engine.connect() as conn, conn.begin(): try: res = conn.execute(query) l2 = res.fetchall()[0][0] except: print "Database Locked" conn.close() engine.dispose() lfile = l1 ldb = l2 return l1 == l2 and l1 != 0, ldb, lfile # Repair data for workouts where the CSV file is lost (or the DB entries # don't exist) def repair_data(verbose=False): ws = Workout.objects.all() for w in ws: if verbose: sys.stdout.write(".") test, ldb, lfile = compare_data(w.id) if not test: if verbose: print w.id, lfile, ldb try: rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id, rowdata.df) except IOError, AttributeError: pass if lfile == 0: # if not ldb - delete workout try: data = read_df_sql(w.id) try: datalength = len(data) except AttributeError: datalength = 0 if datalength != 0: data.rename(columns=columndict, inplace=True) res = data.to_csv(w.csvfilename + '.gz', index_label='index', compression='gzip') print 'adding csv file' else: print w.id, ' No stroke records anywhere' w.delete() except: print 'failed' print str(sys.exc_info()[0]) pass # A wrapper around the rowingdata class, with some error catching def rdata(file, rower=rrower()): try: res = rrdata(csvfile=file, rower=rower) except IOError, IndexError: try: res = rrdata(csvfile=file + '.gz', rower=rower) except IOError, IndexError: res = 0 except: res = 0 return res # Remove all stroke data for workout ID from database def delete_strokedata(id): engine = create_engine(database_url, echo=False) query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format( id=id, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: print "Database Locked" conn.close() engine.dispose() # Replace stroke data in DB with data from CSV file def update_strokedata(id, df): delete_strokedata(id) rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True) # Test that all data are of a numerical time def testdata(time, distance, pace, spm): t1 = np.issubdtype(time, np.number) t2 = np.issubdtype(distance, np.number) t3 = np.issubdtype(pace, np.number) t4 = np.issubdtype(spm, np.number) return t1 and t2 and t3 and t4 # Get data from DB for one workout (fetches all data). If data # is not in DB, read from CSV file (and create DB entry) def getrowdata_db(id=0, doclean=False, convertnewtons=True): data = read_df_sql(id) data['x_right'] = data['x_right'] / 1.0e6 if data.empty: rowdata, row = getrowdata(id=id) if rowdata: data = dataprep(rowdata.df, id=id, bands=True, barchart=True, otwpower=True) else: data = pd.DataFrame() # returning empty dataframe else: row = Workout.objects.get(id=id) if data['efficiency'].mean() == 0 and data['power'].mean() != 0: data = add_efficiency(id=id) if doclean: data = clean_df_stats(data, ignorehr=True) return data, row # Fetch a subset of the data from the DB def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True): prepmultipledata(ids) data = read_cols_df_sql(ids, columns) # convert newtons if doclean: data = clean_df_stats(data, ignorehr=True, workstrokesonly=workstrokesonly) return data # Fetch both the workout and the workout stroke data (from CSV file) def getrowdata(id=0): # check if valid ID exists (workout exists) row = Workout.objects.get(id=id) f1 = row.csvfilename # get user r = row.user u = r.user rr = rrower(hrmax=r.max, hrut2=r.ut2, hrut1=r.ut1, hrat=r.at, hrtr=r.tr, hran=r.an, ftp=r.ftp) rowdata = rdata(f1, rower=rr) return rowdata, row # Checks if all rows for a list of workout IDs have entries in the # stroke_data table. If this is not the case, it creates the stroke # data # In theory, this should never yield any work, but it's a good # safety net for programming errors elsewhere in the app # Also used heavily when I moved from CSV file only to CSV+Stroke data def prepmultipledata(ids, verbose=False): query = sa.text('SELECT DISTINCT workoutid FROM strokedata') engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): res = conn.execute(query) res = list(itertools.chain.from_iterable(res.fetchall())) conn.close() engine.dispose() try: ids2 = [int(id) for id in ids] except ValueError: ids2 = ids res = list(set(ids2) - set(res)) for id in res: rowdata, row = getrowdata(id=id) if verbose: print id if rowdata and len(rowdata.df): data = dataprep(rowdata.df, id=id, bands=True, barchart=True, otwpower=True) return res # Read a set of columns for a set of workout ids, returns data as a # pandas dataframe def read_cols_df_sql(ids, columns, convertnewtons=True): # drop columns that are not in offical list # axx = [ax[0] for ax in axes] axx = [f.name for f in StrokeData._meta.get_fields()] for c in columns: if not c in axx: columns.remove(c) columns = list(columns) + ['distance', 'spm', 'workoutid'] columns = [x for x in columns if x != 'None'] columns = list(set(columns)) cls = '' ids = [int(id) for id in ids] engine = create_engine(database_url, echo=False) for column in columns: cls += column + ', ' cls = cls[:-2] if len(ids) == 0: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format( columns=cls, )) elif len(ids) == 1: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format( id=ids[0], columns=cls, )) else: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format( columns=cls, ids=tuple(ids), )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) df = df.fillna(value=0) if 'peakforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'peakforce'] = df.loc[mask, 'peakforce'] * lbstoN if 'averageforce' in columns: funits = ((w.id, w.forceunit) for w in Workout.objects.filter(id__in=ids)) for id, u in funits: if u == 'lbs': mask = df['workoutid'] == id df.loc[mask, 'averageforce'] = df.loc[mask, 'averageforce'] * lbstoN engine.dispose() return df # Read stroke data from the DB for a Workout ID. Returns a pandas dataframe def read_df_sql(id): engine = create_engine(database_url, echo=False) df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format( id=id)), engine) engine.dispose() df = df.fillna(value=0) funit = Workout.objects.get(id=id).forceunit if funit == 'lbs': try: df['peakforce'] = df['peakforce'] * lbstoN except KeyError: pass try: df['averageforce'] = df['averageforce'] * lbstoN except KeyError: pass return df # Get the necessary data from the strokedata table in the DB. # For the flex plot def smalldataprep(therows, xparam, yparam1, yparam2): df = pd.DataFrame() if yparam2 == 'None': yparam2 = 'power' df[xparam] = [] df[yparam1] = [] df[yparam2] = [] df['distance'] = [] df['spm'] = [] for workout in therows: f1 = workout.csvfilename try: rowdata = dataprep(rrdata(f1).df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) if workout.forceunit == 'lbs': try: rowdata['peakforce'] *= lbstoN except KeyError: pass try: rowdata['averageforce'] *= lbstoN except KeyError: pass df = pd.concat([df, rowdata], ignore_index=True) except IOError: try: rowdata = dataprep(rrdata(f1 + '.gz').df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) if workout.forceunit == 'lbs': try: rowdata['peakforce'] *= lbstoN except KeyError: pass try: rowdata['averageforce'] *= lbstoN except KeyError: pass df = pd.concat([df, rowdata], ignore_index=True) except IOError: pass return df # data fusion def datafusion(id1, id2, columns, offset): workout1 = Workout.objects.get(id=id1) workout2 = Workout.objects.get(id=id2) df1, w1 = getrowdata_db(id=id1) df1 = df1.drop([ # 'cumdist', 'hr_ut2', 'hr_ut1', 'hr_at', 'hr_tr', 'hr_an', 'hr_max', 'ftime', 'fpace', 'workoutid', 'id'], 1, errors='ignore') # Add coordinates to DataFrame latitude, longitude = get_latlon(id1) df1[' latitude'] = latitude df1[' longitude'] = longitude df2 = getsmallrowdata_db(['time'] + columns, ids=[id2], doclean=False) forceunit = 'N' offsetmillisecs = offset.seconds * 1000 + offset.microseconds / 1000. offsetmillisecs += offset.days * (3600 * 24 * 1000) df2['time'] = df2['time'] + offsetmillisecs keep1 = {c: c for c in set(df1.columns)} for c in columns: keep1.pop(c) for c in df1.columns: if not c in keep1: df1 = df1.drop(c, 1, errors='ignore') df = pd.concat([df1, df2], ignore_index=True) df = df.sort_values(['time']) df = df.interpolate(method='linear', axis=0, limit_direction='both', limit=10) df.fillna(method='bfill', inplace=True) # Some new stuff to try out df = df.groupby('time', axis=0).mean() df['time'] = df.index df.reset_index(drop=True, inplace=True) df['time'] = df['time'] / 1000. df['pace'] = df['pace'] / 1000. df['cum_dist'] = df['cumdist'] return df, forceunit def fix_newtons(id=0, limit=3000): # rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False) rowdata = getsmallrowdata_db(['peakforce'], ids=[id], doclean=False) try: #avgforce = rowdata['averageforce'] peakforce = rowdata['peakforce'] if peakforce.mean() > limit: w = Workout.objects.get(id=id) print "fixing ", id rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id, rowdata.df) except KeyError: pass def add_efficiency(id=0): rowdata, row = getrowdata_db(id=id, doclean=False, convertnewtons=False) power = rowdata['power'] pace = rowdata['pace'] / 1.0e3 velo = 500. / pace ergpw = 2.8 * velo**3 efficiency = 100. * ergpw / power efficiency = efficiency.replace([-np.inf, np.inf], np.nan) efficiency.fillna(method='ffill') rowdata['efficiency'] = efficiency delete_strokedata(id) if id != 0: rowdata['workoutid'] = id engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): rowdata.to_sql('strokedata', engine, if_exists='append', index=False) conn.close() engine.dispose() return rowdata # This is the main routine. # it reindexes, sorts, filters, and smooths the data, then # saves it to the stroke_data table in the database # Takes a rowingdata object's DataFrame as input def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, empower=True, inboard=0.88, forceunit='lbs'): if rowdatadf.empty: return 0 rowdatadf.set_index([range(len(rowdatadf))], inplace=True) t = rowdatadf.ix[:, 'TimeStamp (sec)'] t = pd.Series(t - rowdatadf.ix[0, 'TimeStamp (sec)']) row_index = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)'] > 3000 rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000. p = rowdatadf.ix[:, ' Stroke500mPace (sec/500m)'] hr = rowdatadf.ix[:, ' HRCur (bpm)'] spm = rowdatadf.ix[:, ' Cadence (stokes/min)'] cumdist = rowdatadf.ix[:, 'cum_dist'] power = rowdatadf.ix[:, ' Power (watts)'] averageforce = rowdatadf.ix[:, ' AverageDriveForce (lbs)'] drivelength = rowdatadf.ix[:, ' DriveLength (meters)'] try: workoutstate = rowdatadf.ix[:, ' WorkoutState'] except KeyError: workoutstate = 0 * hr peakforce = rowdatadf.ix[:, ' PeakDriveForce (lbs)'] forceratio = averageforce / peakforce forceratio = forceratio.fillna(value=0) try: drivetime = rowdatadf.ix[:, ' DriveTime (ms)'] recoverytime = rowdatadf.ix[:, ' StrokeRecoveryTime (ms)'] rhythm = 100. * drivetime / (recoverytime + drivetime) rhythm = rhythm.fillna(value=0) except: rhythm = 0.0 * forceratio f = rowdatadf['TimeStamp (sec)'].diff().mean() if f != 0: windowsize = 2 * (int(10. / (f))) + 1 else: windowsize = 1 if windowsize <= 3: windowsize = 5 if windowsize > 3 and windowsize < len(hr): spm = savgol_filter(spm, windowsize, 3) hr = savgol_filter(hr, windowsize, 3) drivelength = savgol_filter(drivelength, windowsize, 3) forceratio = savgol_filter(forceratio, windowsize, 3) try: t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) except TypeError: t2 = 0 * t p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) try: drivespeed = drivelength / rowdatadf[' DriveTime (ms)'] * 1.0e3 except TypeError: drivespeed = 0.0 * rowdatadf['TimeStamp (sec)'] drivespeed = drivespeed.fillna(value=0) try: driveenergy = rowdatadf['driveenergy'] except KeyError: if forceunit == 'lbs': driveenergy = drivelength * averageforce * lbstoN else: drivenergy = drivelength * averageforce distance = rowdatadf.ix[:, 'cum_dist'] velo = 500. / p distanceperstroke = 60. * velo / spm data = DataFrame( dict( time=t * 1e3, hr=hr, pace=p * 1e3, spm=spm, cumdist=cumdist, ftime=niceformat(t2), fpace=nicepaceformat(p2), driveenergy=driveenergy, power=power, workoutstate=workoutstate, averageforce=averageforce, drivelength=drivelength, peakforce=peakforce, forceratio=forceratio, distance=distance, drivespeed=drivespeed, rhythm=rhythm, distanceperstroke=distanceperstroke, ) ) if bands: # HR bands data['hr_ut2'] = rowdatadf.ix[:, 'hr_ut2'] data['hr_ut1'] = rowdatadf.ix[:, 'hr_ut1'] data['hr_at'] = rowdatadf.ix[:, 'hr_at'] data['hr_tr'] = rowdatadf.ix[:, 'hr_tr'] data['hr_an'] = rowdatadf.ix[:, 'hr_an'] data['hr_max'] = rowdatadf.ix[:, 'hr_max'] data['hr_bottom'] = 0.0 * data['hr'] try: tel = rowdatadf.ix[:, ' ElapsedTime (sec)'] except KeyError: rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)'] if barchart: # time increments for bar chart time_increments = rowdatadf.ix[:, ' ElapsedTime (sec)'].diff() time_increments[0] = time_increments[1] time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments) x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x))) data['x_right'] = x_right if empower: try: wash = rowdatadf.ix[:, 'wash'] except KeyError: wash = 0 * power try: catch = rowdatadf.ix[:, 'catch'] except KeyError: catch = 0 * power try: finish = rowdatadf.ix[:, 'finish'] except KeyError: finish = 0 * power try: peakforceangle = rowdatadf.ix[:, 'peakforceangle'] except KeyError: peakforceangle = 0 * power if data['driveenergy'].mean() == 0: try: driveenergy = rowdatadf.ix[:, 'driveenergy'] except KeyError: driveenergy = power * 60 / spm else: driveenergy = data['driveenergy'] arclength = (inboard - 0.05) * (np.radians(finish) - np.radians(catch)) if arclength.mean() > 0: drivelength = arclength elif drivelength.mean() == 0: drivelength = driveenergy / (averageforce * 4.44822) try: slip = rowdatadf.ix[:, 'slip'] except KeyError: slip = 0 * power try: totalangle = finish - catch effectiveangle = finish - wash - catch - slip except ValueError: totalangle = 0 * power effectiveangle = 0 * power if windowsize > 3 and windowsize < len(slip): try: wash = savgol_filter(wash, windowsize, 3) except TypeError: pass try: slip = savgol_filter(slip, windowsize, 3) except TypeError: pass try: catch = savgol_filter(catch, windowsize, 3) except TypeError: pass try: finish = savgol_filter(finish, windowsize, 3) except TypeError: pass try: peakforceangle = savgol_filter(peakforceangle, windowsize, 3) except TypeError: pass try: driveenergy = savgol_filter(driveenergy, windowsize, 3) except TypeError: pass try: drivelength = savgol_filter(drivelength, windowsize, 3) except TypeError: pass try: totalangle = savgol_filter(totalangle, windowsize, 3) except TypeError: pass try: effectiveangle = savgol_filter(effectiveangle, windowsize, 3) except TypeError: pass velo = 500. / p ergpw = 2.8 * velo**3 efficiency = 100. * ergpw / power efficiency = efficiency.replace([-np.inf, np.inf], np.nan) efficiency.fillna(method='ffill') try: data['wash'] = wash data['catch'] = catch data['slip'] = slip data['finish'] = finish data['peakforceangle'] = peakforceangle data['driveenergy'] = driveenergy data['drivelength'] = drivelength data['totalangle'] = totalangle data['effectiveangle'] = effectiveangle data['efficiency'] = efficiency except ValueError: pass if otwpower: try: nowindpace = rowdatadf.ix[:, 'nowindpace'] except KeyError: nowindpace = p try: equivergpower = rowdatadf.ix[:, 'equivergpower'] except KeyError: equivergpower = 0 * p + 50. nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x)) ergvelo = (equivergpower / 2.8)**(1. / 3.) ergpace = 500. / ergvelo ergpace[ergpace == np.inf] = 240. ergpace2 = ergpace.apply(lambda x: timedeltaconv(x)) data['ergpace'] = ergpace * 1e3 data['nowindpace'] = nowindpace * 1e3 data['equivergpower'] = equivergpower data['fergpace'] = nicepaceformat(ergpace2) data['fnowindpace'] = nicepaceformat(nowindpace2) data = data.replace([-np.inf, np.inf], np.nan) data = data.fillna(method='ffill') # write data if id given if id != 0: data['workoutid'] = id engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): data.to_sql('strokedata', engine, if_exists='append', index=False) conn.close() engine.dispose() return data