# All the data preparation, data cleaning and data mangling should # be defined here from rowers.models import Workout, User, Rower,StrokeData from rowingdata import rowingdata as rrdata from rowers.tasks import handle_sendemail_unrecognized from rowers.tasks import handle_zip_file from rowingdata import rower as rrower from rowingdata import main as rmain from rowingdata import get_file_type,get_empower_rigging from pandas import DataFrame,Series from pytz import timezone as tz,utc from django.utils import timezone from time import strftime,strptime,mktime,time,daylight from django.utils.timezone import get_current_timezone thetimezone = get_current_timezone() from rowingdata import ( TCXParser,RowProParser,ErgDataParser,TCXParserNoHR, CoxMateParser, BoatCoachParser,RowPerfectParser,BoatCoachAdvancedParser, MysteryParser, painsledDesktopParser,speedcoachParser,ErgStickParser, SpeedCoach2Parser,FITParser,fitsummarydata, make_cumvalues, summarydata,get_file_type, ) from rowers.models import Team from rowers.metrics import axes from async_messages import messages as a_messages import os import zipfile import pandas as pd import numpy as np import itertools import math from tasks import handle_sendemail_unrecognized,handle_sendemail_breakthrough from django.conf import settings from sqlalchemy import create_engine import sqlalchemy as sa import sys import utils import datautils from utils import lbstoN from scipy.interpolate import griddata import django_rq queue = django_rq.get_queue('default') queuelow = django_rq.get_queue('low') queuehigh = django_rq.get_queue('default') user = settings.DATABASES['default']['USER'] password = settings.DATABASES['default']['PASSWORD'] database_name = settings.DATABASES['default']['NAME'] host = settings.DATABASES['default']['HOST'] port = settings.DATABASES['default']['PORT'] database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format( user=user, password=password, database_name=database_name, host=host, port=port, ) # Use SQLite local database when we're in debug mode if settings.DEBUG or user=='': # database_url = 'sqlite:///db.sqlite3' database_url = 'sqlite:///'+database_name # mapping the DB column names to the CSV file column names columndict = { 'time':'TimeStamp (sec)', 'hr':' HRCur (bpm)', 'pace':' Stroke500mPace (sec/500m)', 'spm':' Cadence (stokes/min)', 'power':' Power (watts)', 'averageforce':' AverageDriveForce (lbs)', 'drivelength':' DriveLength (meters)', 'peakforce':' PeakDriveForce (lbs)', 'distance':' Horizontal (meters)', 'catch':'catch', 'finish':'finish', 'peakforceangle':'peakforceangle', 'wash':'wash', 'slip':'slip', 'workoutstate':' WorkoutState', } from scipy.signal import savgol_filter import datetime def get_latlon(id): try: w = Workout.objects.get(id=id) except Workout.DoesNotExist: return False rowdata = rdata(w.csvfilename) try: try: latitude = rowdata.df.ix[:,' latitude'] longitude = rowdata.df.ix[:,' longitude'] except KeyError: latitude = 0*rowdata.df.ix[:,'TimeStamp (sec)'] longitude = 0*rowdata.df.ix[:,'TimeStamp (sec)'] return [latitude,longitude] except AttributeError: return [pd.Series([]),pd.Series([])] return [pd.Series([]),pd.Series([])] def get_workouts(ids,userid): goodids = [] for id in ids: w = Workout.objects.get(id=id) if int(w.user.user.id) == int(userid): goodids.append(id) return [Workout.objects.get(id=id) for id in goodids] def filter_df(datadf,fieldname,value,largerthan=True): try: x = datadf[fieldname] except KeyError: return datadf if largerthan: mask = datadf[fieldname] < value else: mask = datadf[fieldname] >= value datadf.loc[mask,fieldname] = np.nan return datadf def df_resample(datadf): # time stamps must be in seconds timestamps = datadf['TimeStamp (sec)'].astype('int') datadf['timestamps'] = timestamps newdf = datadf.groupby(['timestamps']).mean() return newdf def clean_df_stats(datadf,workstrokesonly=True,ignorehr=True, ignoreadvanced=False): # clean data remove zeros and negative values # bring metrics which have negative values to positive domain try: datadf['catch'] = -datadf['catch'] except KeyError: pass try: datadf['peakforceangle'] = datadf['peakforceangle']+1000 except KeyError: pass try: datadf['hr'] = datadf['hr']+10 except KeyError: pass datadf=datadf.clip(lower=0) datadf.replace(to_replace=0,value=np.nan,inplace=True) # return from positive domain to negative try: datadf['catch'] = -datadf['catch'] except KeyError: pass try: datadf['peakforceangle'] = datadf['peakforceangle']-1000 except KeyError: pass try: datadf['hr'] = datadf['hr']-10 except KeyError: pass # clean data for useful ranges per column if not ignorehr: try: mask = datadf['hr'] < 30 datadf.loc[mask,'hr'] = np.nan except KeyError: pass try: mask = datadf['spm'] < 10 datadf.loc[mask,'spm'] = np.nan except KeyError: pass try: mask = datadf['pace']/1000. > 300. datadf.loc[mask,'pace'] = np.nan except KeyError: pass try: mask = datadf['efficiency'] < 0. datadf.loc[mask,'efficiency'] = np.nan except KeyError: pass try: mask = datadf['pace']/1000. < 60. datadf.loc[mask,'pace'] = np.nan except KeyError: pass try: mask = datadf['spm'] > 60 datadf.loc[mask,'spm'] = np.nan except KeyError: pass try: mask = datadf['wash'] < 1 datadf.loc[mask,'wash'] = np.nan except KeyError: pass if not ignoreadvanced: try: mask = datadf['rhythm'] < 5 datadf.loc[mask,'rhythm'] = np.nan except KeyError: pass try: mask = datadf['rhythm'] > 70 datadf.loc[mask,'rhythm'] = np.nan except KeyError: pass try: mask = datadf['power'] < 20 datadf.loc[mask,'power'] = np.nan except KeyError: pass try: mask = datadf['drivelength'] < 0.5 datadf.loc[mask,'drivelength'] = np.nan except KeyError: pass try: mask = datadf['forceratio'] < 0.2 datadf.loc[mask,'forceratio'] = np.nan except KeyError: pass try: mask = datadf['forceratio'] > 1.0 datadf.loc[mask,'forceratio'] = np.nan except KeyError: pass try: mask = datadf['drivespeed'] < 0.5 datadf.loc[mask,'drivespeed'] = np.nan except KeyError: pass try: mask = datadf['drivespeed'] > 4 datadf.loc[mask,'drivespeed'] = np.nan except KeyError: pass try: mask = datadf['driveenergy'] > 2000 datadf.loc[mask,'driveenergy'] = np.nan except KeyError: pass try: mask = datadf['driveenergy'] < 100 datadf.loc[mask,'driveenergy'] = np.nan except KeyError: pass try: mask = datadf['catch'] > -30. datadf.loc[mask,'catch'] = np.nan except KeyError: pass workoutstateswork = [1,4,5,8,9,6,7] workoutstatesrest = [3] workoutstatetransition = [0,2,10,11,12,13] if workstrokesonly=='True' or workstrokesonly==True: try: datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)] except: pass return datadf def getstatsfields(): # Get field names and remove those that are not useful in stats fields = StrokeData._meta.get_fields() fielddict = {field.name:field.verbose_name for field in fields} #fielddict.pop('workoutid') fielddict.pop('ergpace') fielddict.pop('hr_an') fielddict.pop('hr_tr') fielddict.pop('hr_at') fielddict.pop('hr_ut2') fielddict.pop('hr_ut1') fielddict.pop('time') fielddict.pop('distance') fielddict.pop('nowindpace') fielddict.pop('fnowindpace') fielddict.pop('fergpace') fielddict.pop('equivergpower') # fielddict.pop('workoutstate') fielddict.pop('fpace') fielddict.pop('pace') fielddict.pop('id') fielddict.pop('ftime') fielddict.pop('x_right') fielddict.pop('hr_max') fielddict.pop('hr_bottom') fielddict.pop('cumdist') fieldlist = [field for field,value in fielddict.iteritems()] return fieldlist,fielddict # A string representation for time deltas def niceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # A nice printable format for time delta values def strfdelta(tdelta): try: minutes,seconds = divmod(tdelta.seconds,60) tenths = int(tdelta.microseconds/1e5) except AttributeError: minutes,seconds = divmod(tdelta.view(np.int64),60e9) seconds,rest = divmod(seconds,1e9) tenths = int(rest/1e8) res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( minutes=minutes, seconds=seconds, tenths=tenths, ) return res # A nice printable format for pace values def nicepaceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace def timedeltaconv(x): if np.isfinite(x) and x != 0 and x>0 and x<175000: dt = datetime.timedelta(seconds=x) else: dt = datetime.timedelta(seconds=350.) return dt def paceformatsecs(values): out = [] for v in values: td = timedeltaconv(v) formattedv = strfdelta(td) out.append(formattedv) return out # Processes painsled CSV file to database def save_workout_database(f2,r,dosmooth=True,workouttype='rower', dosummary=True,title='Workout', workoutsource='unknown', notes='',totaldist=0,totaltime=0, summary='', makeprivate=False, oarlength=2.89,inboard=0.88, consistencychecks=False): message = None powerperc = 100*np.array([r.pw_ut2, r.pw_ut1, r.pw_at, r.pw_tr,r.pw_an])/r.ftp # make workout and put in database rr = rrower(hrmax=r.max,hrut2=r.ut2, hrut1=r.ut1,hrat=r.at, hrtr=r.tr,hran=r.an,ftp=r.ftp, powerperc=powerperc,powerzones=r.powerzones) row = rdata(f2,rower=rr) dtavg = row.df['TimeStamp (sec)'].diff().mean() if dtavg < 1: newdf = df_resample(row.df) try: os.remove(f2) except: pass return new_workout_from_df(r,newdf, title=title) try: checks = row.check_consistency() allchecks = 1 for key,value in checks.iteritems(): if not value: allchecks = 0 if consistencychecks: a_messages.error(r.user,'Failed consistency check: '+key+', autocorrected') else: pass # a_messages.error(r.user,'Failed consistency check: '+key+', not corrected') except ZeroDivisionError: pass if not allchecks and consistencychecks: # row.repair() pass if row == 0: return (0,'Error: CSV data file not found') if dosmooth: # auto smoothing pace = row.df[' Stroke500mPace (sec/500m)'].values velo = 500./pace f = row.df['TimeStamp (sec)'].diff().mean() if f !=0 and not np.isnan(f): windowsize = 2*(int(10./(f)))+1 else: windowsize = 1 if not 'originalvelo' in row.df: row.df['originalvelo'] = velo if windowsize > 3 and windowsize23: message = 'Warning: The workout duration was longer than 23 hours. ' hours = 23 minutes = int((totaltime - 3600.*hours)/60.) if minutes>59: minutes = 59 if not message: message = 'Warning: there is something wrong with the workout duration' seconds = int(totaltime - 3600.*hours - 60.*minutes) if seconds > 59: seconds = 59 if not message: message = 'Warning: there is something wrong with the workout duration' tenths = int(10*(totaltime - 3600.*hours - 60.*minutes - seconds)) if tenths > 9: tenths = 9 if not message: message = 'Warning: there is something wrong with the workout duration' duration = "%s:%s:%s.%s" % (hours,minutes,seconds,tenths) if dosummary: summary = row.allstats() #summary = row.summary() #summary += '\n' #summary += row.intervalstats() workoutdate = row.rowdatetime.strftime('%Y-%m-%d') workoutstarttime = row.rowdatetime.strftime('%H:%M:%S') workoutstartdatetime = thetimezone.localize(row.rowdatetime).astimezone(utc) if makeprivate: privacy = 'hidden' else: privacy = 'visible' # check for duplicate start times and duration ws = Workout.objects.filter(startdatetime=workoutstartdatetime, distance=totaldist, user=r) if (len(ws) != 0): message = "Warning: This workout probably already exists in the database" privacy = 'hidden' # checking for inf values totaldist = np.nan_to_num(totaldist) maxhr = np.nan_to_num(maxhr) averagehr = np.nan_to_num(averagehr) w = Workout(user=r,name=title,date=workoutdate, workouttype=workouttype, duration=duration,distance=totaldist, weightcategory=r.weightcategory, starttime=workoutstarttime, workoutsource=workoutsource, csvfilename=f2,notes=notes,summary=summary, maxhr=maxhr,averagehr=averagehr, startdatetime=workoutstartdatetime, inboard=inboard,oarlength=oarlength, privacy=privacy) w.save() isbreakthrough = False if workouttype == 'water': df = getsmallrowdata_db(['power','workoutid','time'],ids=[w.id]) # delta,cpvalues,avgpower = datautils.getsinglecp(row.df) thesecs = totaltime maxt = 1.05*thesecs logarr = datautils.getlogarr(maxt) dfgrouped = df.groupby(['workoutid']) delta,cpvalues,avgpower = datautils.getcp(dfgrouped,logarr) res,btvalues = utils.isbreakthrough(delta,cpvalues,r.p0,r.p1,r.p2,r.p3,r.cpratio) if res: isbreakthrough = True res = datautils.updatecp(delta,cpvalues,r) # submit email task to send email about breakthrough workout if isbreakthrough: a_messages.info(r.user,'It looks like you have a new breakthrough workout') if settings.DEBUG and r.getemailnotifications: res = handle_sendemail_breakthrough.delay(w.id,r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json()) elif r.getemailnotifications: try: res = queuehigh.enqueue( handle_sendemail_breakthrough(w.id, r.user.email, r.user.first_name, r.user.last_name, btvalues=btvalues.to_json())) except AttributeError: pass else: pass if privacy == 'visible': ts = Team.objects.filter(rower=r) for t in ts: w.team.add(t) # put stroke data in database res = dataprep(row.df,id=w.id,bands=True, barchart=True,otwpower=True,empower=True,inboard=inboard) return (w.id,message) def handle_nonpainsled(f2,fileformat,summary=''): oarlength = 2.89 inboard = 0.88 # handle RowPro: if (fileformat == 'rp'): row = RowProParser(f2) # handle TCX if (fileformat == 'tcx'): row = TCXParser(f2) # handle Mystery if (fileformat == 'mystery'): row = MysteryParser(f2) # handle TCX no HR if (fileformat == 'tcxnohr'): row = TCXParserNoHR(f2) # handle RowPerfect if (fileformat == 'rowperfect3'): row = RowPerfectParser(f2) # handle ErgData if (fileformat == 'ergdata'): row = ErgDataParser(f2) # handle CoxMate if (fileformat == 'coxmate'): row = CoxMateParser(f2) # handle Mike if (fileformat == 'bcmike'): row = BoatCoachAdvancedParser(f2) # handle BoatCoach if (fileformat == 'boatcoach'): row = BoatCoachParser(f2) # handle painsled desktop if (fileformat == 'painsleddesktop'): row = painsledDesktopParser(f2) # handle speed coach GPS if (fileformat == 'speedcoach'): row = speedcoachParser(f2) # handle speed coach GPS 2 if (fileformat == 'speedcoach2'): row = SpeedCoach2Parser(f2) try: oarlength,inboard = get_empower_rigging(f2) summary = row.allstats() except: pass # handle ErgStick if (fileformat == 'ergstick'): row = ErgStickParser(f2) # handle FIT if (fileformat == 'fit'): row = FITParser(f2) try: s = fitsummarydata(f2) s.setsummary() summary = s.summarytext except: pass f_to_be_deleted = f2 # should delete file f2 = f2[:-4]+'o.csv' row.write_csv(f2,gzip=True) #os.remove(f2) try: os.remove(f_to_be_deleted) except: os.remove(f_to_be_deleted+'.gz') return (f2,summary,oarlength,inboard) # Create new workout from file and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_file(r,f2, workouttype='rower', title='Workout', makeprivate=False, notes=''): message = None fileformat = get_file_type(f2) summary = '' oarlength = 2.89 inboard = 0.88 if len(fileformat)==3 and fileformat[0]=='zip': f_to_be_deleted = f2 title = os.path.basename(f2) if settings.DEBUG: res = handle_zip_file.delay( r.user.email,title,f2 ) else: res = queuelow.enqueue( handle_zip_file, r.user.email, title, f2 ) return -1,message,f2 # Some people try to upload Concept2 logbook summaries if fileformat == 'c2log': os.remove(f2) message = "This C2 logbook summary does not contain stroke data. Please download the Export Stroke Data file from the workout details on the C2 logbook." return (0,message,f2) if fileformat == 'nostrokes': os.remove(f2) message = "It looks like this file doesn't contain stroke data." return (0,message,f2) # Some people try to upload RowPro summary logs if fileformat == 'rowprolog': os.remove(f2) message = "This RowPro logbook summary does not contain stroke data. Please use the Stroke Data CSV file for the individual workout in your log." return (0,message,f2) # Sometimes people try an unsupported file type. # Send an email to info@rowsandall.com with the file attached # for me to check if it is a bug, or a new file type # worth supporting if fileformat == 'unknown': message = "We couldn't recognize the file type" if settings.DEBUG: res = handle_sendemail_unrecognized.delay(f2, r.user.email) else: res = queuehigh.enqueue(handle_sendemail_unrecognized, f2,r.user.email) return (0,message,f2) # handle non-Painsled by converting it to painsled compatible CSV if (fileformat != 'csv'): try: f2,summary,oarlength,inboard = handle_nonpainsled(f2, fileformat, summary=summary) except: errorstring = str(sys.exc_info()[0]) message = 'Something went wrong: '+errorstring return (0,message,'') dosummary = (fileformat != 'fit') id,message = save_workout_database(f2,r, workouttype=workouttype, makeprivate=makeprivate, dosummary=dosummary, workoutsource=fileformat, summary=summary, inboard=inboard,oarlength=oarlength, title=title) return (id,message,f2) def split_workout(r,parent,splitsecond,splitmode): data,row = getrowdata_db(id=parent.id) latitude,longitude = get_latlon(parent.id) if not latitude.empty and not longitude.empty: data[' latitude'] = latitude data[' longitude'] = longitude data['time'] = data['time']/1000. data1 = data[data['time']<=splitsecond].copy() data2 = data[data['time']>splitsecond].copy() data1 = data1.sort_values(['time']) data1 = data1.interpolate(method='linear',axis=0,limit_direction='both', limit=10) data1.fillna(method='bfill',inplace=True) # Some new stuff to try out data1 = data1.groupby('time',axis=0).mean() data1['time'] = data1.index data1.reset_index(drop=True,inplace=True) data2 = data2.sort_values(['time']) data2 = data2.interpolate(method='linear',axis=0,limit_direction='both', limit=10) data2.fillna(method='bfill',inplace=True) # Some new stuff to try out data2 = data2.groupby('time',axis=0).mean() data2['time'] = data2.index data2.reset_index(drop=True,inplace=True) data1['pace'] = data1['pace']/1000. data2['pace'] = data2['pace']/1000. data1.drop_duplicates(subset='time',inplace=True) data2.drop_duplicates(subset='time',inplace=True) messages = [] ids = [] if 'keep first' in splitmode: if 'firstprivate' in splitmode: setprivate = True else: setprivate = False id,message = new_workout_from_df(r,data1, title=parent.name+' (1)', parent=parent, setprivate=setprivate) messages.append(message) ids.append(id) if 'keep second' in splitmode: data2['cumdist'] = data2['cumdist'] - data2.ix[0,'cumdist'] data2['distance'] = data2['distance'] - data2.ix[0,'distance'] data2['time'] = data2['time'] - data2.ix[0,'time'] if 'secondprivate' in splitmode: setprivate = True else: setprivate = False dt = datetime.timedelta(seconds=splitsecond) id,message = new_workout_from_df(r,data2, title=parent.name+' (2)', parent=parent, setprivate=setprivate, dt=dt) messages.append(message) ids.append(id) if not 'keep original' in splitmode: if 'keep second' in splitmode or 'keep first' in splitmode: parent.delete() messages.append('Deleted Workout: '+parent.name) else: messages.append('That would delete your workout') ids.append(parent.id) elif 'originalprivate' in splitmode: parent.privacy = 'hidden' parent.save() return ids,messages # Create new workout from data frame and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_df(r,df, title='New Workout', parent=None, setprivate=False, dt=datetime.timedelta()): message = None summary = '' if parent: oarlength = parent.oarlength inboard = parent.inboard workouttype = parent.workouttype notes=parent.notes summary=parent.summary if parent.privacy == 'hidden': makeprivate=True else: makeprivate=False startdatetime=parent.startdatetime+dt else: oarlength = 2.89 inboard = 0.88 workouttype = 'rower' notes='' summary='' makeprivate=False startdatetime = timezone.now() if setprivate: makeprivate=True timestr = strftime("%Y%m%d-%H%M%S") csvfilename ='media/df_'+timestr+'.csv' df.rename(columns = columndict,inplace=True) starttimeunix = mktime(startdatetime.utctimetuple()) df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] df['TimeStamp (sec)'] = df['TimeStamp (sec)']+starttimeunix row = rrdata(df=df) row.write_csv(csvfilename,gzip=True) #res = df.to_csv(csvfilename+'.gz',index_label='index', # compression='gzip') id,message = save_workout_database(csvfilename,r, workouttype=workouttype, title=title, notes=notes, oarlength=oarlength, inboard=inboard, makeprivate=makeprivate, dosmooth=False, consistencychecks=False) return (id,message) # Compare the data from the CSV file and the database # Currently only calculates number of strokes. To be expanded with # more elaborate testing if needed def compare_data(id): row = Workout.objects.get(id=id) f1 = row.csvfilename try: rowdata = rdata(f1) l1 = len(rowdata.df) except AttributeError: rowdata = 0 l1 = 0 engine = create_engine(database_url, echo=False) query = sa.text('SELECT COUNT(*) FROM strokedata WHERE workoutid={id};'.format( id=id, )) with engine.connect() as conn, conn.begin(): try: res = conn.execute(query) l2 = res.fetchall()[0][0] except: print "Database Locked" conn.close() engine.dispose() lfile = l1 ldb = l2 return l1==l2 and l1 != 0,ldb,lfile # Repair data for workouts where the CSV file is lost (or the DB entries # don't exist) def repair_data(verbose=False): ws = Workout.objects.all() for w in ws: if verbose: sys.stdout.write(".") test,ldb,lfile = compare_data(w.id) if not test: if verbose: print w.id,lfile,ldb try: rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id,rowdata.df) except IOError, AttributeError: pass if lfile==0: # if not ldb - delete workout try: data = read_df_sql(w.id) try: datalength = len(data) except AttributeError: datalength = 0 if datalength != 0: data.rename(columns = columndict,inplace=True) res = data.to_csv(w.csvfilename+'.gz', index_label='index', compression='gzip') print 'adding csv file' else: print w.id,' No stroke records anywhere' w.delete() except: print 'failed' print str(sys.exc_info()[0]) pass # A wrapper around the rowingdata class, with some error catching def rdata(file,rower=rrower()): try: res = rrdata(csvfile=file,rower=rower) except IOError,IndexError: try: res = rrdata(csvfile=file+'.gz',rower=rower) except IOError,IndexError: res = 0 except: res = 0 return res # Remove all stroke data for workout ID from database def delete_strokedata(id): engine = create_engine(database_url, echo=False) query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format( id=id, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: print "Database Locked" conn.close() engine.dispose() # Replace stroke data in DB with data from CSV file def update_strokedata(id,df): delete_strokedata(id) rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True) # Test that all data are of a numerical time def testdata(time,distance,pace,spm): t1 = np.issubdtype(time,np.number) t2 = np.issubdtype(distance,np.number) t3 = np.issubdtype(pace,np.number) t4 = np.issubdtype(spm,np.number) return t1 and t2 and t3 and t4 # Get data from DB for one workout (fetches all data). If data # is not in DB, read from CSV file (and create DB entry) def getrowdata_db(id=0,doclean=False,convertnewtons=True): data = read_df_sql(id) data['x_right'] = data['x_right']/1.0e6 if data.empty: rowdata,row = getrowdata(id=id) if rowdata: data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True) else: data = pd.DataFrame() # returning empty dataframe else: row = Workout.objects.get(id=id) if data['efficiency'].mean() == 0 and data['power'].mean() != 0: data = add_efficiency(id=id) if doclean: data = clean_df_stats(data,ignorehr=True) # these two lines seem redundant ?? #data['averageforce'] = data['averageforce'] #data['peakforce'] = data['peakforce'] return data,row # Fetch a subset of the data from the DB def getsmallrowdata_db(columns,ids=[],doclean=True,workstrokesonly=True, convertnewtons=False): prepmultipledata(ids) data = read_cols_df_sql(ids,columns) if convertnewtons: if 'peakforce' in columns: data['peakforce'] = data['peakforce']*lbstoN if 'averageforce' in columns: data['averageforce'] = data['averageforce']*lbstoN if doclean: data = clean_df_stats(data,ignorehr=True, workstrokesonly=workstrokesonly) return data # Fetch both the workout and the workout stroke data (from CSV file) def getrowdata(id=0,convertnewtons=True): # check if valid ID exists (workout exists) row = Workout.objects.get(id=id) f1 = row.csvfilename # get user r = row.user u = r.user rr = rrower(hrmax=r.max,hrut2=r.ut2, hrut1=r.ut1,hrat=r.at, hrtr=r.tr,hran=r.an,ftp=r.ftp) rowdata = rdata(f1,rower=rr) return rowdata,row # Checks if all rows for a list of workout IDs have entries in the # stroke_data table. If this is not the case, it creates the stroke # data # In theory, this should never yield any work, but it's a good # safety net for programming errors elsewhere in the app # Also used heavily when I moved from CSV file only to CSV+Stroke data def prepmultipledata(ids,verbose=False): query = sa.text('SELECT DISTINCT workoutid FROM strokedata') engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): res = conn.execute(query) res = list(itertools.chain.from_iterable(res.fetchall())) conn.close() engine.dispose() try: ids2 = [int(id) for id in ids] except ValueError: ids2 = ids res = list(set(ids2)-set(res)) for id in res: rowdata,row = getrowdata(id=id) if verbose: print id if rowdata and len(rowdata.df): data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True) return res # Read a set of columns for a set of workout ids, returns data as a # pandas dataframe def read_cols_df_sql(ids,columns,convertnewtons=True): # drop columns that are not in offical list # axx = [ax[0] for ax in axes] axx = [f.name for f in StrokeData._meta.get_fields()] for c in columns: if not c in axx: columns.remove(c) columns = list(columns)+['distance','spm'] columns = [x for x in columns if x != 'None'] columns = list(set(columns)) cls = '' engine = create_engine(database_url, echo=False) for column in columns: cls += column+', ' cls = cls[:-2] if len(ids) == 0: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid=0'.format( columns = cls, )) elif len(ids) == 1: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid={id}'.format( id = ids[0], columns = cls, )) else: query = sa.text('SELECT {columns} FROM strokedata WHERE workoutid IN {ids}'.format( columns = cls, ids = tuple(ids), )) connection = engine.raw_connection() df = pd.read_sql_query(query,engine) df = df.fillna(value=0) if convertnewtons: try: df['peakforce'] = df['peakforce']*lbstoN except KeyError: pass try: df['averageforce'] = df['averageforce']*lbstoN except KeyError: pass engine.dispose() return df # Read stroke data from the DB for a Workout ID. Returns a pandas dataframe def read_df_sql(id,convertnewtons=True): engine = create_engine(database_url, echo=False) df = pd.read_sql_query(sa.text('SELECT * FROM strokedata WHERE workoutid={id}'.format( id=id)), engine) engine.dispose() df = df.fillna(value=0) if convertnewtons: try: df['peakforce'] = df['peakforce']*lbstoN except KeyError: pass try: df['averageforce'] = df['averageforce']*lbstoN except KeyError: pass return df # Get the necessary data from the strokedata table in the DB. # For the flex plot def smalldataprep(therows,xparam,yparam1,yparam2,convertnewtons=True): df = pd.DataFrame() if yparam2 == 'None': yparam2 = 'power' df[xparam] = [] df[yparam1] = [] df[yparam2] = [] df['distance'] = [] df['spm'] = [] for workout in therows: f1 = workout.csvfilename try: rowdata = dataprep(rrdata(f1).df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) df = pd.concat([df,rowdata],ignore_index=True) except IOError: try: rowdata = dataprep(rrdata(f1+'.gz').df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) df = pd.concat([df,rowdata],ignore_index=True) except IOError: pass if convertnewtons: try: df['peakforce'] = df['peakforce']*lbstoN except KeyError: pass try: df['averageforce'] = df['averageforce']*lbstoN except KeyError: pass return df # data fusion def datafusion(id1,id2,columns,offset): df1,w1 = getrowdata_db(id=id1) df1 = df1.drop([#'cumdist', 'hr_ut2', 'hr_ut1', 'hr_at', 'hr_tr', 'hr_an', 'hr_max', 'ftime', 'fpace', 'workoutid', 'id'], 1,errors='ignore') # Add coordinates to DataFrame latitude,longitude = get_latlon(id1) df1[' latitude'] = latitude df1[' longitude'] = longitude df2 = getsmallrowdata_db(['time']+columns,ids=[id2],doclean=False) offsetmillisecs = offset.seconds*1000+offset.microseconds/1000. offsetmillisecs += offset.days*(3600*24*1000) df2['time'] = df2['time']+offsetmillisecs keep1 = {c:c for c in set(df1.columns)} for c in columns: keep1.pop(c) for c in df1.columns: if not c in keep1: df1 = df1.drop(c,1,errors='ignore') df = pd.concat([df1,df2],ignore_index=True) df = df.sort_values(['time']) df = df.interpolate(method='linear',axis=0,limit_direction='both', limit=10) df.fillna(method='bfill',inplace=True) # Some new stuff to try out df = df.groupby('time',axis=0).mean() df['time'] = df.index df.reset_index(drop=True,inplace=True) df['time'] = df['time']/1000. df['pace'] = df['pace']/1000. df['cum_dist'] = df['cumdist'] return df def fix_newtons(id=0,limit=3000): # rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False) rowdata = getsmallrowdata_db(['peakforce'],ids=[id],doclean=False) try: #avgforce = rowdata['averageforce'] peakforce = rowdata['peakforce'] if peakforce.mean() > limit: w = Workout.objects.get(id=id) print "fixing ",id rowdata = rdata(w.csvfilename) if rowdata and len(rowdata.df): update_strokedata(w.id,rowdata.df) except KeyError: pass def add_efficiency(id=0): rowdata,row = getrowdata_db(id=id,doclean=False,convertnewtons=False) power = rowdata['power'] pace = rowdata['pace']/1.0e3 velo = 500./pace ergpw = 2.8*velo**3 efficiency = 100.*ergpw/power efficiency = efficiency.replace([-np.inf,np.inf],np.nan) efficiency.fillna(method='ffill') rowdata['efficiency'] = efficiency delete_strokedata(id) if id != 0: rowdata['workoutid'] = id engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): rowdata.to_sql('strokedata',engine,if_exists='append',index=False) conn.close() engine.dispose() return rowdata # This is the main routine. # it reindexes, sorts, filters, and smooths the data, then # saves it to the stroke_data table in the database # Takes a rowingdata object's DataFrame as input def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True, empower=True,inboard=0.88): if rowdatadf.empty: return 0 rowdatadf.set_index([range(len(rowdatadf))],inplace=True) t = rowdatadf.ix[:,'TimeStamp (sec)'] t = pd.Series(t-rowdatadf.ix[0,'TimeStamp (sec)']) row_index = rowdatadf.ix[:,' Stroke500mPace (sec/500m)'] > 3000 rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000. p = rowdatadf.ix[:,' Stroke500mPace (sec/500m)'] hr = rowdatadf.ix[:,' HRCur (bpm)'] spm = rowdatadf.ix[:,' Cadence (stokes/min)'] cumdist = rowdatadf.ix[:,'cum_dist'] power = rowdatadf.ix[:,' Power (watts)'] averageforce = rowdatadf.ix[:,' AverageDriveForce (lbs)'] drivelength = rowdatadf.ix[:,' DriveLength (meters)'] try: workoutstate = rowdatadf.ix[:,' WorkoutState'] except KeyError: workoutstate = 0*hr peakforce = rowdatadf.ix[:,' PeakDriveForce (lbs)'] forceratio = averageforce/peakforce forceratio = forceratio.fillna(value=0) try: drivetime = rowdatadf.ix[:,' DriveTime (ms)'] recoverytime = rowdatadf.ix[:,' StrokeRecoveryTime (ms)'] rhythm = 100.*drivetime/(recoverytime+drivetime) rhythm = rhythm.fillna(value=0) except: rhythm = 0.0*forceratio f = rowdatadf['TimeStamp (sec)'].diff().mean() if f != 0: windowsize = 2*(int(10./(f)))+1 else: windowsize = 1 if windowsize <= 3: windowsize = 5 if windowsize > 3 and windowsize0: drivelength = arclength elif drivelength.mean() == 0: drivelength = driveenergy/(averageforce*4.44822) try: slip = rowdatadf.ix[:,'slip'] except KeyError: slip = 0*power totalangle = finish-catch effectiveangle = finish-wash-catch-slip if windowsize > 3 and windowsize