# All the data preparation, data cleaning and data mangling should # be defined here from rowers.models import Workout, User, Rower,StrokeData from rowingdata import rowingdata as rrdata from rowers.tasks import handle_sendemail_unrecognized from rowingdata import rower as rrower from rowingdata import main as rmain from rowingdata import get_file_type from pandas import DataFrame,Series from pytz import timezone as tz,utc from django.utils.timezone import get_current_timezone thetimezone = get_current_timezone() import pandas as pd import numpy as np import itertools from django.conf import settings from sqlalchemy import create_engine import sqlalchemy as sa import sys user = settings.DATABASES['default']['USER'] password = settings.DATABASES['default']['PASSWORD'] database_name = settings.DATABASES['default']['NAME'] host = settings.DATABASES['default']['HOST'] port = settings.DATABASES['default']['PORT'] database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format( user=user, password=password, database_name=database_name, host=host, port=port, ) # Use SQLite local database when we're in debug mode if settings.DEBUG or user=='': # database_url = 'sqlite:///db.sqlite3' database_url = 'sqlite:///'+database_name # mapping the DB column names to the CSV file column names columndict = { 'time':'TimeStamp (sec)', 'hr':' HRCur (bpm)', 'pace':' Stroke500mPace (sec/500m)', 'spm':' Cadence (stokes/min)', 'power':' Power (watts)', 'averageforce':' AverageDriveForce (lbs)', 'drivelength':' DriveLength (meters)', 'peakforce':' PeakDriveForce (lbs)', 'distance':' Horizontal (meters)', 'catch':'catch', 'finish':'finish', 'peakforceangle':'peakforceangle', 'wash':'wash', 'slip':'wash', } from scipy.signal import savgol_filter import datetime def clean_df_stats(datadf,workstrokesonly=True): # clean data remove zeros and negative values datadf=datadf.clip(lower=0) datadf.replace(to_replace=0,value=np.nan,inplace=True) # clean data for useful ranges per column mask = datadf['hr'] < 30 datadf.loc[mask,'hr'] = np.nan mask = datadf['rhythm'] < 5 datadf.loc[mask,'rhythm'] = np.nan mask = datadf['rhythm'] > 70 datadf.loc[mask,'rhythm'] = np.nan mask = datadf['power'] < 20 datadf.loc[mask,'power'] = np.nan mask = datadf['drivelength'] < 0.5 datadf.loc[mask,'drivelength'] = np.nan mask = datadf['forceratio'] < 0.2 datadf.loc[mask,'forceratio'] = np.nan mask = datadf['forceratio'] > 1.0 datadf.loc[mask,'forceratio'] = np.nan mask = datadf['spm'] < 10 datadf.loc[mask,'spm'] = np.nan mask = datadf['spm'] > 60 datadf.loc[mask,'spm'] = np.nan mask = datadf['drivespeed'] < 0.5 datadf.loc[mask,'drivespeed'] = np.nan mask = datadf['drivespeed'] > 4 datadf.loc[mask,'drivespeed'] = np.nan mask = datadf['driveenergy'] > 2000 datadf.loc[mask,'driveenergy'] = np.nan mask = datadf['driveenergy'] < 100 datadf.loc[mask,'driveenergy'] = np.nan workoutstateswork = [1,4,5,8,9,6,7] workoutstatesrest = [3] workoutstatetransition = [0,2,10,11,12,13] if workstrokesonly=='True' or workstrokesonly==True: try: datadf = datadf[~datadf['workoutstate'].isin(workoutstatesrest)] except: pass return datadf def getstatsfields(): # Get field names and remove those that are not useful in stats fields = StrokeData._meta.get_fields() fielddict = {field.name:field.verbose_name for field in fields} fielddict.pop('workoutid') fielddict.pop('ergpace') fielddict.pop('hr_an') fielddict.pop('hr_tr') fielddict.pop('hr_at') fielddict.pop('hr_ut2') fielddict.pop('hr_ut1') fielddict.pop('time') fielddict.pop('distance') fielddict.pop('nowindpace') fielddict.pop('fnowindpace') fielddict.pop('fergpace') fielddict.pop('equivergpower') # fielddict.pop('workoutstate') fielddict.pop('fpace') fielddict.pop('pace') fielddict.pop('id') fielddict.pop('ftime') fielddict.pop('x_right') fielddict.pop('hr_max') fielddict.pop('hr_bottom') fielddict.pop('cumdist') fieldlist = [field for field,value in fielddict.iteritems()] return fieldlist,fielddict # A string representation for time deltas def niceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # A nice printable format for time delta values def strfdelta(tdelta): try: minutes,seconds = divmod(tdelta.seconds,60) tenths = int(tdelta.microseconds/1e5) except AttributeError: minutes,seconds = divmod(tdelta.view(np.int64),60e9) seconds,rest = divmod(seconds,1e9) tenths = int(rest/1e8) res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( minutes=minutes, seconds=seconds, tenths=tenths, ) return res # A nice printable format for pace values def nicepaceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out # Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace def timedeltaconv(x): if not np.isnan(x): dt = datetime.timedelta(seconds=x) else: dt = datetime.timedelta(seconds=350.) return dt # Create new workout from file and store it in the database # This routine should be used everywhere in views.py and mailprocessing.py # Currently there is code duplication def new_workout_from_file(r,f2, workouttype='rower', title='Workout', notes=''): fileformat = get_file_type(f2) summary = '' # handle non-Painsled if (fileformat != 'csv'): # handle RowPro: if (fileformat == 'rp'): row = RowProParser(f2) # handle TCX if (fileformat == 'tcx'): row = TCXParser(f2) # handle Mystery if (fileformat == 'mystery'): row = MysteryParser(f2) # handle TCX no HR if (fileformat == 'tcxnohr'): row = TCXParserNoHR(f2) # handle ErgData if (fileformat == 'ergdata'): row = ErgDataParser(f2) # handle BoatCoach if (fileformat == 'boatcoach'): row = BoatCoachParser(f2) # handle painsled desktop if (fileformat == 'painsleddesktop'): row = painsledDesktopParser(f2) # handle speed coach GPS if (fileformat == 'speedcoach'): row = speedcoachParser(f2) # handle speed coach GPS 2 if (fileformat == 'speedcoach2'): row = SpeedCoach2Parser(f2) summary = row.allstats() # handle ErgStick if (fileformat == 'ergstick'): row = ErgStickParser(f2) # handle FIT if (fileformat == 'fit'): row = FITParser(f2) s = fitsummarydata(f2) s.setsummary() summary = s.summarytext f_to_be_deleted = f2 # should delete file f2 = f2[:-4]+'o.csv' row.write_csv(f2,gzip=True) #os.remove(f2) try: os.remove(f_to_be_deleted) except: os.remove(f_to_be_deleted+'.gz') # make workout and put in database rr = rrower(hrmax=r.max,hrut2=r.ut2, hrut1=r.ut1,hrat=r.at, hrtr=r.tr,hran=r.an,ftp=r.ftp) row = rdata(f2,rower=rr) if row == 0: return HttpResponse("Error: CSV Data File Not Found") # auto smoothing pace = row.df[' Stroke500mPace (sec/500m)'].values velo = 500./pace f = row.df['TimeStamp (sec)'].diff().mean() windowsize = 2*(int(10./(f)))+1 if not 'originalvelo' in row.df: row.df['originalvelo'] = velo if windowsize > 3 and windowsize 3000 rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000. p = rowdatadf.ix[:,' Stroke500mPace (sec/500m)'] hr = rowdatadf.ix[:,' HRCur (bpm)'] spm = rowdatadf.ix[:,' Cadence (stokes/min)'] cumdist = rowdatadf.ix[:,'cum_dist'] power = rowdatadf.ix[:,' Power (watts)'] averageforce = rowdatadf.ix[:,' AverageDriveForce (lbs)'] drivelength = rowdatadf.ix[:,' DriveLength (meters)'] try: workoutstate = rowdatadf.ix[:,' WorkoutState'] except KeyError: workoutstate = 0*hr peakforce = rowdatadf.ix[:,' PeakDriveForce (lbs)'] forceratio = averageforce/peakforce forceratio = forceratio.fillna(value=0) try: drivetime = rowdatadf.ix[:,' DriveTime (ms)'] recoverytime = rowdatadf.ix[:,' StrokeRecoveryTime (ms)'] rhythm = 100.*drivetime/(recoverytime+drivetime) rhythm = rhythm.fillna(value=0) except: rhythm = 0.0*forceratio f = rowdatadf['TimeStamp (sec)'].diff().mean() windowsize = 2*(int(10./(f)))+1 if windowsize <= 3: windowsize = 5 if windowsize > 3 and windowsize 3 and windowsize