from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals # This is Data prep used for testing purposes (no Django environment) # Uses the debug SQLite database for stroke data from rowingdata import rowingdata as rrdata from rowingdata import make_cumvalues from rowingdata import rower as rrower from rowingdata import main as rmain from rowingdata import empower_bug_correction,get_empower_rigging, get_file_type from rowingdata.csvparsers import make_cumvalues_array from time import strftime from pandas import DataFrame,Series import shutil from shutil import copyfile import pyarrow as pa import pandas as pd import numpy as np import itertools import dask.dataframe as dd from dask.delayed import delayed from sqlalchemy import create_engine import sqlalchemy as sa from rowsandall_app.settings import DATABASES from rowsandall_app.settings_dev import DATABASES as DEV_DATABASES from rowsandall_app.settings_dev import use_sqlite from rowers.utils import lbstoN import pytz from timezonefinder import TimezoneFinder try: user = DATABASES['default']['USER'] except KeyError: # pragma: no cover user = '' try: password = DATABASES['default']['PASSWORD'] except KeyError: # pragma: no cover password = '' try: database_name = DATABASES['default']['NAME'] except KeyError: # pragma: no cover database_name = '' try: host = DATABASES['default']['HOST'] except KeyError: # pragma: no cover host = '' try: port = DATABASES['default']['PORT'] except KeyError: # pragma: no cover port = '' database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format( user=user, password=password, database_name=database_name, host=host, port=port, ) database_name_dev = DEV_DATABASES['default']['NAME'] database_url_debug = database_url if use_sqlite: database_url_debug = 'sqlite:///'+database_name_dev database_url = database_url_debug # mapping the DB column names to the CSV file column names columndict = { 'time':'TimeStamp (sec)', 'hr':' HRCur (bpm)', 'velo': ' AverageBoatSpeed (m/s)', 'pace':' Stroke500mPace (sec/500m)', 'spm':' Cadence (stokes/min)', 'power':' Power (watts)', 'averageforce':' AverageDriveForce (lbs)', 'drivelength':' DriveLength (meters)', 'peakforce':' PeakDriveForce (lbs)', 'distance':' Horizontal (meters)', 'catch':'catch', 'finish':'finish', 'peakforceangle':'peakforceangle', 'wash':'wash', 'slip':'wash', 'workoutstate':' WorkoutState', 'cumdist':'cum_dist', } from scipy.signal import savgol_filter import datetime def niceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out def strfdelta(tdelta): try: minutes,seconds = divmod(tdelta.seconds,60) tenths = int(tdelta.microseconds/1e5) except AttributeError: # pragma: no cover minutes,seconds = divmod(tdelta.view(np.int64),60e9) seconds,rest = divmod(seconds,1e9) tenths = int(rest/1e8) res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( minutes=minutes, seconds=seconds, tenths=tenths, ) return res def nicepaceformat(values): out = [] for v in values: formattedv = strfdelta(v) out.append(formattedv) return out def timedeltaconv(x): if not np.isnan(x): dt = datetime.timedelta(seconds=x) else: # pragma: no cover dt = datetime.timedelta(seconds=350.) return dt def rdata(file,rower=rrower()): # pragma: no cover try: res = rrdata(csvfile=file,rower=rower) except IOError: try: res = rrdata(csvfile=file+'.gz',rower=rower) except IOError: res = 0 return res from rowers.utils import totaltime_sec_to_string from rowers.metrics import dtypes # Creates C2 stroke data def create_c2_stroke_data_db( distance,duration,workouttype, workoutid,starttimeunix,csvfilename,debug=False): # pragma: no cover nr_strokes = int(distance/10.) totalseconds = duration.hour*3600. totalseconds += duration.minute*60. totalseconds += duration.second totalseconds += duration.microsecond/1.e6 try: spm = 60.*nr_strokes/totalseconds except ZeroDivisionError: spm = 20*np.zeros(nr_strokes) try: step = totalseconds/float(nr_strokes) except ZeroDivisionError: return 0 elapsed = np.arange(nr_strokes)*totalseconds/(float(nr_strokes-1)) dstep = distance/float(nr_strokes) d = np.arange(nr_strokes)*distance/(float(nr_strokes-1)) unixtime = starttimeunix + elapsed pace = 500.*totalseconds/distance if workouttype in ['rower','slides','dynamic']: try: velo = distance/totalseconds except ZeroDivisionError: velo = 0 power = 2.8*velo**3 else: power = 0 df = pd.DataFrame({ 'TimeStamp (sec)': unixtime, ' Horizontal (meters)': d, ' Cadence (stokes/min)': spm, ' Stroke500mPace (sec/500m)':pace, ' ElapsedTime (sec)':elapsed, ' Power (watts)':power, ' HRCur (bpm)':np.zeros(nr_strokes), ' longitude':np.zeros(nr_strokes), ' latitude':np.zeros(nr_strokes), ' DragFactor':np.zeros(nr_strokes), ' DriveLength (meters)':np.zeros(nr_strokes), ' StrokeDistance (meters)':np.zeros(nr_strokes), ' DriveTime (ms)':np.zeros(nr_strokes), ' StrokeRecoveryTime (ms)':np.zeros(nr_strokes), ' AverageDriveForce (lbs)':np.zeros(nr_strokes), ' PeakDriveForce (lbs)':np.zeros(nr_strokes), ' lapIdx':np.zeros(nr_strokes), 'cum_dist': d }) timestr = strftime("%Y%m%d-%H%M%S") df[' ElapsedTime (sec)'] = df['TimeStamp (sec)'] res = df.to_csv(csvfilename,index_label='index', compression='gzip') data = dataprep(df,id=workoutid,bands=False,debug=debug) return data # Saves C2 stroke data to CSV and database def add_c2_stroke_data_db(strokedata,workoutid,starttimeunix,csvfilename, debug=False,workouttype='rower'): res = make_cumvalues(0.1*strokedata['t']) cum_time = res[0] lapidx = res[1] unixtime = cum_time+starttimeunix # unixtime[0] = starttimeunix seconds = 0.1*strokedata.loc[:,'t'] nr_rows = len(unixtime) try: # pragma: no cover latcoord = strokedata.loc[:,'lat'] loncoord = strokedata.loc[:,'lon'] except: latcoord = np.zeros(nr_rows) loncoord = np.zeros(nr_rows) try: strokelength = strokedata.loc[:,'strokelength'] except: strokelength = np.zeros(nr_rows) dist2 = 0.1*strokedata.loc[:,'d'] try: spm = strokedata.loc[:,'spm'] except KeyError: # pragma: no cover spm = 0*dist2 try: hr = strokedata.loc[:,'hr'] except KeyError: # pragma: no cover hr = 0*spm pace = strokedata.loc[:,'p']/10. pace = np.clip(pace,0,1e4) pace = pace.replace(0,300) velo = 500./pace power = 2.8*velo**3 if workouttype == 'bike': # pragma: no cover velo = 1000./pace # save csv # Create data frame with all necessary data to write to csv df = pd.DataFrame({'TimeStamp (sec)':unixtime, ' Horizontal (meters)': dist2, ' Cadence (stokes/min)':spm, ' HRCur (bpm)':hr, ' longitude':loncoord, ' latitude':latcoord, ' Stroke500mPace (sec/500m)':pace, ' Power (watts)':power, ' DragFactor':np.zeros(nr_rows), ' DriveLength (meters)':np.zeros(nr_rows), ' StrokeDistance (meters)':strokelength, ' DriveTime (ms)':np.zeros(nr_rows), ' StrokeRecoveryTime (ms)':np.zeros(nr_rows), ' AverageDriveForce (lbs)':np.zeros(nr_rows), ' PeakDriveForce (lbs)':np.zeros(nr_rows), ' lapIdx':lapidx, ' WorkoutState': 4, ' ElapsedTime (sec)':seconds, 'cum_dist': dist2 }) df.sort_values(by='TimeStamp (sec)',ascending=True) timestr = strftime("%Y%m%d-%H%M%S") # Create CSV file name and save data to CSV file res = df.to_csv(csvfilename,index_label='index', compression='gzip') try: data = dataprep(df,id=workoutid,bands=False,debug=debug) except: # pragma: no cover return 0 return data def handle_nonpainsled(f2,fileformat,summary=''): # pragma: no cover oarlength = 2.89 inboard = 0.88 # handle RowPro: if (fileformat == 'rp'): row = RowProParser(f2) # handle TCX if (fileformat == 'tcx'): row = TCXParser(f2) # handle Mystery if (fileformat == 'mystery'): row = MysteryParser(f2) # handle RowPerfect if (fileformat == 'rowperfect3'): row = RowPerfectParser(f2) # handle ErgData if (fileformat == 'ergdata'): row = ErgDataParser(f2) # handle CoxMate if (fileformat == 'coxmate'): row = CoxMateParser(f2) # handle Mike if (fileformat == 'bcmike'): row = BoatCoachAdvancedParser(f2) # handle BoatCoach OTW if (fileformat == 'boatcoachotw'): row = BoatCoachOTWParser(f2) # handle BoatCoach if (fileformat == 'boatcoach'): row = BoatCoachParser(f2) # handle painsled desktop if (fileformat == 'painsleddesktop'): row = painsledDesktopParser(f2) # handle speed coach GPS if (fileformat == 'speedcoach'): row = speedcoachParser(f2) # handle speed coach GPS 2 if (fileformat == 'speedcoach2'): row = SpeedCoach2Parser(f2) try: oarlength,inboard = get_empower_rigging(f2) summary = row.allstats() except: pass # handle ErgStick if (fileformat == 'ergstick'): row = ErgStickParser(f2) # handle FIT if (fileformat == 'fit'): row = FITParser(f2) s = fitsummarydata(f2) s.setsummary() summary = s.summarytext f_to_be_deleted = f2 # should delete file f2 = f2[:-4]+'o.csv' row.write_csv(f2,gzip=True) #os.remove(f2) try: os.remove(f_to_be_deleted) except: os.remove(f_to_be_deleted+'.gz') return (f2,summary,oarlength,inboard) def delete_strokedata(id,debug=False): dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id) try: shutil.rmtree(dirname) except FileNotFoundError: # pragma: no cover pass def update_strokedata(id,df,debug=False): delete_strokedata(id,debug=debug) if debug: # pragma: no cover # pragma: no cover print("updating ",id) rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True, debug=debug) return rowdata def update_empower(id, inboard, oarlength, boattype, df, f1, debug=False): # pragma: no cover corr_factor = 1.0 if 'x' in boattype: # sweep a = 0.06 b = 0.275 else: # scull a = 0.15 b = 0.275 corr_factor = empower_bug_correction(oarlength,inboard,a,b) success = False try: df['power empower old'] = df[' Power (watts)'] df[' Power (watts)'] = df[' Power (watts)'] * corr_factor df['driveenergy empower old'] = df['driveenergy'] df['driveenergy'] = df['driveenergy'] * corr_factor success = True except KeyError: pass if success: delete_strokedata(id,debug=debug) if debug: # pragma: no cover print("updated ",id) print("correction ",corr_factor) else: if debug: # pragma: no cover print("not updated ",id) rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True, debug=debug) row = rrdata(df=df) row.write_csv(f1,gzip=True) return success def testdata(time,distance,pace,spm): # pragma: no cover t1 = np.issubdtype(time,np.number) t2 = np.issubdtype(distance,np.number) t3 = np.issubdtype(pace,np.number) t4 = np.issubdtype(spm,np.number) return t1 and t2 and t3 and t4 def getsmallrowdata_db(columns,ids=[],debug=False): csvfilenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] data = [] columns = [c for c in columns if c != 'None'] df = pd.DataFrame() if len(ids)>1: # pragma: no cover for id, f in zip(ids,csvfilenames): try: df = pd.read_parquet(f,columns=columns,engine='pyarrow') data.append(df) except OSError: pass except pa.lib.ArrowInvalid: pass try: df = pd.concat(data,axis=0) except ValueError: df = pd.DataFrame() elif len(ids)==1: try: df = pd.read_parquet(csvfilenames[0],columns=columns,engine='pyarrow') except (OSError,IndexError): # pragma: no cover df = pd.DataFrame() else: # pragma: no cover df = pd.DataFrame() return df def update_workout_field_sql(workoutid,fieldname,value,debug=False): if debug: # pragma: no cover # pragma: no cover engine = create_engine(database_url_debug, echo=False) else: engine = create_engine(database_url, echo=False) table = 'rowers_workout' query = "UPDATE %s SET %s = '%s' WHERE `id` = %s;" % (table,fieldname,value,workoutid) with engine.connect() as conn, conn.begin(): result = conn.execute(query) conn.close() engine.dispose() return 1 def update_c2id_sql(id,c2id): # pragma: no cover engine = create_engine(database_url, echo=False) table = 'rowers_workout' query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % (table,c2id,id) with engine.connect() as conn, conn.begin(): result = conn.execute(query) conn.close() engine.dispose() return 1 def read_cols_df_sql(ids,columns,debug=False): # pragma: no cover columns = list(columns)+['distance','spm'] columns = [x for x in columns if x != 'None'] columns = list(set(columns)) ids = [int(id) for id in ids] if len(ids) == 0: return pd.DataFrame() elif len(ids) == 1: try: filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0]) df = pd.read_parquet(filename,columns=columns) except OSError: pass else: data = [] filenames = ['media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] for id,f in zip(ids,filenames): try: df = pd.read_parquet(f,columns=columns) data.append(df) except OSError: pass df = pd.concat(data,axis=0) return df def read_df_sql(id,debug=False): # pragma: no cover try: f = 'media/strokedata_{id}.parquet.gz'.format(id=id) df = pd.read_parquet(f) except OSError: pass df = df.fillna(value=0) return df def getcpdata_sql(rower_id,table='cpdata',debug=False): # pragma: no cover if debug: # pragma: no cover engine = create_engine(database_url_debug, echo=False) else: engine = create_engine(database_url, echo=False) query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) connection = engine.raw_connection() df = pd.read_sql_query(query, engine) return df def deletecpdata_sql(rower_id,table='cpdata',debug=False): # pragma: no cover if debug: # pragma: no cover engine = create_engine(database_url_debug, echo=False) else: engine = create_engine(database_url, echo=False) query = sa.text('DELETE from {table} WHERE user={rower_id};'.format( rower_id=rower_id, table=table, )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: # pragma: no cover print("Database locked") conn.close() engine.dispose() def delete_agegroup_db(age,sex,weightcategory,debug=False): if debug: # pragma: no cover engine = create_engine(database_url_debug, echo=False) else: # pragma: no cover engine = create_engine(database_url, echo=False) query = sa.text('DELETE from {table} WHERE age={age} and weightcategory = {weightcategory} and sex={sex};'.format( sex=sex, age=age, weightcategory=weightcategory, table='calcagegrouprecords' )) with engine.connect() as conn, conn.begin(): try: result = conn.execute(query) except: # pragma: no cover print("Database locked") conn.close() engine.dispose() def update_agegroup_db(age,sex,weightcategory,wcdurations,wcpower, debug=False): delete_agegroup_db(age,sex,weightcategory,debug=debug) wcdurations = [None if type(y) is float and np.isnan(y) else y for y in wcdurations] wcpower = [None if type(y) is float and np.isnan(y) else y for y in wcpower] df = pd.DataFrame( { 'duration':wcdurations, 'power':wcpower, } ) df['sex'] = sex df['age'] = age df['weightcategory'] = weightcategory df.replace([np.inf,-np.inf],np.nan,inplace=True) df.dropna(axis=0,inplace=True) if debug: # pragma: no cover # pragma: no cover engine = create_engine(database_url_debug, echo=False) else: engine = create_engine(database_url, echo=False) table = 'calcagegrouprecords' with engine.connect() as conn, conn.begin(): df.to_sql(table, engine, if_exists='append', index=False) conn.close() engine.dispose() def updatecpdata_sql(rower_id,delta,cp,table='cpdata',distance=pd.Series([]),debug=False): deletecpdata_sql(rower_id,table=table,debug=debug) df = pd.DataFrame( { 'delta':delta, 'cp':cp, 'user':rower_id } ) if not distance.empty: df['distance'] = distance if debug: # pragma: no cover engine = create_engine(database_url_debug, echo=False) else: engine = create_engine(database_url, echo=False) with engine.connect() as conn, conn.begin(): df.to_sql(table, engine, if_exists='append', index=False) conn.close() engine.dispose() def smalldataprep(therows,xparam,yparam1,yparam2): # pragma: no cover df = pd.DataFrame() if yparam2 == 'None': yparam2 = 'power' df[xparam] = [] df[yparam1] = [] df[yparam2] = [] df['distance'] = [] df['spm'] = [] for workout in therows: f1 = workout.csvfilename try: rowdata = dataprep(rrdata(csvfile=f1).df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) df = pd.concat([df,rowdata],ignore_index=True) except IOError: try: rowdata = dataprep(rrdata(csvfile=f1+'.gz').df) rowdata = pd.DataFrame({xparam: rowdata[xparam], yparam1: rowdata[yparam1], yparam2: rowdata[yparam2], 'distance': rowdata['distance'], 'spm': rowdata['spm'], } ) df = pd.concat([df,rowdata],ignore_index=True) except IOError: pass return df def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True, empower=True,debug=False,inboard=0.88,forceunit='lbs'): if rowdatadf.empty: # pragma: no cover if debug: # pragma: no cover print("empty") return 0 # rowdatadf.set_index([range(len(rowdatadf))],inplace=True) t = rowdatadf.loc[:,'TimeStamp (sec)'] t = pd.Series(t-rowdatadf.loc[:,'TimeStamp (sec)'].iloc[0]) row_index = rowdatadf.loc[:,' Stroke500mPace (sec/500m)'] > 3000 rowdatadf.loc[row_index,' Stroke500mPace (sec/500m)'] = 3000. p = rowdatadf.loc[:,' Stroke500mPace (sec/500m)'] try: velo = rowdatadf.loc[:,' AverageBoatSpeed (m/s)'] except KeyError: velo = 500./p hr = rowdatadf.loc[:,' HRCur (bpm)'] spm = rowdatadf.loc[:,' Cadence (stokes/min)'] cumdist = rowdatadf.loc[:,'cum_dist'] power = rowdatadf.loc[:,' Power (watts)'] averageforce = rowdatadf.loc[:,' AverageDriveForce (lbs)'] drivelength = rowdatadf.loc[:,' DriveLength (meters)'] try: workoutstate = rowdatadf.loc[:,' WorkoutState'] except KeyError: # pragma: no cover workoutstate = 0*hr peakforce = rowdatadf.loc[:,' PeakDriveForce (lbs)'] forceratio = averageforce/peakforce forceratio = forceratio.fillna(value=0) try: drivetime = rowdatadf.loc[:,' DriveTime (ms)'] recoverytime = rowdatadf.loc[:,' StrokeRecoveryTime (ms)'] rhythm = 100.*drivetime/(recoverytime+drivetime) rhythm = rhythm.fillna(value=0) except: # pragma: no cover rhythm = 0.0*forceratio f = rowdatadf['TimeStamp (sec)'].diff().mean() if f != 0: try: windowsize = 2*(int(10./(f)))+1 except ValueError: # pragma: no cover windowsize = 1 else: # pragma: no cover windowsize = 1 if windowsize <= 3: # pragma: no cover windowsize = 5 if windowsize > 3 and windowsize0: # pragma: no cover drivelength = arclength elif drivelength.mean() == 0: drivelength = driveenergy/(averageforce*4.44822) try: slip = rowdatadf.loc[:,'slip'] except KeyError: slip = 0*t try: totalangle = finish-catch effectiveangle = finish-wash-catch-slip except ValueError: # pragma: no cover totalangle = 0*t effectiveangle = 0*t if windowsize > 3 and windowsize