diff --git a/rowers/dataprepnodjango.py b/rowers/dataprepnodjango.py deleted file mode 100644 index e59582ac..00000000 --- a/rowers/dataprepnodjango.py +++ /dev/null @@ -1,799 +0,0 @@ -from rowers.utils import totaltime_sec_to_string -from rowers.metrics import dtypes -import datetime -from scipy.signal import savgol_filter -import os - -# This is Data prep used for testing purposes (no Django environment) -# Uses the debug SQLite database for stroke data -from rowingdata import rowingdata as rrdata -from rowingdata import make_cumvalues -from rowingdata import rower as rrower -from rowingdata import main as rmain -from rowingdata import empower_bug_correction, get_empower_rigging, get_file_type -from rowingdata.csvparsers import make_cumvalues_array -from time import strftime -from pandas import DataFrame, Series - -import shutil -from shutil import copyfile -import pyarrow as pa - -import pandas as pd -import numpy as np -import itertools -import dask.dataframe as dd -from dask.delayed import delayed - -from sqlalchemy import create_engine -import sqlalchemy as sa - -from rowsandall_app.settings import DATABASES -from rowsandall_app.settings_dev import DATABASES as DEV_DATABASES -from rowsandall_app.settings_dev import use_sqlite - -from rowers.utils import lbstoN -import pytz -from timezonefinder import TimezoneFinder - -from rowingdata import ( - RowProParser, TCXParser, MysteryParser, RowPerfectParser, - ErgDataParser, CoxMateParser, BoatCoachAdvancedParser, BoatCoachOTWParser, - BoatCoachParser, painsledDesktopParser, SpeedCoach2Parser, speedcoachParser, - ErgStickParser, FITParser, fitsummarydata -) - -try: - user = DATABASES['default']['USER'] -except KeyError: # pragma: no cover - user = '' -try: - password = DATABASES['default']['PASSWORD'] -except KeyError: # pragma: no cover - password = '' - -try: - database_name = DATABASES['default']['NAME'] -except KeyError: # pragma: no cover - database_name = '' -try: - host = DATABASES['default']['HOST'] -except KeyError: # pragma: no cover - host = '' -try: - port = DATABASES['default']['PORT'] -except KeyError: # pragma: no cover - port = '' - -database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format( - user=user, - password=password, - database_name=database_name, - host=host, - port=port, -) - -database_name_dev = DEV_DATABASES['default']['NAME'] - -database_url_debug = database_url - -if use_sqlite: - database_url_debug = 'sqlite:///'+database_name_dev - database_url = database_url_debug - - -# mapping the DB column names to the CSV file column names -columndict = { - 'time': 'TimeStamp (sec)', - 'hr': ' HRCur (bpm)', - 'velo': ' AverageBoatSpeed (m/s)', - 'pace': ' Stroke500mPace (sec/500m)', - 'spm': ' Cadence (stokes/min)', - 'power': ' Power (watts)', - 'averageforce': ' AverageDriveForce (lbs)', - 'drivelength': ' DriveLength (meters)', - 'peakforce': ' PeakDriveForce (lbs)', - 'distance': ' Horizontal (meters)', - 'catch': 'catch', - 'finish': 'finish', - 'peakforceangle': 'peakforceangle', - 'wash': 'wash', - 'slip': 'wash', - 'workoutstate': ' WorkoutState', - 'cumdist': 'cum_dist', -} - - -def niceformat(values): - out = [] - for v in values: - formattedv = strfdelta(v) - out.append(formattedv) - - return out - - -def strfdelta(tdelta): - try: - minutes, seconds = divmod(tdelta.seconds, 60) - tenths = int(tdelta.microseconds/1e5) - except AttributeError: # pragma: no cover - minutes, seconds = divmod(tdelta.view(np.int64), 60e9) - seconds, rest = divmod(seconds, 1e9) - tenths = int(rest/1e8) - res = "{minutes:0>2}:{seconds:0>2}.{tenths:0>1}".format( - minutes=minutes, - seconds=seconds, - tenths=tenths, - ) - - return res - - -def nicepaceformat(values): - out = [] - for v in values: - formattedv = strfdelta(v) - out.append(formattedv) - - return out - - -def timedeltaconv(x): - if not np.isnan(x): - dt = datetime.timedelta(seconds=x) - else: # pragma: no cover - dt = datetime.timedelta(seconds=350.) - - return dt - - -def rdata(file, rower=rrower()): # pragma: no cover - try: - res = rrdata(csvfile=file, rower=rower) - except IOError: - try: - res = rrdata(csvfile=file+'.gz', rower=rower) - except IOError: - res = 0 - - return res - - - -# Saves C2 stroke data to CSV and database - - -def handle_nonpainsled(f2, fileformat, summary=''): # pragma: no cover - oarlength = 2.89 - inboard = 0.88 - # handle RowPro: - if (fileformat == 'rp'): - row = RowProParser(f2) - # handle TCX - if (fileformat == 'tcx'): - row = TCXParser(f2) - - # handle Mystery - if (fileformat == 'mystery'): - row = MysteryParser(f2) - - # handle RowPerfect - if (fileformat == 'rowperfect3'): - row = RowPerfectParser(f2) - - # handle ErgData - if (fileformat == 'ergdata'): - row = ErgDataParser(f2) - - # handle CoxMate - if (fileformat == 'coxmate'): - row = CoxMateParser(f2) - - # handle Mike - if (fileformat == 'bcmike'): - row = BoatCoachAdvancedParser(f2) - - # handle BoatCoach OTW - if (fileformat == 'boatcoachotw'): - row = BoatCoachOTWParser(f2) - - # handle BoatCoach - if (fileformat == 'boatcoach'): - row = BoatCoachParser(f2) - - # handle painsled desktop - if (fileformat == 'painsleddesktop'): - row = painsledDesktopParser(f2) - - # handle speed coach GPS - if (fileformat == 'speedcoach'): - row = speedcoachParser(f2) - - # handle speed coach GPS 2 - if (fileformat == 'speedcoach2'): - row = SpeedCoach2Parser(f2) - try: - oarlength, inboard = get_empower_rigging(f2) - summary = row.allstats() - except: - pass - - # handle ErgStick - if (fileformat == 'ergstick'): - row = ErgStickParser(f2) - - # handle FIT - if (fileformat == 'fit'): - row = FITParser(f2) - s = fitsummarydata(f2) - s.setsummary() - summary = s.summarytext - - f_to_be_deleted = f2 - # should delete file - f2 = f2[:-4]+'o.csv' - row.write_csv(f2, gzip=True) - - # os.remove(f2) - try: - os.remove(f_to_be_deleted) - except: - os.remove(f_to_be_deleted+'.gz') - - return (f2, summary, oarlength, inboard) - - -def delete_strokedata(id, debug=False): - dirname = 'media/strokedata_{id}.parquet.gz'.format(id=id) - try: - shutil.rmtree(dirname) - except FileNotFoundError: # pragma: no cover - pass - - -def update_strokedata(id, df, debug=False): - delete_strokedata(id, debug=debug) - if debug: # pragma: no cover # pragma: no cover - print("updating ", id) - rowdata = dataprep(df, id=id, bands=True, barchart=True, otwpower=True, - debug=debug) - - return rowdata - - - -def testdata(time, distance, pace, spm): # pragma: no cover - t1 = np.issubdtype(time, np.number) - t2 = np.issubdtype(distance, np.number) - t3 = np.issubdtype(pace, np.number) - t4 = np.issubdtype(spm, np.number) - - return t1 and t2 and t3 and t4 - - -def getsmallrowdata_db(columns, ids=[], debug=False): - csvfilenames = [ - 'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] - data = [] - columns = [c for c in columns if c != 'None'] - - df = pd.DataFrame() - - if len(ids) > 1: # pragma: no cover - for id, f in zip(ids, csvfilenames): - try: - df = pd.read_parquet(f, columns=columns, engine='pyarrow') - data.append(df) - except OSError: - pass - except pa.lib.ArrowInvalid: - pass - - try: - df = pd.concat(data, axis=0) - except ValueError: - df = pd.DataFrame() - elif len(ids) == 1: - try: - df = pd.read_parquet( - csvfilenames[0], columns=columns, engine='pyarrow') - except (OSError, IndexError): # pragma: no cover - df = pd.DataFrame() - else: # pragma: no cover - df = pd.DataFrame() - - return df - - -def update_workout_field_sql(workoutid, fieldname, value, debug=False): - if debug: # pragma: no cover # pragma: no cover - engine = create_engine(database_url_debug, echo=False) - else: - engine = create_engine(database_url, echo=False) - - table = 'rowers_workout' - - query = "UPDATE %s SET %s = '%s' WHERE `id` = %s;" % ( - table, fieldname, value, workoutid) - - with engine.connect() as conn, conn.begin(): - _ = conn.execute(query) - - conn.close() - engine.dispose() - - return 1 - - -def update_c2id_sql(id, c2id): # pragma: no cover - engine = create_engine(database_url, echo=False) - table = 'rowers_workout' - - query = "UPDATE %s SET uploadedtoc2 = %s WHERE `id` = %s;" % ( - table, c2id, id) - - with engine.connect() as conn, conn.begin(): - _ = conn.execute(query) - - conn.close() - engine.dispose() - - return 1 - - -def read_cols_df_sql(ids, columns, debug=False): # pragma: no cover - columns = list(columns)+['distance', 'spm'] - columns = [x for x in columns if x != 'None'] - columns = list(set(columns)) - - ids = [int(id) for id in ids] - - if len(ids) == 0: - return pd.DataFrame() - elif len(ids) == 1: - try: - filename = 'media/strokedata_{id}.parquet.gz'.format(id=ids[0]) - df = pd.read_parquet(filename, columns=columns) - except OSError: - pass - else: - data = [] - filenames = [ - 'media/strokedata_{id}.parquet.gz'.format(id=id) for id in ids] - for id, f in zip(ids, filenames): - try: - df = pd.read_parquet(f, columns=columns) - data.append(df) - except OSError: - pass - - df = pd.concat(data, axis=0) - - return df - - -def read_df_sql(id, debug=False): # pragma: no cover - try: - f = 'media/strokedata_{id}.parquet.gz'.format(id=id) - df = pd.read_parquet(f) - except OSError: - pass - - df = df.fillna(value=0) - - return df - - -def getcpdata_sql(rower_id, table='cpdata', debug=False): # pragma: no cover - if debug: # pragma: no cover - engine = create_engine(database_url_debug, echo=False) - else: - engine = create_engine(database_url, echo=False) - - query = sa.text('SELECT * from {table} WHERE user={rower_id};'.format( - rower_id=rower_id, - table=table, - )) - _ = engine.raw_connection() - df = pd.read_sql_query(query, engine) - - return df - - -def deletecpdata_sql(rower_id, table='cpdata', debug=False): # pragma: no cover - if debug: # pragma: no cover - engine = create_engine(database_url_debug, echo=False) - else: - engine = create_engine(database_url, echo=False) - - query = sa.text('DELETE from {table} WHERE user={rower_id};'.format( - rower_id=rower_id, - table=table, - )) - with engine.connect() as conn, conn.begin(): - try: - _ = conn.execute(query) - except: # pragma: no cover - print("Database locked") - conn.close() - engine.dispose() - - -def delete_agegroup_db(age, sex, weightcategory, debug=False): - if debug: # pragma: no cover - engine = create_engine(database_url_debug, echo=False) - else: # pragma: no cover - engine = create_engine(database_url, echo=False) - - query = sa.text('DELETE from {table} WHERE age={age} and weightcategory = {weightcategory} and sex={sex};'.format( - sex=sex, - age=age, - weightcategory=weightcategory, - table='calcagegrouprecords' - )) - with engine.connect() as conn, conn.begin(): - try: - _ = conn.execute(query) - except: # pragma: no cover - print("Database locked") - conn.close() - engine.dispose() - - - - - -def updatecpdata_sql(rower_id, delta, cp, table='cpdata', - distance=pd.Series([], dtype='float'), debug=False): - deletecpdata_sql(rower_id, table=table, debug=debug) - df = pd.DataFrame( - { - 'delta': delta, - 'cp': cp, - 'user': rower_id - } - ) - - if not distance.empty: - df['distance'] = distance - - if debug: # pragma: no cover - engine = create_engine(database_url_debug, echo=False) - else: - engine = create_engine(database_url, echo=False) - - with engine.connect() as conn, conn.begin(): - df.to_sql(table, engine, if_exists='append', index=False) - conn.close() - engine.dispose() - - -def smalldataprep(therows, xparam, yparam1, yparam2): # pragma: no cover - df = pd.DataFrame() - if yparam2 == 'None': - yparam2 = 'power' - df[xparam] = [] - df[yparam1] = [] - df[yparam2] = [] - df['distance'] = [] - df['spm'] = [] - for workout in therows: - f1 = workout.csvfilename - - try: - rowdata = dataprep(rrdata(csvfile=f1).df) - - rowdata = pd.DataFrame({xparam: rowdata[xparam], - yparam1: rowdata[yparam1], - yparam2: rowdata[yparam2], - 'distance': rowdata['distance'], - 'spm': rowdata['spm'], - } - ) - df = pd.concat([df, rowdata], ignore_index=True) - except IOError: - try: - rowdata = dataprep(rrdata(csvfile=f1+'.gz').df) - rowdata = pd.DataFrame({xparam: rowdata[xparam], - yparam1: rowdata[yparam1], - yparam2: rowdata[yparam2], - 'distance': rowdata['distance'], - 'spm': rowdata['spm'], - } - ) - df = pd.concat([df, rowdata], ignore_index=True) - except IOError: - pass - - return df - - -def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True, - empower=True, debug=False, inboard=0.88, forceunit='lbs'): - - if rowdatadf.empty: # pragma: no cover - if debug: # pragma: no cover - print("empty") - return 0 - - # rowdatadf.set_index([range(len(rowdatadf))],inplace=True) - t = rowdatadf.loc[:, 'TimeStamp (sec)'] - t = pd.Series(t-rowdatadf.loc[:, 'TimeStamp (sec)'].iloc[0]) - - row_index = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] > 3000 - rowdatadf.loc[row_index, ' Stroke500mPace (sec/500m)'] = 3000. - - p = rowdatadf.loc[:, ' Stroke500mPace (sec/500m)'] - try: - velo = rowdatadf.loc[:, ' AverageBoatSpeed (m/s)'] - except KeyError: - velo = 500./p - - hr = rowdatadf.loc[:, ' HRCur (bpm)'] - spm = rowdatadf.loc[:, ' Cadence (stokes/min)'] - cumdist = rowdatadf.loc[:, 'cum_dist'] - - power = rowdatadf.loc[:, ' Power (watts)'] - averageforce = rowdatadf.loc[:, ' AverageDriveForce (lbs)'] - drivelength = rowdatadf.loc[:, ' DriveLength (meters)'] - try: - workoutstate = rowdatadf.loc[:, ' WorkoutState'] - except KeyError: # pragma: no cover - workoutstate = 0*hr - - peakforce = rowdatadf.loc[:, ' PeakDriveForce (lbs)'] - - forceratio = averageforce/peakforce - forceratio = forceratio.fillna(value=0) - - try: - drivetime = rowdatadf.loc[:, ' DriveTime (ms)'] - recoverytime = rowdatadf.loc[:, ' StrokeRecoveryTime (ms)'] - rhythm = 100.*drivetime/(recoverytime+drivetime) - rhythm = rhythm.fillna(value=0) - except: # pragma: no cover - rhythm = 0.0*forceratio - - f = rowdatadf['TimeStamp (sec)'].diff().mean() - if f != 0: - try: - windowsize = 2*(int(10./(f)))+1 - except ValueError: # pragma: no cover - windowsize = 1 - else: # pragma: no cover - windowsize = 1 - if windowsize <= 3: # pragma: no cover - windowsize = 5 - - if windowsize > 3 and windowsize < len(hr): - spm = savgol_filter(spm, windowsize, 3) - hr = savgol_filter(hr, windowsize, 3) - drivelength = savgol_filter(drivelength, windowsize, 3) - forceratio = savgol_filter(forceratio, windowsize, 3) - - try: - t2 = t.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) - except TypeError: # pragma: no cover - t2 = 0*t - - p2 = p.fillna(method='ffill').apply(lambda x: timedeltaconv(x)) - - try: - drivespeed = drivelength/rowdatadf[' DriveTime (ms)']*1.0e3 - except KeyError: # pragma: no cover - drivespeed = 0.0*rowdatadf['TimeStamp (sec)'] - except TypeError: # pragma: no cover - drivespeed = 0.0*rowdatadf['TimeStamp (sec)'] - - drivespeed = drivespeed.fillna(value=0) - - try: - driveenergy = rowdatadf['driveenergy'] - except KeyError: # pragma: no cover - if forceunit == 'lbs': - driveenergy = drivelength*averageforce*lbstoN - else: # pragma: no cover - driveenergy = drivelength*averageforce - - distance = rowdatadf.loc[:, 'cum_dist'] - - velo = 500./p - - distanceperstroke = 60.*velo/spm - - if forceunit == 'lbs': - averageforce *= lbstoN - peakforce *= lbstoN - - data = DataFrame( - dict( - time=t * 1e3, - hr=hr, - pace=p * 1e3, - spm=spm, - velo=velo, - cumdist=cumdist, - ftime=niceformat(t2), - fpace=nicepaceformat(p2), - driveenergy=driveenergy, - power=power, - workoutstate=workoutstate, - averageforce=averageforce, - drivelength=drivelength, - peakforce=peakforce, - forceratio=forceratio, - distance=distance, - drivespeed=drivespeed, - rhythm=rhythm, - distanceperstroke=distanceperstroke, - ) - ) - - if bands: - # HR bands - data['hr_ut2'] = rowdatadf.loc[:, 'hr_ut2'] - data['hr_ut1'] = rowdatadf.loc[:, 'hr_ut1'] - data['hr_at'] = rowdatadf.loc[:, 'hr_at'] - data['hr_tr'] = rowdatadf.loc[:, 'hr_tr'] - data['hr_an'] = rowdatadf.loc[:, 'hr_an'] - data['hr_max'] = rowdatadf.loc[:, 'hr_max'] - data['hr_bottom'] = 0.0*data['hr'] - - try: - _ = rowdatadf.loc[:, ' ElapsedTime (sec)'] - except KeyError: # pragma: no cover - rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)'] - - if empower: - try: - wash = rowdatadf.loc[:, 'wash'] - except KeyError: - wash = 0*t - - try: - catch = rowdatadf.loc[:, 'catch'] - except KeyError: - catch = 0*t - - try: - finish = rowdatadf.loc[:, 'finish'] - except KeyError: - finish = 0*t - - try: - peakforceangle = rowdatadf.loc[:, 'peakforceangle'] - except KeyError: - peakforceangle = 0*t - - if data['driveenergy'].mean() == 0: - try: - driveenergy = rowdatadf.loc[:, 'driveenergy'] - except KeyError: - driveenergy = power*60/spm - else: - driveenergy = data['driveenergy'] - - arclength = (inboard-0.05)*(np.radians(finish)-np.radians(catch)) - if arclength.mean() > 0: # pragma: no cover - drivelength = arclength - elif drivelength.mean() == 0: - drivelength = driveenergy/(averageforce*4.44822) - - try: - slip = rowdatadf.loc[:, 'slip'] - except KeyError: - slip = 0*t - - try: - totalangle = finish-catch - effectiveangle = finish-wash-catch-slip - except ValueError: # pragma: no cover - totalangle = 0*t - effectiveangle = 0*t - - if windowsize > 3 and windowsize < len(slip): - try: - wash = savgol_filter(wash, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - slip = savgol_filter(slip, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - catch = savgol_filter(catch, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - finish = savgol_filter(finish, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - peakforceangle = savgol_filter(peakforceangle, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - driveenergy = savgol_filter(driveenergy, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - drivelength = savgol_filter(drivelength, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - totalangle = savgol_filter(totalangle, windowsize, 3) - except TypeError: # pragma: no cover - pass - try: - effectiveangle = savgol_filter(effectiveangle, windowsize, 3) - except TypeError: # pragma: no cover - pass - - velo = 500./p - - ergpw = 2.8*velo**3 - efficiency = 100.*ergpw/power - - efficiency = efficiency.replace([-np.inf, np.inf], np.nan) - efficiency.fillna(method='ffill') - - try: - data['wash'] = wash - data['catch'] = catch - data['slip'] = slip - data['finish'] = finish - data['peakforceangle'] = peakforceangle - data['driveenergy'] = driveenergy - data['drivelength'] = drivelength - data['totalangle'] = totalangle - data['effectiveangle'] = effectiveangle - data['efficiency'] = efficiency - except ValueError: # pragma: no cover - pass - - if otwpower: - try: - nowindpace = rowdatadf.loc[:, 'nowindpace'] - except KeyError: - nowindpace = p - try: - equivergpower = rowdatadf.loc[:, 'equivergpower'] - except KeyError: - equivergpower = 0*p+50. - - nowindpace2 = nowindpace.apply(lambda x: timedeltaconv(x)) - ergvelo = (equivergpower/2.8)**(1./3.) - - ergpace = 500./ergvelo - ergpace[ergpace == np.inf] = 240. - ergpace2 = ergpace.apply(lambda x: timedeltaconv(x)) - - data['ergpace'] = ergpace*1.e3 - data['nowindpace'] = nowindpace*1.e3 - data['equivergpower'] = equivergpower - data['fergpace'] = nicepaceformat(ergpace2) - data['fnowindpace'] = nicepaceformat(nowindpace2) - data['efficiency'] = efficiency - - data = data.replace([-np.inf, np.inf], np.nan) - data = data.fillna(method='ffill') - - data.dropna(axis=0, inplace=True, how='all') - data.dropna(axis=1, inplace=True, how='any') - - # write data if id given - if id != 0: - data['workoutid'] = id - data.fillna(0, inplace=True) - for k, v in dtypes.items(): - try: - data[k] = data[k].astype(v) - except KeyError: - pass - - filename = 'media/strokedata_{id}.parquet.gz'.format(id=id) - df = dd.from_pandas(data, npartitions=1) - df.to_parquet(filename, engine='fastparquet', compression='GZIP') - - return data