Private
Public Access
1
0

another batch of light comments

This commit is contained in:
Sander Roosendaal
2017-01-15 16:57:11 +01:00
parent e5810dbf62
commit 5f551528da
4 changed files with 127 additions and 86 deletions

View File

@@ -1,3 +1,5 @@
# All the data preparation, data cleaning and data mangling should
# be defined here
from rowers.models import Workout, User, Rower
from rowingdata import rowingdata as rrdata
@@ -37,11 +39,13 @@ database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
port=port,
)
# Use SQLite local database when we're in debug mode
if settings.DEBUG or user=='':
# database_url = 'sqlite:///db.sqlite3'
database_url = 'sqlite:///'+database_name
# mapping the DB column names to the CSV file column names
columndict = {
'time':'TimeStamp (sec)',
'hr':' HRCur (bpm)',
@@ -63,6 +67,7 @@ from scipy.signal import savgol_filter
import datetime
# A string representation for time deltas
def niceformat(values):
out = []
for v in values:
@@ -71,6 +76,7 @@ def niceformat(values):
return out
# A nice printable format for time delta values
def strfdelta(tdelta):
try:
minutes,seconds = divmod(tdelta.seconds,60)
@@ -87,6 +93,7 @@ def strfdelta(tdelta):
return res
# A nice printable format for pace values
def nicepaceformat(values):
out = []
for v in values:
@@ -96,6 +103,7 @@ def nicepaceformat(values):
return out
# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
def timedeltaconv(x):
if not np.isnan(x):
dt = datetime.timedelta(seconds=x)
@@ -105,6 +113,9 @@ def timedeltaconv(x):
return dt
# Create new workout from file and store it in the database
# This routine should be used everywhere in views.py and mailprocessing.pu
# Currently there is code duplication
def new_workout_from_file(r,f2,
workouttype='rower',
title='Workout',
@@ -263,6 +274,9 @@ def new_workout_from_file(r,f2,
return True
# Compare the data from the CSV file and the database
# Currently only calculates number of strokes. To be expanded with
# more elaborate testing if needed
def compare_data(id):
row = Workout.objects.get(id=id)
f1 = row.csvfilename
@@ -288,6 +302,8 @@ def compare_data(id):
ldb = l2
return l1==l2,ldb,lfile
# Repair data for workouts where the CSV file is lost (or the DB entries
# don't exist)
def repair_data(verbose=False):
ws = Workout.objects.all()
for w in ws:
@@ -319,6 +335,7 @@ def repair_data(verbose=False):
print str(sys.exc_info()[0])
pass
# A wrapper around the rowingdata class, with some error catching
def rdata(file,rower=rrower()):
try:
res = rrdata(file,rower=rower)
@@ -330,6 +347,7 @@ def rdata(file,rower=rrower()):
return res
# Remove all stroke data for workout ID from database
def delete_strokedata(id):
engine = create_engine(database_url, echo=False)
query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
@@ -343,10 +361,12 @@ def delete_strokedata(id):
conn.close()
engine.dispose()
# Replace stroke data in DB with data from CSV file
def update_strokedata(id,df):
delete_strokedata(id)
rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True)
# Test that all data are of a numerical time
def testdata(time,distance,pace,spm):
t1 = np.issubdtype(time,np.number)
t2 = np.issubdtype(distance,np.number)
@@ -355,6 +375,8 @@ def testdata(time,distance,pace,spm):
return t1 and t2 and t3 and t4
# Get data from DB for one workout (fetches all data). If data
# is not in DB, read from CSV file (and create DB entry)
def getrowdata_db(id=0):
data = read_df_sql(id)
data['x_right'] = data['x_right']/1.0e6
@@ -369,12 +391,14 @@ def getrowdata_db(id=0):
return data,row
# Fetch a subset of the data from the DB
def getsmallrowdata_db(columns,ids=[]):
prepmultipledata(ids)
data = read_cols_df_sql(ids,columns)
return data
# Fetch both the workout and the workout stroke data (from CSV file)
def getrowdata(id=0):
# check if valid ID exists (workout exists)
@@ -395,7 +419,12 @@ def getrowdata(id=0):
return rowdata,row
# Checks if all rows for a list of workout IDs have entries in the
# stroke_data table. If this is not the case, it creates the stroke
# data
# In theory, this should never yield any work, but it's a good
# safety net for programming errors elsewhere in the app
# Also used heavily when I moved from CSV file only to CSV+Stroke data
def prepmultipledata(ids,verbose=False):
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
engine = create_engine(database_url, echo=False)
@@ -420,6 +449,8 @@ def prepmultipledata(ids,verbose=False):
data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True)
return res
# Read a set of columns for a set of workout ids, returns data as a
# pandas dataframe
def read_cols_df_sql(ids,columns):
columns = list(columns)+['distance','spm']
columns = [x for x in columns if x != 'None']
@@ -450,7 +481,7 @@ def read_cols_df_sql(ids,columns):
engine.dispose()
return df
# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
def read_df_sql(id):
engine = create_engine(database_url, echo=False)
@@ -460,10 +491,8 @@ def read_df_sql(id):
engine.dispose()
return df
# Get the necessary data from the strokedata table in the DB.
# For the flex plot
def smalldataprep(therows,xparam,yparam1,yparam2):
df = pd.DataFrame()
if yparam2 == 'None':
@@ -503,7 +532,10 @@ def smalldataprep(therows,xparam,yparam1,yparam2):
return df
# This is the main routine.
# it reindexes, sorts, filters, and smooths the data, then
# saves it to the stroke_data table in the database
# Takes a rowingdata object's DataFrame as input
def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True,
empower=True):
rowdatadf.set_index([range(len(rowdatadf))],inplace=True)