another batch of light comments

2017-01-15 16:57:11 +01:00
parent e5810dbf62
commit 5f551528da
4 changed files with 127 additions and 86 deletions
--- a/rowers/dataprep.py
+++ b/rowers/dataprep.py
@@ -1,3 +1,5 @@
+# All the data preparation, data cleaning and data mangling should
+# be defined here
 from rowers.models import Workout, User, Rower
 from rowingdata import rowingdata as rrdata

@@ -37,11 +39,13 @@ database_url = 'mysql://{user}:{password}@{host}:{port}/{database_name}'.format(
    port=port,
    )

+# Use SQLite local database when we're in debug mode
 if settings.DEBUG or user=='':	    
    # database_url = 'sqlite:///db.sqlite3'
    database_url = 'sqlite:///'+database_name


+# mapping the DB column names to the CSV file column names
 columndict = {
    'time':'TimeStamp (sec)',
    'hr':' HRCur (bpm)',
@@ -63,6 +67,7 @@ from scipy.signal import savgol_filter

 import datetime

+# A string representation for time deltas
 def niceformat(values):
    out = []
    for v in values:
@@ -71,6 +76,7 @@ def niceformat(values):
 	
    return out

+# A nice printable format for time delta values
 def strfdelta(tdelta):
    try:
 	minutes,seconds = divmod(tdelta.seconds,60)
@@ -87,6 +93,7 @@ def strfdelta(tdelta):
    
    return res

+# A nice printable format for pace values
 def nicepaceformat(values):
    out = []
    for v in values:
@@ -96,6 +103,7 @@ def nicepaceformat(values):

    return out

+# Convert seconds to a Time Delta value, replacing NaN with a 5:50 pace
 def timedeltaconv(x):
    if not np.isnan(x):
        dt = datetime.timedelta(seconds=x)
@@ -105,6 +113,9 @@ def timedeltaconv(x):
        
    return dt

+# Create new workout from file and store it in the database
+# This routine should be used everywhere in views.py and mailprocessing.pu
+# Currently there is code duplication
 def new_workout_from_file(r,f2,
                          workouttype='rower',
                          title='Workout',
@@ -263,6 +274,9 @@ def new_workout_from_file(r,f2,

    return True

+# Compare the data from the CSV file and the database
+# Currently only calculates number of strokes. To be expanded with
+# more elaborate testing if needed
 def compare_data(id):
    row = Workout.objects.get(id=id)
    f1 = row.csvfilename
@@ -288,6 +302,8 @@ def compare_data(id):
    ldb = l2
    return l1==l2,ldb,lfile

+# Repair data for workouts where the CSV file is lost (or the DB entries
+# don't exist)
 def repair_data(verbose=False):
    ws = Workout.objects.all()
    for w in ws:
@@ -319,6 +335,7 @@ def repair_data(verbose=False):
                    print str(sys.exc_info()[0])
                    pass
                    
+# A wrapper around the rowingdata class, with some error catching
 def rdata(file,rower=rrower()):
    try:
 	res = rrdata(file,rower=rower)
@@ -330,6 +347,7 @@ def rdata(file,rower=rrower()):

    return res

+# Remove all stroke data for workout ID from database
 def delete_strokedata(id):
    engine = create_engine(database_url, echo=False)
    query = sa.text('DELETE FROM strokedata WHERE workoutid={id};'.format(
@@ -343,10 +361,12 @@ def delete_strokedata(id):
    conn.close()
    engine.dispose()

+# Replace stroke data in DB with data from CSV file
 def update_strokedata(id,df):
    delete_strokedata(id)
    rowdata = dataprep(df,id=id,bands=True,barchart=True,otwpower=True)
-    
+
+# Test that all data are of a numerical time
 def testdata(time,distance,pace,spm):
    t1 = np.issubdtype(time,np.number)
    t2 = np.issubdtype(distance,np.number)
@@ -355,6 +375,8 @@ def testdata(time,distance,pace,spm):

    return t1 and t2 and t3 and t4

+# Get data from DB for one workout (fetches all data). If data
+# is not in DB, read from CSV file (and create DB entry)
 def getrowdata_db(id=0):
    data = read_df_sql(id)
    data['x_right'] = data['x_right']/1.0e6
@@ -369,12 +391,14 @@ def getrowdata_db(id=0):

    return data,row

+# Fetch a subset of the data from the DB
 def getsmallrowdata_db(columns,ids=[]):
    prepmultipledata(ids)
    data = read_cols_df_sql(ids,columns)

    return data
-        
+
+# Fetch both the workout and the workout stroke data (from CSV file)
 def getrowdata(id=0):

    # check if valid ID exists (workout exists)
@@ -395,7 +419,12 @@ def getrowdata(id=0):

    return rowdata,row

-
+# Checks if all rows for a list of workout IDs have entries in the
+# stroke_data table. If this is not the case, it creates the stroke
+# data
+# In theory, this should never yield any work, but it's a good
+# safety net for programming errors elsewhere in the app
+# Also used heavily when I moved from CSV file only to CSV+Stroke data
 def prepmultipledata(ids,verbose=False):
    query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
    engine = create_engine(database_url, echo=False)
@@ -420,6 +449,8 @@ def prepmultipledata(ids,verbose=False):
            data = dataprep(rowdata.df,id=id,bands=True,barchart=True,otwpower=True)    
    return res

+# Read a set of columns for a set of workout ids, returns data as a
+# pandas dataframe
 def read_cols_df_sql(ids,columns):
    columns = list(columns)+['distance','spm']
    columns = [x for x in columns if x != 'None']
@@ -450,7 +481,7 @@ def read_cols_df_sql(ids,columns):
    engine.dispose()
    return df
        
-
+# Read stroke data from the DB for a Workout ID. Returns a pandas dataframe
 def read_df_sql(id):
    engine = create_engine(database_url, echo=False)

@@ -460,10 +491,8 @@ def read_df_sql(id):
    engine.dispose()
    return df

-
-
-
-
+# Get the necessary data from the strokedata table in the DB.
+# For the flex plot
 def smalldataprep(therows,xparam,yparam1,yparam2):
    df = pd.DataFrame()
    if yparam2 == 'None':
@@ -503,7 +532,10 @@ def smalldataprep(therows,xparam,yparam1,yparam2):

    return df
        
-        
+# This is the main routine.
+# it reindexes, sorts, filters, and smooths the data, then
+# saves it to the stroke_data table in the database
+# Takes a rowingdata object's DataFrame as input
 def dataprep(rowdatadf,id=0,bands=True,barchart=True,otwpower=True,
             empower=True):
    rowdatadf.set_index([range(len(rowdatadf))],inplace=True)