bug fix and additional filtering for spm<0

2018-03-02 10:37:24 +01:00
parent c1158747e3
commit 66452b44c0
4 changed files with 52 additions and 15 deletions
--- a/rowers/dataprep.py
+++ b/rowers/dataprep.py
@@ -263,6 +263,12 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
        pass


+    # protect 0 spm values from being nulled
+    try:
+        datadf['spm'] = datadf['spm'] + 1.0
+    except TypeError:
+        pass
+        
    try:
        datadf = datadf.clip(lower=0)
    except TypeError:
@@ -270,6 +276,11 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,

    datadf.replace(to_replace=0, value=np.nan, inplace=True)

+    # bring spm back to real values
+    try:
+        datadf['spm'] = datadf['spm'] - 1
+    except TypeError:
+        pass

    # return from positive domain to negative
    try:
@@ -295,6 +306,12 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
        except KeyError:
            pass

+    try:
+        mask = datadf['spm'] < 0
+        datadf.loc[mask,'spm'] = np.nan
+    except KeyError:
+        pass
+        
    try:
        mask = datadf['efficiency'] > 200.
        datadf.loc[mask, 'efficiency'] = np.nan
@@ -1614,6 +1631,7 @@ def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True):
    if doclean:
        data = clean_df_stats(data, ignorehr=True,
                              workstrokesonly=workstrokesonly)
+        data.dropna(inplace=True,axis=0)

    return data