bug fix and additional filtering for spm<0
This commit is contained in:
@@ -263,6 +263,12 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
||||
pass
|
||||
|
||||
|
||||
# protect 0 spm values from being nulled
|
||||
try:
|
||||
datadf['spm'] = datadf['spm'] + 1.0
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
datadf = datadf.clip(lower=0)
|
||||
except TypeError:
|
||||
@@ -270,6 +276,11 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
||||
|
||||
datadf.replace(to_replace=0, value=np.nan, inplace=True)
|
||||
|
||||
# bring spm back to real values
|
||||
try:
|
||||
datadf['spm'] = datadf['spm'] - 1
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# return from positive domain to negative
|
||||
try:
|
||||
@@ -295,6 +306,12 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
mask = datadf['spm'] < 0
|
||||
datadf.loc[mask,'spm'] = np.nan
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
mask = datadf['efficiency'] > 200.
|
||||
datadf.loc[mask, 'efficiency'] = np.nan
|
||||
@@ -1614,6 +1631,7 @@ def getsmallrowdata_db(columns, ids=[], doclean=True, workstrokesonly=True):
|
||||
if doclean:
|
||||
data = clean_df_stats(data, ignorehr=True,
|
||||
workstrokesonly=workstrokesonly)
|
||||
data.dropna(inplace=True,axis=0)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
Reference in New Issue
Block a user