bla
This commit is contained in:
@@ -19,6 +19,7 @@ certifi==2019.3.9
|
|||||||
cffi==1.12.2
|
cffi==1.12.2
|
||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
Click==7.0
|
Click==7.0
|
||||||
|
cloudpickle==1.2.2
|
||||||
colorama==0.4.1
|
colorama==0.4.1
|
||||||
colorclass==2.2.0
|
colorclass==2.2.0
|
||||||
cookies==2.2.1
|
cookies==2.2.1
|
||||||
@@ -27,7 +28,7 @@ coreschema==0.0.4
|
|||||||
coverage==4.5.3
|
coverage==4.5.3
|
||||||
cryptography==2.6.1
|
cryptography==2.6.1
|
||||||
cycler==0.10.0
|
cycler==0.10.0
|
||||||
dask==1.1.4
|
dask==2.6.0
|
||||||
decorator==4.4.0
|
decorator==4.4.0
|
||||||
defusedxml==0.5.0
|
defusedxml==0.5.0
|
||||||
Django==2.1.7
|
Django==2.1.7
|
||||||
@@ -39,7 +40,7 @@ django-cookie-law==2.0.1
|
|||||||
django-cors-headers==2.5.2
|
django-cors-headers==2.5.2
|
||||||
django-countries==5.3.3
|
django-countries==5.3.3
|
||||||
django-datetime-widget==0.9.3
|
django-datetime-widget==0.9.3
|
||||||
django-debug-toolbar==1.11
|
django-debug-toolbar==2.0
|
||||||
django-extensions==2.1.6
|
django-extensions==2.1.6
|
||||||
django-htmlmin==0.11.0
|
django-htmlmin==0.11.0
|
||||||
django-leaflet==0.24.0
|
django-leaflet==0.24.0
|
||||||
@@ -64,8 +65,10 @@ entrypoints==0.3
|
|||||||
execnet==1.5.0
|
execnet==1.5.0
|
||||||
factory-boy==2.11.1
|
factory-boy==2.11.1
|
||||||
Faker==1.0.4
|
Faker==1.0.4
|
||||||
|
fastparquet==0.3.2
|
||||||
fitparse==1.1.0
|
fitparse==1.1.0
|
||||||
Flask==1.0.2
|
Flask==1.0.2
|
||||||
|
fsspec==0.5.2
|
||||||
future==0.17.1
|
future==0.17.1
|
||||||
geocoder==1.38.1
|
geocoder==1.38.1
|
||||||
geos==0.2.1
|
geos==0.2.1
|
||||||
@@ -74,6 +77,7 @@ html5lib==1.0.1
|
|||||||
htmlmin==0.1.12
|
htmlmin==0.1.12
|
||||||
HTMLParser==0.0.2
|
HTMLParser==0.0.2
|
||||||
httplib2==0.12.1
|
httplib2==0.12.1
|
||||||
|
hvplot==0.4.0
|
||||||
icalendar==4.0.3
|
icalendar==4.0.3
|
||||||
idna==2.8
|
idna==2.8
|
||||||
image==1.5.27
|
image==1.5.27
|
||||||
@@ -99,10 +103,12 @@ jupyterlab-server==0.3.0
|
|||||||
keyring==18.0.0
|
keyring==18.0.0
|
||||||
kiwisolver==1.0.1
|
kiwisolver==1.0.1
|
||||||
kombu==4.5.0
|
kombu==4.5.0
|
||||||
|
llvmlite==0.30.0
|
||||||
lxml==4.3.2
|
lxml==4.3.2
|
||||||
Markdown==3.0.1
|
Markdown==3.0.1
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
matplotlib==3.0.3
|
matplotlib==3.0.3
|
||||||
|
minify==0.1.4
|
||||||
MiniMockTest==0.5
|
MiniMockTest==0.5
|
||||||
mistune==0.8.4
|
mistune==0.8.4
|
||||||
mock==2.0.0
|
mock==2.0.0
|
||||||
@@ -111,9 +117,11 @@ mpld3==0.3
|
|||||||
mysqlclient==1.4.2.post1
|
mysqlclient==1.4.2.post1
|
||||||
nbconvert==5.4.1
|
nbconvert==5.4.1
|
||||||
nbformat==4.4.0
|
nbformat==4.4.0
|
||||||
|
newrelic==5.2.1.129
|
||||||
nose==1.3.7
|
nose==1.3.7
|
||||||
nose-parameterized==0.6.0
|
nose-parameterized==0.6.0
|
||||||
notebook==5.7.6
|
notebook==5.7.6
|
||||||
|
numba==0.46.0
|
||||||
numpy==1.16.2
|
numpy==1.16.2
|
||||||
oauth2==1.9.0.post1
|
oauth2==1.9.0.post1
|
||||||
oauthlib==3.0.1
|
oauthlib==3.0.1
|
||||||
@@ -135,6 +143,7 @@ prompt-toolkit==2.0.9
|
|||||||
psycopg2==2.8.1
|
psycopg2==2.8.1
|
||||||
ptyprocess==0.6.0
|
ptyprocess==0.6.0
|
||||||
py==1.8.0
|
py==1.8.0
|
||||||
|
pyarrow==0.15.0
|
||||||
pycparser==2.19
|
pycparser==2.19
|
||||||
Pygments==2.3.1
|
Pygments==2.3.1
|
||||||
pyparsing==2.3.1
|
pyparsing==2.3.1
|
||||||
@@ -160,7 +169,7 @@ ratelim==0.1.6
|
|||||||
redis==3.2.1
|
redis==3.2.1
|
||||||
requests==2.21.0
|
requests==2.21.0
|
||||||
requests-oauthlib==1.2.0
|
requests-oauthlib==1.2.0
|
||||||
rowingdata==2.5.4
|
rowingdata==2.5.5
|
||||||
rowingphysics==0.5.0
|
rowingphysics==0.5.0
|
||||||
rq==0.13.0
|
rq==0.13.0
|
||||||
scipy==1.2.1
|
scipy==1.2.1
|
||||||
@@ -179,7 +188,9 @@ terminado==0.8.1
|
|||||||
terminaltables==3.1.0
|
terminaltables==3.1.0
|
||||||
testpath==0.4.2
|
testpath==0.4.2
|
||||||
text-unidecode==1.2
|
text-unidecode==1.2
|
||||||
|
thrift==0.11.0
|
||||||
timezonefinder==4.0.1
|
timezonefinder==4.0.1
|
||||||
|
toolz==0.10.0
|
||||||
tornado==6.0.1
|
tornado==6.0.1
|
||||||
tqdm==4.31.1
|
tqdm==4.31.1
|
||||||
traitlets==4.3.2
|
traitlets==4.3.2
|
||||||
@@ -196,3 +207,4 @@ xlrd==1.2.0
|
|||||||
xmltodict==0.12.0
|
xmltodict==0.12.0
|
||||||
yamjam==0.1.7
|
yamjam==0.1.7
|
||||||
yamllint==1.15.0
|
yamllint==1.15.0
|
||||||
|
yuicompressor==2.4.8
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from __future__ import print_function
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# All the data preparation, data cleaning and data mangling should
|
# All the data preparation, data cleaning and data mangling should
|
||||||
# be defined here
|
# be defined here
|
||||||
from __future__ import unicode_literals, absolute_import
|
from __future__ import unicode_literals, absolute_import
|
||||||
@@ -26,6 +25,8 @@ from rowers.tasks import handle_sendemail_unrecognized
|
|||||||
from rowers.tasks import handle_zip_file
|
from rowers.tasks import handle_zip_file
|
||||||
|
|
||||||
from pandas import DataFrame, Series
|
from pandas import DataFrame, Series
|
||||||
|
import dask.dataframe as dd
|
||||||
|
from dask.delayed import delayed
|
||||||
|
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.timezone import get_current_timezone
|
from django.utils.timezone import get_current_timezone
|
||||||
@@ -349,7 +350,7 @@ def clean_df_stats(datadf, workstrokesonly=True, ignorehr=True,
|
|||||||
# clean data remove zeros and negative values
|
# clean data remove zeros and negative values
|
||||||
|
|
||||||
# bring metrics which have negative values to positive domain
|
# bring metrics which have negative values to positive domain
|
||||||
if datadf.empty:
|
if len(datadf)==0:
|
||||||
return datadf
|
return datadf
|
||||||
try:
|
try:
|
||||||
datadf['catch'] = -datadf['catch']
|
datadf['catch'] = -datadf['catch']
|
||||||
@@ -1771,9 +1772,32 @@ def getrowdata_db(id=0, doclean=False, convertnewtons=True,
|
|||||||
|
|
||||||
# Fetch a subset of the data from the DB
|
# Fetch a subset of the data from the DB
|
||||||
|
|
||||||
|
|
||||||
def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True):
|
def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True):
|
||||||
prepmultipledata(ids)
|
prepmultipledata(ids)
|
||||||
|
|
||||||
|
csvfilenames = ['media/strokedata_{id}.parquet'.format(id=id) for id in ids]
|
||||||
|
data = []
|
||||||
|
columns = [c for c in columns if c != 'None']
|
||||||
|
|
||||||
|
for f in csvfilenames:
|
||||||
|
df = dd.read_parquet(f,columns=columns,engine='pyarrow')
|
||||||
|
data.append(df)
|
||||||
|
|
||||||
|
df = dd.concat(data,axis=0)
|
||||||
|
|
||||||
|
data = df.compute()
|
||||||
|
data = data.loc[:,~data.columns.duplicated()]
|
||||||
|
extracols = []
|
||||||
|
if doclean:
|
||||||
|
data = clean_df_stats(data, ignorehr=True,
|
||||||
|
workstrokesonly=workstrokesonly)
|
||||||
|
data.dropna(axis=1,how='all',inplace=True)
|
||||||
|
data.dropna(axis=0,how='any',inplace=True)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def getsmallrowdata_db_old(columns, ids=[], doclean=True, workstrokesonly=True):
|
||||||
|
prepmultipledata(ids)
|
||||||
data,extracols = read_cols_df_sql(ids, columns)
|
data,extracols = read_cols_df_sql(ids, columns)
|
||||||
if extracols and len(ids)==1:
|
if extracols and len(ids)==1:
|
||||||
w = Workout.objects.get(id=ids[0])
|
w = Workout.objects.get(id=ids[0])
|
||||||
@@ -1850,31 +1874,20 @@ def getrowdata(id=0):
|
|||||||
# safety net for programming errors elsewhere in the app
|
# safety net for programming errors elsewhere in the app
|
||||||
# Also used heavily when I moved from CSV file only to CSV+Stroke data
|
# Also used heavily when I moved from CSV file only to CSV+Stroke data
|
||||||
|
|
||||||
|
import glob
|
||||||
|
|
||||||
def prepmultipledata(ids, verbose=False):
|
def prepmultipledata(ids, verbose=False):
|
||||||
query = sa.text('SELECT DISTINCT workoutid FROM strokedata')
|
filenames = glob.glob('media/*.parquet')
|
||||||
engine = create_engine(database_url, echo=False)
|
ids = [id for id in ids if 'media/strokedata_{id}.parquet'.format(id=id) not in filenames]
|
||||||
|
|
||||||
with engine.connect() as conn, conn.begin():
|
for id in ids:
|
||||||
res = conn.execute(query)
|
|
||||||
res = list(itertools.chain.from_iterable(res.fetchall()))
|
|
||||||
conn.close()
|
|
||||||
engine.dispose()
|
|
||||||
|
|
||||||
try:
|
|
||||||
ids2 = [int(id) for id in ids]
|
|
||||||
except ValueError:
|
|
||||||
ids2 = ids
|
|
||||||
|
|
||||||
res = list(set(ids2) - set(res))
|
|
||||||
for id in res:
|
|
||||||
rowdata, row = getrowdata(id=id)
|
rowdata, row = getrowdata(id=id)
|
||||||
if verbose:
|
if verbose:
|
||||||
print(id)
|
print(id)
|
||||||
if rowdata and len(rowdata.df):
|
if rowdata and len(rowdata.df):
|
||||||
data = dataprep(rowdata.df, id=id, bands=True,
|
data = dataprep(rowdata.df, id=id, bands=True,
|
||||||
barchart=True, otwpower=True)
|
barchart=True, otwpower=True)
|
||||||
return res
|
return ids
|
||||||
|
|
||||||
# Read a set of columns for a set of workout ids, returns data as a
|
# Read a set of columns for a set of workout ids, returns data as a
|
||||||
# pandas dataframe
|
# pandas dataframe
|
||||||
@@ -2292,19 +2305,6 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
|
rowdatadf[' ElapsedTime (sec)'] = rowdatadf['TimeStamp (sec)']
|
||||||
|
|
||||||
if barchart:
|
|
||||||
# time increments for bar chart
|
|
||||||
time_increments = rowdatadf.loc[:, ' ElapsedTime (sec)'].diff()
|
|
||||||
try:
|
|
||||||
time_increments.iloc[0] = time_increments.iloc[1]
|
|
||||||
except (KeyError, IndexError):
|
|
||||||
time_increments.iloc[0] = 1.
|
|
||||||
|
|
||||||
time_increments = 0.5 * time_increments + 0.5 * np.abs(time_increments)
|
|
||||||
x_right = (t2 + time_increments.apply(lambda x: timedeltaconv(x)))
|
|
||||||
|
|
||||||
data['x_right'] = x_right
|
|
||||||
|
|
||||||
if empower:
|
if empower:
|
||||||
try:
|
try:
|
||||||
wash = rowdatadf.loc[:, 'wash']
|
wash = rowdatadf.loc[:, 'wash']
|
||||||
@@ -2441,12 +2441,10 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
|
|||||||
# write data if id given
|
# write data if id given
|
||||||
if id != 0:
|
if id != 0:
|
||||||
data['workoutid'] = id
|
data['workoutid'] = id
|
||||||
|
filename = 'media/strokedata_{id}.parquet'.format(id=id)
|
||||||
|
# df = dd.from_pandas(data,npartitions=1)
|
||||||
|
data.to_parquet(filename,engine='pyarrow')
|
||||||
|
|
||||||
engine = create_engine(database_url, echo=False)
|
|
||||||
with engine.connect() as conn, conn.begin():
|
|
||||||
data.to_sql('strokedata', engine, if_exists='append', index=False)
|
|
||||||
conn.close()
|
|
||||||
engine.dispose()
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user