using dask
This commit is contained in:
@@ -1779,11 +1779,15 @@ def getsmallrowdata_db(columns, ids=[], doclean=True,workstrokesonly=True):
|
|||||||
data = []
|
data = []
|
||||||
columns = [c for c in columns if c != 'None']
|
columns = [c for c in columns if c != 'None']
|
||||||
|
|
||||||
|
if len(ids)>1:
|
||||||
for f in csvfilenames:
|
for f in csvfilenames:
|
||||||
df = dd.read_parquet(f,columns=columns,engine='pyarrow')
|
df = dd.read_parquet(f,columns=columns,engine='pyarrow')
|
||||||
data.append(df)
|
data.append(df)
|
||||||
|
|
||||||
|
|
||||||
df = dd.concat(data,axis=0)
|
df = dd.concat(data,axis=0)
|
||||||
|
else:
|
||||||
|
df = dd.read_parquet(csvfilenames[0],columns=columns,engine='pyarrow')
|
||||||
|
|
||||||
data = df.compute()
|
data = df.compute()
|
||||||
data = data.loc[:,~data.columns.duplicated()]
|
data = data.loc[:,~data.columns.duplicated()]
|
||||||
@@ -2443,7 +2447,7 @@ def dataprep(rowdatadf, id=0, bands=True, barchart=True, otwpower=True,
|
|||||||
data['workoutid'] = id
|
data['workoutid'] = id
|
||||||
filename = 'media/strokedata_{id}.parquet'.format(id=id)
|
filename = 'media/strokedata_{id}.parquet'.format(id=id)
|
||||||
# df = dd.from_pandas(data,npartitions=1)
|
# df = dd.from_pandas(data,npartitions=1)
|
||||||
data.to_parquet(filename,engine='pyarrow')
|
data.to_parquet(filename,engine='pyarrow',compression='gzip')
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user