Skip to content

Commit

Permalink
Rewrite of dask interface.
Browse files Browse the repository at this point in the history
* Consider data locality in distributing data.
* New interface that looks similar to the one with single node.
  • Loading branch information
Jiaming Yuan authored and trivialfis committed Sep 5, 2019
1 parent 52d44e0 commit b46bb59
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 253 deletions.
20 changes: 0 additions & 20 deletions demo/dask/README.md

This file was deleted.

42 changes: 0 additions & 42 deletions demo/dask/dask_gpu_demo.py

This file was deleted.

68 changes: 0 additions & 68 deletions demo/dask/dask_simple_demo.py

This file was deleted.

24 changes: 24 additions & 0 deletions demo/dask/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import xgboost as xgb
from xgboost.distributed import DaskDMatrix
from dask.distributed import Client
from dask.distributed import LocalCluster
from dask import dataframe as dd


if __name__ == '__main__':
cluster = LocalCluster(n_workers=4) # or use any other clusters
client = Client(cluster)

train_csv = dd.read_csv('./train.csv')
ad_feature = dd.read_csv('./adFeature.csv')
train_csv = train_csv.merge(ad_feature, how='left', on=['aid'])

label = train_csv['label']
train_csv = train_csv.drop(labels=['label'], axis=1)

dtrain = DaskDMatrix(train_csv, label)

bst = xgb.train({'verbosity': 3}, dtrain)

with open('result', 'w') as fd:
fd.write(str(bst))
1 change: 0 additions & 1 deletion python-package/xgboost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from .core import DMatrix, Booster
from .training import train, cv
from . import rabit # noqa
from . import dask # noqa
from . import tracker # noqa
from .tracker import RabitTracker # noqa
try:
Expand Down
2 changes: 1 addition & 1 deletion python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ def __init__(self, params=None, cache=(), model_file=None):
"""
for d in cache:
if not isinstance(d, DMatrix):
raise TypeError('invalid cache item: {}'.format(type(d).__name__))
raise TypeError('invalid cache item: {}'.format(type(d).__name__), cache)
self._validate_features(d)

dmats = c_array(ctypes.c_void_p, [d.handle for d in cache])
Expand Down
121 changes: 0 additions & 121 deletions python-package/xgboost/dask.py

This file was deleted.

Loading

0 comments on commit b46bb59

Please sign in to comment.