Skip to content

Commit

Permalink
[dask] Dask Vector types for group, init_score, sample_weights (fixes #…
Browse files Browse the repository at this point in the history
  • Loading branch information
ffineis authored Jun 15, 2021
1 parent 9d9e9b8 commit 5af7eb7
Showing 1 changed file with 26 additions and 25 deletions.
51 changes: 26 additions & 25 deletions python-package/lightgbm/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

_DaskCollection = Union[dask_Array, dask_DataFrame, dask_Series]
_DaskMatrixLike = Union[dask_Array, dask_DataFrame]
_DaskVectorLike = Union[dask_Array, dask_Series]
_DaskPart = Union[np.ndarray, pd_DataFrame, pd_Series, ss.spmatrix]
_PredictionDtype = Union[Type[np.float32], Type[np.float64], Type[np.int32], Type[np.int64]]

Expand Down Expand Up @@ -214,9 +215,9 @@ def _train(
label: _DaskCollection,
params: Dict[str, Any],
model_factory: Type[LGBMModel],
sample_weight: Optional[_DaskCollection] = None,
init_score: Optional[_DaskCollection] = None,
group: Optional[_DaskCollection] = None,
sample_weight: Optional[_DaskVectorLike] = None,
init_score: Optional[_DaskVectorLike] = None,
group: Optional[_DaskVectorLike] = None,
**kwargs: Any
) -> LGBMModel:
"""Inner train routine.
Expand All @@ -233,11 +234,11 @@ def _train(
Parameters passed to constructor of the local underlying model.
model_factory : lightgbm.LGBMClassifier, lightgbm.LGBMRegressor, or lightgbm.LGBMRanker class
Class of the local underlying model.
sample_weight : Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)
sample_weight : Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)
Weights of training data.
init_score : Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)
init_score : Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)
Init score of training data.
group : Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)
group : Dask Array or Dask Series or None, optional (default=None)
Group/query data.
Only used in the learning-to-rank task.
sum(group) = n_samples.
Expand Down Expand Up @@ -603,9 +604,9 @@ def _lgb_dask_fit(
model_factory: Type[LGBMModel],
X: _DaskMatrixLike,
y: _DaskCollection,
sample_weight: Optional[_DaskCollection] = None,
init_score: Optional[_DaskCollection] = None,
group: Optional[_DaskCollection] = None,
sample_weight: Optional[_DaskVectorLike] = None,
init_score: Optional[_DaskVectorLike] = None,
group: Optional[_DaskVectorLike] = None,
**kwargs: Any
) -> "_DaskLGBMModel":
if not all((DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED)):
Expand Down Expand Up @@ -721,8 +722,8 @@ def fit(
self,
X: _DaskMatrixLike,
y: _DaskCollection,
sample_weight: Optional[_DaskCollection] = None,
init_score: Optional[_DaskCollection] = None,
sample_weight: Optional[_DaskVectorLike] = None,
init_score: Optional[_DaskVectorLike] = None,
**kwargs: Any
) -> "DaskLGBMClassifier":
"""Docstring is inherited from the lightgbm.LGBMClassifier.fit."""
Expand All @@ -738,9 +739,9 @@ def fit(
_base_doc = _lgbmmodel_doc_fit.format(
X_shape="Dask Array or Dask DataFrame of shape = [n_samples, n_features]",
y_shape="Dask Array, Dask DataFrame or Dask Series of shape = [n_samples]",
sample_weight_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
init_score_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
group_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
sample_weight_shape="Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)",
init_score_shape="Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)",
group_shape="Dask Array or Dask Series or None, optional (default=None)"
)

# DaskLGBMClassifier does not support evaluation data, or early stopping
Expand Down Expand Up @@ -871,8 +872,8 @@ def fit(
self,
X: _DaskMatrixLike,
y: _DaskCollection,
sample_weight: Optional[_DaskCollection] = None,
init_score: Optional[_DaskCollection] = None,
sample_weight: Optional[_DaskVectorLike] = None,
init_score: Optional[_DaskVectorLike] = None,
**kwargs: Any
) -> "DaskLGBMRegressor":
"""Docstring is inherited from the lightgbm.LGBMRegressor.fit."""
Expand All @@ -888,9 +889,9 @@ def fit(
_base_doc = _lgbmmodel_doc_fit.format(
X_shape="Dask Array or Dask DataFrame of shape = [n_samples, n_features]",
y_shape="Dask Array, Dask DataFrame or Dask Series of shape = [n_samples]",
sample_weight_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
init_score_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
group_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
sample_weight_shape="Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)",
init_score_shape="Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)",
group_shape="Dask Array or Dask Series or None, optional (default=None)"
)

# DaskLGBMRegressor does not support evaluation data, or early stopping
Expand Down Expand Up @@ -1003,9 +1004,9 @@ def fit(
self,
X: _DaskMatrixLike,
y: _DaskCollection,
sample_weight: Optional[_DaskCollection] = None,
init_score: Optional[_DaskCollection] = None,
group: Optional[_DaskCollection] = None,
sample_weight: Optional[_DaskVectorLike] = None,
init_score: Optional[_DaskVectorLike] = None,
group: Optional[_DaskVectorLike] = None,
**kwargs: Any
) -> "DaskLGBMRanker":
"""Docstring is inherited from the lightgbm.LGBMRanker.fit."""
Expand All @@ -1022,9 +1023,9 @@ def fit(
_base_doc = _lgbmmodel_doc_fit.format(
X_shape="Dask Array or Dask DataFrame of shape = [n_samples, n_features]",
y_shape="Dask Array, Dask DataFrame or Dask Series of shape = [n_samples]",
sample_weight_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
init_score_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)",
group_shape="Dask Array, Dask DataFrame, Dask Series of shape = [n_samples] or None, optional (default=None)"
sample_weight_shape="Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)",
init_score_shape="Dask Array or Dask Series of shape = [n_samples] or None, optional (default=None)",
group_shape="Dask Array or Dask Series or None, optional (default=None)"
)

# DaskLGBMRanker does not support evaluation data, or early stopping
Expand Down

0 comments on commit 5af7eb7

Please sign in to comment.