Skip to content

Commit

Permalink
Merge pull request #5596 from bdice/branch-23.12-merge-23.10
Browse files Browse the repository at this point in the history
Forward-merge branch-23.10 to branch-23.12
  • Loading branch information
raydouglass authored Oct 2, 2023
2 parents c1c7347 + 99e0c36 commit 6d3598f
Show file tree
Hide file tree
Showing 13 changed files with 218 additions and 39 deletions.
2 changes: 1 addition & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if [[ "$(arch)" == "aarch64" ]]; then
fi

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12
python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.12

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cuml*.whl)[test]
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.7.1
- dask-core==2023.9.2
- dask-cuda==23.12.*
- dask-cudf==23.12.*
- dask-ml
- dask>=2023.7.1
- distributed>=2023.7.1
- dask==2023.9.2
- distributed==2023.9.2
- doxygen=1.9.1
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.7.1
- dask-core==2023.9.2
- dask-cuda==23.12.*
- dask-cudf==23.12.*
- dask-ml
- dask>=2023.7.1
- distributed>=2023.7.1
- dask==2023.9.2
- distributed==2023.9.2
- doxygen=1.9.1
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ requirements:
- cudf ={{ minor_version }}
- cupy >=12.0.0
- dask-cudf ={{ minor_version }}
- dask >=2023.7.1
- dask-core>=2023.7.1
- distributed >=2023.7.1
- dask ==2023.9.2
- dask-core==2023.9.2
- distributed ==2023.9.2
- joblib >=0.11
- libcuml ={{ version }}
- libcumlprims ={{ minor_version }}
Expand Down
12 changes: 12 additions & 0 deletions cpp/include/cuml/linear_model/qn_mg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,24 @@

#include <cumlprims/opg/matrix/data.hpp>
#include <cumlprims/opg/matrix/part_descriptor.hpp>
#include <vector>
using namespace MLCommon;

namespace ML {
namespace GLM {
namespace opg {

/**
* @brief Calculate unique class labels across multiple GPUs in a multi-node environment.
* @param[in] handle: the internal cuml handle object
* @param[in] input_desc: PartDescriptor object for the input
* @param[in] labels: labels data
* @returns host vector that stores the distinct labels
*/
std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels);

/**
* @brief performs MNMG fit operation for the logistic regression using quasi newton methods
* @param[in] handle: the internal cuml handle object
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/glm/qn/mg/qn_mg.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ inline void qn_fit_x_mg(const raft::handle_t& handle,
ML::GLM::opg::qn_fit_mg<T, decltype(loss)>(
handle, pams, loss, X, y, Z, w0_data, f, num_iters, n_samples, rank, n_ranks);
} break;
case QN_LOSS_SOFTMAX: {
ASSERT(C > 2, "qn_mg.cuh: softmax invalid C");
ML::GLM::detail::Softmax<T> loss(handle, D, C, pams.fit_intercept);
ML::GLM::opg::qn_fit_mg<T, decltype(loss)>(
handle, pams, loss, X, y, Z, w0_data, f, num_iters, n_samples, rank, n_ranks);
} break;
default: {
ASSERT(false, "qn_mg.cuh: unknown loss function type (id = %d).", pams.loss);
}
Expand Down
66 changes: 55 additions & 11 deletions cpp/src/glm/qn_mg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,59 @@
#include <cuml/linear_model/qn.h>
#include <cuml/linear_model/qn_mg.hpp>
#include <raft/core/comms.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/error.hpp>
#include <raft/core/handle.hpp>
#include <raft/label/classlabels.cuh>
#include <raft/util/cudart_utils.hpp>
#include <vector>
using namespace MLCommon;

namespace ML {
namespace GLM {
namespace opg {

template <typename T>
std::vector<T> distinct_mg(const raft::handle_t& handle, T* y, size_t n)
{
cudaStream_t stream = handle.get_stream();
raft::comms::comms_t const& comm = raft::resource::get_comms(handle);
int rank = comm.get_rank();
int n_ranks = comm.get_size();

rmm::device_uvector<T> unique_y(0, stream);
raft::label::getUniquelabels(unique_y, y, n, stream);

rmm::device_uvector<size_t> recv_counts(n_ranks, stream);
auto send_count = raft::make_device_scalar<size_t>(handle, unique_y.size());
comm.allgather(send_count.data_handle(), recv_counts.data(), 1, stream);
comm.sync_stream(stream);

std::vector<size_t> recv_counts_host(n_ranks);
raft::copy(recv_counts_host.data(), recv_counts.data(), n_ranks, stream);

std::vector<size_t> displs(n_ranks);
size_t pos = 0;
for (int i = 0; i < n_ranks; ++i) {
displs[i] = pos;
pos += recv_counts_host[i];
}

rmm::device_uvector<T> recv_buff(displs.back() + recv_counts_host.back(), stream);
comm.allgatherv(
unique_y.data(), recv_buff.data(), recv_counts_host.data(), displs.data(), stream);
comm.sync_stream(stream);

rmm::device_uvector<T> global_unique_y(0, stream);
int n_distinct =
raft::label::getUniquelabels(global_unique_y, recv_buff.data(), recv_buff.size(), stream);

std::vector<T> global_unique_y_host(global_unique_y.size());
raft::copy(global_unique_y_host.data(), global_unique_y.data(), global_unique_y.size(), stream);

return global_unique_y_host;
}

template <typename T>
void qnFit_impl(const raft::handle_t& handle,
const qn_params& pams,
Expand All @@ -46,17 +90,6 @@ void qnFit_impl(const raft::handle_t& handle,
int rank,
int n_ranks)
{
switch (pams.loss) {
case QN_LOSS_LOGISTIC: {
RAFT_EXPECTS(
C == 2,
"qn_mg.cu: only the LOGISTIC loss is supported currently. The number of classes must be 2");
} break;
default: {
RAFT_EXPECTS(false, "qn_mg.cu: unknown loss function type (id = %d).", pams.loss);
}
}

auto X_simple = SimpleDenseMat<T>(X, N, D, X_col_major ? COL_MAJOR : ROW_MAJOR);

ML::GLM::opg::qn_fit_x_mg(handle,
Expand Down Expand Up @@ -113,6 +146,17 @@ void qnFit_impl(raft::handle_t& handle,
input_desc.uniqueRanks().size());
}

std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels)
{
RAFT_EXPECTS(labels.size() == 1,
"getUniqueLabelsMG currently does not accept more than one data chunk");
Matrix::Data<float>* data_y = labels[0];
int n_rows = input_desc.totalElementsOwnedBy(input_desc.rank);
return distinct_mg<float>(handle, data_y->ptr, n_rows);
}

void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_data,
Matrix::PartDescriptor& input_desc,
Expand Down
6 changes: 3 additions & 3 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- cudf==23.12.*
- dask>=2023.7.1
- dask==2023.9.2
- dask-cuda==23.12.*
- dask-cudf==23.12.*
- distributed>=2023.7.1
- distributed==2023.9.2
- joblib>=0.11
- numba>=0.57
# TODO: Is scipy really a hard dependency, or should
Expand All @@ -192,7 +192,7 @@ dependencies:
- cupy>=12.0.0
- output_types: conda
packages:
- dask-core>=2023.7.1
- dask-core==2023.9.2
- output_types: pyproject
packages:
- *treelite_runtime
Expand Down
4 changes: 2 additions & 2 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ Packages required for multigpu algorithms*:
- ucx-py version matching the cuML version
- dask-cudf version matching the cuML version
- nccl>=2.5
- dask>=2023.7.1
- distributed>=2023.7.1
- dask==2023.9.2
- distributed==2023.9.2

* this can be avoided with `--singlegpu` argument flag.

Expand Down
9 changes: 8 additions & 1 deletion python/cuml/dask/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,11 @@ def _create_model(sessionId, datatype, **kwargs):
def _func_fit(f, data, n_rows, n_cols, partsToSizes, rank):
inp_X = concatenate([X for X, _ in data])
inp_y = concatenate([y for _, y in data])
return f.fit([(inp_X, inp_y)], n_rows, n_cols, partsToSizes, rank)
n_ranks = max([p[0] for p in partsToSizes]) + 1
aggregated_partsToSizes = [[i, 0] for i in range(n_ranks)]
for p in partsToSizes:
aggregated_partsToSizes[p[0]][1] += p[1]

return f.fit(
[(inp_X, inp_y)], n_rows, n_cols, aggregated_partsToSizes, rank
)
28 changes: 23 additions & 5 deletions python/cuml/linear_model/logistic_regression_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,18 @@ cdef extern from "cuml/linear_model/qn_mg.hpp" namespace "ML::GLM::opg" nogil:
float *f,
int *num_iters) except +

cdef vector[float] getUniquelabelsMG(
const handle_t& handle,
PartDescriptor &input_desc,
vector[floatData_t*] labels) except+


class LogisticRegressionMG(MGFitMixin, LogisticRegression):

def __init__(self, **kwargs):
super(LogisticRegressionMG, self).__init__(**kwargs)
if self.penalty != "l2" and self.penalty != "none":
assert False, "Currently only support 'l2' and 'none' penalty"

@property
@cuml.internals.api_base_return_array_skipall
Expand All @@ -102,8 +109,8 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):

self.solver_model.coef_ = value

def prepare_for_fit(self, n_classes):
self.solver_model.qnparams = QNParams(
def create_qnparams(self):
return QNParams(
loss=self.loss,
penalty_l1=self.l1_strength,
penalty_l2=self.l2_strength,
Expand All @@ -118,8 +125,11 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
penalty_normalized=self.penalty_normalized
)

def prepare_for_fit(self, n_classes):
self.solver_model.qnparams = self.create_qnparams()

# modified
qnpams = self.qnparams.params
qnpams = self.solver_model.qnparams.params

# modified qnp
solves_classification = qnpams['loss'] in {
Expand Down Expand Up @@ -174,8 +184,14 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
cdef float objective32
cdef int num_iters

# TODO: calculate _num_classes at runtime
self._num_classes = 2
cdef vector[float] c_classes_
c_classes_ = getUniquelabelsMG(
handle_[0],
deref(<PartDescriptor*><uintptr_t>input_desc),
deref(<vector[floatData_t*]*><uintptr_t>y))
self.classes_ = np.sort(list(c_classes_)).astype('float32')

self._num_classes = len(self.classes_)
self.loss = "sigmoid" if self._num_classes <= 2 else "softmax"
self.prepare_for_fit(self._num_classes)
cdef uintptr_t mat_coef_ptr = self.coef_.ptr
Expand All @@ -194,6 +210,8 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
self._num_classes,
<float*> &objective32,
<int*> &num_iters)
else:
assert False, "dtypes other than float32 are currently not supported yet. See issue: https://github.com/rapidsai/cuml/issues/5589"

self.solver_model._calc_intercept()

Expand Down
Loading

0 comments on commit 6d3598f

Please sign in to comment.