Skip to content

Commit

Permalink
Merge branch 'branch-23.02' into fusedL2NN_cutlass
Browse files Browse the repository at this point in the history
  • Loading branch information
mdoijade authored Jan 25, 2023
2 parents a084d82 + 7c12b1e commit 8169088
Show file tree
Hide file tree
Showing 73 changed files with 3,451 additions and 1,260 deletions.
6 changes: 4 additions & 2 deletions cpp/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ if(BUILD_BENCH)
bench/main.cpp
)

ConfigureBench(NAME MATRIX_BENCH PATH bench/matrix/argmin.cu bench/main.cpp)
ConfigureBench(
NAME MATRIX_BENCH PATH bench/matrix/argmin.cu bench/matrix/gather.cu bench/matrix/select_k.cu
bench/main.cpp
)

ConfigureBench(
NAME RANDOM_BENCH PATH bench/random/make_blobs.cu bench/random/permute.cu bench/random/rng.cu
Expand All @@ -127,7 +130,6 @@ if(BUILD_BENCH)
bench/neighbors/knn/ivf_pq_int8_t_int64_t.cu
bench/neighbors/knn/ivf_pq_uint8_t_uint32_t.cu
bench/neighbors/refine.cu
bench/neighbors/selection.cu
bench/main.cpp
OPTIONAL
DIST
Expand Down
17 changes: 7 additions & 10 deletions cpp/bench/matrix/argmin.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,10 +17,11 @@
#include <common/benchmark.hpp>
#include <raft/matrix/argmin.cuh>
#include <raft/random/rng.cuh>
#include <raft/util/itertools.hpp>

#include <rmm/device_uvector.hpp>

namespace raft::bench::linalg {
namespace raft::bench::matrix {

template <typename IdxT>
struct ArgminParams {
Expand Down Expand Up @@ -57,15 +58,11 @@ struct Argmin : public fixture {
raft::device_vector<OutT, IdxT> indices;
}; // struct Argmin

const std::vector<ArgminParams<int64_t>> argmin_inputs_i64{
{1000, 64}, {1000, 128}, {1000, 256}, {1000, 512}, {1000, 1024},
{10000, 64}, {10000, 128}, {10000, 256}, {10000, 512}, {10000, 1024},
{100000, 64}, {100000, 128}, {100000, 256}, {100000, 512}, {100000, 1024},
{1000000, 64}, {1000000, 128}, {1000000, 256}, {1000000, 512}, {1000000, 1024},
{10000000, 64}, {10000000, 128}, {10000000, 256}, {10000000, 512}, {10000000, 1024},
};
const std::vector<ArgminParams<int64_t>> argmin_inputs_i64 =
raft::util::itertools::product<ArgminParams<int64_t>>({1000, 10000, 100000, 1000000, 10000000},
{64, 128, 256, 512, 1024});

RAFT_BENCH_REGISTER((Argmin<float, uint32_t, int64_t>), "", argmin_inputs_i64);
RAFT_BENCH_REGISTER((Argmin<double, uint32_t, int64_t>), "", argmin_inputs_i64);

} // namespace raft::bench::linalg
} // namespace raft::bench::matrix
101 changes: 101 additions & 0 deletions cpp/bench/matrix/gather.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <common/benchmark.hpp>
#include <raft/matrix/gather.cuh>
#include <raft/random/rng.cuh>
#include <raft/util/itertools.hpp>

#include <rmm/device_uvector.hpp>

namespace raft::bench::matrix {

template <typename IdxT>
struct GatherParams {
IdxT rows, cols, map_length;
};

template <typename IdxT>
inline auto operator<<(std::ostream& os, const GatherParams<IdxT>& p) -> std::ostream&
{
os << p.rows << "#" << p.cols << "#" << p.map_length;
return os;
}

template <typename T, typename MapT, typename IdxT, bool Conditional = false>
struct Gather : public fixture {
Gather(const GatherParams<IdxT>& p) : params(p) {}

void allocate_data(const ::benchmark::State& state) override
{
matrix = raft::make_device_matrix<T, IdxT>(handle, params.rows, params.cols);
map = raft::make_device_vector<MapT, IdxT>(handle, params.map_length);
out = raft::make_device_matrix<T, IdxT>(handle, params.map_length, params.cols);
stencil = raft::make_device_vector<T, IdxT>(handle, Conditional ? params.map_length : IdxT(0));

raft::random::RngState rng{1234};
raft::random::uniform(
rng, matrix.data_handle(), params.rows * params.cols, T(-1), T(1), stream);
raft::random::uniformInt(
handle, rng, map.data_handle(), params.map_length, (MapT)0, (MapT)params.rows);
if constexpr (Conditional) {
raft::random::uniform(rng, stencil.data_handle(), params.map_length, T(-1), T(1), stream);
}
handle.sync_stream(stream);
}

void run_benchmark(::benchmark::State& state) override
{
std::ostringstream label_stream;
label_stream << params;
state.SetLabel(label_stream.str());

loop_on_state(state, [this]() {
auto matrix_const_view = raft::make_device_matrix_view<const T, IdxT, row_major>(
matrix.data_handle(), matrix.extent(0), matrix.extent(1));
auto map_const_view =
raft::make_device_vector_view<const MapT, IdxT>(map.data_handle(), map.extent(0));
if constexpr (Conditional) {
auto stencil_const_view =
raft::make_device_vector_view<const T, IdxT>(stencil.data_handle(), stencil.extent(0));
auto pred_op = raft::plug_const_op(T(0.0), raft::greater_op());
raft::matrix::gather_if(
handle, matrix_const_view, out.view(), map_const_view, stencil_const_view, pred_op);
} else {
raft::matrix::gather(handle, matrix_const_view, map_const_view, out.view());
}
});
}

private:
GatherParams<IdxT> params;
raft::device_matrix<T, IdxT> matrix, out;
raft::device_vector<T, IdxT> stencil;
raft::device_vector<MapT, IdxT> map;
}; // struct Gather

template <typename T, typename MapT, typename IdxT>
using GatherIf = Gather<T, MapT, IdxT, true>;

const std::vector<GatherParams<int64_t>> gather_inputs_i64 =
raft::util::itertools::product<GatherParams<int64_t>>(
{1000000}, {10, 20, 50, 100, 200, 500}, {1000, 10000, 100000, 1000000});

RAFT_BENCH_REGISTER((Gather<float, uint32_t, int64_t>), "", gather_inputs_i64);
RAFT_BENCH_REGISTER((Gather<double, uint32_t, int64_t>), "", gather_inputs_i64);
RAFT_BENCH_REGISTER((GatherIf<float, uint32_t, int64_t>), "", gather_inputs_i64);
RAFT_BENCH_REGISTER((GatherIf<double, uint32_t, int64_t>), "", gather_inputs_i64);
} // namespace raft::bench::matrix
133 changes: 133 additions & 0 deletions cpp/bench/matrix/select_k.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* TODO: reconsider how to organize shared test+bench files better
* Related Issue: https://github.com/rapidsai/raft/issues/1153
* (although this header does not depend on any gtest headers)
*/
#include "../../test/matrix/select_k.cuh"

#include <common/benchmark.hpp>

#include <raft/core/handle.hpp>
#include <raft/random/rng.cuh>
#include <raft/sparse/detail/utils.h>
#include <raft/util/cudart_utils.hpp>

#include <raft/matrix/detail/select_radix.cuh>
#include <raft/matrix/detail/select_warpsort.cuh>
#include <raft/matrix/select_k.cuh>

#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>

namespace raft::matrix {

using namespace raft::bench; // NOLINT

template <typename KeyT, typename IdxT, select::Algo Algo>
struct selection : public fixture {
explicit selection(const select::params& p)
: params_(p),
in_dists_(p.batch_size * p.len, stream),
in_ids_(p.batch_size * p.len, stream),
out_dists_(p.batch_size * p.k, stream),
out_ids_(p.batch_size * p.k, stream)
{
raft::sparse::iota_fill(in_ids_.data(), IdxT(p.batch_size), IdxT(p.len), stream);
raft::random::RngState state{42};
raft::random::uniform(handle, state, in_dists_.data(), in_dists_.size(), KeyT(-1.0), KeyT(1.0));
}

void run_benchmark(::benchmark::State& state) override // NOLINT
{
handle_t handle{stream};
using_pool_memory_res res;
try {
std::ostringstream label_stream;
label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k;
state.SetLabel(label_stream.str());
loop_on_state(state, [this, &handle]() {
select::select_k_impl<KeyT, IdxT>(handle,
Algo,
in_dists_.data(),
in_ids_.data(),
params_.batch_size,
params_.len,
params_.k,
out_dists_.data(),
out_ids_.data(),
params_.select_min);
});
} catch (raft::exception& e) {
state.SkipWithError(e.what());
}
}

private:
const select::params params_;
rmm::device_uvector<KeyT> in_dists_, out_dists_;
rmm::device_uvector<IdxT> in_ids_, out_ids_;
};

const std::vector<select::params> kInputs{
{20000, 500, 1, true}, {20000, 500, 2, true}, {20000, 500, 4, true},
{20000, 500, 8, true}, {20000, 500, 16, true}, {20000, 500, 32, true},
{20000, 500, 64, true}, {20000, 500, 128, true}, {20000, 500, 256, true},

{1000, 10000, 1, true}, {1000, 10000, 2, true}, {1000, 10000, 4, true},
{1000, 10000, 8, true}, {1000, 10000, 16, true}, {1000, 10000, 32, true},
{1000, 10000, 64, true}, {1000, 10000, 128, true}, {1000, 10000, 256, true},

{100, 100000, 1, true}, {100, 100000, 2, true}, {100, 100000, 4, true},
{100, 100000, 8, true}, {100, 100000, 16, true}, {100, 100000, 32, true},
{100, 100000, 64, true}, {100, 100000, 128, true}, {100, 100000, 256, true},

{10, 1000000, 1, true}, {10, 1000000, 2, true}, {10, 1000000, 4, true},
{10, 1000000, 8, true}, {10, 1000000, 16, true}, {10, 1000000, 32, true},
{10, 1000000, 64, true}, {10, 1000000, 128, true}, {10, 1000000, 256, true},
};

#define SELECTION_REGISTER(KeyT, IdxT, A) \
namespace BENCHMARK_PRIVATE_NAME(selection) \
{ \
using SelectK = selection<KeyT, IdxT, select::Algo::A>; \
RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \
}

SELECTION_REGISTER(float, int, kPublicApi); // NOLINT
SELECTION_REGISTER(float, int, kRadix8bits); // NOLINT
SELECTION_REGISTER(float, int, kRadix11bits); // NOLINT
SELECTION_REGISTER(float, int, kWarpAuto); // NOLINT
SELECTION_REGISTER(float, int, kWarpImmediate); // NOLINT
SELECTION_REGISTER(float, int, kWarpFiltered); // NOLINT
SELECTION_REGISTER(float, int, kWarpDistributed); // NOLINT
SELECTION_REGISTER(float, int, kWarpDistributedShm); // NOLINT

SELECTION_REGISTER(double, int, kRadix8bits); // NOLINT
SELECTION_REGISTER(double, int, kRadix11bits); // NOLINT
SELECTION_REGISTER(double, int, kWarpAuto); // NOLINT

SELECTION_REGISTER(double, size_t, kRadix8bits); // NOLINT
SELECTION_REGISTER(double, size_t, kRadix11bits); // NOLINT
SELECTION_REGISTER(double, size_t, kWarpImmediate); // NOLINT
SELECTION_REGISTER(double, size_t, kWarpFiltered); // NOLINT
SELECTION_REGISTER(double, size_t, kWarpDistributed); // NOLINT
SELECTION_REGISTER(double, size_t, kWarpDistributedShm); // NOLINT

} // namespace raft::matrix
Loading

0 comments on commit 8169088

Please sign in to comment.