From 424c09d465f3cfd72cb71ec639671387863b3dfa Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 14 Jul 2020 11:12:11 +0800 Subject: [PATCH 1/3] Fix sketch size calculation. --- src/common/hist_util.cu | 10 +++++++--- tests/cpp/common/test_hist_util.cu | 12 ++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index fe1305d4bf99..d95e36d85c1f 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -167,8 +167,12 @@ size_t SketchBatchNumElements(size_t sketch_batch_num_elements, if (sketch_batch_num_elements == 0) { auto required_memory = RequiredMemory(num_rows, columns, nnz, num_cuts, has_weight); // use up to 80% of available space - sketch_batch_num_elements = (dh::AvailableMemory(device) - - required_memory * 0.8); + auto avail = dh::AvailableMemory(device) * 0.8; + if (required_memory > avail) { + sketch_batch_num_elements = avail / BytesPerElement(has_weight); + } else { + sketch_batch_num_elements = num_rows * columns; + } } return sketch_batch_num_elements; } @@ -196,7 +200,7 @@ void ProcessBatch(int device, const SparsePage &page, size_t begin, size_t end, size_t num_columns) { dh::XGBCachingDeviceAllocator alloc; const auto& host_data = page.data.ConstHostVector(); - dh::caching_device_vector sorted_entries(host_data.begin() + begin, + dh::device_vector sorted_entries(host_data.begin() + begin, host_data.begin() + end); thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(), sorted_entries.end(), detail::EntryCompareOp()); diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index 3ec49668a7eb..cb0fd827e7cf 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -48,6 +48,18 @@ TEST(HistUtil, DeviceSketch) { EXPECT_EQ(device_cuts.MinValues(), host_cuts.MinValues()); } +TEST(HistUtil, SketchBatchNumElements) { + size_t constexpr kCols = 10000; + int device; + dh::safe_cuda(cudaGetDevice(&device)); + auto avail = dh::AvailableMemory(device) * 0.8; + auto per_elem = detail::BytesPerElement(false); + auto avail_elem = avail / per_elem; + size_t rows = avail_elem / kCols * 10; + auto batch = detail::SketchBatchNumElements(0, rows, kCols, rows * kCols, device, 256, false); + ASSERT_EQ(batch, avail_elem); +} + TEST(HistUtil, DeviceSketchMemory) { int num_columns = 100; int num_rows = 1000; From fc34764241a3b6640ac0118d208555dccc1f5446 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 16 Jul 2020 12:54:26 +0800 Subject: [PATCH 2/3] Force size_t. --- tests/cpp/common/test_hist_util.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index cb0fd827e7cf..3145e572a205 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -52,7 +52,7 @@ TEST(HistUtil, SketchBatchNumElements) { size_t constexpr kCols = 10000; int device; dh::safe_cuda(cudaGetDevice(&device)); - auto avail = dh::AvailableMemory(device) * 0.8; + auto avail = static_cast(dh::AvailableMemory(device) * 0.8); auto per_elem = detail::BytesPerElement(false); auto avail_elem = avail / per_elem; size_t rows = avail_elem / kCols * 10; From 045a9b4b291a10085873fb81c83176b96297dc2c Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 17 Jul 2020 07:37:36 +0800 Subject: [PATCH 3/3] Consider nnz. --- src/common/hist_util.cu | 5 +++-- src/common/hist_util.cuh | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index d95e36d85c1f..f39b6af470e9 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -162,7 +162,8 @@ size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz, } size_t SketchBatchNumElements(size_t sketch_batch_num_elements, - bst_row_t num_rows, size_t columns, size_t nnz, int device, + bst_row_t num_rows, bst_feature_t columns, + size_t nnz, int device, size_t num_cuts, bool has_weight) { if (sketch_batch_num_elements == 0) { auto required_memory = RequiredMemory(num_rows, columns, nnz, num_cuts, has_weight); @@ -171,7 +172,7 @@ size_t SketchBatchNumElements(size_t sketch_batch_num_elements, if (required_memory > avail) { sketch_batch_num_elements = avail / BytesPerElement(has_weight); } else { - sketch_batch_num_elements = num_rows * columns; + sketch_batch_num_elements = std::min(num_rows * static_cast(columns), nnz); } } return sketch_batch_num_elements; diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh index 94744513aa81..b0dbf1e6730d 100644 --- a/src/common/hist_util.cuh +++ b/src/common/hist_util.cuh @@ -100,7 +100,8 @@ inline size_t constexpr BytesPerElement(bool has_weight) { * directly if it's not 0. */ size_t SketchBatchNumElements(size_t sketch_batch_num_elements, - bst_row_t num_rows, size_t columns, size_t nnz, int device, + bst_row_t num_rows, bst_feature_t columns, + size_t nnz, int device, size_t num_cuts, bool has_weight); // Compute number of sample cuts needed on local node to maintain accuracy