Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix non-openmp build. #5566

Merged
merged 2 commits into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ if (USE_CUDA)
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
endif (USE_CUDA)

find_package(Threads REQUIRED)

if (USE_OPENMP)
if (APPLE)
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
Expand Down Expand Up @@ -128,7 +130,7 @@ foreach(lib rabit rabit_base rabit_empty rabit_mock rabit_mock_static)
# Explicitly link dmlc to rabit, so that configured header (build_config.h)
# from dmlc is correctly applied to rabit.
if (TARGET ${lib})
target_link_libraries(${lib} dmlc)
target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
endif (TARGET ${lib})
endforeach()

Expand All @@ -138,6 +140,7 @@ if (R_LIB)
endif (R_LIB)

# core xgboost
list(APPEND LINKED_LIBRARIES_PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
add_subdirectory(${xgboost_SOURCE_DIR}/src)
target_link_libraries(objxgboost PUBLIC dmlc)
Expand Down
21 changes: 19 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ pipeline {
parallel ([
'build-cpu': { BuildCPU() },
'build-cpu-rabit-mock': { BuildCPUMock() },
'build-cpu-non-omp': { BuildCPUNonOmp() },
'build-gpu-cuda9.0': { BuildCUDA(cuda_version: '9.0') },
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
Expand Down Expand Up @@ -215,12 +216,28 @@ def BuildCPUMock() {
sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_mock_cmake.sh
"""
echo 'Stashing rabit C++ test executable (xgboost)...'
echo 'Stashing rabit C++ test executable (xgboost)...'
stash name: 'xgboost_rabit_tests', includes: 'xgboost'
deleteDir()
}
}

def BuildCPUNonOmp() {
node('linux && cpu') {
unstash name: 'srcs'
echo "Build CPU without OpenMP"
def container_type = "cpu"
def docker_binary = "docker"
sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_via_cmake.sh -DUSE_OPENMP=OFF
"""
echo "Running Non-OpenMP C++ test..."
sh """
${dockerRun} ${container_type} ${docker_binary} build/testxgboost
"""
deleteDir()
}
}

def BuildCUDA(args) {
node('linux && cpu') {
Expand Down Expand Up @@ -259,7 +276,7 @@ def BuildJVMPackages(args) {
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_jvm_packages.sh ${args.spark_version}
"""
echo 'Stashing XGBoost4J JAR...'
stash name: 'xgboost4j_jar', includes: 'jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar'
stash name: 'xgboost4j_jar', includes: "jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar"
deleteDir()
}
}
Expand Down
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,15 @@ if (XGBOOST_BUILTIN_PREFETCH_PRESENT)
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)

find_package(Threads REQUIRED)
list(APPEND SRC_LIBS Threads::Threads ${CMAKE_THREAD_LIBS_INIT})

if (USE_OPENMP OR USE_CUDA) # CUDA requires OpenMP
find_package(OpenMP REQUIRED)
list(APPEND SRC_LIBS OpenMP::OpenMP_CXX)
set(LINKED_LIBRARIES_PRIVATE "${LINKED_LIBRARIES_PRIVATE};${SRC_LIBS}" PARENT_SCOPE)
target_link_libraries(objxgboost PRIVATE OpenMP::OpenMP_CXX)
endif (USE_OPENMP OR USE_CUDA)
set(LINKED_LIBRARIES_PRIVATE "${LINKED_LIBRARIES_PRIVATE};${SRC_LIBS}" PARENT_SCOPE)

# For MSVC: Call msvc_use_static_runtime() once again to completely
# replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462
Expand Down
3 changes: 1 addition & 2 deletions src/common/column_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,7 @@ class ColumnMatrix {
/* missing values make sense only for column with type kDenseColumn,
and if no missing values were observed it could be handled much faster. */
if (noMissingValues) {
const int32_t nthread = omp_get_max_threads(); // NOLINT
#pragma omp parallel for num_threads(nthread)
#pragma omp parallel for num_threads(omp_get_max_threads())
for (omp_ulong rid = 0; rid < nrow; ++rid) {
const size_t ibegin = rid*nfeature;
const size_t iend = (rid+1)*nfeature;
Expand Down
6 changes: 4 additions & 2 deletions src/common/threading_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,12 @@ class BlockedSpace2d {

// Wrapper to implement nested parallelism with simple omp parallel for
template<typename Func>
void ParallelFor2d(const BlockedSpace2d& space, const int nthreads, Func func) {
void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
const size_t num_blocks_in_space = space.Size();
nthreads = std::min(nthreads, omp_get_max_threads());
nthreads = std::max(nthreads, 1);

#pragma omp parallel num_threads(nthreads)
#pragma omp parallel num_threads(nthreads)
{
size_t tid = omp_get_thread_num();
size_t chunck_size = num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
Expand Down
13 changes: 2 additions & 11 deletions src/metric/rank_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,7 @@ struct EvalAuc : public Metric {
rec[j - gptr[group_id]] = {h_preds[j], j};
}

if (omp_in_parallel()) {
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
} else {
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
}

XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
// calculate AUC
double sum_pospair = 0.0;
double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
Expand Down Expand Up @@ -557,11 +552,7 @@ struct EvalAucPR : public Metric {
continue;
}

if (omp_in_parallel()) {
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
} else {
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
}
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);

// calculate AUC
double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0;
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ set_target_properties(
target_link_libraries(testxgboost
PRIVATE
${GTEST_LIBRARIES}
${LINKED_LIBRARIES_PRIVATE}
OpenMP::OpenMP_CXX)
${LINKED_LIBRARIES_PRIVATE})

target_compile_definitions(testxgboost PRIVATE ${XGBOOST_DEFINITIONS})
set_output_directory(testxgboost ${xgboost_BINARY_DIR})

Expand Down
14 changes: 12 additions & 2 deletions tests/cpp/common/test_threading_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ TEST(ParallelFor2d, Test) {
return kDim2;
}, kGrainSize);

ParallelFor2d(space, 4, [&](size_t i, Range1d r) {
auto old = omp_get_max_threads();
omp_set_num_threads(4);

ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) {
matrix[i*kDim2 + j] += 1;
}
Expand All @@ -47,12 +50,17 @@ TEST(ParallelFor2d, Test) {
for (size_t i = 0; i < kDim1 * kDim2; i++) {
ASSERT_EQ(matrix[i], 1);
}

omp_set_num_threads(old);
}

TEST(ParallelFor2dNonUniform, Test) {
constexpr size_t kDim1 = 5;
constexpr size_t kGrainSize = 256;

auto old = omp_get_max_threads();
omp_set_num_threads(4);

// here are quite non-uniform distribution in space
// but ParallelFor2d should split them by blocks with max size = kGrainSize
// and process in balanced manner (optimal performance)
Expand All @@ -66,7 +74,7 @@ TEST(ParallelFor2dNonUniform, Test) {
working_space[i].resize(dim2[i], 0);
}

ParallelFor2d(space, 4, [&](size_t i, Range1d r) {
ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) {
working_space[i][j] += 1;
}
Expand All @@ -77,6 +85,8 @@ TEST(ParallelFor2dNonUniform, Test) {
ASSERT_EQ(working_space[i][j], 1);
}
}

omp_set_num_threads(old);
}

} // namespace common
Expand Down
5 changes: 4 additions & 1 deletion tests/cpp/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,12 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
batch_count++;
row_count += batch.Size();
}
#if defined(_OPENMP)
EXPECT_GE(batch_count, 2);
EXPECT_EQ(row_count, dmat->Info().num_row_);

#else
#warning "External memory doesn't work with Non-OpenMP build "
#endif // defined(_OPENMP)
return dmat;
}

Expand Down