Skip to content

Commit

Permalink
Drop single point model recovery.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Sep 11, 2020
1 parent c92d751 commit 1dc2d2d
Show file tree
Hide file tree
Showing 20 changed files with 23 additions and 2,844 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ else()
-D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE)
endif (MSVC)
endif(RABIT_MOCK)
foreach(lib rabit rabit_base rabit_mock rabit_mock_static)
foreach(lib rabit rabit_mock_static)
# Explicitly link dmlc to rabit, so that configured header (build_config.h)
# from dmlc is correctly applied to rabit.
if (TARGET ${lib})
Expand Down
14 changes: 0 additions & 14 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -340,20 +340,6 @@ def TestPythonGPU(args) {
}
}

def TestCppRabit() {
node(nodeReq) {
unstash name: 'xgboost_rabit_tests'
unstash name: 'srcs'
echo "Test C++, rabit mock on"
def container_type = "cpu"
def docker_binary = "docker"
sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/runxgb.sh xgboost tests/ci_build/approx.conf.in
"""
deleteDir()
}
}

def TestCppGPU(args) {
def nodeReq = 'linux && mgpu'
def artifact_cuda_version = (args.artifact_cuda_version) ?: ref_cuda_ver
Expand Down
2 changes: 1 addition & 1 deletion R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ PKG_LIBS = @OPENMP_CXXFLAGS@ @OPENMP_LIB@ @ENDIAN_FLAG@ @BACKTRACE_LIB@ -pthread
OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o ./init.o \
$(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o \
$(PKGROOT)/rabit/src/engine.o $(PKGROOT)/rabit/src/c_api.o \
$(PKGROOT)/rabit/src/allreduce_base.o $(PKGROOT)/rabit/src/allreduce_robust.o
$(PKGROOT)/rabit/src/allreduce_base.o
2 changes: 1 addition & 1 deletion R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@ PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(SHLIB_PTHREAD_FLAGS)
OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o ./init.o \
$(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o \
$(PKGROOT)/rabit/src/engine.o $(PKGROOT)/rabit/src/c_api.o \
$(PKGROOT)/rabit/src/allreduce_base.o $(PKGROOT)/rabit/src/allreduce_robust.o
$(PKGROOT)/rabit/src/allreduce_base.o

$(OBJECTS) : xgblib
10 changes: 4 additions & 6 deletions rabit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@ cmake_minimum_required(VERSION 3.3)

find_package(Threads REQUIRED)

add_library(rabit src/allreduce_base.cc src/allreduce_robust.cc src/engine.cc src/c_api.cc)
add_library(rabit_mock_static src/allreduce_base.cc src/allreduce_robust.cc src/engine_mock.cc src/c_api.cc)
add_library(rabit_mock SHARED src/allreduce_base.cc src/allreduce_robust.cc src/engine_mock.cc src/c_api.cc)
add_library(rabit src/allreduce_base.cc src/engine.cc src/c_api.cc)
add_library(rabit_mock_static src/allreduce_base.cc src/engine_mock.cc src/c_api.cc)
target_link_libraries(rabit Threads::Threads dmlc)
target_link_libraries(rabit_mock_static Threads::Threads dmlc)
target_link_libraries(rabit_mock Threads::Threads dmlc)

set(rabit_libs rabit rabit_mock rabit_mock_static)
set_target_properties(rabit rabit_mock rabit_mock_static
set(rabit_libs rabit rabit_mock_static)
set_target_properties(rabit rabit_mock_static
PROPERTIES CXX_STANDARD 14
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
Expand Down
31 changes: 0 additions & 31 deletions rabit/src/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions rabit/src/README.md

This file was deleted.

4 changes: 2 additions & 2 deletions rabit/src/allreduce_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -518,9 +518,9 @@ class AllreduceBase : public IEngine {
//---- data structure related to model ----
// call sequence counter, records how many calls we made so far
// from last call to CheckPoint, LoadCheckPoint
int seq_counter; // NOLINT
int seq_counter{0}; // NOLINT
// version number of model
int version_number; // NOLINT
int version_number {0}; // NOLINT
// whether the job is running in hadoop
bool hadoop_mode; // NOLINT
//---- local data related to link ----
Expand Down
27 changes: 13 additions & 14 deletions rabit/src/allreduce_mock.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
#include <sstream>
#include "rabit/internal/engine.h"
#include "rabit/internal/timer.h"
#include "allreduce_robust.h"
#include "allreduce_base.h"

namespace rabit {
namespace engine {
class AllreduceMock : public AllreduceRobust {
class AllreduceMock : public AllreduceBase {
public:
// constructor
AllreduceMock() {
Expand All @@ -30,7 +30,7 @@ class AllreduceMock : public AllreduceRobust {
// destructor
~AllreduceMock() override = default;
void SetParam(const char *name, const char *val) override {
AllreduceRobust::SetParam(name, val);
AllreduceBase::SetParam(name, val);
// additional parameters
if (!strcmp(name, "rabit_num_trial")) num_trial_ = atoi(val);
if (!strcmp(name, "DMLC_NUM_ATTEMPT")) num_trial_ = atoi(val);
Expand All @@ -51,7 +51,7 @@ class AllreduceMock : public AllreduceRobust {
const char *_caller = _CALLER) override {
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "AllReduce");
double tstart = utils::GetTime();
AllreduceRobust::Allreduce(sendrecvbuf_, type_nbytes,
AllreduceBase::Allreduce(sendrecvbuf_, type_nbytes,
count, reducer, prepare_fun, prepare_arg,
_file, _line, _caller);
tsum_allreduce_ += utils::GetTime() - tstart;
Expand All @@ -62,7 +62,7 @@ class AllreduceMock : public AllreduceRobust {
const char *_caller = _CALLER) override {
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "Allgather");
double tstart = utils::GetTime();
AllreduceRobust::Allgather(sendrecvbuf, total_size,
AllreduceBase::Allgather(sendrecvbuf, total_size,
slice_begin, slice_end,
size_prev_slice, _file, _line, _caller);
tsum_allgather_ += utils::GetTime() - tstart;
Expand All @@ -71,19 +71,19 @@ class AllreduceMock : public AllreduceRobust {
const char *_file = _FILE, const int _line = _LINE,
const char *_caller = _CALLER) override {
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "Broadcast");
AllreduceRobust::Broadcast(sendrecvbuf_, total_size, root, _file, _line, _caller);
AllreduceBase::Broadcast(sendrecvbuf_, total_size, root, _file, _line, _caller);
}
int LoadCheckPoint(Serializable *global_model,
Serializable *local_model) override {
tsum_allreduce_ = 0.0;
tsum_allgather_ = 0.0;
time_checkpoint_ = utils::GetTime();
if (force_local_ == 0) {
return AllreduceRobust::LoadCheckPoint(global_model, local_model);
return AllreduceBase::LoadCheckPoint(global_model, local_model);
} else {
DummySerializer dum;
ComboSerializer com(global_model, local_model);
return AllreduceRobust::LoadCheckPoint(&dum, &com);
return AllreduceBase::LoadCheckPoint(&dum, &com);
}
}
void CheckPoint(const Serializable *global_model,
Expand All @@ -92,18 +92,17 @@ class AllreduceMock : public AllreduceRobust {
double tstart = utils::GetTime();
double tbet_chkpt = tstart - time_checkpoint_;
if (force_local_ == 0) {
AllreduceRobust::CheckPoint(global_model, local_model);
AllreduceBase::CheckPoint(global_model, local_model);
} else {
DummySerializer dum;
ComboSerializer com(global_model, local_model);
AllreduceRobust::CheckPoint(&dum, &com);
AllreduceBase::CheckPoint(&dum, &com);
}
time_checkpoint_ = utils::GetTime();
double tcost = utils::GetTime() - tstart;
if (report_stats_ != 0 && rank == 0) {
std::stringstream ss;
ss << "[v" << version_number << "] global_size=" << global_checkpoint_.length()
<< ",local_size=" << (local_chkpt_[0].length() + local_chkpt_[1].length())
ss << "[v" << version_number << "] global_size="
<< ",check_tcost="<< tcost <<" sec"
<< ",allreduce_tcost=" << tsum_allreduce_ << " sec"
<< ",allgather_tcost=" << tsum_allgather_ << " sec"
Expand All @@ -116,7 +115,7 @@ class AllreduceMock : public AllreduceRobust {

void LazyCheckPoint(const Serializable *global_model) override {
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "LazyCheckPoint");
AllreduceRobust::LazyCheckPoint(global_model);
AllreduceBase::LazyCheckPoint(global_model);
}

protected:
Expand Down Expand Up @@ -186,7 +185,7 @@ class AllreduceMock : public AllreduceRobust {
if (mock_map_.count(key) != 0) {
num_trial_ += 1;
// data processing frameworks runs on shared process
error_("[%d]@@@Hit Mock Error:%s ", rank, name);
throw dmlc::Error(std::to_string(rank) + "@@@Hit Mock Error: " + name);
}
}
};
Expand Down
169 changes: 0 additions & 169 deletions rabit/src/allreduce_robust-inl.h

This file was deleted.

Loading

0 comments on commit 1dc2d2d

Please sign in to comment.