Skip to content

Commit

Permalink
fix conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
ZiyueXu77 committed Mar 15, 2024
1 parent 7ef48c8 commit 8405791
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 148 deletions.
24 changes: 1 addition & 23 deletions src/collective/aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>*

std::size_t size{};
if (collective::GetRank() == 0) {
size = result->Size();
size = result->Size();
}
collective::Broadcast(&size, sizeof(std::size_t), 0);

Expand All @@ -111,42 +111,20 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>*
}
// provide the vectors to the processor interface
// print vector size for rank 1

if (collective::GetRank() == 0) {
std::cout << "DATA size of gpairs: " << vector_gh.size() << std::endl;
}









}
// make broadcast call on the prepared data buffer
// (to local gRPC handler for further encryption)

//collective::Broadcast(gh_buffer, size_of_buffer, 0);

result->Resize(size);
collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
} else {
// clear text mode, broadcast the data directly
result->Resize(size);
collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
}


/*
// print 1 sample
if (is_gpair) {
std::cout << "Rank: " << collective::GetRank() << " after broadcast - g: "
<< reinterpret_cast<float*>(&result->HostVector()[0])[0] << " h: "
<< reinterpret_cast<float*>(&result->HostVector()[0])[1] << std::endl;
}
*/
} else {
std::forward<Function>(function)();
}
Expand Down
33 changes: 0 additions & 33 deletions src/common/quantile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -387,39 +387,6 @@ void AddCutPoint(typename SketchType::SummaryContainer const &summary, int max_b
}
}

template <typename SketchType>
void AddCutPointSecure(typename SketchType::SummaryContainer const &summary, int max_bin,
HistogramCuts *cuts) {
// For secure vertical pipeline, we fill the cut values corresponding to empty columns
// with a vector of minimum value
const float mval = 1e-5f;
size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin));
// make a copy of required_cuts for mode selection
size_t required_cuts_original = required_cuts;
// Sync the required_cuts across all workers
collective::Allreduce<collective::Operation::kMax>(&required_cuts, 1);

// add the cut points
auto &cut_values = cuts->cut_values_.HostVector();
// if not empty column, fill the cut values with the actual values
if (required_cuts_original > 0) {
// we use the min_value as the first (0th) element, hence starting from 1.
for (size_t i = 1; i < required_cuts; ++i) {
bst_float cpt = summary.data[i].value;
if (i == 1 || cpt > cut_values.back()) {
cut_values.push_back(cpt);
}
}
}
// if empty column, fill the cut values with 0
else {
for (size_t i = 1; i < required_cuts; ++i) {
cut_values.push_back(0.0);
}
}
}


auto AddCategories(std::set<float> const &categories, HistogramCuts *cuts) {
if (std::any_of(categories.cbegin(), categories.cend(), InvalidCat)) {
InvalidCategory();
Expand Down
8 changes: 2 additions & 6 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
}
};


DMLC_REGISTER_PARAMETER(LearnerModelParamLegacy);
DMLC_REGISTER_PARAMETER(LearnerTrainParam);

Expand Down Expand Up @@ -492,12 +493,7 @@ class LearnerConfiguration : public Learner {

this->ConfigureMetrics(args);




std::cout<<"configure interface here?????????????????????????"<<std::endl;


std::cout<<"configure interface here???????????????"<<std::endl;

this->need_configuration_ = false;
if (ctx_.validate_parameters) {
Expand Down
132 changes: 47 additions & 85 deletions src/tree/hist/histogram.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,54 +76,39 @@ class HistogramBuilder {
std::vector<bst_node_t> const &nodes_to_build,
common::RowSetCollection const &row_set_collection,
common::Span<GradientPair const> gpair_h, bool force_read_by_column) {







if ((collective::GetRank() == 0)) {
std::cout << "Current samples on nodes: " << std::endl;
// print info on all nodes
for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) {
auto size = row_set_collection[nit].Size();
std::cout << "Node " << nit << " has " << size << " rows." << std::endl;
// print the first and last indexes of the rows with iterator
if (size > 0) {
std::cout << "First index for node " << nit << " is " << *row_set_collection[nit].begin << " and last index is " << *(row_set_collection[nit].end-1) << std::endl;
}
}
std::cout << std::endl;

// print info on the nodes to build
for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) {
std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl;
}
std::cout << std::endl;

std::cout << "Call interface to transmit the row set collection and gidx to the secure worker." << std::endl;
std::cout << "GHistIndexMatrix will not change: size of the ginidx: " << gidx.index.Size() << std::endl;
auto cut_ptrs = gidx.Cuts().Ptrs();
//auto cut_values = gidx.Cuts().Values();
//std::cout << "size of the cut points: " << cut_ptrs.size() << std::endl;
std::cout << "first sample falls to: [feature_id, slot #]: " << std::endl;
for (auto i = 0; i < cut_ptrs.size()-1; ++i) {
// std::cout << "feature " << i << " first cut at " << cut_ptrs[i] + 1 << " with value " << cut_values[cut_ptrs[i]+1] << "; ";
std::cout << "[" << gidx.GetGindex(0, i) << ", " << i << "] ";
}
std::cout << std::endl;
//Print out all kinds if information for interface integration
if ((collective::GetRank() == 0)) {
std::cout << "Current samples on nodes: " << std::endl;
// print info on all nodes
for (bst_node_t nit = 0; nit < row_set_collection.Size(); ++nit) {
auto size = row_set_collection[nit].Size();
std::cout << "Node " << nit << " has " << size << " rows." << std::endl;
// print the first and last indexes of the rows with iterator
if (size > 0) {
std::cout << "First index for node " << nit << " is " << *row_set_collection[nit].begin << " and last index is " << *(row_set_collection[nit].end-1) << std::endl;
}
}







std::cout << std::endl;
// print info on the nodes to build
for (auto nit = nodes_to_build.begin(); nit != nodes_to_build.end(); ++nit) {
std::cout << "Building local histogram for node ID: " << *nit << " with " << row_set_collection[*nit].Size() << " samples." << std::endl;
}
std::cout << std::endl;
std::cout << "Call interface to transmit the row set collection and gidx to the secure worker." << std::endl;
std::cout << "GHistIndexMatrix will not change: size of the ginidx: " << gidx.index.Size() << std::endl;
auto cut_ptrs = gidx.Cuts().Ptrs();
//auto cut_values = gidx.Cuts().Values();
//std::cout << "size of the cut points: " << cut_ptrs.size() << std::endl;
std::cout << "first sample falls to: [feature_id, slot #]: " << std::endl;
for (auto i = 0; i < cut_ptrs.size()-1; ++i) {
//std::cout << "feature " << i << " first cut at " << cut_ptrs[i] + 1 << " with value " << cut_values[cut_ptrs[i]+1] << "; ";
std::cout << "[" << gidx.GetGindex(0, i) << ", " << i << "] ";
}
std::cout << std::endl;
}
// Call the interface to transmit the row set collection and gidx to the secure worker

// Parallel processing by nodes and data in each node
bool print_once = true;
common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) {
const auto tid = static_cast<unsigned>(omp_get_thread_num());
bst_node_t const nidx = nodes_to_build[nid_in_set];
Expand All @@ -133,19 +118,6 @@ class HistogramBuilder {
auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set,
elem.begin + end_of_row_set, nidx);
auto hist = buffer_.GetInitializedHist(tid, nid_in_set);

// print info
//if ((collective::GetRank() == 0) && print_once ) {
//std::cout << "Sample of row set for node " << nidx << ": ";
//std::cout << "Size: " << row_set_collection[nidx].Size() << ", ";
//for (auto i = 0; i < 10; i++) {
// std::cout << rid_set.begin[i] << ", ";
//}
//std::cout << std::endl;
//print_once = false;
//}


if (rid_set.Size() != 0) {
common::BuildHist<any_missing>(gpair_h, rid_set, gidx, hist, force_read_by_column);
}
Expand Down Expand Up @@ -216,11 +188,6 @@ class HistogramBuilder {
if (page_idx == 0) {
// Add the local histogram cache to the parallel buffer before processing the first page.
auto n_nodes = nodes_to_build.size();

if ((collective::GetRank() == 0)) {
std::cout << "Building histogram for " << n_nodes << " nodes" << std::endl;
}

std::vector<common::GHistRow> target_hists(n_nodes);
for (size_t i = 0; i < n_nodes; ++i) {
auto const nidx = nodes_to_build[i];
Expand Down Expand Up @@ -266,11 +233,11 @@ class HistogramBuilder {

// Option 1: in theory the operation is AllGather, but with current system functionality,
// we use AllReduce to simulate the AllGather operation
//auto first_nidx = nodes_to_build.front();
//collective::Allreduce<collective::Operation::kSum>(
// reinterpret_cast<double *>(this->hist_[first_nidx].data()), n);

auto first_nidx = nodes_to_build.front();
collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double *>(this->hist_[first_nidx].data()), n);

/*
// Option 2: use AllGather instead of AllReduce
// Collect the histogram entries from all nodes
// allocate memory for the received entries as a flat vector
Expand All @@ -290,10 +257,9 @@ class HistogramBuilder {
it++;
}
//std::cout << "hist_flat.size() = " << hist_flat.size() << std::endl;

if (collective::GetRank() == 0) {
std::cout << "---------------------CALL AllGather for node building-------------------- " << std::endl;
}
if (collective::GetRank() == 0) {
std::cout << "---------------CALL AllGather for node building-------------- " << std::endl;
}
// Perform AllGather
auto hist_entries = collective::Allgather(hist_flat);
Expand All @@ -316,7 +282,7 @@ class HistogramBuilder {
}
}
}

*/


}
Expand Down Expand Up @@ -385,10 +351,6 @@ class MultiHistogramBuilder {
linalg::MatrixView<GradientPair const> gpair, ExpandEntry const &best,
BatchParam const &param, bool force_read_by_column = false) {
auto n_targets = p_tree->NumTargets();


std::cout << "Root n_targets = " << n_targets << std::endl;

CHECK_EQ(gpair.Shape(1), n_targets);
CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0));
CHECK_EQ(target_builders_.size(), n_targets);
Expand Down Expand Up @@ -429,16 +391,14 @@ class MultiHistogramBuilder {
std::vector<bst_node_t> nodes_to_sub(valid_candidates.size());
AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub);


// print index for nodes_to_build and nodes_to_sub
if (collective::GetRank() == 0) {
for (int i = 0; i < nodes_to_build.size(); i++) {
std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; ";
std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl;
}
for (int i = 0; i < nodes_to_build.size(); i++) {
std::cout<< "Left-Right: nodes_to_build index " << nodes_to_build[i] << "; ";
std::cout<< "nodes_to_sub index " << nodes_to_sub[i] << std::endl;
}
}


// use the first builder for getting number of valid nodes.
target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true);
CHECK_GE(nodes_to_build.size(), nodes_to_sub.size());
Expand All @@ -455,9 +415,11 @@ class MultiHistogramBuilder {
CHECK_EQ(gpair.Shape(1), p_tree->NumTargets());
for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
if (collective::GetRank() == 0) {
std::cout<< "Total row count: " << p_fmat->Info().num_row_ << std::endl;
}

if (collective::GetRank() == 0) {
std::cout<< "Total row count: " << p_fmat->Info().num_row_ << std::endl;
}

CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_);
this->target_builders_[t].BuildHist(page_idx, space, page,
partitioners[page_idx].Partitions(), nodes_to_build,
Expand Down
1 change: 0 additions & 1 deletion src/tree/updater_quantile_hist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,6 @@ class HistUpdater {
std::vector<CPUExpandEntry> const &valid_candidates,
linalg::MatrixView<GradientPair const> gpair) {
monitor_->Start(__func__);

this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree, partitioner_,
valid_candidates, gpair, HistBatch(param_));
monitor_->Stop(__func__);
Expand Down

0 comments on commit 8405791

Please sign in to comment.