-
-
Notifications
You must be signed in to change notification settings - Fork 8.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Binary * MultiClass * LTR * Add documents. This PR resolves a few issues: - Define a value when dataset is invalid, which can happens if there's an empty dataset, or when the dataset contains only positive or negative value. - Define ROC-AUC for multi-class classification. - Define weighted average value for distributed setting. - A correct implementation for learning to rank task. Previous implementation is just binary classification with averaging across groups, which doesn't measure ordered learning to rank.
- Loading branch information
1 parent
1a73a28
commit 0f4d87c
Showing
26 changed files
with
1,607 additions
and
461 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/*! | ||
* Copyright 2021 by XGBoost Contributors | ||
*/ | ||
#ifndef XGBOOST_COMMON_RANKING_UTILS_H_ | ||
#define XGBOOST_COMMON_RANKING_UTILS_H_ | ||
|
||
#include <cub/cub.cuh> | ||
#include "xgboost/base.h" | ||
#include "device_helpers.cuh" | ||
#include "./math.h" | ||
|
||
namespace xgboost { | ||
namespace common { | ||
/** | ||
* \param n Number of items (length of the base) | ||
* \param h hight | ||
*/ | ||
XGBOOST_DEVICE inline size_t DiscreteTrapezoidArea(size_t n, size_t h) { | ||
n -= 1; // without diagonal entries | ||
h = std::min(n, h); // Specific for ranking. | ||
size_t total = ((n - (h - 1)) + n) * h / 2; | ||
return total; | ||
} | ||
|
||
/** | ||
* Used for mapping many groups of trapezoid shaped computation onto CUDA blocks. The | ||
* trapezoid must be on upper right corner. | ||
* | ||
* Equivalent to loops like: | ||
* | ||
* \code | ||
* for (size i = 0; i < h; ++i) { | ||
* for (size_t j = i + 1; j < n; ++j) { | ||
* do_something(); | ||
* } | ||
* } | ||
* \endcode | ||
*/ | ||
template <typename U> | ||
inline size_t | ||
SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr, | ||
xgboost::common::Span<size_t> out_group_threads_ptr, | ||
size_t h) { | ||
CHECK_GE(group_ptr.size(), 1); | ||
CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size()); | ||
dh::LaunchN( | ||
dh::CurrentDevice(), group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) { | ||
if (idx == 0) { | ||
out_group_threads_ptr[0] = 0; | ||
return; | ||
} | ||
|
||
size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]); | ||
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h); | ||
}); | ||
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(), | ||
out_group_threads_ptr.size()); | ||
size_t total = 0; | ||
dh::safe_cuda(cudaMemcpy( | ||
&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1, | ||
sizeof(total), cudaMemcpyDeviceToHost)); | ||
return total; | ||
} | ||
|
||
/** | ||
* Called inside kerenl to obtain coordinate from trapezoid grid. | ||
*/ | ||
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n, | ||
size_t *out_i, size_t *out_j) { | ||
auto &i = *out_i; | ||
auto &j = *out_j; | ||
double idx = static_cast<double>(i_idx); | ||
double N = static_cast<double>(n); | ||
|
||
i = std::ceil(-(0.5 - N + std::sqrt(common::Sqr(N - 0.5) + 2.0 * (-idx - 1.0)))) - 1.0; | ||
|
||
auto I = static_cast<double>(i); | ||
size_t n_elems = -0.5 * common::Sqr(I) + (N - 0.5) * I; | ||
|
||
j = idx - n_elems + i + 1; | ||
} | ||
} // namespace common | ||
} // namespace xgboost | ||
#endif // XGBOOST_COMMON_RANKING_UTILS_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.