Skip to content

Commit

Permalink
[coverage](test) improve test coverage (apache#26096)
Browse files Browse the repository at this point in the history
improve test coverage
  • Loading branch information
Gabriel39 authored and 胥剑旭 committed Dec 14, 2023
1 parent 61e4944 commit c988809
Show file tree
Hide file tree
Showing 11 changed files with 6,243 additions and 167 deletions.
14 changes: 4 additions & 10 deletions be/src/util/radix_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,15 @@ struct RadixSort {
/// Transform the array and calculate the histogram.
/// NOTE This is slightly suboptimal. Look at https://github.com/powturbo/TurboHist
for (size_t i = 0; i < size; ++i) {
if (!Traits::Transform::transform_is_simple)
if (!Traits::Transform::transform_is_simple) {
Traits::extractKey(arr[i]) = bitsToKey(
Traits::Transform::forward(keyToBits(Traits::extractKey(arr[i]))));
}

for (size_t pass = 0; pass < NUM_PASSES; ++pass)
for (size_t pass = 0; pass < NUM_PASSES; ++pass) {
++histograms[pass * HISTOGRAM_SIZE +
getPart(pass, keyToBits(Traits::extractKey(arr[i])))];
}
}

{
Expand Down Expand Up @@ -297,12 +299,4 @@ struct RadixSort {
}
};

/// Helper functions for numeric types.
/// Use RadixSort with custom traits for complex types instead.

template <typename T>
void radixSortLSD(T* arr, size_t size) {
RadixSort<RadixSortNumTraits<T>>::executeLSD(arr, size);
}

} // namespace doris
34 changes: 13 additions & 21 deletions be/src/vec/common/radix_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ struct RadixSortIdentityTransform {
static constexpr bool transform_is_simple = true;

static KeyBits forward(KeyBits x) { return x; }
static KeyBits backward(KeyBits x) { return x; }
};

template <typename TElement>
Expand All @@ -131,7 +130,6 @@ struct RadixSortSignedTransform {
static constexpr bool transform_is_simple = true;

static KeyBits forward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
static KeyBits backward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
};

template <typename TElement>
Expand Down Expand Up @@ -293,13 +291,15 @@ struct RadixSort {
/// Transform the array and calculate the histogram.
/// NOTE This is slightly suboptimal. Look at https://github.com/powturbo/TurboHist
for (size_t i = 0; i < size; ++i) {
if (!Traits::Transform::transform_is_simple)
if (!Traits::Transform::transform_is_simple) {
Traits::extract_key(arr[i]) = bits_to_key(
Traits::Transform::forward(key_to_bits(Traits::extract_key(arr[i]))));
}

for (size_t pass = 0; pass < NUM_PASSES; ++pass)
for (size_t pass = 0; pass < NUM_PASSES; ++pass) {
++histograms[pass * HISTOGRAM_SIZE +
get_part(pass, key_to_bits(Traits::extract_key(arr[i])))];
}
}

{
Expand Down Expand Up @@ -328,15 +328,20 @@ struct RadixSort {
dest = reader[i];

/// On the last pass, we do the reverse transformation.
if (!Traits::Transform::transform_is_simple && pass == NUM_PASSES - 1)
Traits::extract_key(dest) = bits_to_key(Traits::Transform::backward(
key_to_bits(Traits::extract_key(reader[i]))));
if constexpr (!Traits::Transform::transform_is_simple) {
if (pass == NUM_PASSES - 1) {
Traits::extract_key(dest) = bits_to_key(Traits::Transform::backward(
key_to_bits(Traits::extract_key(reader[i]))));
}
}
}
}

/// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
/// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
if (NUM_PASSES % 2) memcpy(arr, swap_buffer, size * sizeof(Element));
if (NUM_PASSES % 2) {
memcpy(arr, swap_buffer, size * sizeof(Element));
}

allocator.deallocate(swap_buffer, size * sizeof(Element));
}
Expand Down Expand Up @@ -373,16 +378,3 @@ struct RadixSort {
radix_sort_msd_internal_helper<NUM_PASSES - 1>(arr, size, limit);
}
};

/// Helper functions for numeric types.
/// Use RadixSort with custom traits for complex types instead.

template <typename T>
void radix_sort_lsd(T* arr, size_t size) {
RadixSort<RadixSortNumTraits<T>>::execute_lsd(arr, size);
}

template <typename T>
void radix_sort_msd(T* arr, size_t size, size_t limit) {
RadixSort<RadixSortNumTraits<T>>::execute_msd(arr, size, limit);
}
25 changes: 4 additions & 21 deletions be/src/vec/common/sip_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,14 @@ class SipHash {
}

template <typename T>
requires std::is_standard_layout_v<T>
void update(const T& x) {
if constexpr (std::is_same_v<T, std::string>) {
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
"String should not use SipHash!");
}
update(reinterpret_cast<const char*>(&x), sizeof(x));
}

void update(const std::string& x) { update(x.data(), x.length()); }

/// Get the result in some form. This can only be done once!

void get128(char* out) {
Expand Down Expand Up @@ -216,21 +217,3 @@ inline void sip_hash128(const char* data, const size_t size, char* out) {
hash.update(data, size);
hash.get128(out);
}

inline doris::vectorized::UInt64 sip_hash64(const char* data, const size_t size) {
SipHash hash;
hash.update(data, size);
return hash.get64();
}

template <typename T>
requires std::is_standard_layout_v<T>
doris::vectorized::UInt64 sip_hash64(const T& x) {
SipHash hash;
hash.update(x);
return hash.get64();
}

inline doris::vectorized::UInt64 sip_hash64(const std::string& s) {
return sip_hash64(s.data(), s.size());
}
7 changes: 0 additions & 7 deletions be/src/vec/core/sort_description.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ struct SortColumnDescription {
nulls_direction(nulls_direction_) {}

SortColumnDescription() = default;

bool operator==(const SortColumnDescription& other) const {
return column_name == other.column_name && column_number == other.column_number &&
direction == other.direction && nulls_direction == other.nulls_direction;
}

bool operator!=(const SortColumnDescription& other) const { return !(*this == other); }
};

/// Description of the sorting rule for several columns.
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/join/vhash_join_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ class HashJoinNode final : public VJoinNodeBase {
template <int JoinOpType, typename Parent>
friend struct ProcessHashTableProbe;

void _init_short_circuit_for_probe() override {
void _init_short_circuit_for_probe() {
_short_circuit_for_probe =
(_has_null_in_build_side && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN &&
!_is_mark_join) ||
Expand Down
5 changes: 0 additions & 5 deletions be/src/vec/exec/join/vjoin_node_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,6 @@ class VJoinNodeBase : public ExecNode {
// Materialize build relation. For HashJoin, it will build a hash table while a list of build blocks for NLJoin.
virtual Status _materialize_build_side(RuntimeState* state) = 0;

virtual void _init_short_circuit_for_probe() {
_short_circuit_for_probe = false;
_empty_right_table_need_probe_dispose = false;
}

TJoinOp::type _join_op;
JoinOpVariants _join_op_variants;

Expand Down
101 changes: 0 additions & 101 deletions be/test/util/radix_sort_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,115 +59,14 @@ class RadixSortTest : public ::testing::Test {
}
};

TEST_F(RadixSortTest, TestUint32Sort) {
constexpr size_t num_values = 10000;
std::vector<uint32_t> data;
// generating random data
for (size_t i = 0; i < num_values; ++i) {
data.push_back(num_values - i);
}
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(data.begin(), data.end(), g);
radixSortLSD(data.data(), data.size());
for (size_t i = 0; i < num_values; ++i) {
data[i] = i + 1;
}
}

TEST_F(RadixSortTest, TestInt32Sort) {
constexpr size_t num_values = 10000;
std::vector<int32_t> data;
// generating random data
for (size_t i = 0; i < num_values; ++i) {
data.push_back(num_values - i - 5000);
}
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(data.begin(), data.end(), g);
radixSortLSD(data.data(), data.size());
for (size_t i = 0; i < num_values; ++i) {
data[i] = i + 1 - 5000;
}
}

bool compare_float_with_epsilon(float a, float b, float E) {
return std::abs(a - b) < E;
}

TEST_F(RadixSortTest, TestFloatSort) {
constexpr size_t num_values = 10000;
std::vector<float> data;
// generating random data
for (size_t i = 0; i < num_values; ++i) {
data.push_back(1.0 * num_values - i - 5000 + 0.1);
}
float nan = std::numeric_limits<float>::quiet_NaN();
float max = std::numeric_limits<float>::max();
float min = std::numeric_limits<float>::lowest();
float infinity = std::numeric_limits<float>::infinity();
data.push_back(nan);
data.push_back(max);
data.push_back(min);
data.push_back(infinity);
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(data.begin(), data.end(), g);
radixSortLSD(data.data(), data.size());
for (size_t i = 0; i < num_values + 4; ++i) {
if (i == 0) {
EXPECT_TRUE(compare_float_with_epsilon(data[i], min, 0.0000001));
} else if (i == num_values + 1) {
EXPECT_TRUE(compare_float_with_epsilon(data[i], max, 0.0000001));
} else if (i == num_values + 2) {
EXPECT_TRUE(std::isinf(data[i]));
} else if (i == num_values + 3) {
EXPECT_TRUE(std::isnan(data[i]));
} else {
EXPECT_TRUE(compare_float_with_epsilon(data[i], 1.0 * i - 5000 + 0.1, 0.0000001));
}
}
}

bool compare_double_with_epsilon(double a, double b, double E) {
return std::abs(a - b) < E;
}

TEST_F(RadixSortTest, TestDoubleSort) {
constexpr size_t num_values = 10000;
std::vector<double> data;
// generating random data
for (size_t i = 0; i < num_values; ++i) {
data.push_back(num_values * 1.0 - i - 5000 + 0.1);
}
double nan = std::numeric_limits<double>::quiet_NaN();
double max = std::numeric_limits<double>::max();
double min = std::numeric_limits<double>::lowest();
double infinity = std::numeric_limits<double>::infinity();
data.push_back(nan);
data.push_back(max);
data.push_back(min);
data.push_back(infinity);
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(data.begin(), data.end(), g);
radixSortLSD(data.data(), data.size());
for (size_t i = 0; i < num_values + 4; ++i) {
if (i == 0) {
EXPECT_TRUE(compare_double_with_epsilon(data[i], min, 0.0000001));
} else if (i == num_values + 1) {
EXPECT_TRUE(compare_double_with_epsilon(data[i], max, 0.0000001));
} else if (i == num_values + 2) {
EXPECT_TRUE(std::isinf(data[i]));
} else if (i == num_values + 3) {
EXPECT_TRUE(std::isnan(data[i]));
} else {
double tmp = 1.0 * i - 5000 + 0.1;
EXPECT_TRUE(compare_double_with_epsilon(data[i], tmp, 0.0000001));
}
}
}

struct TestObject {
float d1;
float d2;
Expand Down
Loading

0 comments on commit c988809

Please sign in to comment.