[coverage](test) improve test coverage (apache#26096)

improve test coverage
XuJianxu · Dec 14, 2023 · c988809 · c988809
1 parent 61e4944
commit c988809
Show file tree

Hide file tree

Showing 11 changed files with 6,243 additions and 167 deletions.
diff --git a/be/src/util/radix_sort.h b/be/src/util/radix_sort.h
@@ -248,13 +248,15 @@ struct RadixSort {
  /// Transform the array and calculate the histogram.
  /// NOTE This is slightly suboptimal. Look at https://github.com/powturbo/TurboHist
  for (size_t i = 0; i < size; ++i) {
- if (!Traits::Transform::transform_is_simple)
+ if (!Traits::Transform::transform_is_simple) {
  Traits::extractKey(arr[i]) = bitsToKey(
  Traits::Transform::forward(keyToBits(Traits::extractKey(arr[i]))));
+ }
 
- for (size_t pass = 0; pass < NUM_PASSES; ++pass)
+ for (size_t pass = 0; pass < NUM_PASSES; ++pass) {
  ++histograms[pass * HISTOGRAM_SIZE +
  getPart(pass, keyToBits(Traits::extractKey(arr[i])))];
+ }
  }
 
  {
@@ -297,12 +299,4 @@ struct RadixSort {
  }
 };
 
-/// Helper functions for numeric types.
-/// Use RadixSort with custom traits for complex types instead.
-
-template <typename T>
-void radixSortLSD(T* arr, size_t size) {
- RadixSort<RadixSortNumTraits<T>>::executeLSD(arr, size);
-}
-
 } // namespace doris
diff --git a/be/src/vec/common/radix_sort.h b/be/src/vec/common/radix_sort.h
@@ -106,7 +106,6 @@ struct RadixSortIdentityTransform {
  static constexpr bool transform_is_simple = true;
 
  static KeyBits forward(KeyBits x) { return x; }
- static KeyBits backward(KeyBits x) { return x; }
 };
 
 template <typename TElement>
@@ -131,7 +130,6 @@ struct RadixSortSignedTransform {
  static constexpr bool transform_is_simple = true;
 
  static KeyBits forward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
- static KeyBits backward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
 };
 
 template <typename TElement>
@@ -293,13 +291,15 @@ struct RadixSort {
  /// Transform the array and calculate the histogram.
  /// NOTE This is slightly suboptimal. Look at https://github.com/powturbo/TurboHist
  for (size_t i = 0; i < size; ++i) {
- if (!Traits::Transform::transform_is_simple)
+ if (!Traits::Transform::transform_is_simple) {
  Traits::extract_key(arr[i]) = bits_to_key(
  Traits::Transform::forward(key_to_bits(Traits::extract_key(arr[i]))));
+ }
 
- for (size_t pass = 0; pass < NUM_PASSES; ++pass)
+ for (size_t pass = 0; pass < NUM_PASSES; ++pass) {
  ++histograms[pass * HISTOGRAM_SIZE +
  get_part(pass, key_to_bits(Traits::extract_key(arr[i])))];
+ }
  }
 
  {
@@ -328,15 +328,20 @@ struct RadixSort {
  dest = reader[i];
 
  /// On the last pass, we do the reverse transformation.
- if (!Traits::Transform::transform_is_simple && pass == NUM_PASSES - 1)
- Traits::extract_key(dest) = bits_to_key(Traits::Transform::backward(
- key_to_bits(Traits::extract_key(reader[i]))));
+ if constexpr (!Traits::Transform::transform_is_simple) {
+ if (pass == NUM_PASSES - 1) {
+ Traits::extract_key(dest) = bits_to_key(Traits::Transform::backward(
+ key_to_bits(Traits::extract_key(reader[i]))));
+ }
+ }
  }
  }
 
  /// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
  /// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
- if (NUM_PASSES % 2) memcpy(arr, swap_buffer, size * sizeof(Element));
+ if (NUM_PASSES % 2) {
+ memcpy(arr, swap_buffer, size * sizeof(Element));
+ }
 
  allocator.deallocate(swap_buffer, size * sizeof(Element));
  }
@@ -373,16 +378,3 @@ struct RadixSort {
  radix_sort_msd_internal_helper<NUM_PASSES - 1>(arr, size, limit);
  }
 };
-
-/// Helper functions for numeric types.
-/// Use RadixSort with custom traits for complex types instead.
-
-template <typename T>
-void radix_sort_lsd(T* arr, size_t size) {
- RadixSort<RadixSortNumTraits<T>>::execute_lsd(arr, size);
-}
-
-template <typename T>
-void radix_sort_msd(T* arr, size_t size, size_t limit) {
- RadixSort<RadixSortNumTraits<T>>::execute_msd(arr, size, limit);
-}
diff --git a/be/src/vec/common/sip_hash.h b/be/src/vec/common/sip_hash.h
@@ -170,13 +170,14 @@ class SipHash {
  }
 
  template <typename T>
- requires std::is_standard_layout_v<T>
  void update(const T& x) {
+ if constexpr (std::is_same_v<T, std::string>) {
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "String should not use SipHash!");
+ }
  update(reinterpret_cast<const char*>(&x), sizeof(x));
  }
 
- void update(const std::string& x) { update(x.data(), x.length()); }
-
  /// Get the result in some form. This can only be done once!
 
  void get128(char* out) {
@@ -216,21 +217,3 @@ inline void sip_hash128(const char* data, const size_t size, char* out) {
  hash.update(data, size);
  hash.get128(out);
 }
-
-inline doris::vectorized::UInt64 sip_hash64(const char* data, const size_t size) {
- SipHash hash;
- hash.update(data, size);
- return hash.get64();
-}
-
-template <typename T>
- requires std::is_standard_layout_v<T>
-doris::vectorized::UInt64 sip_hash64(const T& x) {
- SipHash hash;
- hash.update(x);
- return hash.get64();
-}
-
-inline doris::vectorized::UInt64 sip_hash64(const std::string& s) {
- return sip_hash64(s.data(), s.size());
-}
diff --git a/be/src/vec/core/sort_description.h b/be/src/vec/core/sort_description.h
@@ -41,13 +41,6 @@ struct SortColumnDescription {
  nulls_direction(nulls_direction_) {}
 
  SortColumnDescription() = default;
-
- bool operator==(const SortColumnDescription& other) const {
- return column_name == other.column_name && column_number == other.column_number &&
- direction == other.direction && nulls_direction == other.nulls_direction;
- }
-
- bool operator!=(const SortColumnDescription& other) const { return !(*this == other); }
 };
 
 /// Description of the sorting rule for several columns.

diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
@@ -386,7 +386,7 @@ class HashJoinNode final : public VJoinNodeBase {
  template <int JoinOpType, typename Parent>
  friend struct ProcessHashTableProbe;
 
- void _init_short_circuit_for_probe() override {
+ void _init_short_circuit_for_probe() {
  _short_circuit_for_probe =
  (_has_null_in_build_side && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN &&
  !_is_mark_join) ||

diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h
@@ -97,11 +97,6 @@ class VJoinNodeBase : public ExecNode {
  // Materialize build relation. For HashJoin, it will build a hash table while a list of build blocks for NLJoin.
  virtual Status _materialize_build_side(RuntimeState* state) = 0;
 
- virtual void _init_short_circuit_for_probe() {
- _short_circuit_for_probe = false;
- _empty_right_table_need_probe_dispose = false;
- }
-
  TJoinOp::type _join_op;
  JoinOpVariants _join_op_variants;
 

diff --git a/be/test/util/radix_sort_test.cpp b/be/test/util/radix_sort_test.cpp
@@ -59,115 +59,14 @@ class RadixSortTest : public ::testing::Test {
  }
 };
 
-TEST_F(RadixSortTest, TestUint32Sort) {
- constexpr size_t num_values = 10000;
- std::vector<uint32_t> data;
- // generating random data
- for (size_t i = 0; i < num_values; ++i) {
- data.push_back(num_values - i);
- }
- std::random_device rd;
- std::mt19937 g(rd());
- std::shuffle(data.begin(), data.end(), g);
- radixSortLSD(data.data(), data.size());
- for (size_t i = 0; i < num_values; ++i) {
- data[i] = i + 1;
- }
-}
-
-TEST_F(RadixSortTest, TestInt32Sort) {
- constexpr size_t num_values = 10000;
- std::vector<int32_t> data;
- // generating random data
- for (size_t i = 0; i < num_values; ++i) {
- data.push_back(num_values - i - 5000);
- }
- std::random_device rd;
- std::mt19937 g(rd());
- std::shuffle(data.begin(), data.end(), g);
- radixSortLSD(data.data(), data.size());
- for (size_t i = 0; i < num_values; ++i) {
- data[i] = i + 1 - 5000;
- }
-}
-
 bool compare_float_with_epsilon(float a, float b, float E) {
  return std::abs(a - b) < E;
 }
 
-TEST_F(RadixSortTest, TestFloatSort) {
- constexpr size_t num_values = 10000;
- std::vector<float> data;
- // generating random data
- for (size_t i = 0; i < num_values; ++i) {
- data.push_back(1.0 * num_values - i - 5000 + 0.1);
- }
- float nan = std::numeric_limits<float>::quiet_NaN();
- float max = std::numeric_limits<float>::max();
- float min = std::numeric_limits<float>::lowest();
- float infinity = std::numeric_limits<float>::infinity();
- data.push_back(nan);
- data.push_back(max);
- data.push_back(min);
- data.push_back(infinity);
- std::random_device rd;
- std::mt19937 g(rd());
- std::shuffle(data.begin(), data.end(), g);
- radixSortLSD(data.data(), data.size());
- for (size_t i = 0; i < num_values + 4; ++i) {
- if (i == 0) {
- EXPECT_TRUE(compare_float_with_epsilon(data[i], min, 0.0000001));
- } else if (i == num_values + 1) {
- EXPECT_TRUE(compare_float_with_epsilon(data[i], max, 0.0000001));
- } else if (i == num_values + 2) {
- EXPECT_TRUE(std::isinf(data[i]));
- } else if (i == num_values + 3) {
- EXPECT_TRUE(std::isnan(data[i]));
- } else {
- EXPECT_TRUE(compare_float_with_epsilon(data[i], 1.0 * i - 5000 + 0.1, 0.0000001));
- }
- }
-}
-
 bool compare_double_with_epsilon(double a, double b, double E) {
  return std::abs(a - b) < E;
 }
 
-TEST_F(RadixSortTest, TestDoubleSort) {
- constexpr size_t num_values = 10000;
- std::vector<double> data;
- // generating random data
- for (size_t i = 0; i < num_values; ++i) {
- data.push_back(num_values * 1.0 - i - 5000 + 0.1);
- }
- double nan = std::numeric_limits<double>::quiet_NaN();
- double max = std::numeric_limits<double>::max();
- double min = std::numeric_limits<double>::lowest();
- double infinity = std::numeric_limits<double>::infinity();
- data.push_back(nan);
- data.push_back(max);
- data.push_back(min);
- data.push_back(infinity);
- std::random_device rd;
- std::mt19937 g(rd());
- std::shuffle(data.begin(), data.end(), g);
- radixSortLSD(data.data(), data.size());
- for (size_t i = 0; i < num_values + 4; ++i) {
- if (i == 0) {
- EXPECT_TRUE(compare_double_with_epsilon(data[i], min, 0.0000001));
- } else if (i == num_values + 1) {
- EXPECT_TRUE(compare_double_with_epsilon(data[i], max, 0.0000001));
- } else if (i == num_values + 2) {
- EXPECT_TRUE(std::isinf(data[i]));
- } else if (i == num_values + 3) {
- EXPECT_TRUE(std::isnan(data[i]));
- } else {
- double tmp = 1.0 * i - 5000 + 0.1;
- EXPECT_TRUE(compare_double_with_epsilon(data[i], tmp, 0.0000001));
- }
- }
-}
-
 struct TestObject {
  float d1;
  float d2;