diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 0664735599a8ac..71edf62a8adb11 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1172,10 +1172,11 @@ class BitmapValue { using SetContainer = phmap::flat_hash_set; // Construct an empty bitmap. - BitmapValue() : _type(EMPTY), _is_shared(false) {} + BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); } // Construct a bitmap with one element. - explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE), _is_shared(false) {} + explicit BitmapValue(uint64_t value) + : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {} // Construct a bitmap from serialized data. explicit BitmapValue(const char* src) : _is_shared(false) { @@ -1199,7 +1200,7 @@ class BitmapValue { break; } - if (other._type != EMPTY) { + if (other._type == BITMAP) { _is_shared = true; // should also set other's state to shared, so that other bitmap value will // create a new bitmap when it wants to modify it. @@ -1229,6 +1230,10 @@ class BitmapValue { } BitmapValue& operator=(const BitmapValue& other) { + if (this == &other) { + return *this; + } + reset(); _type = other._type; switch (other._type) { case EMPTY: @@ -1244,7 +1249,7 @@ class BitmapValue { break; } - if (other._type != EMPTY) { + if (other._type == BITMAP) { _is_shared = true; // should also set other's state to shared, so that other bitmap value will // create a new bitmap when it wants to modify it. @@ -1265,6 +1270,7 @@ class BitmapValue { if (this == &other) { return *this; } + reset(); _type = other._type; switch (other._type) { @@ -1721,8 +1727,7 @@ class BitmapValue { BitmapValue& operator&=(const BitmapValue& rhs) { switch (rhs._type) { case EMPTY: - _type = EMPTY; - _bitmap.reset(); + reset(); // empty & any = empty break; case SINGLE: switch (_type) { @@ -1741,6 +1746,7 @@ class BitmapValue { _sv = rhs._sv; } _bitmap.reset(); + _is_shared = false; break; case SET: if (!_set.contains(rhs._sv)) { @@ -1797,6 +1803,7 @@ class BitmapValue { } _type = SET; _bitmap.reset(); + _is_shared = false; _convert_to_smaller_type(); break; case SET: @@ -1832,7 +1839,6 @@ class BitmapValue { case SINGLE: if (_sv == rhs._sv) { _type = EMPTY; - _bitmap.reset(); } else { add(rhs._sv); } @@ -2162,7 +2168,7 @@ class BitmapValue { // Return how many bytes are required to serialize this bitmap. // See BitmapTypeCode for the serialized format. - size_t getSizeInBytes() { + size_t getSizeInBytes() const { size_t res = 0; switch (_type) { case EMPTY: @@ -2613,12 +2619,13 @@ class BitmapValue { } } - void clear() { + void reset() { _type = EMPTY; - _bitmap.reset(); _sv = 0; + _set.clear(); + _is_shared = false; + _bitmap = nullptr; } - // Implement an iterator for convenience friend class BitmapValueIterator; typedef BitmapValueIterator b_iterator; diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index 418eef1b9b78d3..2d707dbebfd0e5 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -143,7 +143,7 @@ struct AggregateFunctionBitmapData { void reset() { is_first = true; - value.clear(); + value.reset(); // it's better to call reset function by self firstly. } BitmapValue& get() { return value; } diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h index 382957302ec245..a4c08aefe2ad43 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -46,7 +46,7 @@ struct AggregateFunctionBitmapAggData { void add(const T& value_) { value.add(value_); } - void reset() { value.clear(); } + void reset() { value.reset(); } void merge(const AggregateFunctionBitmapAggData& other) { value |= other.value; } diff --git a/be/src/vec/data_types/data_type_bitmap.cpp b/be/src/vec/data_types/data_type_bitmap.cpp index ccb39b080a33ce..79800029eb83ca 100644 --- a/be/src/vec/data_types/data_type_bitmap.cpp +++ b/be/src/vec/data_types/data_type_bitmap.cpp @@ -114,9 +114,7 @@ void DataTypeBitMap::to_string(const IColumn& column, size_t row_num, BufferWrit ColumnPtr ptr = result.first; row_num = result.second; - auto& data = - const_cast(assert_cast(*ptr).get_element(row_num)); - + const auto& data = assert_cast(*ptr).get_element(row_num); std::string buffer(data.getSizeInBytes(), '0'); data.write_to(const_cast(buffer.data())); ostr.write(buffer.c_str(), buffer.size()); diff --git a/be/src/vec/data_types/data_type_bitmap.h b/be/src/vec/data_types/data_type_bitmap.h index 6fd6a7d110d277..b5afc1170732ad 100644 --- a/be/src/vec/data_types/data_type_bitmap.h +++ b/be/src/vec/data_types/data_type_bitmap.h @@ -30,6 +30,7 @@ #include "serde/data_type_bitmap_serde.h" #include "util/bitmap_value.h" #include "vec/columns/column_complex.h" +#include "vec/columns/column_const.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -92,7 +93,12 @@ class DataTypeBitMap : public IDataType { bool can_be_inside_low_cardinality() const override { return false; } std::string to_string(const IColumn& column, size_t row_num) const override { - return "BitMap()"; + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const auto& data = assert_cast(*ptr).get_element(row_num); + return data.to_string(); } void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 2083ec8420b138..cc97fb01d8e5c4 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -578,7 +578,7 @@ struct BitmapAndNot { mid_data &= rvec[i]; res[i] = lvec[i]; res[i] -= mid_data; - mid_data.clear(); + mid_data.reset(); } } static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) { @@ -589,7 +589,7 @@ struct BitmapAndNot { mid_data &= rval; res[i] = lvec[i]; res[i] -= mid_data; - mid_data.clear(); + mid_data.reset(); } } static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) { @@ -600,7 +600,7 @@ struct BitmapAndNot { mid_data &= rvec[i]; res[i] = lval; res[i] -= mid_data; - mid_data.clear(); + mid_data.reset(); } } }; @@ -624,7 +624,7 @@ struct BitmapAndNotCount { mid_data = lvec[i]; mid_data &= rvec[i]; res[i] = lvec[i].andnot_cardinality(mid_data); - mid_data.clear(); + mid_data.reset(); } } static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) { @@ -634,7 +634,7 @@ struct BitmapAndNotCount { mid_data = lval; mid_data &= rvec[i]; res[i] = lval.andnot_cardinality(mid_data); - mid_data.clear(); + mid_data.reset(); } } static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) { @@ -644,7 +644,7 @@ struct BitmapAndNotCount { mid_data = lvec[i]; mid_data &= rval; res[i] = lvec[i].andnot_cardinality(mid_data); - mid_data.clear(); + mid_data.reset(); } } }; diff --git a/be/test/util/bitmap_value_test.cpp b/be/test/util/bitmap_value_test.cpp index 6271300ecbb0da..e7652199ab03f9 100644 --- a/be/test/util/bitmap_value_test.cpp +++ b/be/test/util/bitmap_value_test.cpp @@ -24,6 +24,7 @@ #include #include +#include "gtest/gtest.h" #include "gtest/gtest_pred_impl.h" #include "util/coding.h" @@ -422,7 +423,7 @@ TEST(BitmapValueTest, set) { bitmap_value.add(4294967297); EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::SINGLE64); - bitmap_value.clear(); + bitmap_value.reset(); bitmap_value.add(10); EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::SINGLE32); @@ -494,7 +495,7 @@ TEST(BitmapValueTest, add) { bitmap_value.add_many(values.data(), values.size()); EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::BITMAP32); - bitmap_value.clear(); + bitmap_value.reset(); values.clear(); values.resize(31); std::iota(values.begin(), values.end(), 0); @@ -545,6 +546,39 @@ void check_bitmap_value_operator(const BitmapValue& left, const BitmapValue& rig EXPECT_EQ(copy.cardinality(), left_cardinality + right_cardinality - and_cardinality * 2); } +// '=' +TEST(BitmapValueTest, copy_operator) { + BitmapValue test_bitmap; + + std::vector values1(31); + BitmapValue bitmap; + values1.resize(128); + std::iota(values1.begin(), values1.begin() + 16, 0); + std::iota(values1.begin() + 16, values1.begin() + 32, 4294967297); + std::iota(values1.begin() + 32, values1.begin() + 64, 8589934594); + std::iota(values1.begin() + 64, values1.end(), 42949672970); + bitmap.add_many(values1.data(), values1.size()); + + test_bitmap = bitmap; //should be bitmap + EXPECT_EQ(test_bitmap.cardinality(), bitmap.cardinality()); + EXPECT_EQ(test_bitmap.to_string(), bitmap.to_string()); + + BitmapValue single(1); + test_bitmap = single; //should be single + EXPECT_EQ(test_bitmap.cardinality(), 1); + EXPECT_EQ(test_bitmap.cardinality(), single.cardinality()); + EXPECT_EQ(test_bitmap.to_string(), single.to_string()); + + BitmapValue empty; + test_bitmap = empty; // should be empty + EXPECT_TRUE(test_bitmap.empty()); + + BitmapValue bitmap2(bitmap); + EXPECT_EQ(bitmap2.to_string(), bitmap.to_string()); + bitmap2 = bitmap; + EXPECT_EQ(bitmap2.to_string(), bitmap.to_string()); +} + // '-=', '|=', '&=', '^=' TEST(BitmapValueTest, operators) { config::enable_set_in_bitmap_value = true; @@ -658,7 +692,7 @@ TEST(BitmapValueTest, write_read) { buffer.reset(new char[size]); bitmap_single.write_to(buffer.get()); - deserialized.clear(); + deserialized.reset(); deserialized.deserialize(buffer.get()); check_bitmap_equal(deserialized, bitmap_single); @@ -667,7 +701,7 @@ TEST(BitmapValueTest, write_read) { buffer.reset(new char[size]); bitmap_set.write_to(buffer.get()); - deserialized.clear(); + deserialized.reset(); deserialized.deserialize(buffer.get()); check_bitmap_equal(deserialized, bitmap_set); @@ -676,7 +710,7 @@ TEST(BitmapValueTest, write_read) { buffer.reset(new char[size]); bitmap.write_to(buffer.get()); - deserialized.clear(); + deserialized.reset(); deserialized.deserialize(buffer.get()); check_bitmap_equal(deserialized, bitmap); diff --git a/be/test/vec/aggregate_functions/agg_bitmap_test.cpp b/be/test/vec/aggregate_functions/agg_bitmap_test.cpp new file mode 100644 index 00000000000000..6ca85efe3210cb --- /dev/null +++ b/be/test/vec/aggregate_functions/agg_bitmap_test.cpp @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "gtest/gtest_pred_impl.h" +#include "util/bitmap_value.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/aggregate_functions/aggregate_function_simple_factory.h" +#include "vec/columns/column.h" +#include "vec/columns/column_complex.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" +#include "vec/common/string_ref.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" + +const int agg_test_batch_size = 10; + +namespace doris::vectorized { +// declare function +void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory); + +TEST(AggBitmapTest, bitmap_union_test) { + std::string function_name = "bitmap_union"; + auto data_type = std::make_shared(); + // Prepare test data. + auto column_bitmap = data_type->create_column(); + for (int i = 0; i < agg_test_batch_size; i++) { + BitmapValue bitmap_value(i); + assert_cast(*column_bitmap).insert_value(bitmap_value); + } + + // Prepare test function and parameters. + AggregateFunctionSimpleFactory factory; + register_aggregate_function_bitmap(factory); + DataTypes data_types = {data_type}; + auto agg_function = factory.get(function_name, data_types); + agg_function->set_version(3); + std::unique_ptr memory(new char[agg_function->size_of_data()]); + AggregateDataPtr place = memory.get(); + agg_function->create(place); + + // Do aggregation. + const IColumn* column[1] = {column_bitmap.get()}; + for (int i = 0; i < agg_test_batch_size; i++) { + agg_function->add(place, column, i, nullptr); + } + + // Check result. + ColumnBitmap ans; + agg_function->insert_result_into(place, ans); + EXPECT_EQ(ans.size(), 1); + EXPECT_EQ(ans.get_element(0).cardinality(), agg_test_batch_size); + agg_function->destroy(place); + + auto dst = agg_function->create_serialize_column(); + agg_function->streaming_agg_serialize_to_column(column, dst, agg_test_batch_size, nullptr); + + for (size_t i = 0; i != agg_test_batch_size; ++i) { + EXPECT_EQ(std::to_string(i), assert_cast(*dst).get_element(i).to_string()); + } +} + +} // namespace doris::vectorized