Skip to content

Commit

Permalink
[bug](bitmap) fix bitmap value copy operator not call reset (apache#2…
Browse files Browse the repository at this point in the history
…6451)

when a empty bitmap assign to other bitmap
the other bitmap should reset self firstly, and then set empty type.
  • Loading branch information
zhangstar333 committed Nov 9, 2023
1 parent 0d83327 commit dc2a79a
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 23 deletions.
29 changes: 18 additions & 11 deletions be/src/util/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -1177,10 +1177,11 @@ class BitmapValue {
using SetContainer = phmap::flat_hash_set<T>;

// Construct an empty bitmap.
BitmapValue() : _type(EMPTY), _is_shared(false) {}
BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }

// Construct a bitmap with one element.
explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE), _is_shared(false) {}
explicit BitmapValue(uint64_t value)
: _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}

// Construct a bitmap from serialized data.
explicit BitmapValue(const char* src) : _is_shared(false) {
Expand All @@ -1204,7 +1205,7 @@ class BitmapValue {
break;
}

if (other._type != EMPTY) {
if (other._type == BITMAP) {
_is_shared = true;
// should also set other's state to shared, so that other bitmap value will
// create a new bitmap when it wants to modify it.
Expand Down Expand Up @@ -1234,6 +1235,10 @@ class BitmapValue {
}

BitmapValue& operator=(const BitmapValue& other) {
if (this == &other) {
return *this;
}
reset();
_type = other._type;
switch (other._type) {
case EMPTY:
Expand All @@ -1249,7 +1254,7 @@ class BitmapValue {
break;
}

if (other._type != EMPTY) {
if (other._type == BITMAP) {
_is_shared = true;
// should also set other's state to shared, so that other bitmap value will
// create a new bitmap when it wants to modify it.
Expand All @@ -1270,6 +1275,7 @@ class BitmapValue {
if (this == &other) {
return *this;
}
reset();

_type = other._type;
switch (other._type) {
Expand Down Expand Up @@ -1726,8 +1732,7 @@ class BitmapValue {
BitmapValue& operator&=(const BitmapValue& rhs) {
switch (rhs._type) {
case EMPTY:
_type = EMPTY;
_bitmap.reset();
reset(); // empty & any = empty
break;
case SINGLE:
switch (_type) {
Expand All @@ -1746,6 +1751,7 @@ class BitmapValue {
_sv = rhs._sv;
}
_bitmap.reset();
_is_shared = false;
break;
case SET:
if (!_set.contains(rhs._sv)) {
Expand Down Expand Up @@ -1802,6 +1808,7 @@ class BitmapValue {
}
_type = SET;
_bitmap.reset();
_is_shared = false;
_convert_to_smaller_type();
break;
case SET:
Expand Down Expand Up @@ -1837,7 +1844,6 @@ class BitmapValue {
case SINGLE:
if (_sv == rhs._sv) {
_type = EMPTY;
_bitmap.reset();
} else {
add(rhs._sv);
}
Expand Down Expand Up @@ -2167,7 +2173,7 @@ class BitmapValue {

// Return how many bytes are required to serialize this bitmap.
// See BitmapTypeCode for the serialized format.
size_t getSizeInBytes() {
size_t getSizeInBytes() const {
size_t res = 0;
switch (_type) {
case EMPTY:
Expand Down Expand Up @@ -2618,12 +2624,13 @@ class BitmapValue {
}
}

void clear() {
void reset() {
_type = EMPTY;
_bitmap.reset();
_sv = 0;
_set.clear();
_is_shared = false;
_bitmap = nullptr;
}

// Implement an iterator for convenience
friend class BitmapValueIterator;
typedef BitmapValueIterator b_iterator;
Expand Down
5 changes: 4 additions & 1 deletion be/src/vec/aggregate_functions/aggregate_function_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ struct AggregateFunctionBitmapData {

void read(BufferReadable& buf) { DataTypeBitMap::deserialize_as_stream(value, buf); }

void reset() { is_first = true; }
void reset() {
is_first = true;
value.reset(); // it's better to call reset function by self firstly.
}

BitmapValue& get() { return value; }
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct AggregateFunctionBitmapAggData {

void add(const T& value_) { value.add(value_); }

void reset() { value.clear(); }
void reset() { value.reset(); }

void merge(const AggregateFunctionBitmapAggData& other) { value |= other.value; }

Expand Down
4 changes: 1 addition & 3 deletions be/src/vec/data_types/data_type_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,7 @@ void DataTypeBitMap::to_string(const IColumn& column, size_t row_num, BufferWrit
ColumnPtr ptr = result.first;
row_num = result.second;

auto& data =
const_cast<BitmapValue&>(assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num));

const auto& data = assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num);
std::string buffer(data.getSizeInBytes(), '0');
data.write_to(const_cast<char*>(buffer.data()));
ostr.write(buffer.c_str(), buffer.size());
Expand Down
8 changes: 7 additions & 1 deletion be/src/vec/data_types/data_type_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "serde/data_type_bitmap_serde.h"
#include "util/bitmap_value.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_const.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
Expand Down Expand Up @@ -92,7 +93,12 @@ class DataTypeBitMap : public IDataType {
bool can_be_inside_low_cardinality() const override { return false; }

std::string to_string(const IColumn& column, size_t row_num) const override {
return "BitMap()";
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;

const auto& data = assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num);
return data.to_string();
}
void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
Status from_string(ReadBuffer& rb, IColumn* column) const override;
Expand Down
12 changes: 6 additions & 6 deletions be/src/vec/functions/function_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ struct BitmapAndNot {
mid_data &= rvec[i];
res[i] = lvec[i];
res[i] -= mid_data;
mid_data.clear();
mid_data.reset();
}
}
static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
Expand All @@ -595,7 +595,7 @@ struct BitmapAndNot {
mid_data &= rval;
res[i] = lvec[i];
res[i] -= mid_data;
mid_data.clear();
mid_data.reset();
}
}
static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
Expand All @@ -606,7 +606,7 @@ struct BitmapAndNot {
mid_data &= rvec[i];
res[i] = lval;
res[i] -= mid_data;
mid_data.clear();
mid_data.reset();
}
}
};
Expand All @@ -630,7 +630,7 @@ struct BitmapAndNotCount {
mid_data = lvec[i];
mid_data &= rvec[i];
res[i] = lvec[i].andnot_cardinality(mid_data);
mid_data.clear();
mid_data.reset();
}
}
static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) {
Expand All @@ -640,7 +640,7 @@ struct BitmapAndNotCount {
mid_data = lval;
mid_data &= rvec[i];
res[i] = lval.andnot_cardinality(mid_data);
mid_data.clear();
mid_data.reset();
}
}
static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) {
Expand All @@ -650,7 +650,7 @@ struct BitmapAndNotCount {
mid_data = lvec[i];
mid_data &= rval;
res[i] = lvec[i].andnot_cardinality(mid_data);
mid_data.clear();
mid_data.reset();
}
}
};
Expand Down
1 change: 1 addition & 0 deletions be/test/util/bitmap_value_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cstdint>
#include <string>

#include "gtest/gtest.h"
#include "gtest/gtest_pred_impl.h"
#include "util/coding.h"

Expand Down
92 changes: 92 additions & 0 deletions be/test/vec/aggregate_functions/agg_bitmap_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <glog/logging.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-param-test.h>
#include <gtest/gtest-test-part.h>
#include <stddef.h>

#include <memory>
#include <string>
#include <vector>

#include "gtest/gtest_pred_impl.h"
#include "util/bitmap_value.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
#include "vec/columns/column.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/string_ref.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"

const int agg_test_batch_size = 10;

namespace doris::vectorized {
// declare function
void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory);

TEST(AggBitmapTest, bitmap_union_test) {
std::string function_name = "bitmap_union";
auto data_type = std::make_shared<DataTypeBitMap>();
// Prepare test data.
auto column_bitmap = data_type->create_column();
for (int i = 0; i < agg_test_batch_size; i++) {
BitmapValue bitmap_value(i);
assert_cast<ColumnBitmap&>(*column_bitmap).insert_value(bitmap_value);
}

// Prepare test function and parameters.
AggregateFunctionSimpleFactory factory;
register_aggregate_function_bitmap(factory);
DataTypes data_types = {data_type};
auto agg_function = factory.get(function_name, data_types);
agg_function->set_version(3);
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);

// Do aggregation.
const IColumn* column[1] = {column_bitmap.get()};
for (int i = 0; i < agg_test_batch_size; i++) {
agg_function->add(place, column, i, nullptr);
}

// Check result.
ColumnBitmap ans;
agg_function->insert_result_into(place, ans);
EXPECT_EQ(ans.size(), 1);
EXPECT_EQ(ans.get_element(0).cardinality(), agg_test_batch_size);
agg_function->destroy(place);

auto dst = agg_function->create_serialize_column();
agg_function->streaming_agg_serialize_to_column(column, dst, agg_test_batch_size, nullptr);

for (size_t i = 0; i != agg_test_batch_size; ++i) {
EXPECT_EQ(std::to_string(i), assert_cast<ColumnBitmap&>(*dst).get_element(i).to_string());
}
}

} // namespace doris::vectorized

0 comments on commit dc2a79a

Please sign in to comment.