From 1e2fdec2c14e4ef70eaafabb9edc0e2f3f08e027 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 29 Mar 2023 13:18:54 +0800 Subject: [PATCH] Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746) (#6752) close pingcap/tiflash#6730 --- dbms/src/Columns/ColumnAggregateFunction.cpp | 7 ++++++- dbms/src/Columns/ColumnArray.h | 1 + dbms/src/Columns/ColumnDecimal.cpp | 6 +++++- dbms/src/Columns/ColumnFixedString.cpp | 7 ++++++- dbms/src/Columns/ColumnVector.cpp | 7 ++++++- dbms/src/Columns/ColumnsCommon.cpp | 15 ++++++++------- dbms/src/Columns/IColumn.h | 2 +- 7 files changed, 33 insertions(+), 12 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index d24a17a1256..3f87df6a176 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -143,7 +144,11 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_ auto & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filter); + res_data.reserve(result_size_hint); + } for (size_t i = 0; i < size; ++i) if (filter[i]) diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 85e78957f39..ce05b692dcf 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -87,6 +87,7 @@ class ColumnArray final : public COWPtrHelper void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; + /// TODO: If result_size_hint < 0, makes reserve() using size of filtered column, not source column to avoid some OOM issues. ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index fd99e5b0c32..d590acf5665 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -306,7 +306,11 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ Container & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 5d747b52ed5..847d2f623f9 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -205,7 +206,11 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result auto res = ColumnFixedString::create(n); if (result_size_hint) - res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size()); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res->chars.reserve(result_size_hint * n); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + col_size; diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index e69727d4a9f..1fd7fb6a01b 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -220,7 +221,11 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s Container & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index 0cfa6eeae29..270166eeaec 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -126,9 +126,9 @@ struct ResultOffsetsBuilder : res_offsets(*res_offsets_) {} - void reserve(ssize_t result_size_hint, size_t src_size) + void reserve(size_t result_size_hint) { - res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); + res_offsets.reserve(result_size_hint); } void insertOne(size_t array_size) @@ -169,7 +169,7 @@ struct ResultOffsetsBuilder struct NoResultOffsetsBuilder { explicit NoResultOffsetsBuilder(IColumn::Offsets *) {} - void reserve(ssize_t, size_t) {} + void reserve(size_t) {} void insertOne(size_t) {} template @@ -200,11 +200,12 @@ void filterArraysImplGeneric( if (result_size_hint) { - result_offsets_builder.reserve(result_size_hint, size); - if (result_size_hint < 0) - res_elems.reserve(src_elems.size()); - else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. + result_size_hint = countBytesInFilter(filt); + + result_offsets_builder.reserve(result_size_hint); + + if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size); } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 3c764b1a6f8..4ad292d8f31 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -216,7 +216,7 @@ class IColumn : public COWPtr * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; * if 0, then don't makes reserve(), - * otherwise (i.e. < 0), makes reserve() using size of source column. + * otherwise (i.e. < 0), makes reserve() using size of filtered column. */ using Filter = PaddedPODArray; virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;