From 4b45fa18093f9b724e85331e7de03bd374f8a21e Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Tue, 7 Feb 2023 14:11:57 +0800 Subject: [PATCH] This is an automated cherry-pick of #6746 Signed-off-by: ti-chi-bot --- dbms/src/Columns/ColumnAggregateFunction.cpp | 10 +++- dbms/src/Columns/ColumnArray.h | 1 + dbms/src/Columns/ColumnDecimal.cpp | 6 ++- dbms/src/Columns/ColumnFixedString.cpp | 10 +++- dbms/src/Columns/ColumnVector.cpp | 11 +++- dbms/src/Columns/ColumnsCommon.cpp | 54 ++++++++++++++++++++ dbms/src/Columns/IColumn.h | 2 +- 7 files changed, 89 insertions(+), 5 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index ea00aee2e5a..892a8ea0caf 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -1,7 +1,11 @@ #include +<<<<<<< HEAD #include #include #include +======= +#include +>>>>>>> dba70f9da8 (Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746)) #include #include #include @@ -127,7 +131,11 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_ auto & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filter); + res_data.reserve(result_size_hint); + } for (size_t i = 0; i < size; ++i) if (filter[i]) diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 2e54ad25f89..8cb04f05739 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -71,6 +71,7 @@ class ColumnArray final : public COWPtrHelper void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; + /// TODO: If result_size_hint < 0, makes reserve() using size of filtered column, not source column to avoid some OOM issues. ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index c6c0e880f54..65c22d007d6 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -244,7 +244,11 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ Container & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 378beff4f0a..0c558efc991 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -1,5 +1,9 @@ #include +<<<<<<< HEAD +======= +#include +>>>>>>> dba70f9da8 (Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746)) #include #include #include @@ -192,7 +196,11 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result auto res = ColumnFixedString::create(n); if (result_size_hint) - res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size()); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res->chars.reserve(result_size_hint * n); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + col_size; diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 821b28e8040..1d11f3496c4 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -1,6 +1,11 @@ #include #include +<<<<<<< HEAD +======= +#include +#include +>>>>>>> dba70f9da8 (Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746)) #include #include #include @@ -203,7 +208,11 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s Container & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index b7506f3b53f..f4200753a9b 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -121,10 +121,26 @@ namespace /// Implementation details of filterArraysImpl function, used as template parameter. /// Allow to build or not to build offsets array. +<<<<<<< HEAD struct ResultOffsetsBuilder { IColumn::Offsets & res_offsets; IColumn::Offset current_src_offset = 0; +======= +struct ResultOffsetsBuilder +{ + IColumn::Offsets & res_offsets; + IColumn::Offset current_src_offset = 0; + + explicit ResultOffsetsBuilder(IColumn::Offsets * res_offsets_) + : res_offsets(*res_offsets_) + {} + + void reserve(size_t result_size_hint) + { + res_offsets.reserve(result_size_hint); + } +>>>>>>> dba70f9da8 (Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746)) explicit ResultOffsetsBuilder(IColumn::Offsets * res_offsets_) : res_offsets(*res_offsets_) {} @@ -133,11 +149,19 @@ namespace res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); } +<<<<<<< HEAD void insertOne(size_t array_size) { current_src_offset += array_size; res_offsets.push_back(current_src_offset); } +======= +struct NoResultOffsetsBuilder +{ + explicit NoResultOffsetsBuilder(IColumn::Offsets *) {} + void reserve(size_t) {} + void insertOne(size_t) {} +>>>>>>> dba70f9da8 (Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746)) template void insertChunk( @@ -159,6 +183,7 @@ namespace { const auto res_offsets_pos = &res_offsets[offsets_size_old]; +<<<<<<< HEAD /// adjust offsets for (size_t i = 0; i < SIMD_BYTES; ++i) res_offsets_pos[i] -= diff_offset; @@ -166,6 +191,35 @@ namespace } current_src_offset += chunk_size; } +======= + if (result_size_hint) + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + + result_offsets_builder.reserve(result_size_hint); + + if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. + res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size); + } + + const UInt8 * filt_pos = filt.data(); + const auto * filt_end = filt_pos + size; + + const auto * offsets_pos = src_offsets.data(); + const auto * offsets_begin = offsets_pos; + + /// copy array ending at *end_offset_ptr + const auto copy_array = [&](const IColumn::Offset * offset_ptr) { + const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1]; + const auto arr_size = *offset_ptr - arr_offset; + + result_offsets_builder.insertOne(arr_size); + + const auto elems_size_old = res_elems.size(); + res_elems.resize(elems_size_old + arr_size); + inline_memcpy(&res_elems[elems_size_old], &src_elems[arr_offset], arr_size * sizeof(T)); +>>>>>>> dba70f9da8 (Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746)) }; struct NoResultOffsetsBuilder diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index f46d646472d..9b73f2b6452 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -187,7 +187,7 @@ class IColumn : public COWPtr * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; * if 0, then don't makes reserve(), - * otherwise (i.e. < 0), makes reserve() using size of source column. + * otherwise (i.e. < 0), makes reserve() using size of filtered column. */ using Filter = PaddedPODArray; virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;