Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use size of filtered column to reserve memory for filter when result_size_hint < 0 (#6746) #6750

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion dbms/src/Columns/ColumnAggregateFunction.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <AggregateFunctions/AggregateFunctionState.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnsCommon.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
Expand Down Expand Up @@ -129,7 +130,11 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
auto & res_data = res->getData();

if (result_size_hint)
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
{
if (result_size_hint < 0)
result_size_hint = countBytesInFilter(filter);
res_data.reserve(result_size_hint);
}

for (size_t i = 0; i < size; ++i)
if (filter[i])
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Columns/ColumnArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
/// TODO: If result_size_hint < 0, makes reserve() using size of filtered column, not source column to avoid some OOM issues.
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
Expand Down
6 changes: 5 additions & 1 deletion dbms/src/Columns/ColumnDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,11 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
Container & res_data = res->getData();

if (result_size_hint)
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
{
if (result_size_hint < 0)
result_size_hint = countBytesInFilter(filt);
res_data.reserve(result_size_hint);
}

const UInt8 * filt_pos = filt.data();
const UInt8 * filt_end = filt_pos + size;
Expand Down
7 changes: 6 additions & 1 deletion dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsCommon.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
Expand Down Expand Up @@ -190,7 +191,11 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
auto res = ColumnFixedString::create(n);

if (result_size_hint)
res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size());
{
if (result_size_hint < 0)
result_size_hint = countBytesInFilter(filt);
res->chars.reserve(result_size_hint * n);
}

const UInt8 * filt_pos = &filt[0];
const UInt8 * filt_end = filt_pos + col_size;
Expand Down
7 changes: 6 additions & 1 deletion dbms/src/Columns/ColumnVector.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsCommon.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand Down Expand Up @@ -206,7 +207,11 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
Container & res_data = res->getData();

if (result_size_hint)
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
{
if (result_size_hint < 0)
result_size_hint = countBytesInFilter(filt);
res_data.reserve(result_size_hint);
}

const UInt8 * filt_pos = &filt[0];
const UInt8 * filt_end = filt_pos + size;
Expand Down
15 changes: 8 additions & 7 deletions dbms/src/Columns/ColumnsCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ struct ResultOffsetsBuilder
: res_offsets(*res_offsets_)
{}

void reserve(ssize_t result_size_hint, size_t src_size)
void reserve(size_t result_size_hint)
{
res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size);
res_offsets.reserve(result_size_hint);
}

void insertOne(size_t array_size)
Expand Down Expand Up @@ -155,7 +155,7 @@ struct ResultOffsetsBuilder
struct NoResultOffsetsBuilder
{
explicit NoResultOffsetsBuilder(IColumn::Offsets *) {}
void reserve(ssize_t, size_t) {}
void reserve(size_t) {}
void insertOne(size_t) {}

template <size_t SIMD_BYTES>
Expand Down Expand Up @@ -186,11 +186,12 @@ void filterArraysImplGeneric(

if (result_size_hint)
{
result_offsets_builder.reserve(result_size_hint, size);

if (result_size_hint < 0)
res_elems.reserve(src_elems.size());
else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow.
result_size_hint = countBytesInFilter(filt);

result_offsets_builder.reserve(result_size_hint);

if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow.
res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size);
}

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/IColumn.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ class IColumn : public COWPtr<IColumn>
* Is used in WHERE and HAVING operations.
* If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column;
* if 0, then don't makes reserve(),
* otherwise (i.e. < 0), makes reserve() using size of source column.
* otherwise (i.e. < 0), makes reserve() using size of filtered column.
*/
using Filter = PaddedPODArray<UInt8>;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
Expand Down