Skip to content

Commit

Permalink
[improvement](page builder) avoid allocating big memory in ctor (#35493)
Browse files Browse the repository at this point in the history
## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->

## Further comments

If this is a relatively large or complex change, kick off the discussion
at [dev@doris.apache.org](mailto:dev@doris.apache.org) by explaining why
you chose the solution you did and what alternatives you considered,
etc...
  • Loading branch information
jacktengg authored May 28, 2024
1 parent 0e33ab4 commit d6279fa
Show file tree
Hide file tree
Showing 19 changed files with 159 additions and 63 deletions.
30 changes: 21 additions & 9 deletions be/src/olap/rowset/segment_v2/binary_dict_page.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,26 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
_finished(false),
_data_page_builder(nullptr),
_dict_builder(nullptr),
_encoding_type(DICT_ENCODING) {
_encoding_type(DICT_ENCODING) {}

Status BinaryDictPageBuilder::init() {
// initially use DICT_ENCODING
// TODO: the data page builder type can be created by Factory according to user config
_data_page_builder.reset(new BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>(options));
PageBuilder* data_page_builder_ptr = nullptr;
RETURN_IF_ERROR(BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>::create(
&data_page_builder_ptr, _options));
_data_page_builder.reset(data_page_builder_ptr);
PageBuilderOptions dict_builder_options;
dict_builder_options.data_page_size =
std::min(_options.data_page_size, _options.dict_page_size);
dict_builder_options.is_dict_page = true;
_dict_builder.reset(
new BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
reset();

PageBuilder* dict_builder_ptr = nullptr;
RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create(
&dict_builder_ptr, dict_builder_options));
_dict_builder.reset(static_cast<BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>*>(
dict_builder_ptr));
return reset();
}

bool BinaryDictPageBuilder::is_page_full() {
Expand Down Expand Up @@ -148,18 +157,21 @@ OwnedSlice BinaryDictPageBuilder::finish() {
return _buffer.build();
}

void BinaryDictPageBuilder::reset() {
Status BinaryDictPageBuilder::reset() {
_finished = false;
_buffer.reserve(_options.data_page_size + BINARY_DICT_PAGE_HEADER_SIZE);
_buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE);

if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
_data_page_builder.reset(
new BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_options));
PageBuilder* data_page_builder_ptr = nullptr;
RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create(
&data_page_builder_ptr, _options));
_data_page_builder.reset(data_page_builder_ptr);
_encoding_type = PLAIN_ENCODING;
} else {
_data_page_builder->reset();
RETURN_IF_ERROR(_data_page_builder->reset());
}
return Status::OK();
}

size_t BinaryDictPageBuilder::count() const {
Expand Down
11 changes: 8 additions & 3 deletions be/src/olap/rowset/segment_v2/binary_dict_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,20 @@ enum { BINARY_DICT_PAGE_HEADER_SIZE = 4 };
// Data pages start with mode_ = DICT_ENCODING, when the size of dictionary
// page go beyond the option_->dict_page_size, the subsequent data pages will switch
// to string plain page automatically.
class BinaryDictPageBuilder : public PageBuilder {
class BinaryDictPageBuilder : public PageBuilderHelper<BinaryDictPageBuilder> {
public:
BinaryDictPageBuilder(const PageBuilderOptions& options);
using Self = BinaryDictPageBuilder;
friend class PageBuilderHelper<Self>;

Status init() override;

bool is_page_full() override;

Status add(const uint8_t* vals, size_t* count) override;

OwnedSlice finish() override;

void reset() override;
Status reset() override;

size_t count() const override;

Expand All @@ -80,6 +83,8 @@ class BinaryDictPageBuilder : public PageBuilder {
Status get_last_value(void* value) const override;

private:
BinaryDictPageBuilder(const PageBuilderOptions& options);

PageBuilderOptions _options;
bool _finished;

Expand Down
16 changes: 10 additions & 6 deletions be/src/olap/rowset/segment_v2/binary_plain_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ namespace doris {
namespace segment_v2 {

template <FieldType Type>
class BinaryPlainPageBuilder : public PageBuilder {
class BinaryPlainPageBuilder : public PageBuilderHelper<BinaryPlainPageBuilder<Type>> {
public:
BinaryPlainPageBuilder(const PageBuilderOptions& options)
: _size_estimate(0), _options(options) {
reset();
}
using Self = BinaryPlainPageBuilder<Type>;
friend class PageBuilderHelper<Self>;

Status init() override { return reset(); }

bool is_page_full() override {
bool ret = false;
Expand Down Expand Up @@ -108,7 +108,7 @@ class BinaryPlainPageBuilder : public PageBuilder {
return _buffer.build();
}

void reset() override {
Status reset() override {
_offsets.clear();
_buffer.clear();
_buffer.reserve(_options.data_page_size == 0
Expand All @@ -117,6 +117,7 @@ class BinaryPlainPageBuilder : public PageBuilder {
_size_estimate = sizeof(uint32_t);
_finished = false;
_last_value_size = 0;
return Status::OK();
}

size_t count() const override { return _offsets.size(); }
Expand Down Expand Up @@ -151,6 +152,9 @@ class BinaryPlainPageBuilder : public PageBuilder {
inline Slice get(std::size_t idx) const { return (*this)[idx]; }

private:
BinaryPlainPageBuilder(const PageBuilderOptions& options)
: _size_estimate(0), _options(options) {}

void _copy_value_at(size_t idx, faststring* value) const {
size_t value_size =
(idx < _offsets.size() - 1) ? _offsets[idx + 1] - _offsets[idx] : _last_value_size;
Expand Down
12 changes: 9 additions & 3 deletions be/src/olap/rowset/segment_v2/binary_prefix_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,26 @@ namespace segment_v2 {
// Entry := SharedPrefixLength(vint), UnsharedLength(vint), Byte^UnsharedLength
// Trailer := NumEntry(uint32_t), RESTART_POINT_INTERVAL(uint8_t)
// RestartPointStartOffset(uint32_t)^NumRestartPoints,NumRestartPoints(uint32_t)
class BinaryPrefixPageBuilder : public PageBuilder {
class BinaryPrefixPageBuilder : public PageBuilderHelper<BinaryPrefixPageBuilder> {
public:
BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) { reset(); }
using Self = BinaryPrefixPageBuilder;
friend class PageBuilderHelper<Self>;

Status init() override { return reset(); }

bool is_page_full() override { return size() >= _options.data_page_size; }

Status add(const uint8_t* vals, size_t* add_count) override;

OwnedSlice finish() override;

void reset() override {
Status reset() override {
_restart_points_offset.clear();
_last_entry.clear();
_count = 0;
_buffer.clear();
_finished = false;
return Status::OK();
}

uint64_t size() const override {
Expand Down Expand Up @@ -88,6 +92,8 @@ class BinaryPrefixPageBuilder : public PageBuilder {
}

private:
BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) {}

PageBuilderOptions _options;
std::vector<uint32_t> _restart_points_offset;
faststring _first_entry;
Expand Down
16 changes: 10 additions & 6 deletions be/src/olap/rowset/segment_v2/bitshuffle_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ void warn_with_bitshuffle_error(int64_t val);
// The header is followed by the bitshuffle-compressed element data.
//
template <FieldType Type>
class BitshufflePageBuilder : public PageBuilder {
class BitshufflePageBuilder : public PageBuilderHelper<BitshufflePageBuilder<Type>> {
public:
BitshufflePageBuilder(const PageBuilderOptions& options)
: _options(options), _count(0), _remain_element_capacity(0), _finished(false) {
reset();
}
using Self = BitshufflePageBuilder<Type>;
friend class PageBuilderHelper<Self>;

Status init() override { return reset(); }

bool is_page_full() override { return _remain_element_capacity == 0; }

Expand Down Expand Up @@ -150,7 +150,7 @@ class BitshufflePageBuilder : public PageBuilder {
return _finish(SIZE_OF_TYPE);
}

void reset() override {
Status reset() override {
auto block_size = _options.data_page_size;
_count = 0;
_data.clear();
Expand All @@ -161,6 +161,7 @@ class BitshufflePageBuilder : public PageBuilder {
_buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE);
_finished = false;
_remain_element_capacity = block_size / SIZE_OF_TYPE;
return Status::OK();
}

size_t count() const override { return _count; }
Expand All @@ -185,6 +186,9 @@ class BitshufflePageBuilder : public PageBuilder {
}

private:
BitshufflePageBuilder(const PageBuilderOptions& options)
: _options(options), _count(0), _remain_element_capacity(0), _finished(false) {}

OwnedSlice _finish(int final_size_of_type) {
_data.resize(final_size_of_type * _count);

Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ Status ScalarColumnWriter::finish_current_page() {
// build data page body : encoded values + [nullmap]
std::vector<Slice> body;
OwnedSlice encoded_values = _page_builder->finish();
_page_builder->reset();
RETURN_IF_ERROR(_page_builder->reset());
body.push_back(encoded_values.slice());

OwnedSlice nullmap;
Expand Down
32 changes: 12 additions & 20 deletions be/src/olap/rowset/segment_v2/encoding_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ struct TypeEncodingTraits {};
template <FieldType type, typename CppType>
struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new PlainPageBuilder<type>(opts);
return Status::OK();
return PlainPageBuilder<type>::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -64,8 +63,7 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
template <FieldType type>
struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryPlainPageBuilder<type>(opts);
return Status::OK();
return BinaryPlainPageBuilder<type>::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -78,8 +76,7 @@ template <FieldType type, typename CppType>
struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType,
typename std::enable_if<!std::is_same<CppType, Slice>::value>::type> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BitshufflePageBuilder<type>(opts);
return Status::OK();
return BitshufflePageBuilder<type>::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -91,8 +88,7 @@ struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType,
template <>
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>(opts);
return Status::OK();
return RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -104,8 +100,7 @@ struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> {
template <FieldType type>
struct TypeEncodingTraits<type, DICT_ENCODING, Slice> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryDictPageBuilder(opts);
return Status::OK();
return BinaryDictPageBuilder::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -118,8 +113,7 @@ template <>
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATE, FOR_ENCODING,
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>(opts);
return Status::OK();
return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -132,8 +126,8 @@ template <>
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATEV2, FOR_ENCODING,
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>::CppType> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>(opts);
return Status::OK();
return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>::create(builder,
opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -146,8 +140,8 @@ template <>
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2, FOR_ENCODING,
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::CppType> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>(opts);
return Status::OK();
return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::create(builder,
opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -161,8 +155,7 @@ template <FieldType type, typename CppType>
struct TypeEncodingTraits<type, FOR_ENCODING, CppType,
typename std::enable_if<std::is_integral<CppType>::value>::type> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new FrameOfReferencePageBuilder<type>(opts);
return Status::OK();
return FrameOfReferencePageBuilder<type>::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand All @@ -174,8 +167,7 @@ struct TypeEncodingTraits<type, FOR_ENCODING, CppType,
template <FieldType type>
struct TypeEncodingTraits<type, PREFIX_ENCODING, Slice> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryPrefixPageBuilder(opts);
return Status::OK();
return BinaryPrefixPageBuilder::create(builder, opts);
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
Expand Down
15 changes: 11 additions & 4 deletions be/src/olap/rowset/segment_v2/frame_of_reference_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@ namespace segment_v2 {

// Encode page use frame-of-reference coding
template <FieldType Type>
class FrameOfReferencePageBuilder : public PageBuilder {
class FrameOfReferencePageBuilder : public PageBuilderHelper<FrameOfReferencePageBuilder<Type>> {
public:
explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options)
: _options(options), _count(0), _finished(false) {
using Self = FrameOfReferencePageBuilder<Type>;
friend class PageBuilderHelper<Self>;

Status init() override {
_encoder.reset(new ForEncoder<CppType>(&_buf));
return Status::OK();
}

bool is_page_full() override { return _encoder->len() >= _options.data_page_size; }
Expand All @@ -58,10 +61,11 @@ class FrameOfReferencePageBuilder : public PageBuilder {
return _buf.build();
}

void reset() override {
Status reset() override {
_count = 0;
_finished = false;
_encoder->clear();
return Status::OK();
}

size_t count() const override { return _count; }
Expand All @@ -85,6 +89,9 @@ class FrameOfReferencePageBuilder : public PageBuilder {
}

private:
explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options)
: _options(options), _count(0), _finished(false) {}

typedef typename TypeTraits<Type>::CppType CppType;
PageBuilderOptions _options;
size_t _count;
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {

// IndexedColumn doesn't have NULLs, thus data page body only contains encoded values
OwnedSlice page_body = _data_page_builder->finish();
_data_page_builder->reset();
RETURN_IF_ERROR(_data_page_builder->reset());

PageFooterPB footer;
footer.set_type(DATA_PAGE);
Expand Down
Loading

0 comments on commit d6279fa

Please sign in to comment.