Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2 segment support string encode(#1766) #1816

Merged
merged 21 commits into from
Sep 30, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions be/src/olap/rowset/segment_v2/binary_dict_page.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,16 @@ Slice BinaryDictPageBuilder::finish() {
Slice data_slice = _data_page_builder->finish();
_buffer.append(data_slice.data, data_slice.size);
encode_fixed32_le(&_buffer[0], _encoding_type);
return Slice(_buffer.data(), _buffer.size());

if (_encoding_type == DICT_ENCODING) {
size_t dict_offset = _buffer.size();
Slice dictionary_page;
get_dictionary_page(&dictionary_page);
wangbo marked this conversation as resolved.
Show resolved Hide resolved
_buffer.append(dictionary_page.data, dictionary_page.size);
put_fixed32_le(&_buffer, dict_offset);
}

return Slice(_buffer);
}

void BinaryDictPageBuilder::reset() {
Expand Down Expand Up @@ -148,7 +157,7 @@ BinaryDictPageDecoder::BinaryDictPageDecoder(Slice data, const PageDecoderOption
_data(data),
_options(options),
_data_page_decoder(nullptr),
_dict_decoder(options.dict_decoder),
_dict_decoder(nullptr),
_parsed(false),
_encoding_type(UNKNOWN_ENCODING) { }

Expand All @@ -162,8 +171,15 @@ Status BinaryDictPageDecoder::init() {
_encoding_type = static_cast<EncodingTypePB>(type);
_data.remove_prefix(BINARY_DICT_PAGE_HEADER_SIZE);
if (_encoding_type == DICT_ENCODING) {
DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";
size_t dict_offset = decode_fixed32_le((const uint8_t *)&_data[_data.get_size() - sizeof(uint32_t)]) - BINARY_DICT_PAGE_HEADER_SIZE;
size_t dict_size = _data.get_size() - dict_offset - sizeof(uint32_t);


Slice dictSlice(&_data[dict_offset], dict_size);
_data.size = dict_offset;

_data_page_decoder.reset(new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
_dict_decoder.reset(new BinaryPlainPageDecoder(dictSlice));
} else if (_encoding_type == PLAIN_ENCODING) {
DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
_data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options));
Expand All @@ -173,6 +189,7 @@ Status BinaryDictPageDecoder::init() {
}

RETURN_IF_ERROR(_data_page_decoder->init());
RETURN_IF_ERROR(_dict_decoder->init());
wangbo marked this conversation as resolved.
Show resolved Hide resolved
_parsed = true;
return Status::OK();
}
Expand Down
17 changes: 17 additions & 0 deletions be/src/olap/rowset/segment_v2/encoding_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/bitshuffle_page.h"
#include "olap/rowset/segment_v2/binary_dict_page.h"

namespace doris {
namespace segment_v2 {
Expand Down Expand Up @@ -54,6 +55,18 @@ struct TypeEncodingTraits<type, BIT_SHUFFLE> {
}
};

template<FieldType type>
struct TypeEncodingTraits<type, DICT_ENCODING> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryDictPageBuilder(opts);
return Status::OK();
wangbo marked this conversation as resolved.
Show resolved Hide resolved
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) {
*decoder = new BinaryDictPageDecoder(data, opts);
return Status::OK();
}
};

template<FieldType Type, EncodingTypePB Encoding>
struct EncodingTraits : TypeEncodingTraits<Type, Encoding> {
static const FieldType type = Type;
Expand Down Expand Up @@ -109,6 +122,10 @@ EncodingInfoResolver::EncodingInfoResolver() {
_add_map<OLAP_FIELD_TYPE_FLOAT, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_DOUBLE, BIT_SHUFFLE>();
_add_map<OLAP_FIELD_TYPE_DOUBLE, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_CHAR, DICT_ENCODING>();
_add_map<OLAP_FIELD_TYPE_CHAR, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_VARCHAR, DICT_ENCODING>();
_add_map<OLAP_FIELD_TYPE_VARCHAR, PLAIN_ENCODING>();
}

EncodingInfoResolver::~EncodingInfoResolver() {
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) {
DCHECK(type_info != nullptr);

ColumnWriterOptions opts;
if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR) {
opts.encoding_type = DICT_ENCODING;
wangbo marked this conversation as resolved.
Show resolved Hide resolved
}
opts.compression_type = segment_v2::CompressionTypePB::LZ4F;
// now we create zone map for key columns
if (column.is_key()) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/util/arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "util/arena.h"
#include <assert.h>
#include "string.h"

namespace doris {

Expand Down Expand Up @@ -59,6 +60,7 @@ char* Arena::AllocateAligned(size_t bytes) {

char* Arena::AllocateNewBlock(size_t block_bytes) {
char* result = new char[block_bytes];
memset(result, 0, block_bytes);
wangbo marked this conversation as resolved.
Show resolved Hide resolved
blocks_.push_back(result);
memory_usage_.store(MemoryUsage() + block_bytes + sizeof(char*),
std::memory_order_relaxed);
Expand Down
32 changes: 3 additions & 29 deletions be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,10 @@ class BinaryDictPageTest : public testing::Test {
ASSERT_EQ(slices.size(), page_builder.count());
ASSERT_FALSE(page_builder.is_page_full());

// construct dict page
Slice dict_slice;
Status status = page_builder.get_dictionary_page(&dict_slice);
ASSERT_TRUE(status.ok());
PageDecoderOptions dict_decoder_options;
std::shared_ptr<BinaryPlainPageDecoder> dict_page_decoder(
new BinaryPlainPageDecoder(dict_slice, dict_decoder_options));
status = dict_page_decoder->init();
ASSERT_TRUE(status.ok());
// because every slice is unique
ASSERT_EQ(slices.size(), dict_page_decoder->count());

// decode
PageDecoderOptions decoder_options;
decoder_options.dict_decoder = dict_page_decoder;
BinaryDictPageDecoder page_decoder(s, decoder_options);
status = page_decoder.init();
Status status = page_decoder.init();
ASSERT_TRUE(status.ok());
ASSERT_EQ(slices.size(), page_decoder.count());

Expand Down Expand Up @@ -131,32 +118,19 @@ class BinaryDictPageTest : public testing::Test {
page_builder.reset();
page_start_ids.push_back(count);

Slice dict_slice;
Status status = page_builder.get_dictionary_page(&dict_slice);
size_t data_size = total_size;
total_size += dict_slice.size;
ASSERT_TRUE(status.ok());
LOG(INFO) << "total size:" << total_size << ", data size:" << data_size
<< ", dict size:" << dict_slice.size
LOG(INFO) << "total size:" << total_size
<< " result page size:" << results.size();

// validate
// random 100 times to validate
srand(time(nullptr));
for (int i = 0; i < 100; ++i) {
int slice_index = random() % results.size();
//int slice_index = 1;
PageDecoderOptions dict_decoder_options;
std::shared_ptr<BinaryPlainPageDecoder> dict_page_decoder(
new BinaryPlainPageDecoder(dict_slice, dict_decoder_options));
status = dict_page_decoder->init();
ASSERT_TRUE(status.ok());

// decode
PageDecoderOptions decoder_options;
decoder_options.dict_decoder = dict_page_decoder;
BinaryDictPageDecoder page_decoder(results[slice_index], decoder_options);
status = page_decoder.init();
Status status = page_decoder.init();
ASSERT_TRUE(status.ok());

//check values
Expand Down
2 changes: 1 addition & 1 deletion be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class BinaryPlainPageTest : public testing::Test {
PageDecoderType page_decoder(s, decoder_options);
Status status = page_decoder.init();
ASSERT_TRUE(status.ok());

//test1

size_t size = 3;
Expand Down
Loading