Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions be/src/runtime/exec_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ class ExecEnv {
static void set_tracking_memory(bool tracking_memory) {
_s_tracking_memory.store(tracking_memory, std::memory_order_release);
}
void set_orc_memory_pool(orc::MemoryPool* pool) { _orc_memory_pool = pool; }
#endif
LoadStreamMapPool* load_stream_map_pool() { return _load_stream_map_pool.get(); }

Expand Down
478 changes: 327 additions & 151 deletions be/src/vec/exec/format/orc/vorc_reader.cpp

Large diffs are not rendered by default.

37 changes: 26 additions & 11 deletions be/src/vec/exec/format/orc/vorc_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
#pragma once

#include <cctz/time_zone.h>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'cctz/time_zone.h' file not found [clang-diagnostic-error]

#include <cctz/time_zone.h>
         ^

#include <stddef.h>
#include <stdint.h>

#include <cstddef>
#include <cstdint>
#include <list>
#include <memory>
#include <orc/OrcFile.hh>
Expand Down Expand Up @@ -51,6 +51,8 @@
#include "vec/exec/format/format_common.h"
#include "vec/exec/format/generic_reader.h"
#include "vec/exec/format/table/transactional_hive_reader.h"
#include "vec/exprs/vliteral.h"
#include "vec/exprs/vslot_ref.h"

namespace doris {
class RuntimeState;
Expand Down Expand Up @@ -80,13 +82,6 @@ namespace doris::vectorized {

class ORCFileInputStream;

struct OrcPredicate {
std::string col_name;
orc::PredicateDataType data_type;
std::vector<orc::Literal> literals;
SQLFilterOp op;
};

struct LazyReadContext {
VExprContextSPtrs conjuncts;
bool can_lazy_read = false;
Expand Down Expand Up @@ -228,6 +223,8 @@ class OrcReader : public GenericReader {
RuntimeProfile::Counter* decode_value_time = nullptr;
RuntimeProfile::Counter* decode_null_map_time = nullptr;
RuntimeProfile::Counter* filter_block_time = nullptr;
RuntimeProfile::Counter* selected_row_group_count = nullptr;
RuntimeProfile::Counter* evaluated_row_group_count = nullptr;
};

class ORCFilterImpl : public orc::ORCFilter {
Expand Down Expand Up @@ -291,8 +288,23 @@ class OrcReader : public GenericReader {
bool* is_hive1_orc);
static bool _check_acid_schema(const orc::Type& type);
static const orc::Type& _remove_acid(const orc::Type& type);
bool _init_search_argument(
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
std::tuple<bool, orc::Literal, orc::PredicateDataType> _make_orc_literal(
const VSlotRef* slot_ref, const VLiteral* literal);
bool _check_slot_can_push_down(const VExprSPtr& expr);
bool _check_rest_children_can_push_down(const VExprSPtr& expr);
bool _check_expr_can_push_down(const VExprSPtr& expr);
bool _build_less_than(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _build_less_than_equals(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _build_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _build_filter_in(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _build_is_null(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _build_search_argument(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _init_search_argument(const VExprContextSPtrs& conjuncts);
void _init_bloom_filter(
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
void _init_system_properties();
Expand Down Expand Up @@ -578,11 +590,14 @@ class OrcReader : public GenericReader {
bool _is_hive1_orc_or_use_idx = false;

std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
// TODO: check if we can remove _col_name_to_file_col_name_low_case
std::unordered_map<std::string, std::string> _col_name_to_file_col_name_low_case;
std::unordered_map<std::string, const orc::Type*> _type_map;
std::vector<const orc::Type*> _col_orc_type;
std::unique_ptr<ORCFileInputStream> _file_input_stream;
Statistics _statistics;
OrcProfile _orc_profile;
orc::ReaderMetrics _reader_metrics;

std::unique_ptr<orc::ColumnVectorBatch> _batch;
std::unique_ptr<orc::Reader> _reader;
Expand Down
Binary file added be/test/exec/test_data/orc_scanner/orders.orc
Binary file not shown.
29 changes: 11 additions & 18 deletions be/test/testutil/desc_tbl_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,9 @@

#include "testutil/desc_tbl_builder.h"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'testutil/desc_tbl_builder.h' file not found [clang-diagnostic-error]

#include "testutil/desc_tbl_builder.h"
         ^


#include <glog/logging.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <gtest/gtest.h>

#include <vector>

#include "common/object_pool.h"
#include "common/status.h"
#include "gtest/gtest_pred_impl.h"
#include "runtime/define_primitive_type.h"
#include "runtime/descriptors.h"
#include "util/bit_util.h"

using std::vector;

namespace doris {

Expand All @@ -44,7 +33,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() {

// item_id of -1 indicates no itemTupleId
static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDescriptor& type,
int slot_idx, int item_id) {
const std::string& name, int slot_idx, int item_id) {
int null_byte = slot_idx / 8;
int null_bit = slot_idx % 8;
TSlotDescriptor slot_desc;
Expand All @@ -58,6 +47,7 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes
slot_desc.__set_nullIndicatorBit(null_bit);
slot_desc.__set_slotIdx(slot_idx);
slot_desc.__set_isMaterialized(true);
slot_desc.__set_colName(name);
// if (item_id != -1) {
// slot_desc.__set_itemTupleId(item_id);
// }
Expand All @@ -78,24 +68,27 @@ DescriptorTbl* DescriptorTblBuilder::build() {
int tuple_id = 0;
int slot_id = 0;

for (int i = 0; i < _tuples_descs.size(); ++i) {
build_tuple(_tuples_descs[i]->slot_types(), &thrift_desc_tbl, &tuple_id, &slot_id);
for (auto& _tuples_desc : _tuples_descs) {
build_tuple(_tuples_desc->slot_types(), _tuples_desc->slot_names(), &thrift_desc_tbl,
&tuple_id, &slot_id);
}

Status status = DescriptorTbl::create(_obj_pool, thrift_desc_tbl, &desc_tbl);
EXPECT_TRUE(status.ok());
return desc_tbl;
}

TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector<TypeDescriptor>& slot_types,
TTupleDescriptor DescriptorTblBuilder::build_tuple(const std::vector<TypeDescriptor>& slot_types,
const std::vector<std::string>& slot_names,
TDescriptorTable* thrift_desc_tbl,
int* next_tuple_id, int* slot_id) {
// We never materialize struct slots (there's no in-memory representation of structs,
// instead the materialized fields appear directly in the tuple), but array types can
// still have a struct item type. In this case, the array item tuple contains the
// "inlined" struct fields.
if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) {
return build_tuple(slot_types[0].children, thrift_desc_tbl, next_tuple_id, slot_id);
return build_tuple(slot_types[0].children, slot_types[0].field_names, thrift_desc_tbl,
next_tuple_id, slot_id);
}

int tuple_id = *next_tuple_id;
Expand All @@ -111,7 +104,7 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector<TypeDescriptor>&
// }

thrift_desc_tbl->slotDescriptors.push_back(
make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, item_id));
make_slot_descriptor(*slot_id, tuple_id, slot_types[i], slot_names[i], i, item_id));
thrift_desc_tbl->__isset.slotDescriptors = true;
++(*slot_id);
}
Expand Down
17 changes: 15 additions & 2 deletions be/test/testutil/desc_tbl_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@

#include <gen_cpp/Descriptors_types.h>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'gen_cpp/Descriptors_types.h' file not found [clang-diagnostic-error]

#include <gen_cpp/Descriptors_types.h>
         ^


#include <tuple>
#include <vector>

#include "common/object_pool.h"
#include "runtime/descriptors.h"
#include "runtime/types.h"

namespace doris {

class ObjectPool;
class TupleDescBuilder;
class DescriptorTbl;

// Aids in the construction of a DescriptorTbl by declaring tuples and slots
// associated with those tuples.
Expand All @@ -40,6 +41,7 @@ class DescriptorTbl;
// DescriptorTblBuilder builder;
// builder.declare_tuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0
// builder.declare_tuple() << TYPE_FLOAT; // gets TupleId 1
// builder.declare_tuple() << std::make_tuple(TYPE_INT, "col1") << std::make_tuple(TYPE_STRING, "col2"); // gets Tuple with type and name
// DescriptorTbl desc_tbl = builder.build();
class DescriptorTblBuilder {
public:
Expand All @@ -57,20 +59,31 @@ class DescriptorTblBuilder {
std::vector<TupleDescBuilder*> _tuples_descs;

TTupleDescriptor build_tuple(const std::vector<TypeDescriptor>& slot_types,
const std::vector<std::string>& slot_names,
TDescriptorTable* thrift_desc_tbl, int* tuple_id, int* slot_id);
};

class TupleDescBuilder {
public:
using SlotType = std::tuple<TypeDescriptor, std::string>;
TupleDescBuilder& operator<<(const SlotType& slot) {
_slot_types.push_back(std::get<0>(slot));
_slot_names.push_back(std::get<1>(slot));
return *this;
}

TupleDescBuilder& operator<<(const TypeDescriptor& slot_type) {
_slot_types.push_back(slot_type);
_slot_names.emplace_back("");
return *this;
}

std::vector<TypeDescriptor> slot_types() const { return _slot_types; }
std::vector<std::string> slot_names() const { return _slot_names; }

private:
std::vector<TypeDescriptor> _slot_types;
std::vector<std::string> _slot_names;
};

} // end namespace doris
Expand Down
Loading