Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ Status OrcReader::_init_read_columns() {
std::vector<std::string> orc_cols;
std::vector<std::string> orc_cols_lower_case;
bool is_hive1_orc = false;
_init_orc_cols(root_type, orc_cols, orc_cols_lower_case, _type_map, &is_hive1_orc);
_init_orc_cols(root_type, orc_cols, orc_cols_lower_case, _type_map, &is_hive1_orc, false);

// In old version slot_name_to_schema_pos may not be set in _scan_params
// TODO, should be removed in 2.2 or later
Expand Down Expand Up @@ -451,7 +451,7 @@ Status OrcReader::_init_read_columns() {
void OrcReader::_init_orc_cols(const orc::Type& type, std::vector<std::string>& orc_cols,
std::vector<std::string>& orc_cols_lower_case,
std::unordered_map<std::string, const orc::Type*>& type_map,
bool* is_hive1_orc) const {
bool* is_hive1_orc, bool should_add_acid_prefix) const {
bool hive1_orc = true;
for (int i = 0; i < type.getSubtypeCount(); ++i) {
orc_cols.emplace_back(type.getFieldName(i));
Expand All @@ -461,11 +461,17 @@ void OrcReader::_init_orc_cols(const orc::Type& type, std::vector<std::string>&
}
orc_cols_lower_case.emplace_back(std::move(filed_name_lower_case));
auto file_name = type.getFieldName(i);
if (should_add_acid_prefix) {
file_name = fmt::format(
"{}.{}", TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
file_name);
}
type_map.emplace(std::move(file_name), type.getSubtype(i));
if (_is_acid) {
const orc::Type* sub_type = type.getSubtype(i);
if (sub_type->getKind() == orc::TypeKind::STRUCT) {
_init_orc_cols(*sub_type, orc_cols, orc_cols_lower_case, type_map, is_hive1_orc);
_init_orc_cols(*sub_type, orc_cols, orc_cols_lower_case, type_map, is_hive1_orc,
true);
}
}
}
Expand Down Expand Up @@ -988,10 +994,6 @@ bool OrcReader::_build_search_argument(const VExprSPtr& expr,
}

bool OrcReader::_init_search_argument(const VExprContextSPtrs& conjuncts) {
if (!_enable_filter_by_min_max) {
return false;
}

// build search argument, if any expr can not be pushed down, return false
auto builder = orc::SearchArgumentFactory::newBuilder();
bool at_least_one_can_push_down = false;
Expand Down Expand Up @@ -1139,8 +1141,20 @@ Status OrcReader::set_fill_columns(

if (_lazy_read_ctx.conjuncts.empty()) {
_lazy_read_ctx.can_lazy_read = false;
} else {
_init_search_argument(_lazy_read_ctx.conjuncts);
} else if (_enable_filter_by_min_max) {
auto res = _init_search_argument(_lazy_read_ctx.conjuncts);
if (_state->query_options().check_orc_init_sargs_success && !res) {
std::stringstream ss;
for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
ss << conjunct->root()->debug_string() << "\n";
}
std::string conjuncts_str = ss.str();
return Status::InternalError(
"Session variable check_orc_init_sargs_success is set, but "
"_init_search_argument returns false because all exprs can not be pushed "
"down:\n " +
conjuncts_str);
}
}
try {
_row_reader_options.range(_range_start_offset, _range_size);
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/orc/vorc_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class OrcReader : public GenericReader {
void _init_orc_cols(const orc::Type& type, std::vector<std::string>& orc_cols,
std::vector<std::string>& orc_cols_lower_case,
std::unordered_map<std::string, const orc::Type*>& type_map,
bool* is_hive1_orc) const;
bool* is_hive1_orc, bool should_add_acid_prefix) const;
static bool _check_acid_schema(const orc::Type& type);
static const orc::Type& _remove_acid(const orc::Type& type);

Expand Down
19 changes: 19 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ public class SessionVariable implements Serializable, Writable {

public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = "enable_orc_filter_by_min_max";

public static final String CHECK_ORC_INIT_SARGS_SUCCESS = "check_orc_init_sargs_success";

public static final String INLINE_CTE_REFERENCED_THRESHOLD = "inline_cte_referenced_threshold";

public static final String ENABLE_CTE_MATERIALIZE = "enable_cte_materialize";
Expand Down Expand Up @@ -1900,6 +1902,14 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
needForward = true)
public boolean enableOrcFilterByMinMax = true;

@VariableMgr.VarAttr(
name = CHECK_ORC_INIT_SARGS_SUCCESS,
description = {"是否检查orc init sargs是否成功。默认为 false。",
"Whether to check whether orc init sargs is successful. "
+ "The default value is false."},
needForward = true)
public boolean checkOrcInitSargsSuccess = false;

@VariableMgr.VarAttr(
name = EXTERNAL_TABLE_ANALYZE_PART_NUM,
description = {"收集外表统计信息行数时选取的采样分区数,默认-1表示全部分区",
Expand Down Expand Up @@ -3440,6 +3450,14 @@ public void setEnableOrcFilterByMinMax(boolean enableOrcFilterByMinMax) {
this.enableOrcFilterByMinMax = enableOrcFilterByMinMax;
}

public boolean isCheckOrcInitSargsSuccess() {
return checkOrcInitSargsSuccess;
}

public void setCheckOrcInitSargsSuccess(boolean checkOrcInitSargsSuccess) {
this.checkOrcInitSargsSuccess = checkOrcInitSargsSuccess;
}

public String getSqlDialect() {
return sqlDialect;
}
Expand Down Expand Up @@ -4033,6 +4051,7 @@ public TQueryOptions toThrift() {
tResult.setEnableOrcLazyMat(enableOrcLazyMat);
tResult.setEnableParquetFilterByMinMax(enableParquetFilterByMinMax);
tResult.setEnableOrcFilterByMinMax(enableOrcFilterByMinMax);
tResult.setCheckOrcInitSargsSuccess(checkOrcInitSargsSuccess);

tResult.setTruncateCharOrVarcharColumns(truncateCharOrVarcharColumns);
tResult.setEnableMemtableOnSinkNode(enableMemtableOnSinkNode);
Expand Down
1 change: 1 addition & 0 deletions gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ struct TQueryOptions {
161: optional i64 low_memory_mode_buffer_limit = 33554432
162: optional bool dump_heap_profile_when_mem_limit_exceeded = false
163: optional bool inverted_index_compatible_read = false
164: optional bool check_orc_init_sargs_success = false

// For cloud, to control if the content would be written into file cache
// In write path, to control if the content would be written into file cache.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
1 \N 1 \N
3 \N 1 \N

-- !predicate_full_acid_push_down --

-- !lazy_materialization_for_list_type --
["c", "a"]
["b", "c"]
Expand Down Expand Up @@ -77,6 +79,9 @@
1 \N 1 \N
3 \N 1 \N

-- !predicate_full_acid_push_down --
2 BB 20230101

-- !lazy_materialization_for_list_type --
["c", "a"]
["b", "c"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,23 @@ suite("test_hive_orc_predicate", "p0,external,hive,external_docker,external_dock

qt_predicate_null_aware_equal_in_rt """select * from table_a inner join table_b on table_a.age <=> table_b.age and table_b.id in (1,3) order by table_a.id;"""

// use check_orc_init_sargs_success to test full acid push down
sql """use `${catalog_name}`.`default`"""
if (hivePrefix == "hive3") {
sql """ set check_orc_init_sargs_success = true; """
}
qt_predicate_full_acid_push_down """ select * from orc_full_acid_par where value = 'BB' order by id;"""
sql """ set check_orc_init_sargs_success = false; """

sql """use `${catalog_name}`.`multi_catalog`"""
qt_lazy_materialization_for_list_type """ select l from complex_data_orc where id > 2 order by id; """
qt_lazy_materialization_for_map_type """ select m from complex_data_orc where id > 2 order by id; """
qt_lazy_materialization_for_list_and_map_type """ select * from complex_data_orc where id > 2 order by id; """
qt_lazy_materialization_for_list_type2 """select t_struct_nested from `${catalog_name}`.`default`.orc_all_types_t where t_int=3;"""

sql """drop catalog if exists ${catalog_name}"""
} finally {
sql """ set check_orc_init_sargs_success = false; """
}
}
}
Loading