From 5ec81d50f886907d867205e3be28c76b5f3ebe09 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Wed, 15 Oct 2025 18:17:26 +0800 Subject: [PATCH] [fix](function) Crash caused by explode function (#56991) Crashed when the `element_size` is less than `_cur_offset`. ```text *** SIGABRT unknown detail explain (@0x3f800011a04) received by PID 72196 (TID 74961 OR 0x7bc6104c4700) from PID 72196; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /root/doris/be/src/common/signal_handler.h:420 1# 0x00007FCD3B81AD10 in /lib64/libpthread.so.0 2# __GI_raise in /lib64/libc.so.6 3# __GI_abort in /lib64/libc.so.6 4# 0x000055583DEC0C0D in /root/doris/be/output/lib/doris_be 5# google::LogMessage::SendToLog() in /root/doris/be/output/lib/doris_be 6# google::LogMessage::Flush() in /root/doris/be/output/lib/doris_be 7# google::LogMessageFatal::~LogMessageFatal() in /root/doris/be/output/lib/doris_be 8# doris::vectorized::PODArray, 16ul, 15ul>::operator[](long) const at /root/doris/be/src/vec/common/pod_array.h:466 9# doris::vectorized::ColumnVector<(doris::PrimitiveType)2>::insert_range_from(doris::vectorized::IColumn const&, unsigned long, unsigned long) at /root/doris/be/src/vec/columns/column_vector.cpp:278 10# doris::vectorized::VExplodeV2TableFunction::get_value(doris::COW::mutable_ptr&, int) at /root/doris/be/src/vec/exprs/table_function/vexplode_v2.cpp:233 11# doris::pipeline::TableFunctionLocalState::get_expanded_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/doris/be/src/pipeline/exec/table_function_operator.cpp:199 12# doris::pipeline::TableFunctionOperatorX::pull(doris::RuntimeState*, doris::vectorized::Block*, bool*) const at /root/doris/be/src/pipeline/exec/table_function_operator.h:119 13# doris::pipeline::StatefulOperatorX::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/doris/be/src/pipeline/exec/operator.cpp:707 14# doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/doris/be/src/pipeline/exec/operator.cpp:397 15# doris::pipeline::PipelineTask::execute(bool*) at /root/doris/be/src/pipeline/pipeline_task.cpp:532 16# doris::pipeline::TaskScheduler::_do_work(int) at /root/doris/be/src/pipeline/task_scheduler.cpp:146 17# doris::pipeline::TaskScheduler::start()::$_0::operator()() const at /root/doris/be/src/pipeline/task_scheduler.cpp:67 18# void std::__invoke_impl(std::__invoke_other, doris::pipeline::TaskScheduler::start()::$_0&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63 19# std::enable_if, void>::type std::__invoke_r(doris::pipeline::TaskScheduler::start()::$_0&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:119 20# std::_Function_handler::_M_invoke(std::_Any_data const&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292 21# std::function::operator()() const at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593 22# doris::FunctionRunnable::run() at /root/doris/be/src/util/threadpool.cpp:60 23# doris::ThreadPool::dispatch_thread() at /root/doris/be/src/util/threadpool.cpp:614 24# void std::__invoke_impl(std::__invoke_memfun_deref, void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:76 25# std::__invoke_result::type std::__invoke(void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:98 26# void std::_Bind::__call(std::tuple<>&&, std::_Index_tuple<0ul>) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/functional:515 27# void std::_Bind::operator()<, void>() at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/functional:600 28# void std::__invoke_impl&>(std::__invoke_other, std::_Bind&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63 29# std::enable_if&>, void>::type std::__invoke_r&>(std::_Bind&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:119 30# std::_Function_handler >::_M_invoke(std::_Any_data const&) at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292 31# std::function::operator()() const at /root/ldb_toolchain_taipan/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593 32# doris::Thread::supervise_thread(void*) at /root/doris/be/src/util/thread.cpp:460 33# asan_thread_start(void*) in /root/doris/be/output/lib/doris_be 34# start_thread in /lib64/libpthread.so.0 35# __GI___clone in /lib64/libc.so.6 ``` Related PR: #xxx Problem Summary: None - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- .../vec/exprs/table_function/vexplode_v2.cpp | 23 +++--- .../sql_functions/table_function/explode.out | 72 +++++++++++++++++++ .../table_function/explode.groovy | 25 +++++++ 3 files changed, 109 insertions(+), 11 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp b/be/src/vec/exprs/table_function/vexplode_v2.cpp index 44659dde806ad6..02864c58115d48 100644 --- a/be/src/vec/exprs/table_function/vexplode_v2.cpp +++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp @@ -218,21 +218,22 @@ int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step) { } else { nullmap_column->insert_many_defaults(max_step); } - } else { + } else if (element_size > _cur_offset) { + const auto current_insert_num = element_size - _cur_offset; nullable_column->get_nested_column_ptr()->insert_range_from( - *detail.nested_col, pos, element_size - _cur_offset); + *detail.nested_col, pos, current_insert_num); if (detail.nested_nullmap_data) { - for (int j = 0; j < element_size - _cur_offset; j++) { - if (detail.nested_nullmap_data[pos + j]) { - nullmap_column->insert_value(1); - } else { - nullmap_column->insert_value(0); - } - } + const auto old_size = nullmap_column->size(); + nullmap_column->resize(old_size + current_insert_num); + memcpy(nullmap_column->get_data().data() + old_size, + detail.nested_nullmap_data + pos, + current_insert_num * sizeof(UInt8)); } else { - nullmap_column->insert_many_defaults(element_size - _cur_offset); + nullmap_column->insert_many_defaults(current_insert_num); } - nullable_column->insert_many_defaults(max_step - (element_size - _cur_offset)); + nullable_column->insert_many_defaults(max_step - current_insert_num); + } else { + nullable_column->insert_many_defaults(max_step); } } } diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode.out b/regression-test/data/query_p0/sql_functions/table_function/explode.out index 234119bea63555..1a6280bb84f736 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode.out @@ -675,3 +675,75 @@ 3 116 \N 115 \N 3 116 \N 116 \N +-- !test24 -- +a \N \N \N 4 +a \N \N \N 5 +a \N \N ef 1 +a \N 1 ab \N +a \N 2 cd \N +a \N 4 \N 2 +a \N 5 \N 3 +b \N \N \N 4 +b \N \N \N 5 +b \N \N ef 1 +b \N 1 ab \N +b \N 2 cd \N +b \N 4 \N 2 +b \N 5 \N 3 +c \N \N \N 4 +c \N \N \N 5 +c \N \N ef 1 +c \N 1 ab \N +c \N 2 cd \N +c \N 4 \N 2 +c \N 5 \N 3 +d \N \N \N 4 +d \N \N \N 5 +d \N \N ef 1 +d \N 1 ab \N +d \N 2 cd \N +d \N 4 \N 2 +d \N 5 \N 3 +e \N \N \N 4 +e \N \N \N 5 +e \N \N ef 1 +e \N 1 ab \N +e \N 2 cd \N +e \N 4 \N 2 +e \N 5 \N 3 +f \N \N \N 4 +f \N \N \N 5 +f \N \N ef 1 +f \N 1 ab \N +f \N 2 cd \N +f \N 4 \N 2 +f \N 5 \N 3 +g \N \N \N 4 +g \N \N \N 5 +g \N \N ef 1 +g \N 1 ab \N +g \N 2 cd \N +g \N 4 \N 2 +g \N 5 \N 3 +h \N \N \N 4 +h \N \N \N 5 +h \N \N ef 1 +h \N 1 ab \N +h \N 2 cd \N +h \N 4 \N 2 +h \N 5 \N 3 +i \N \N \N 4 +i \N \N \N 5 +i \N \N ef 1 +i \N 1 ab \N +i \N 2 cd \N +i \N 4 \N 2 +i \N 5 \N 3 +j \N \N \N 4 +j \N \N \N 5 +j \N \N ef 1 +j \N 1 ab \N +j \N 2 cd \N +j \N 4 \N 2 +j \N 5 \N 3 + diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy index 76d797057045d8..88f7a85023fbd2 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy @@ -189,4 +189,29 @@ suite("explode") { qt_test22 "select id,e1,e2,e3 from array_test as a lateral view explode_variant_array(a.v_string['a'],a.v_int['a'],a.v_int['a']) tmp1 as e1,e2,e3;" qt_test23 "select id,e1,e2,e11,e12 from array_test as a lateral view explode_variant_array(a.v_int['a'],a.v_string['a']) tmp1 as e1,e2 lateral view explode_variant_array(a.v_int['a'],a.v_string['a']) tmp2 as e11,e12;" + sql "DROP TABLE IF EXISTS array_test2;" + sql """ + CREATE TABLE `array_test2` ( + `v` varchar(10) + ) DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + insert into array_test2 values + ("a"), ("b"), ("c"), ("d"), ("e"), + ("f"), ("g"), ("h"), ("i"), ("j"); + """ + + sql "set batch_size = 16" + + qt_test24 """ + select + * + from array_test2 + lateral view explode([], [1, 2, null, 4, 5], ["ab", "cd", "ef"], [null, null, 1, 2, 3, 4, 5]) t2 as c0, c1, c2, c3 + order by 1,2,3,4,5; + """ }