From 1753172f892fb17e73b07f68ee7eaeb911afd0a1 Mon Sep 17 00:00:00 2001 From: camby Date: Mon, 6 Jan 2025 21:38:55 +0800 Subject: [PATCH] [fix](mem) heap-buffer-overflow for function convert_to (#46405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Reproduce SQL with ASAN version: `select convert('装装装装装' using gbk);` Then be crashed: ``` ================================================================= ==1830466==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x606002aeec20 at pc 0x560826fb3e66 bp 0x7fc3816a5890 sp 0x7fc3816a5058 WRITE of size 10 at 0x606002aeec20 thread T711 (brpc_light) ``` But if we use release version, we found the result is not correct, and the memory maybe already corrupted: ``` > select convert('装装装装装' using gbk); +---------------------------------------------------------+ | convert_to('装装装装装', 'gbk') | +---------------------------------------------------------+ | ~zhuangdang~zhuangdang~zhuangdang~zhuangdang~zhu | +---------------------------------------------------------+ 1 row in set (0.04 sec) ``` The correct answer should be: ``` > select convert('装装装装装' using gbk); +--------------------------------------+ | convert_to('装装装装装', 'gbk') | +--------------------------------------+ | ~zhuang~zhuang~zhuang~zhuang~zhuang | +--------------------------------------+ 1 row in set (0.06 sec) ``` --- be/src/vec/functions/function_string.h | 13 +++++++++---- .../nereids_function_p0/scalar_function/C.out | 3 +++ .../string_functions/test_string_function.out | Bin 4838 -> 4892 bytes .../scalar_function/C.groovy | 2 +- .../test_string_function.groovy | 2 ++ 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 195ef4af76b1d1..c71920c137f98b 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -3653,9 +3653,10 @@ class FunctionConvertTo : public IFunction { auto& res_offset = col_res->get_offsets(); auto& res_chars = col_res->get_chars(); res_offset.resize(input_rows_count); - // max pinyin size is 6, double of utf8 chinese word 3, add one char to set '~' - ColumnString::check_chars_length(str_chars.size() * 2 + input_rows_count, 0); - res_chars.resize(str_chars.size() * 2 + input_rows_count); + // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3 + size_t pinyin_size = (str_chars.size() + 2) / 3 * 7; + ColumnString::check_chars_length(pinyin_size, 0); + res_chars.resize(pinyin_size); size_t in_len = 0, out_len = 0; for (int i = 0; i < input_rows_count; ++i) { @@ -3696,7 +3697,11 @@ class FunctionConvertTo : public IFunction { } auto end = strchr(buf, ' '); - auto len = end != nullptr ? end - buf : MAX_PINYIN_LEN; + // max len for pinyin is 6 + int len = MAX_PINYIN_LEN; + if (end != nullptr && end - buf < MAX_PINYIN_LEN) { + len = end - buf; + } // set first char '~' just make sure all english word lower than chinese word *dest = 126; memcpy(dest + 1, buf, len); diff --git a/regression-test/data/nereids_function_p0/scalar_function/C.out b/regression-test/data/nereids_function_p0/scalar_function/C.out index e36fdc7f3742be..924ddf5b148105 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/C.out +++ b/regression-test/data/nereids_function_p0/scalar_function/C.out @@ -1418,3 +1418,6 @@ Doris Doris -- !convert -- 1 1 +-- !convert_1 -- +~zhuang~zhuang~zhuang~zhuang~zhuang + diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out index cadf5039794dd87ff4e1af559db1ccb3172a8b23..4af2997eda286b96c77e3edcc4991ab13d04bf13 100644 GIT binary patch delta 61 qcmaE+I!A580S<%Y{JgT%qLO$+1zlaPx~h!Q#JqHT)aG3rI$Qwt?-*?W delta 12 TcmbQE_Dprd0glbzIMlcRCHw^^ diff --git a/regression-test/suites/nereids_function_p0/scalar_function/C.groovy b/regression-test/suites/nereids_function_p0/scalar_function/C.groovy index bf072c9ad8cf74..0a14602eff9c17 100644 --- a/regression-test/suites/nereids_function_p0/scalar_function/C.groovy +++ b/regression-test/suites/nereids_function_p0/scalar_function/C.groovy @@ -197,5 +197,5 @@ suite("nereids_scalar_fn_C") { qt_bitmap_not_nullable "select count(kbitmap) from fn_test_bitmap_not_nullable" qt_char "select char(68, 111, 114, 105, 115), char(68, 111, 114, 105, 115 using utf8);" qt_convert "select convert(1 using gbk), convert(1, string);" - + qt_convert_1 "select convert('装装装装装' using gbk);" } diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index f5d32653c818b5..b8418a1a318f24 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -163,6 +163,8 @@ suite("test_string_function") { qt_sql "select right(\"Hello doris\", 120);" qt_sql "select right(\"Hello doris\", -6);" + qt_convert_1 "select convert('装装装装装' using gbk);" + sql """ drop table if exists left_right_test; """ sql """ create table left_right_test ( id INT NULL,