diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 38a52d1d2118aa..c1c94a439c8a06 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -555,6 +555,13 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, return Status::Error( "CLuceneError occured: {}", e.what()); } + // we should refresh nullmap for array + for (int row_id = 0; row_id < num_rows; row_id++) { + if (null_map && null_map[row_id] == 1) { + RETURN_IF_ERROR( + _inverted_index_builders[index_writer_sign]->add_array_nulls(row_id)); + } + } return Status::OK(); } diff --git a/regression-test/data/inverted_index_p0/array_contains/arr_null_test_data.csv b/regression-test/data/inverted_index_p0/array_contains/arr_null_test_data.csv new file mode 100644 index 00000000000000..2258751c9315d7 --- /dev/null +++ b/regression-test/data/inverted_index_p0/array_contains/arr_null_test_data.csv @@ -0,0 +1,200 @@ +0|NULL|NULL +1|[]|NULL +2|NULL|[] +3|[null, 'None', null, null, 'None', 'text5', null]|[] +4|[null, 'None', 'None', null, 'text4', null, 'None']|NULL +5|NULL|NULL +6|NULL|[] +7|NULL|NULL +8|[null, 'None', 'None', null, 'text4', 'None', null, 'text7', null]|[] +9|NULL|[] +10|['None', 'text1', 'None', 'text3', 'text4', null, 'text6', null]|['text0', 'None', null, 'text3', 'None'] +11|[null, 'None', null, 'None', null, 'text5', null, null, 'text8']|[null, null, null, 'text3', null, 'text5', null] +12|[]|['text0', 'text1', null, 'text3', 'text4', 'None', 'text6', null] +13|[]|[] +14|NULL|[null, null, null, 'None', 'None'] +15|NULL|NULL +16|NULL|NULL +17|NULL|['text0', null, null, null, 'text4', 'text5', null, null] +18|[null, 'None', 'None', 'None', 'None', null, 'text6', 'text7', null, null]|NULL +19|[]|[] +20|NULL|[] +21|NULL|NULL +22|NULL|NULL +23|NULL|NULL +24|NULL|[] +25|NULL|[] +26|NULL|['text0', 'text1', 'text2', 'text3', null, null, null] +27|[]|NULL +28|NULL|NULL +29|NULL|[] +30|[]|[null, null, null, 'text3', 'None', 'None', null, null, null, null] +31|NULL|NULL +32|NULL|NULL +33|[]|[] +34|['None', null, null, 'text3', 'None', null, null, null, 'text8']|[null, 'None', 'None', 'None', 'None', 'None', null] +35|[]|NULL +36|NULL|[null, 'None', null, 'text3', 'text4', null, 'text6', null, 'None', 'text9'] +37|['text0', null, 'text2', null, 'text4', 'text5']|NULL +38|[]|[null, null, null, 'None', 'text4'] +39|[]|['text0', 'text1', null, 'None', 'None'] +40|NULL|['None', null, 'text2', 'text3', 'text4', 'None', 'None', null, null, null] +41|[null, 'text1', 'text2', 'None', 'None', 'None', null, null, null]|[null, 'None', null, null, 'text4', 'text5', null, 'text7', null, 'None'] +42|NULL|NULL +43|NULL|NULL +44|NULL|[] +45|['None', null, null, 'None', null, 'None', 'None', 'None']|[] +46|[null, null, 'text2', null, null, null, 'text6', null, 'text8']|NULL +47|NULL|NULL +48|NULL|[] +49|NULL|NULL +50|['text0', null, 'None', null, null, null, null, 'None']|[] +51|NULL|NULL +52|NULL|['text0', 'None', 'None', 'None', 'text4'] +53|[]|NULL +54|['text0', 'None', 'text2', null, 'None', 'None', null, null]|NULL +55|['None', null, null, null, null, 'None', null]|[] +56|NULL|[] +57|['None', 'text1', null, null, null, null, 'None', null]|[null, null, 'None', 'text3', 'None', null, 'None', 'None'] +58|NULL|[] +59|NULL|NULL +60|[]|NULL +61|[null, 'text1', null, 'None', 'text4', null]|[] +62|NULL|NULL +63|[null, null, 'text2', null, 'None', null, null, null, null]|[null, 'text1', 'None', 'text3', null, null, 'text6'] +64|['text0', null, null, null, null, 'text5']|NULL +65|NULL|['text0', 'None', 'None', 'None', null, 'None', null, 'None', 'text8', 'None'] +66|NULL|NULL +67|[]|NULL +68|['None', null, null, 'text3', 'None', null, 'text6', 'None']|NULL +69|NULL|['text0', null, null, 'None', 'text4', 'text5', null] +70|['text0', null, null, null, 'text4']|[] +71|NULL|[null, 'text1', null, null, 'text4', 'text5'] +72|NULL|[] +73|NULL|[] +74|[null, 'None', null, 'None', null, null]|[null, 'text1', 'None', null, 'text4', null] +75|NULL|NULL +76|[]|[] +77|[]|[] +78|[null, null, 'None', 'None', null, null]|['None', 'text1', null, 'None', null, null, null, 'None', null] +79|NULL|[] +80|[]|[] +81|[]|NULL +82|NULL|NULL +83|['None', 'None', null, 'None', 'None', 'text5', null, null, null, 'text9']|['None', null, 'None', null, 'text4', 'None', 'None', 'text7', 'None'] +84|NULL|['None', 'None', 'text2', null, 'None', null, null, 'None'] +85|NULL|[null, null, 'text2', 'text3', null, 'text5'] +86|[]|['text0', null, null, 'text3', null, 'text5'] +87|NULL|['None', null, null, null, 'None'] +88|NULL|[] +89|[null, 'None', 'None', null, 'text4', null, 'text6']|[] +90|NULL|['None', null, 'None', null, 'None', 'text5', 'text6', 'text7', null] +91|NULL|[null, 'None', null, 'text3', null, null, 'text6', 'text7', 'None'] +92|NULL|['None', 'None', 'None', null, null, 'text5', null, null] +93|[]|[] +94|[null, null, 'None', 'text3', 'text4', null]|[] +95|NULL|[null, 'None', 'None', null, 'None', null, null, 'None'] +96|NULL|NULL +97|[]|NULL +98|[]|NULL +99|['text0', 'text1', null, 'text3', 'text4', null, null, null, null, 'None']|['None', 'text1', 'text2', 'text3', null, null, 'None', null] +100|NULL|NULL +101|NULL|[null, null, null, null, 'text4', null, 'None', 'None'] +102|[null, 'None', 'None', 'text3', 'None', null, null, null, null]|['None', null, null, 'text3', null] +103|NULL|NULL +104|NULL|[null, 'None', 'text2', null, 'None', null] +105|[null, null, 'text2', 'text3', null]|NULL +106|NULL|[] +107|NULL|['text0', null, null, null, 'None'] +108|NULL|NULL +109|NULL|[] +110|[null, 'text1', 'None', null, 'None', null, 'text6', 'text7']|[] +111|[null, null, 'None', 'text3', null, 'None']|[null, 'text1', null, null, 'None', null] +112|NULL|[null, 'None', null, null, 'text4', 'text5', 'text6', 'None'] +113|NULL|[] +114|NULL|['None', null, null, null, null, 'text5'] +115|[null, null, 'None', 'text3', null]|['text0', 'text1', null, null, null] +116|NULL|NULL +117|NULL|[] +118|['text0', null, null, null, 'None', 'text5', 'text6', 'None', 'text8']|NULL +119|[null, 'text1', 'text2', 'None', null, null, 'text6', 'None']|NULL +120|[null, 'None', null, 'None', null, null, null, null, null]|['text0', 'text1', 'None', 'None', null, 'None', null, null] +121|NULL|[null, null, 'text2', null, 'None'] +122|NULL|[] +123|['None', 'text1', 'None', 'None', null, 'text5']|[null, 'None', null, null, 'None', 'None', null] +124|[]|[null, 'None', null, null, null, null] +125|NULL|NULL +126|NULL|[] +127|NULL|NULL +128|NULL|[] +129|['text0', 'None', 'None', 'text3', null, 'text5']|['None', 'text1', 'text2', 'None', null, null, 'None', null, 'None'] +130|NULL|NULL +131|['text0', 'text1', 'text2', 'None', null, null, 'None']|NULL +132|NULL|NULL +133|NULL|NULL +134|[]|[] +135|NULL|NULL +136|NULL|[null, null, null, null, null, 'None', 'text6', null] +137|[]|NULL +138|NULL|[] +139|NULL|['text0', null, 'text2', null, 'text4', null, null, 'None', 'text8', null] +140|[]|[] +141|[]|[] +142|NULL|NULL +143|NULL|NULL +144|[]|['None', 'text1', null, null, 'text4', null, 'text6', null, null, 'None'] +145|[]|NULL +146|[]|['text0', 'None', null, null, null, 'None', null] +147|NULL|NULL +148|['None', 'None', null, 'None', 'text4', 'None']|['None', null, null, null, 'None'] +149|NULL|['None', null, null, 'None', 'None', null, null, 'text7'] +150|['text0', 'text1', 'None', 'text3', 'text4', 'None']|[] +151|[]|[] +152|NULL|NULL +153|NULL|NULL +154|[null, 'text1', null, null, null, 'None', 'None', 'text7']|[null, null, null, null, 'text4', 'text5', 'text6'] +155|NULL|['None', 'text1', 'text2', 'text3', 'None', null, 'None', 'text7', 'text8', 'None'] +156|[null, 'None', null, 'text3', null, 'text5', 'text6', null]|NULL +157|NULL|['None', 'None', 'text2', null, null] +158|[]|[] +159|NULL|[] +160|NULL|NULL +161|[]|[] +162|[null, 'None', null, 'None', 'text4']|NULL +163|NULL|NULL +164|['None', 'None', 'None', null, null, null, 'text6', null, null]|NULL +165|[]|NULL +166|NULL|[] +167|NULL|NULL +168|[]|[] +169|[]|['text0', 'text1', null, null, 'text4', null] +170|NULL|[null, null, 'None', 'None', 'None', 'text5', 'None', null, 'None', 'None'] +171|NULL|NULL +172|[]|NULL +173|NULL|[] +174|['None', 'text1', null, 'None', 'text4', null, null, 'None', 'text8']|[] +175|NULL|NULL +176|[]|NULL +177|NULL|[] +178|[]|[] +179|NULL|NULL +180|[null, null, null, null, 'text4', null, null, null, 'text8']|NULL +181|[]|[null, 'None', null, null, 'text4', null, 'text6', 'None'] +182|NULL|['None', 'text1', null, 'None', 'None', null, 'None', 'text7'] +183|NULL|[null, null, 'None', 'text3', null, 'None', null, 'None', null, null] +184|[null, 'None', null, 'None', null, 'text5', 'None', 'text7']|NULL +185|[]|NULL +186|NULL|[null, null, null, 'text3', 'text4'] +187|[null, null, 'None', 'text3', 'text4']|NULL +188|[]|[] +189|[null, null, 'text2', null, 'None', null, null, 'None', 'text8']|NULL +190|['None', null, 'None', null, 'None', 'text5', 'None', null, 'text8', null]|[] +191|NULL|NULL +192|NULL|NULL +193|['None', null, 'text2', 'None', null]|NULL +194|NULL|[] +195|[]|NULL +196|NULL|NULL +197|[null, 'text1', 'text2', 'None', 'text4', null, null]|[null, 'None', 'None', 'None', null, null, null, 'text7', null, null] +198|NULL|NULL +199|[]|[null, 'None', 'None', null, null, null, null, null, null, 'text9'] diff --git a/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy b/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy new file mode 100644 index 00000000000000..6f3e772dd08151 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_add_index_for_arr") { + // prepare test table + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + // here some variable to control inverted index query + sql """ set enable_profile=true""" + sql """ set enable_pipeline_x_engine=true;""" + sql """ set enable_inverted_index_query=false""" + sql """ set enable_common_expr_pushdown=true """ + + def wait_for_latest_op_on_table_finish = { table_name, OpTimeout -> + for(int t = delta_time; t <= OpTimeout; t += delta_time){ + alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${table_name}" ORDER BY CreateTime DESC LIMIT 1;""" + alter_res = alter_res.toString() + if(alter_res.contains("FINISHED")) { + sleep(3000) // wait change table state to normal + logger.info(table_name + " latest alter job finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") + } + + def wait_for_build_index_on_partition_finish = { table_name, OpTimeout -> + for(int t = delta_time; t <= OpTimeout; t += delta_time){ + alter_res = sql """SHOW BUILD INDEX WHERE TableName = "${table_name}";""" + def expected_finished_num = alter_res.size(); + def finished_num = 0; + for (int i = 0; i < expected_finished_num; i++) { + logger.info(table_name + " build index job state: " + alter_res[i][7] + i) + if (alter_res[i][7] == "FINISHED") { + ++finished_num; + } + } + if (finished_num == expected_finished_num) { + logger.info(table_name + " all build index jobs finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= OpTimeout, "wait_for_latest_build_index_on_partition_finish timeout") + } + + + sql "DROP TABLE IF EXISTS my_test_array" + // create table without any index + sql """ + CREATE TABLE IF NOT EXISTS my_test_array ( + `id` int(11) NULL, + `name` ARRAY NULL, + `description` ARRAY NULL, + ) + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + properties("replication_num" = "1"); + """ + + def var_result = sql "show variables" + logger.info("show variales result: " + var_result ) + + // stream_load with csv data + streamLoad { + table "my_test_array" + + file "arr_null_test_data.csv" // import csv file + time 10000 // limit inflight 10s + set 'column_separator', '|' + set 'format', 'csv' + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals(200, json.NumberTotalRows) + assertEquals(200, json.NumberLoadedRows) + assertTrue(json.LoadBytes > 0) + } + } + + // query without inverted index + // query rows with array_contains + def sql_query_name1 = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')" + // query rows with !array_contains + def sql_query_name2 = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')" + + // add index for name + sql "ALTER TABLE my_test_array ADD INDEX name_idx (name) USING INVERTED;" + wait_for_latest_op_on_table_finish("my_test_array", timeout) + // build index for name that name data can using inverted index + if (!isCloudMode()) { + sql "BUILD INDEX name_idx ON my_test_array" + wait_for_build_index_on_partition_finish("my_test_array", timeout) + } + + // query with inverted index + sql "set enable_inverted_index_query=true" + // query rows with array_contains + def sql_query_name1_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')" + // query rows with !array_contains + def sql_query_name2_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')" + + // check result for query without inverted index and with inverted index + def size1 = sql_query_name1.size(); + log.info("sql_query_name1 query without inverted index rows size: ${size1}") + for (int i = 0; i < sql_query_name1.size(); i++) { + assertEquals(sql_query_name1[i][0], sql_query_name1_inverted[i][0]) + assertEquals(sql_query_name1[i][1], sql_query_name1_inverted[i][1]) + assertEquals(sql_query_name1[i][2], sql_query_name1_inverted[i][2]) + } + def size2 = sql_query_name2.size(); + log.info("sql_query_name2 query without inverted index rows size: ${size2}") + for (int i = 0; i < sql_query_name2.size(); i++) { + assertEquals(sql_query_name2[i][0], sql_query_name2_inverted[i][0]) + assertEquals(sql_query_name2[i][1], sql_query_name2_inverted[i][1]) + assertEquals(sql_query_name2[i][2], sql_query_name2_inverted[i][2]) + } + + // drop index + // add index on column description + sql "drop index name_idx on my_test_array" + wait_for_latest_op_on_table_finish("my_test_array", timeout) + + def sql_query_name1_without_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')" + def sql_query_name2_without_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')" + + assertEquals(sql_query_name1.size(), sql_query_name1_without_inverted.size()) + assertEquals(sql_query_name2.size(), sql_query_name2_without_inverted.size()) +}