-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[fix](inveted index) fix variant index (#36163)
Some columns from the variant do not support indexing, but they are listed in TabletIndex. If such a column can obtain the TabletIndex, the corresponding index file will not be found in copying files, creating snapshots, and uploading files to S3. When the variant column is created, it has a TabletIndex. The extracted column from the variant will inherit this TabletIndex. If the column extracted from the variant does not support index, it should not get TabletIndex.
- Loading branch information
Showing
9 changed files
with
136 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
regression-test/data/inverted_index_p0/test_variant_index_format_v1.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
-- This file is automatically generated. You should know what you did if you want to edit this | ||
-- !sql -- | ||
\N | ||
\N | ||
\N | ||
4748 | ||
|
||
-- !sql -- | ||
4 | ||
|
105 changes: 105 additions & 0 deletions
105
regression-test/suites/inverted_index_p0/test_variant_index_format_v1.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
suite("test_variant_index_format_v1", "p0") { | ||
|
||
def calc_file_crc_on_tablet = { ip, port, tablet -> | ||
return curl("GET", String.format("http://%s:%s/api/calc_crc?tablet_id=%s", ip, port, tablet)) | ||
} | ||
def set_be_config = { key, value -> | ||
String backend_id; | ||
def backendId_to_backendIP = [:] | ||
def backendId_to_backendHttpPort = [:] | ||
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); | ||
|
||
backend_id = backendId_to_backendIP.keySet()[0] | ||
def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) | ||
logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) | ||
} | ||
|
||
def load_json_data = {table_name, file_name -> | ||
// load the json data | ||
streamLoad { | ||
table "${table_name}" | ||
|
||
// set http request header params | ||
set 'read_json_by_line', 'true' | ||
set 'format', 'json' | ||
set 'max_filter_ratio', '0.1' | ||
file file_name // import json file | ||
time 10000 // limit inflight 10s | ||
|
||
// if declared a check callback, the default check condition will ignore. | ||
// So you must check all condition | ||
|
||
check { result, exception, startTime, endTime -> | ||
if (exception != null) { | ||
throw exception | ||
} | ||
logger.info("Stream load ${file_name} result: ${result}".toString()) | ||
def json = parseJson(result) | ||
assertEquals("success", json.Status.toLowerCase()) | ||
// assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) | ||
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) | ||
} | ||
} | ||
} | ||
|
||
def table_name = "github_events" | ||
sql """DROP TABLE IF EXISTS ${table_name}""" | ||
sql """ | ||
CREATE TABLE IF NOT EXISTS ${table_name} ( | ||
k bigint, | ||
v variant, | ||
INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' | ||
) | ||
DUPLICATE KEY(`k`) | ||
DISTRIBUTED BY HASH(k) BUCKETS 1 | ||
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1"); | ||
""" | ||
|
||
set_be_config.call("memory_limitation_per_thread_for_schema_change_bytes", "6294967296") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-2.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-3.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-16.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-10.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-22.json'}""") | ||
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-23.json'}""") | ||
def backendId_to_backendIP = [:] | ||
def backendId_to_backendHttpPort = [:] | ||
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); | ||
|
||
tablets = sql_return_maparray """ show tablets from ${table_name}; """ | ||
String tablet_id = tablets[0].TabletId | ||
String backend_id = tablets[0].BackendId | ||
String ip = backendId_to_backendIP.get(backend_id) | ||
String port = backendId_to_backendHttpPort.get(backend_id) | ||
def (code_0, out_0, err_0) = calc_file_crc_on_tablet(ip, port, tablet_id) | ||
logger.info("Run calc_file_crc_on_tablet: code=" + code_0 + ", out=" + out_0 + ", err=" + err_0) | ||
assertTrue(code_0 == 0) | ||
assertTrue(out_0.contains("crc_value")) | ||
assertTrue(out_0.contains("used_time_ms")) | ||
assertEquals("0", parseJson(out_0.trim()).start_version) | ||
assertEquals("9", parseJson(out_0.trim()).end_version) | ||
assertEquals("9", parseJson(out_0.trim()).rowset_count) | ||
|
||
qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int) from github_events where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core' order by 1;""" | ||
qt_sql """select count() from github_events where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core'""" | ||
set_be_config.call("memory_limitation_per_thread_for_schema_change_bytes", "2147483648") | ||
} |