From 262f7eb4b0d18355f15f0759136845c760506e28 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Mon, 30 Sep 2024 01:51:12 +0800 Subject: [PATCH] [cloud](Variant) limit column size in MetaService in local mode we limit column size in commit_txn phase, in cloud mode we limit it in `write_schema_dict` which in commit_rowset phase --- cloud/src/common/config.h | 5 +-- .../src/meta-service/meta_service_schema.cpp | 9 +++++ .../variant_p0/column_size_limit.groovy | 33 ++++++------------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 7caba826520fb3..a25aa2952ab1ca 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -135,8 +135,9 @@ CONF_mBool(snapshot_get_tablet_stats, "true"); // Value codec version CONF_mInt16(meta_schema_value_version, "1"); -// Limit kv size of Schema SchemaDictKeyList, default 10MB -CONF_mInt32(schema_dict_kv_size_limit, "10485760"); +// Limit kv size of Schema SchemaDictKeyList, default 5MB +CONF_mInt32(schema_dict_kv_size_limit, "5242880"); +CONF_mInt32(schema_dict_key_count_limit, "2048"); // For instance check interval CONF_Int64(reserved_buffer_days, "3"); diff --git a/cloud/src/meta-service/meta_service_schema.cpp b/cloud/src/meta-service/meta_service_schema.cpp index d99f026d051612..c34976c3bb695a 100644 --- a/cloud/src/meta-service/meta_service_schema.cpp +++ b/cloud/src/meta-service/meta_service_schema.cpp @@ -297,6 +297,15 @@ void write_schema_dict(MetaServiceCode& code, std::string& msg, const std::strin << ", reached the limited size threshold of SchemaDictKeyList " << config::schema_dict_kv_size_limit; msg = ss.str(); + return; + } + // Limit the count of dict keys + if (dict.column_dict_size() > config::schema_dict_key_count_limit) { + code = MetaServiceCode::KV_TXN_COMMIT_ERR; + ss << "Reached max column size limit " << config::schema_dict_key_count_limit + << ", txn_id=" << rowset_meta->txn_id(); + msg = ss.str(); + return; } // splitting large values (>90*1000) into multiple KVs cloud::put(txn, dict_key, dict_val, 0); diff --git a/regression-test/suites/variant_p0/column_size_limit.groovy b/regression-test/suites/variant_p0/column_size_limit.groovy index 70567d89c07d2a..08da6ef5ce2cef 100644 --- a/regression-test/suites/variant_p0/column_size_limit.groovy +++ b/regression-test/suites/variant_p0/column_size_limit.groovy @@ -16,17 +16,7 @@ // under the License. import groovy.json.JsonBuilder -suite("regression_test_variant_column_limit", "nonConcurrent"){ - def set_be_config = { key, value -> - String backend_id; - def backendId_to_backendIP = [:] - def backendId_to_backendHttpPort = [:] - getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); - - backend_id = backendId_to_backendIP.keySet()[0] - def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) - logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) - } +suite("regression_test_variant_column_limit"){ def table_name = "var_column_limit" sql "DROP TABLE IF EXISTS ${table_name}" sql """ @@ -38,21 +28,18 @@ suite("regression_test_variant_column_limit", "nonConcurrent"){ DISTRIBUTED BY HASH(k) BUCKETS 1 properties("replication_num" = "1", "disable_auto_compaction" = "false"); """ - try { - def jsonBuilder = new JsonBuilder() - def root = jsonBuilder { - // Generate 2049 fields - (1..2049).each { fieldNumber -> - "field$fieldNumber" fieldNumber - } + def jsonBuilder = new JsonBuilder() + def root = jsonBuilder { + // Generate 2049 fields + (1..2049).each { fieldNumber -> + "field$fieldNumber" fieldNumber } + } - String jsonString = jsonBuilder.toPrettyString() + String jsonString = jsonBuilder.toPrettyString() + test { sql """insert into ${table_name} values (1, '$jsonString')""" - } catch(Exception ex) { - logger.info("""INSERT INTO ${table_name} failed: """ + ex) - assertTrue(ex.toString().contains("Reached max column")); - } finally { + exception("Reached max column size limit") } sql """insert into ${table_name} values (1, '{"a" : 1, "b" : 2, "c" : 3}')"""