From bff0d2fd2e38c1c15a4eb20702eea3d0c6284797 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy Date: Tue, 3 Oct 2023 23:03:59 +0200 Subject: [PATCH] rebalancer: introduce rebalancer_mode Closes #432 @TarantoolBot document Title: vshard: rebalancer flag and mode So far it was impossible to specify which instance should run the rebalancer. It was always automatically assigned using some internal rules based on UUIDs. Now the users can choose: - Which specific instance should run the rebalancer. Can be a replica or a master - won't matter. - In which replicaset the instance to run the rebalancer should be selected automatically. - Which instances and whole replicasets should not run the rebalancer even when it is selected automatically. For that there are 2 new options: `rebalancer = ` and `rebalancer_mode = `. The `rebalancer` flag can be either omitted, or set to true, or false. It can be set for replicasets and for specific instances. There can be only one `rebalancer = true` in the whole config. But can be many `rebalancer = false`. * `rebalancer = true` assigned to an instance means that this instance is guaranteed to run the rebalancer service on it. The instance role doesn't matter - it can be a replica or a master. Will run the rebalancer anyway. * `rebalancer = true` assigned to a replicaset means that the service will run only on the master of this replicaset. Can be combined with `master = 'auto'` on the given replicaset. * `rebalancer = false` assigned to an instance means that it will not run the rebalancer. * `rebalancer = false` assigned to a replicaset means that all the instances of this replicaset will not run the rebalancer. * `rebalancer = nil` (same as omitted, default) means that the instance/replicaset will be eligible to run the rebalancer only if `rebalancer_mode = 'auto'` is set and there are no `rebalancer = true` anywhere. The option `rebalancer_mode` should be specified in the root of the config. It can have one of those values: * `'auto'` - default. Means that the rebalancer service location is chosen automatically among all master instances in the cluster. Excluding those which have `rebalancer = false` on them or on their replicaset. If there are any `rebalancer = true`, then this mode works the same as `'manual'`. * `'manual'`. The rebalancer will run only if there is at least one `rebalancer = true` in the config. And only on the given instance / replicaset (depending on at which level the flag was specified - for a specific instance or for a whole replicaset). * `'off'`. The rebalancer will not run anywhere, regardless of all the `rebalancer = true/false` specified in the config. --- test/storage-luatest/rebalancer_test.lua | 59 ++++++++++++++++++++++++ test/unit-luatest/config_test.lua | 39 ++++++++++++++++ vshard/cfg.lua | 13 ++++++ vshard/storage/init.lua | 10 +++- 4 files changed, 120 insertions(+), 1 deletion(-) diff --git a/test/storage-luatest/rebalancer_test.lua b/test/storage-luatest/rebalancer_test.lua index 08353a81..caaa90eb 100644 --- a/test/storage-luatest/rebalancer_test.lua +++ b/test/storage-luatest/rebalancer_test.lua @@ -233,3 +233,62 @@ test_group.test_locate_with_flag = function(g) vtest.cluster_cfg(g, global_cfg) wait_rebalancer_on_instance(g, 'replica_1_a') end + +test_group.test_rebalancer_mode = function(g) + local new_cfg_template = table.deepcopy(cfg_template) + -- + -- Auto-mode won't ignore rebalancer flags. It can only do any difference + -- when the rebalancer is not specified explicitly. + -- + new_cfg_template.rebalancer_mode = 'auto' + new_cfg_template.sharding[1].rebalancer = nil + new_cfg_template.sharding[2].rebalancer = true + local new_global_cfg = vtest.config_new(new_cfg_template) + vtest.cluster_cfg(g, new_global_cfg) + wait_rebalancer_on_instance(g, 'replica_2_a') + -- + -- The rebalancer false-flags are taken into account. + -- + new_cfg_template.sharding[1].rebalancer = false + new_cfg_template.sharding[2].rebalancer = false + new_global_cfg = vtest.config_new(new_cfg_template) + vtest.cluster_cfg(g, new_global_cfg) + wait_rebalancer_on_instance(g, 'replica_3_a') + -- + -- The flags don't matter then the rebalancer is off. + -- + new_cfg_template.rebalancer_mode = 'off' + new_global_cfg = vtest.config_new(new_cfg_template) + vtest.cluster_cfg(g, new_global_cfg) + wait_rebalancer_on_instance(g, nil) + -- + -- Manual with a rebalancer assigned explicitly to an instance. + -- + new_cfg_template.rebalancer_mode = 'manual' + new_cfg_template.sharding[2].rebalancer = nil + new_cfg_template.sharding[2].replicas.replica_2_b.rebalancer = true + new_global_cfg = vtest.config_new(new_cfg_template) + vtest.cluster_cfg(g, new_global_cfg) + wait_rebalancer_on_instance(g, 'replica_2_b') + -- + -- Manual with a rebalancer assigned explicitly to a replicaset. + -- + new_cfg_template.rebalancer_mode = 'manual' + new_cfg_template.sharding[2].replicas.replica_2_b.rebalancer = nil + new_cfg_template.sharding[3].rebalancer = true + new_global_cfg = vtest.config_new(new_cfg_template) + vtest.cluster_cfg(g, new_global_cfg) + wait_rebalancer_on_instance(g, 'replica_3_a') + -- + -- Manual with no explicitly assigned rebalancer means no rebalancer at all. + -- + new_cfg_template.sharding[3].rebalancer = nil + new_global_cfg = vtest.config_new(new_cfg_template) + vtest.cluster_cfg(g, new_global_cfg) + wait_rebalancer_on_instance(g, nil) + -- + -- Cleanup. + -- + vtest.cluster_cfg(g, global_cfg) + wait_rebalancer_on_instance(g, 'replica_1_a') +end diff --git a/test/unit-luatest/config_test.lua b/test/unit-luatest/config_test.lua index 5c83d30b..45dd3acd 100644 --- a/test/unit-luatest/config_test.lua +++ b/test/unit-luatest/config_test.lua @@ -211,3 +211,42 @@ g.test_rebalancer_flag = function() replicaset_1.rebalancer = nil storage_1_a.rebalancer = nil end + +g.test_rebalancer_mode = function() + local storage_1_a = { + uri = 'storage:storage@127.0.0.1:3301', + name = 'storage_1_a', + } + local replicaset_1 = { + replicas = { + storage_1_a_uuid = storage_1_a, + }, + } + local config = { + sharding = { + storage_1_uuid = replicaset_1, + }, + } + t.assert(vcfg.check(config)) + + local function check_all_flag_combinations() + t.assert(vcfg.check(config)) + storage_1_a.rebalancer = true + t.assert(vcfg.check(config)) + storage_1_a.rebalancer = nil + replicaset_1.rebalancer = true + t.assert(vcfg.check(config)) + replicaset_1.rebalancer = false + t.assert(vcfg.check(config)) + replicaset_1.rebalancer = nil + storage_1_a.rebalancer = false + t.assert(vcfg.check(config)) + storage_1_a.rebalancer = nil + end + config.rebalancer_mode = 'auto' + check_all_flag_combinations() + config.rebalancer_mode = 'manual' + check_all_flag_combinations() + config.rebalancer_mode = 'off' + check_all_flag_combinations() +end diff --git a/vshard/cfg.lua b/vshard/cfg.lua index 74dd9e41..e746d789 100644 --- a/vshard/cfg.lua +++ b/vshard/cfg.lua @@ -208,6 +208,12 @@ local function check_discovery_mode(value) end end +local function check_rebalancer_mode(value) + if value ~= 'auto' and value ~= 'manual' and value ~= 'off' then + error("Expected 'auto', 'manual', or 'off' for rebalancer_mode") + end +end + local function check_sharding(sharding) local uuids = {} local uris = {} @@ -319,6 +325,13 @@ local cfg_template = { default = consts.DEFAULT_REBALANCER_MAX_SENDING, max = consts.REBALANCER_MAX_SENDING_MAX }, + rebalancer_mode = { + type = 'string', + name = 'Rebalancer mode', + is_optional = true, + default = 'auto', + check = check_rebalancer_mode, + }, collect_bucket_garbage_interval = { name = 'Garbage bucket collect interval', is_deprecated = true, reason = 'Has no effect anymore' diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index dd601ae4..68bc9eff 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -3238,8 +3238,10 @@ end -- Find UUID of the instance which should run the rebalancer service. -- local function rebalancer_cfg_find_instance(cfg) + assert(cfg.rebalancer_mode ~= 'off') local target_uuid local is_assigned + local is_auto = cfg.rebalancer_mode == 'auto' for _, rs in pairs(cfg.sharding) do if rs.rebalancer == false then goto next_rs @@ -3253,7 +3255,7 @@ local function rebalancer_cfg_find_instance(cfg) end local ok = true ok = ok and not no_rebalancer - ok = ok and (replica.master or replica.rebalancer) + ok = ok and ((is_auto and replica.master) or replica.rebalancer) ok = ok and (not target_uuid or replica_uuid < target_uuid) ok = ok and (not is_assigned or is_rebalancer) if ok then @@ -3266,8 +3268,10 @@ local function rebalancer_cfg_find_instance(cfg) end local function rebalancer_cfg_find_replicaset(cfg) + assert(cfg.rebalancer_mode ~= 'off') local target_uuid local is_assigned + local is_auto = cfg.rebalancer_mode == 'auto' for rs_uuid, rs in pairs(cfg.sharding) do local is_rebalancer = rs.rebalancer local no_rebalancer = rs.rebalancer == false @@ -3278,6 +3282,7 @@ local function rebalancer_cfg_find_replicaset(cfg) local ok = true ok = ok and not no_rebalancer ok = ok and (rs.master == 'auto') + ok = ok and (is_auto or is_rebalancer) ok = ok and (not target_uuid or rs_uuid < target_uuid) ok = ok and (not is_assigned or is_rebalancer) if ok then @@ -3293,6 +3298,9 @@ local function rebalancer_is_needed() return false end local cfg = M.current_cfg + if cfg.rebalancer_mode == 'off' then + return false + end local this_replica_uuid = M.this_replica.uuid local this_replicaset_uuid = M.this_replicaset.uuid