Skip to content

Commit

Permalink
router: auto and manual enable/disable
Browse files Browse the repository at this point in the history
This patch introduces protecting router's API while its configuration
is not done as accessing these functions at that time is not safe and
can cause low level errors like 'bad arguments' or 'no such function'.

Now all non-trivial vshard.router functions are disabled until
`vshard.router.cfg` (or `vshard.router.new`) is finished and error is
raised in case of an attempt to access them.

Manual API's enabling/disabling is also introduced in this patch.

Closes #194
Closes #291

@TarantoolBot document
Title: vshard.router.enable/disable()
`vshard.router.disable()` makes most of the `vshard.router`
functions throw an error. As Lua exception, not via `nil, err`
pattern.

`vshard.router.enable()` reverts the disable.

`router_object:enable()/disable()`, where `router_object` is
the return value of `vshard.router.new()`, can also be used
for manual API access configuration for the specific non-static
router.

By default the router is enabled.

Additionally, the router is forcefully disabled automatically
until its configuration is finished and the instance finished
recovery (its `box.info.status` is `'running'`, for example).

Auto-disable protects from usage of vshard functions before the
router's global state is fully created.

Manual router's disabling helps to achieve the same for user's
application. For instance, a user might want to do some preparatory
work after `vshard.router.cfg` before the application is ready.
Then the flow would be:
```Lua
vshard.router.disable()
vshard.router.cfg(...)
-- Do your preparatory work here ...
vshard.router.enable()
```

The behavior of the router's API enabling/disabling is similar to the
storage's one.
  • Loading branch information
Serpentian authored and Gerold103 committed Jul 8, 2022
1 parent 20263e0 commit dd70cfb
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 24 deletions.
1 change: 1 addition & 0 deletions test/instances/router.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ local helpers = require('test.luatest_helpers')
-- able to use the libs in server:exec() calls and not get upvalue errors if the
-- same lib is declared in the _test.lua file.
--
_G.ifiber = require('fiber')
_G.imsgpack = require('msgpack')
_G.ivtest = require('test.luatest_helpers.vtest')
_G.iwait_timeout = _G.ivtest.wait_timeout
Expand Down
5 changes: 4 additions & 1 deletion test/luatest_helpers/vtest.lua
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ end

--
-- Create a new router in the cluster.
-- If no cfg was passed configuration should be done manually with server:exec
--
local function router_new(g, name, cfg)
if not g.cluster then
Expand All @@ -535,7 +536,9 @@ local function router_new(g, name, cfg)
g[name] = server
g.cluster:add_server(server)
server:start()
router_cfg(server, cfg)
if cfg then
router_cfg(server, cfg)
end
return server
end

Expand Down
101 changes: 101 additions & 0 deletions test/router-luatest/router_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,104 @@ g.test_ssl = function(g)
vtest.router_cfg(g.router, cluster_cfg)
vtest.storage_wait_fullsync(g)
end

g.test_enable_disable = function(g)
--
-- gh-291: router enable/disable
--
local router = vtest.router_new(g, 'router_1')
-- do not allow router's configuration to complete
router:exec(function()
_G.ivshard.router.internal.errinj.ERRINJ_CFG_DELAY = true
end)
router:exec(function(cfg)
rawset(_G, 'fiber_static', ifiber.new(ivshard.router.cfg, cfg))
rawset(_G, 'fiber_new', ifiber.new(ivshard.router.new,
'new_router', cfg))
_G.fiber_static:set_joinable(true)
_G.fiber_new:set_joinable(true)
end, {cluster_cfg})

-- emulate unconfigured box
router:exec(function()
rawset(_G, 'old_box_cfg', box.cfg)
box.cfg = function(...) return _G.old_box_cfg(...) end
end)

-- check whether errors are not nil and their messages are equal to str
local assert_errors_equals = function(err1, err2, str)
t.assert(err1 and err2)
t.assert_equals(err1.message, err2.message)
t.assert_str_contains(err1.message, str)
end

local err1, err2 = router:exec(function()
rawset(_G, 'static_router', ivshard.router.internal.routers._static_router)
rawset(_G, 'new_router', ivshard.router.internal.routers.new_router)
local _, err_1 = pcall(_G.static_router.info, _G.static_router)
local _, err_2 = pcall(_G.new_router.info, _G.new_router)
return err_1, err_2
end)
assert_errors_equals(err1, err2, 'box seems not to be configured')

-- set box status to loading
router:exec(function()
box.cfg = _G.old_box_cfg
rawset(_G, 'old_box_info', box.info)
box.info = {status = 'loading'}
end)

local echo_func = function()
return router:exec(function(timeout)
local echo = function(router)
return pcall(router.callrw, router, 1, 'echo', {1},
{timeout = timeout})
end
local _, ret_val_1 = echo(_G.static_router)
local _, ret_val_2 = echo(_G.new_router)
return ret_val_1, ret_val_2
end, {vtest.wait_timeout})
end

err1, err2 = echo_func()
assert_errors_equals(err1, err2, 'instance status is "loading"')

-- restore proper box configuration
router:exec(function()
box.info = _G.old_box_info
end)

err1, err2 = echo_func()
assert_errors_equals(err1, err2, 'router is not configured')

-- unblock router's configuration and wait until it's finished
router:exec(function()
_G.ivshard.router.internal.errinj.ERRINJ_CFG_DELAY = false
_G.fiber_static:join()
_G.fiber_new:join()
end)

-- finally a success
local ret1, ret2 = echo_func()
t.assert_equals(ret1, ret2)
t.assert_equals(ret1, 1)

-- manual api disabling and enabling
router:exec(function()
_G.static_router:disable()
_G.new_router:disable()
end)
err1, err2 = echo_func()
assert_errors_equals(err1, err2, 'router is disabled explicitly')

router:exec(function()
_G.static_router:enable()
_G.new_router:enable()
end)
ret1, ret2 = echo_func()
t.assert_equals(ret1, ret2)
t.assert_equals(ret1, 1)

-- we don't want this server to interfere with subsequent tests
g.router_1:drop()
end
5 changes: 5 additions & 0 deletions vshard/error.lua
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ local error_message_template = {
msg = 'Bucket %d is corrupted: %s',
args = {'bucket_id', 'reason'}
},
[35] = {
name = 'ROUTER_IS_DISABLED',
msg = 'Router is disabled: %s',
args = {'reason'}
},
}

--
Expand Down
120 changes: 97 additions & 23 deletions vshard/router/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ if not M then
---------------- Common module attributes ----------------
errinj = {
ERRINJ_CFG = false,
ERRINJ_CFG_DELAY = false,
ERRINJ_FAILOVER_CHANGE_CFG = false,
ERRINJ_RELOAD = false,
ERRINJ_LONG_DISCOVERY = false,
Expand All @@ -52,6 +53,8 @@ if not M then
-- This counter is used to restart background fibers with
-- new reloaded code.
module_version = 0,
-- Flag whether box.info.status is acceptable.
is_loaded = false,

----------------------- Map-Reduce -----------------------
-- Storage Ref ID. It must be unique for each ref request
Expand Down Expand Up @@ -95,6 +98,16 @@ local ROUTER_TEMPLATE = {
-- when no timeout is specified.
--
sync_timeout = consts.DEFAULT_SYNC_TIMEOUT,
-- Flag whether router_cfg() is finished.
is_configured = false,
-- Flag whether the instance is enabled manually. It is true by default
-- for backward compatibility with old vshard.
is_enabled = true,
-- Reference to the function-proxy to most of the public functions. It
-- allows to avoid 'if's in each function by adding expensive
-- conditional checks in one rarely used version of the wrapper and no
-- checks into the other almost always used wrapper.
api_call_cache = nil,
}

local STATIC_ROUTER_NAME = '_static_router'
Expand Down Expand Up @@ -1211,6 +1224,9 @@ local function router_cfg(router, cfg, is_reload)
if not is_reload then
box.cfg(box_cfg)
log.info("Box has been configured")
while M.errinj.ERRINJ_CFG_DELAY do
lfiber.sleep(0.01)
end
end
-- Move connections from an old configuration to a new one.
-- It must be done with no yields to prevent usage both of not
Expand Down Expand Up @@ -1248,6 +1264,7 @@ local function router_cfg(router, cfg, is_reload)
end
discovery_set(router, vshard_cfg.discovery_mode)
master_search_set(router)
router.is_configured = true
end

--------------------------------------------------------------------------------
Expand Down Expand Up @@ -1573,6 +1590,60 @@ local function router_sync(router, timeout)
return true
end

--------------------------------------------------------------------------------
-- Public API protection
--------------------------------------------------------------------------------

local function router_api_call_safe(func, router, ...)
return func(router, ...)
end

--
-- Unsafe proxy is loaded with protections. But it is used rarely and only in
-- the beginning of instance's lifetime.
--
local function router_api_call_unsafe(func, router, ...)
-- box.info is quite expensive. Avoid calling it again when the instance
-- is finally loaded.
if not M.is_loaded then
if type(box.cfg) == 'function' then
local msg = 'box seems not to be configured'
return error(lerror.vshard(lerror.code.ROUTER_IS_DISABLED, msg))
end
local status = box.info.status
if status ~= 'running' then
local msg = ('instance status is "%s"'):format(status)
return error(lerror.vshard(lerror.code.ROUTER_IS_DISABLED, msg))
end
M.is_loaded = true
end
if not router.is_configured then
local msg = 'router is not configured'
return error(lerror.vshard(lerror.code.ROUTER_IS_DISABLED, msg))
end
if not router.is_enabled then
local msg = 'router is disabled explicitly'
return error(lerror.vshard(lerror.code.ROUTER_IS_DISABLED, msg))
end
router.api_call_cache = router_api_call_safe
return func(router, ...)
end

local function router_make_api(func)
return function(router, ...)
return router.api_call_cache(func, router, ...)
end
end

local function router_enable(router)
router.is_enabled = true
end

local function router_disable(router)
router.is_enabled = false
router.api_call_cache = router_api_call_unsafe
end

if M.errinj.ERRINJ_RELOAD then
error('Error injection: reload')
end
Expand All @@ -1584,29 +1655,31 @@ end
local router_mt = {
__index = {
cfg = function(router, cfg) return router_cfg(router, cfg, false) end,
info = router_info;
buckets_info = router_buckets_info;
call = router_call;
callro = router_callro;
callbro = router_callbro;
callrw = router_callrw;
callre = router_callre;
callbre = router_callbre;
map_callrw = router_map_callrw,
route = router_route;
routeall = router_routeall;
bucket_id = router_bucket_id,
bucket_id_strcrc32 = router_bucket_id_strcrc32,
bucket_id_mpcrc32 = router_bucket_id_mpcrc32,
bucket_count = router_bucket_count;
sync = router_sync;
bootstrap = cluster_bootstrap;
bucket_discovery = bucket_discovery;
discovery_wakeup = discovery_wakeup;
master_search_wakeup = master_search_wakeup,
discovery_set = discovery_set,
_route_map_clear = route_map_clear,
_bucket_reset = bucket_reset,
info = router_make_api(router_info),
buckets_info = router_make_api(router_buckets_info),
call = router_make_api(router_call),
callro = router_make_api(router_callro),
callbro = router_make_api(router_callbro),
callrw = router_make_api(router_callrw),
callre = router_make_api(router_callre),
callbre = router_make_api(router_callbre),
map_callrw = router_make_api(router_map_callrw),
route = router_make_api(router_route),
routeall = router_make_api(router_routeall),
bucket_id = router_make_api(router_bucket_id),
bucket_id_strcrc32 = router_make_api(router_bucket_id_strcrc32),
bucket_id_mpcrc32 = router_make_api(router_bucket_id_mpcrc32),
bucket_count = router_make_api(router_bucket_count),
sync = router_make_api(router_sync),
bootstrap = router_make_api(cluster_bootstrap),
bucket_discovery = router_make_api(bucket_discovery),
discovery_wakeup = router_make_api(discovery_wakeup),
master_search_wakeup = router_make_api(master_search_wakeup),
discovery_set = router_make_api(discovery_set),
_route_map_clear = router_make_api(route_map_clear),
_bucket_reset = router_make_api(bucket_reset),
disable = router_disable,
enable = router_enable,
}
}

Expand Down Expand Up @@ -1650,6 +1723,7 @@ local function router_new(name, cfg)
end
local router = table.deepcopy(ROUTER_TEMPLATE)
setmetatable(router, router_mt)
router.api_call_cache = router_api_call_unsafe
router.name = name
M.routers[name] = router
local ok, err = pcall(router_cfg, router, cfg)
Expand Down

0 comments on commit dd70cfb

Please sign in to comment.