Skip to content

Commit

Permalink
replicaset: introduce name validation
Browse files Browse the repository at this point in the history
In case of name identification, no UUID may be passed at all, so we
cannot verify only UUID, when connecting to storage. It seems impossible
to extend the current net.box greeting by exposing net_box.conn.name to
it, as iproto greeting doesn't have enough free space to save 64 bit
instance name. So, we should deal with name validation on vshard side.

Let's validate name only during connection establishing, not on every
reconnect, as it's done for UUID now. The motivation for that, is the
fact, that it's not cheap to validate name.

Closes tarantool#426

NO_DOC=internal
  • Loading branch information
Serpentian committed Dec 6, 2023
1 parent f54c07e commit ad6a3de
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 10 deletions.
12 changes: 10 additions & 2 deletions test/replicaset-luatest/replicaset_3_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ local fiber = require('fiber')
local t = require('luatest')
local vreplicaset = require('vshard.replicaset')
local vtest = require('test.luatest_helpers.vtest')
local vutil = require('vshard.util')

local small_timeout_opts = {timeout = 0.05}
local timeout_opts = {timeout = vtest.wait_timeout}
Expand Down Expand Up @@ -245,6 +246,7 @@ test_group.test_locate_master_when_no_conn_object = function(g)
end

test_group.test_named_replicaset = function(g)
t.run_only_if(vutil.version_is_at_least(3, 0, 0, 'entrypoint', 0, 0))
local new_cfg_template = table.deepcopy(cfg_template)
new_cfg_template.identification_mode = 'name_as_key'
new_cfg_template.sharding['replicaset'] = new_cfg_template.sharding[1]
Expand All @@ -265,9 +267,15 @@ test_group.test_named_replicaset = function(g)
t.assert_equals(rs.id, rs.name)
t.assert_equals(replica_1_a.id, replica_1_a.name)

-- Just to be sure, that it works.
local uuid_a = g.replica_1_a:instance_uuid()
-- Name is not set, name mismatch error.
local ret, err = rs:callrw('get_uuid', {}, timeout_opts)
t.assert_equals(err.name, 'INSTANCE_NAME_MISMATCH')
t.assert_equals(ret, nil)

-- Set name, everything works from now on.
g.replica_1_a:exec(function() box.cfg{instance_name = 'replica_1_a'} end)
local uuid_a = g.replica_1_a:instance_uuid()
ret, err = rs:callrw('get_uuid', {}, timeout_opts)
t.assert_equals(err, nil)
t.assert_equals(ret, uuid_a)

Expand Down
8 changes: 4 additions & 4 deletions test/storage/storage.result
Original file line number Diff line number Diff line change
Expand Up @@ -1014,16 +1014,16 @@ vshard.storage.internal.errinj.ERRINJ_RECOVERY_PAUSE = false
--
-- Internal info function.
--
vshard.storage._call('info')
vshard.storage._call('info').is_master
---
- is_master: true
- true
...
_ = test_run:switch('storage_1_b')
---
...
vshard.storage._call('info')
vshard.storage._call('info').is_master
---
- is_master: false
- false
...
--
-- gh-123, gh-298: storage auto-enable/disable depending on instance state.
Expand Down
4 changes: 2 additions & 2 deletions test/storage/storage.test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,9 @@ vshard.storage.internal.errinj.ERRINJ_RECOVERY_PAUSE = false
--
-- Internal info function.
--
vshard.storage._call('info')
vshard.storage._call('info').is_master
_ = test_run:switch('storage_1_b')
vshard.storage._call('info')
vshard.storage._call('info').is_master

--
-- gh-123, gh-298: storage auto-enable/disable depending on instance state.
Expand Down
5 changes: 5 additions & 0 deletions vshard/error.lua
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,11 @@ local error_message_template = {
msg = 'Bucket %s update is invalid: %s',
args = {'bucket_id', 'reason'},
},
[40] = {
name = 'INSTANCE_NAME_MISMATCH',
msg = 'Mismatch server name: expected "%s", but got "%s"',
args = {'expected_name', 'actual_name'},
}
}

--
Expand Down
50 changes: 48 additions & 2 deletions vshard/replicaset.lua
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ local function netbox_on_connect(conn)
-- If a replica's connection has revived, then unset
-- replica.down_ts - it is not down anymore.
replica.down_ts = nil
if conn.name then
-- Name is checked during the first call to the instance.
local opts = {is_async = true}
conn.name = conn:call('vshard.storage._call', {'info'}, opts)
conn.name_cond:broadcast()
end
if replica.uuid and conn.peer_uuid ~= replica.uuid and
-- XXX: Zero UUID means not a real Tarantool instance. It
-- is likely to be a cartridge.remote-control server,
Expand Down Expand Up @@ -225,6 +231,13 @@ local function replicaset_connect_to_replica(replicaset, replica)
conn:on_connect(netbox_on_connect)
conn.on_disconnect_ref = netbox_on_disconnect
conn:on_disconnect(netbox_on_disconnect)
-- Whether name validation is needed.
conn.name = replica.id == replica.name
if conn.name then
-- Replica call can be done before the connection is established.
-- Create fiber condition to wait for future object appearance.
conn.name_cond = fiber.cond()
end
replica.conn = conn
end
return conn
Expand Down Expand Up @@ -402,6 +415,40 @@ local function replica_on_success_request(replica)
end
end

--
-- Validate instance_name if neeed and make a call.
--
local function replica_conn_call(replica, func, args, opts)
local conn = replica.conn
if conn.name then
if conn.name == true and
not fiber_cond_wait(conn.name_cond, opts.timeout) then
-- Connection have not been established.
return nil, lerror.timeout()
end
-- It's future object now.
assert(type(conn.name) == 'userdata')
conn.name_cond = nil
-- It's the first call to instance, validate name.
local res, err = util.future_wait(conn.name, opts.timeout)
if res == nil then
-- Failed to get instance name, e.g. timeout error.
conn.name:discard()
conn:close()
return nil, err
end
if res[1].name ~= replica.name then
conn:close()
return nil, lerror.vshard(lerror.code.INSTANCE_NAME_MISMATCH,
replica.name, res[1].name)
end
-- Everything is all right.
conn.name = nil
end

return pcall(conn.call, conn, func, args, opts)
end

--
-- Call a function on a replica using its connection. The typical
-- usage is calls under storage.call, because of which there
Expand All @@ -419,9 +466,8 @@ end
local function replica_call(replica, func, args, opts)
assert(opts and opts.timeout)
replica.activity_ts = fiber_clock()
local conn = replica.conn
local net_status, storage_status, retval, error_object =
pcall(conn.call, conn, func, args, opts)
replica_conn_call(replica, func, args, opts)
if not net_status then
-- Do not increase replica's network timeout, if the
-- requested one was less, than network's one. For
Expand Down
1 change: 1 addition & 0 deletions vshard/storage/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3468,6 +3468,7 @@ end
local function storage_service_info()
return {
is_master = this_is_master(),
name = box.info.name
}
end

Expand Down

0 comments on commit ad6a3de

Please sign in to comment.