diff --git a/test/instances/router.lua b/test/instances/router.lua index 48373e8b..c220836f 100755 --- a/test/instances/router.lua +++ b/test/instances/router.lua @@ -9,6 +9,7 @@ local helpers = require('test.luatest_helpers') _G.ifiber = require('fiber') _G.imsgpack = require('msgpack') _G.ivtest = require('test.luatest_helpers.vtest') +_G.ivutil = require('vshard.util') _G.iwait_timeout = _G.ivtest.wait_timeout -- Do not load entire vshard into the global namespace to catch errors when code diff --git a/test/router-luatest/router_test.lua b/test/router-luatest/router_test.lua index 9c77b66c..09bfd03c 100644 --- a/test/router-luatest/router_test.lua +++ b/test/router-luatest/router_test.lua @@ -545,3 +545,44 @@ g.test_enable_disable = function(g) -- we don't want this server to interfere with subsequent tests g.router_1:drop() end + +g.test_explicit_fiber_kill = function(g) + -- + -- Kill fibers and wait until they're dead. Without waiting the fiber can + -- die during request, which will cause an exception to be thrown. + -- + local rs_uuids = {g.replica_1_a:replicaset_uuid(), + g.replica_2_a:replicaset_uuid()} + + g.router:exec(function(uuids) + for id, f in pairs(ifiber.info()) do + if f.name:endswith('(net.box)') then + ifiber.kill(id) + end + end + + local replicasets = ivshard.router.static.replicasets + local conn_1 = replicasets[uuids[1]].master.conn + local conn_2 = replicasets[uuids[2]].master.conn + assert(conn_1 and conn_2) + + while ivutil.conn_fiber_killed(conn_1) or + ivutil.conn_fiber_killed(conn_2) do + ifiber.yield() + end + end, {rs_uuids}) + + -- check that all replicasets are accessible and restore connections + local bids = {vtest.storage_first_bucket(g.replica_1_a), + vtest.storage_first_bucket(g.replica_2_a)} + + for _, bid in pairs(bids) do + local res, err = g.router:exec(function(bucket_id) + return ivshard.router.callrw(bucket_id, 'echo', {1}, + {timeout = iwait_timeout}) + end, {bid}) + + t.assert_equals(err, nil, 'no error') + t.assert_equals(res, 1, 'good result') + end +end diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua index 613fe508..6bae68a9 100644 --- a/vshard/replicaset.lua +++ b/vshard/replicaset.lua @@ -170,7 +170,8 @@ end -- local function replicaset_connect_to_replica(replicaset, replica) local conn = replica.conn - if not conn or conn.state == 'closed' then + -- gh-341: additionally check if connection's fiber was explicitly killed + if not conn or conn.state == 'closed' or util.conn_fiber_killed(conn) then conn = netbox.connect(replica.uri, { reconnect_after = consts.RECONNECT_TIMEOUT, wait_connected = false diff --git a/vshard/util.lua b/vshard/util.lua index c34712ba..f57e19d7 100644 --- a/vshard/util.lua +++ b/vshard/util.lua @@ -286,6 +286,15 @@ local function fiber_is_self_canceled() return not pcall(fiber.testcancel) end +-- +-- Check if connection's fiber was explicitly killed +-- +local conn_fiber_killed = function(conn) + local msg = 'fiber is cancelled' + return (conn.state == 'error_reconnect' or conn.state == 'error') and + (conn.error == msg or conn.error.message == msg) +end + -- -- Get min tuple from the index with the given key. -- @@ -357,6 +366,7 @@ return { table_extend = table_extend, fiber_cond_wait = fiber_cond_wait, fiber_is_self_canceled = fiber_is_self_canceled, + conn_fiber_killed = conn_fiber_killed, index_min = index_min, index_has = index_has, feature = feature,