diff --git a/apisix/balancer.lua b/apisix/balancer.lua index 2540f2ff2d1c..4be56fcc28aa 100644 --- a/apisix/balancer.lua +++ b/apisix/balancer.lua @@ -161,8 +161,12 @@ local function pick_server(route, ctx) version = version .. "#" .. checker.status_ver end - local server_picker = lrucache_server_picker(key, version, - create_server_picker, up_conf, checker) + -- the same picker will be used in the whole request, especially during the retry + local server_picker = ctx.server_picker + if not server_picker then + server_picker = lrucache_server_picker(key, version, + create_server_picker, up_conf, checker) + end if not server_picker then return nil, "failed to fetch server picker" end diff --git a/apisix/balancer/chash.lua b/apisix/balancer/chash.lua index df1568a5dc5f..f9dbdbbb470b 100644 --- a/apisix/balancer/chash.lua +++ b/apisix/balancer/chash.lua @@ -22,6 +22,9 @@ local str_gsub = string.gsub local pairs = pairs +local CONSISTENT_POINTS = 160 -- points per server, taken from `resty.chash` + + local _M = {} @@ -62,27 +65,59 @@ end function _M.new(up_nodes, upstream) local str_null = str_char(0) + local nodes_count = 0 + local safe_limit = 0 local servers, nodes = {}, {} for serv, weight in pairs(up_nodes) do local id = str_gsub(serv, ":", str_null) + nodes_count = nodes_count + 1 + safe_limit = safe_limit + weight servers[id] = serv nodes[id] = weight end + safe_limit = safe_limit * CONSISTENT_POINTS local picker = resty_chash:new(nodes) return { upstream = upstream, get = function (ctx) local id - if ctx.balancer_try_count > 1 and ctx.chash_last_server_index then - id, ctx.chash_last_server_index = picker:next(ctx.chash_last_server_index) + if ctx.balancer_tried_servers then + if ctx.balancer_tried_servers_count == nodes_count then + return nil, "all upstream servers tried" + end + + -- the 'safe_limit' is a best effort limit to prevent infinite loop caused by bug + for i = 1, safe_limit do + id, ctx.chash_last_server_index = picker:next(ctx.chash_last_server_index) + if not ctx.balancer_tried_servers[servers[id]] then + break + end + end else local chash_key = fetch_chash_hash_key(ctx, upstream) id, ctx.chash_last_server_index = picker:find(chash_key) end -- core.log.warn("chash id: ", id, " val: ", servers[id]) return servers[id] + end, + after_balance = function (ctx, before_retry) + if not before_retry then + if ctx.balancer_tried_servers then + core.tablepool.release("balancer_tried_servers", ctx.balancer_tried_servers) + ctx.balancer_tried_servers = nil + end + + return nil + end + + if not ctx.balancer_tried_servers then + ctx.balancer_tried_servers = core.tablepool.fetch("balancer_tried_servers", 0, 2) + end + + ctx.balancer_tried_servers[ctx.balancer_server] = true + ctx.balancer_tried_servers_count = (ctx.balancer_tried_servers_count or 0) + 1 end } end diff --git a/t/admin/balancer.t b/t/admin/balancer.t index b9a76c5f3487..d1b9027faad0 100644 --- a/t/admin/balancer.t +++ b/t/admin/balancer.t @@ -52,6 +52,8 @@ add_block_preprocessor(sub { for _, key in ipairs(keys) do ngx.say("host: ", key, " count: ", res[key]) end + + ctx.server_picker = nil end _EOC_ $block->set_value("init_by_lua_block", $init_by_lua_block); diff --git a/t/node/chash-balance.t b/t/node/chash-balance.t index 02eee5f5adc0..dfde2aaffee5 100644 --- a/t/node/chash-balance.t +++ b/t/node/chash-balance.t @@ -496,3 +496,63 @@ GET /t --- error_code_like: ^(?:50\d)$ --- error_log failed to find valid upstream server, no valid upstream node + + + +=== TEST 13: set route(ensure retry can try every node) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/server_port", + "upstream": { + "key": "arg_device_id", + "type": "chash", + "nodes": { + "127.0.0.1:1979": 1000, + "127.0.0.1:1980": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 14: hit routes +--- config + location /t { + content_by_lua_block { + local http = require "resty.http" + local uri = "http://127.0.0.1:" .. ngx.var.server_port + .. "/server_port?device_id=1" + + local httpc = http.new() + local res, err = httpc:request_uri(uri, {method = "GET"}) + if not res then + ngx.say(err) + return + end + + ngx.say(res.status) + } + } +--- request +GET /t +--- response_body +200 diff --git a/t/node/healthcheck.t b/t/node/healthcheck.t index d9bacb86fb9c..9f77a4311090 100644 --- a/t/node/healthcheck.t +++ b/t/node/healthcheck.t @@ -488,7 +488,7 @@ qr{.*http://127.0.0.1:1960/server_port.* .*http://127.0.0.1:1961/server_port.* .*http://127.0.0.1:1961/server_port.* .*http://127.0.0.1:1961/server_port.* -.*http://127.0.0.1:1960/server_port.*} +.*http://127.0.0.1:1961/server_port.*} --- timeout: 10