feat: Add option retry_timeout that like nginx's proxy_next_upstream_…

…timeout (#4574) Co-authored-by: zhuo.chen <zhuo.chen@upai.com>
apache · Jul 14, 2021 · 307c7f6 · 307c7f6
1 parent 5061221
commit 307c7f6
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 3 deletions.
diff --git a/apisix/balancer.lua b/apisix/balancer.lua
@@ -23,6 +23,7 @@ local enable_keepalive = balancer.enable_keepalive
 local set_more_tries   = balancer.set_more_tries
 local get_last_failure = balancer.get_last_failure
 local set_timeouts     = balancer.set_timeouts
+local ngx_now          = ngx.now
 
 
 local module_name = "balancer"
@@ -161,6 +162,9 @@ local function set_balancer_opts(route, ctx)
     end
 
     if retries > 0 then
+        if up_conf.retry_timeout and up_conf.retry_timeout > 0 then
+            ctx.proxy_retry_deadline = ngx_now() + up_conf.retry_timeout
+        end
         local ok, err = set_more_tries(retries)
         if not ok then
             core.log.error("could not set upstream retries: ", err)
@@ -293,6 +297,10 @@ function _M.run(route, ctx)
         set_balancer_opts(route, ctx)
 
     else
+        if ctx.proxy_retry_deadline and ctx.proxy_retry_deadline < ngx_now() then
+            core.log.error("proxy retry timeout, retry count: ", ctx.balancer_try_count or 0)
+            return core.response.exit(502)
+        end
         -- retry
         server, err = pick_server(route, ctx)
         if not server then

diff --git a/apisix/schema_def.lua b/apisix/schema_def.lua
@@ -353,6 +353,10 @@ local upstream_schema = {
             type = "integer",
             minimum = 0,
         },
+        retry_timeout = {
+            type = "integer",
+            minimum = 0,
+        },
         timeout = timeout_def,
         tls = {
             type = "object",

diff --git a/docs/en/latest/admin-api.md b/docs/en/latest/admin-api.md
@@ -546,6 +546,7 @@ In addition to the basic complex equalization algorithm selection, APISIX's Upst
 |key             |optional|This option is only valid if the `type` is `chash`. Find the corresponding node `id` according to `hash_on` and `key`. When `hash_on` is set as `vars`, `key` is the required parameter, for now, it support nginx built-in variables like `uri, server_name, server_addr, request_uri, remote_port, remote_addr, query_string, host, hostname, arg_***`, `arg_***` is arguments in the request line, [Nginx variables list](http://nginx.org/en/docs/varindex.html). When `hash_on` is set as `header`, `key` is the required parameter, and `header name` is customized. When `hash_on` is set to `cookie`, `key` is the required parameter, and `cookie name` is customized. When `hash_on` is set to `consumer`, `key` does not need to be set. In this case, the `key` adopted by the hash algorithm is the `consumer_name` authenticated. If the specified `hash_on` and `key` can not fetch values, it will be fetch `remote_addr` by default.|
 |checks          |optional|Configure the parameters of the health check. For details, refer to [health-check](health-check.md).|
 |retries         |optional|Pass the request to the next upstream using the underlying Nginx retry mechanism, the retry mechanism is enabled by default and set the number of retries according to the number of available backend nodes. If `retries` option is explicitly set, it will override the default value. `0` means disable retry mechanism.|
+|retry_timeout   |optional|Limits the amount of time that retries can be continued, and do not continue retries if the previous request and retry requests have taken too long. `0` means disable retry timeout mechanism.|
 |timeout         |optional| Set the timeout for connecting, sending and receiving messages. |
 |name            |optional|Identifies upstream names|
 |desc            |optional|upstream usage scenarios, and more.|

diff --git a/docs/en/latest/plugins/traffic-split.md b/docs/en/latest/plugins/traffic-split.md
@@ -59,7 +59,7 @@ Note: The ratio between each upstream may not so accurate since the drawback of
 | weighted_upstreams.weight      | integer       | optional    | weight = 1   |  | The traffic is divided according to the `weight` value, and the roundrobin algorithm is used to divide multiple `weight`. |
 
 Currently, in the configuration of `weighted_upstreams.upstream`, the unsupported fields are:
-service_name, discovery_type, checks, retries, desc, scheme, labels, create_time and update_time. But you can use `weighted_upstreams.upstream_id` to bind the `upstream` object to achieve their functions.
+service_name, discovery_type, checks, retries, retry_timeout, desc, scheme, labels, create_time and update_time. But you can use `weighted_upstreams.upstream_id` to bind the `upstream` object to achieve their functions.
 
 The traffic-split plugin is mainly composed of two parts: `match` and `weighted_upstreams`. `match` is a custom conditional rule, and `weighted_upstreams` is upstream configuration information. If you configure `match` and `weighted_upstreams` information, then after the `match` rule is verified, it will be based on the `weight` value in `weighted_upstreams`; the ratio of traffic between each upstream in the plugin will be guided, otherwise, all traffic will be directly Reach the `upstream` configured on `route` or `service`. Of course, you can also configure only the `weighted_upstreams` part, which will directly guide the traffic ratio between each upstream in the plugin based on the `weight` value in `weighted_upstreams`.
 

diff --git a/docs/zh/latest/admin-api.md b/docs/zh/latest/admin-api.md
@@ -551,6 +551,7 @@ APISIX 的 Upstream 除了基本的负载均衡算法选择外，还支持对上
 | key            | 条件必需                           | 匹配类型       | 该选项只有类型是 `chash` 才有效。根据 `key` 来查找对应的 node `id`，相同的 `key` 在同一个对象中，永远返回相同 id，目前支持的 Nginx 内置变量有 `uri, server_name, server_addr, request_uri, remote_port, remote_addr, query_string, host, hostname, arg_***`，其中 `arg_***` 是来自 URL 的请求参数，[Nginx 变量列表](http://nginx.org/en/docs/varindex.html) |                                                  |
 | checks         | 可选                               | health_checker | 配置健康检查的参数，详细可参考[health-check](health-check.md)                                                                                                                                                                                                                                                                                               |                                                  |
 | retries        | 可选                               | 整型           | 使用底层的 Nginx 重试机制将请求传递给下一个上游，默认启用重试且次数为后端可用的 node 数量。如果指定了具体重试次数，它将覆盖默认值。`0` 代表不启用重试机制。                                                                                                                                                                                                 |                                                  |
+| retry_timeout  | 可选                               | 整型           | 限制是否继续重试的时间，若之前的请求和重试请求花费太多时间就不再继续重试。`0` 代表不启用重试超时机制。                                                                                                                                                                                                 |                                                  |
 | timeout        | 可选                               | 超时时间对象   | 设置连接、发送消息、接收消息的超时时间                                                                                                                                                                                                                                                                                                                      |                                                  |
 | hash_on        | 可选                               | 辅助           | `hash_on` 支持的类型有 `vars`（Nginx 内置变量），`header`（自定义 header），`cookie`，`consumer`，默认值为 `vars`                                                                                                                                                                                                                                           |
 | name           | 可选                               | 辅助           | 标识上游服务名称、使用场景等。                                                                                                                                                                                                                                                                                                                              |                                                  |

diff --git a/docs/zh/latest/plugins/traffic-split.md b/docs/zh/latest/plugins/traffic-split.md
@@ -59,7 +59,7 @@ traffic-split 插件使用户可以逐步引导各个上游之间的流量百分
 | weighted_upstreams.weight      | integer | 可选   |   weight = 1     |        | 根据 `weight` 值做流量划分，多个 weight 之间使用 roundrobin 算法划分。|
 
 目前在 `weighted_upstreams.upstream` 的配置中，不支持的字段有：
-service_name、discovery_type、checks、retries、desc、scheme、labels、create_time 和 update_time。但是你可以通过 `weighted_upstreams.upstream_id` 绑定 `upstream` 对象来实现他们。
+service_name、discovery_type、checks、retries、retry_timeout、desc、scheme、labels、create_time 和 update_time。但是你可以通过 `weighted_upstreams.upstream_id` 绑定 `upstream` 对象来实现他们。
 
 traffic-split 插件主要由 `match` 和 `weighted_upstreams` 两部分组成，`match` 是自定义的条件规则，`weighted_upstreams` 是 upstream 的配置信息。如果配置 `match` 和 `weighted_upstreams` 信息，那么在 `match` 规则校验通过后，会根据 `weighted_upstreams` 中的 `weight` 值；引导插件中各个 upstream 之间的流量比例，否则，所有流量直接到达 `route` 或 `service` 上配置的 `upstream`。当然你也可以只配置 `weighted_upstreams` 部分，这样会直接根据 `weighted_upstreams` 中的 `weight` 值，引导插件中各个 upstream 之间的流量比例。
 

diff --git a/t/admin/upstream3.t b/t/admin/upstream3.t
@@ -61,3 +61,33 @@ __DATA__
     }
 --- response_body
 {"action":"get","count":0,"node":{"dir":true,"key":"/apisix/upstreams","nodes":{}}}
+
+
+
+=== TEST 2: retry_timeout is -1 (INVALID)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/upstreams/a-b-c-ABC_0123',
+                ngx.HTTP_PUT,
+                [[{
+                    "nodes": {
+                        "127.0.0.1:8080": 1,
+                        "127.0.0.1:8090": 1
+                    },
+                    "retry_timeout": -1,
+                    "type": "roundrobin"
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.print(body)
+        }
+    }
+--- request
+GET /t
+--- error_code: 400
+--- response_body
+{"error_msg":"invalid configuration: property \"retry_timeout\" validation failed: expected -1 to be greater than 0"}
diff --git a/t/lib/server.lua b/t/lib/server.lua
@@ -356,7 +356,11 @@ end
 
 function _M.mysleep()
     ngx.sleep(tonumber(ngx.var.arg_seconds))
-    ngx.say(ngx.var.arg_seconds)
+    if ngx.var.arg_abort then
+        ngx.exit(ngx.ERROR)
+    else
+        ngx.say(ngx.var.arg_seconds)
+    end
 end
 
 

diff --git a/t/node/upstream-retries.t b/t/node/upstream-retries.t
@@ -267,3 +267,50 @@ qr/proxy request to 127.0.0.1:1
 proxy request to 127.0.0.2:1
 |proxy request to 127.0.0.2:1
 proxy request to 127.0.0.1:1/
+
+
+
+=== TEST 13: stop proxy to next upstream by retry_timeout
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                        "upstream": {
+                            "nodes": {
+                                "127.0.0.1:1980": 100,
+                                "127.0.0.1:1981": 100,
+                                "127.0.0.1:1982": 100
+                            },
+                            "retries": 10,
+                            "retry_timeout": 2,
+                            "type": "roundrobin"
+                        },
+                        "uri": "/mysleep"
+                }]]
+                )
+
+            if code ~= 200 then
+                ngx.say(body)
+                return
+            end
+            local http = require "resty.http"
+            local httpc = http.new()
+            local uri = "http://127.0.0.1:" .. ngx.var.server_port
+                        .. "/mysleep?abort=true&seconds=1"
+            local res, err = httpc:request_uri(uri)
+            if not res then
+                ngx.say(err)
+                return
+            end
+            ngx.status = res.status
+            ngx.say(res.status)
+        }
+    }
+--- request
+GET /t
+--- error_code: 502
+--- error_log eval
+qr/proxy retry timeout, retry count: 2/