Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change(server-info): use a new approach(keepalive) to report DP info #6202

Merged
merged 23 commits into from
Feb 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions apisix/core/etcd.lua
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ local function set(key, value, ttl)
return nil, grant_err
end
res, err = etcd_cli:set(prefix .. key, value, {prev_kv = true, lease = data.body.ID})
res.body.lease_id = data.body.ID
else
res, err = etcd_cli:set(prefix .. key, value, {prev_kv = true})
end
Expand Down Expand Up @@ -372,4 +373,19 @@ function _M.server_version()
end


function _M.keepalive(id)
local etcd_cli, _, err = new()
if not etcd_cli then
return nil, err
end

local res, err = etcd_cli:keepalive(id)
if not res then
return nil, err
end

return res, nil
end


return _M
96 changes: 73 additions & 23 deletions apisix/plugins/server-info.lua
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,20 @@ local type = type

local load_time = os.time()
local plugin_name = "server-info"
local default_report_interval = 60
local default_report_ttl = 7200
local default_report_ttl = 60
local lease_id

local schema = {
type = "object",
}
local attr_schema = {
type = "object",
properties = {
report_interval = {
type = "integer",
description = "server info reporting interval (unit: second)",
default = default_report_interval,
minimum = 60,
maximum = 3600,
},
report_ttl = {
type = "integer",
description = "live time for server info in etcd",
default = default_report_ttl,
minimum = 3600,
minimum = 3,
maximum = 86400,
}
}
Expand Down Expand Up @@ -94,16 +87,15 @@ local function uninitialized_server_info()
hostname = core.utils.gethostname(),
id = core.id.get(),
version = core.version.VERSION,
up_time = ngx_time() - boot_time,
boot_time = boot_time,
last_report_time = -1,
tokers marked this conversation as resolved.
Show resolved Hide resolved
}
end


local function get()
local data, err = internal_status:get("server_info")
if err ~= nil then
core.log.error("get error: ", err)
return nil, err
end

Expand All @@ -113,10 +105,10 @@ local function get()

local server_info, err = core.json.decode(data)
if not server_info then
core.log.error("failed to decode server_info: ", err)
return nil, err
end

server_info.up_time = ngx_time() - server_info.boot_time
return server_info
end

Expand All @@ -132,11 +124,37 @@ local function get_server_info()
end


local function set(key, value, ttl)
local res_new, err = core.etcd.set(key, value, ttl)
if not res_new then
core.log.error("failed to set server_info: ", err)
return nil, err
end

if not res_new.body.lease_id then
core.log.error("failed to get lease_id: ", err)
return nil, err
end

lease_id = res_new.body.lease_id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we check if lease_id is not nil?


-- set or update lease_id
local ok, err = internal_status:set("lease_id", lease_id)
if not ok then
core.log.error("failed to set lease_id to shdict: ", err)
return nil, err
end

return true
end


local function report(premature, report_ttl)
if premature then
return
end

-- get apisix node info
local server_info, err = get()
if not server_info then
core.log.error("failed to get server_info: ", err)
Expand All @@ -158,12 +176,47 @@ local function report(premature, report_ttl)
end
end

server_info.last_report_time = ngx_time()

-- get inside etcd data, if not exist, create it
local key = "/data_plane/server_info/" .. server_info.id
local ok, err = core.etcd.set(key, server_info, report_ttl)
local res, err = core.etcd.get(key)
if not res or (res.status ~= 200 and res.status ~= 404) then
core.log.error("failed to get server_info from etcd: ", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if err is not nil, why can we go ahead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a apisix node be initialed, the ETCD don't have server-info data, I think the get func will report error, so I continue the code and set the info to ETCD. Maybe use if not res be better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, if get method reports error when no data, you should check the error type and decide whether returning or going ahead.

return
end

if not res.body.node then
local ok, err = set(key, server_info, report_ttl)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's strange that get returns nil, err while set returns status, table...
Can we use nil, err in both cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it.

if not ok then
core.log.error("failed to set server_info to etcd: ", err)
return
end

return
end

local ok = core.table.deep_eq(server_info, res.body.node.value)
-- not equal, update it
if not ok then
core.log.error("failed to report server info to etcd: ", err)
local ok, err = set(key, server_info, report_ttl)
if not ok then
core.log.error("failed to set server_info to etcd: ", err)
return
end

return
end

-- get lease_id from ngx dict
lease_id, err = internal_status:get("lease_id")
if not lease_id then
core.log.error("failed to get lease_id from shdict: ", err)
return
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing a return?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already add return code block


-- call keepalive
local res, err = core.etcd.keepalive(lease_id)
if not res then
core.log.error("send heartbeat failed: ", err)
return
end

Expand Down Expand Up @@ -203,8 +256,6 @@ end


function _M.init()
core.log.info("server info: ", core.json.delay_encode(get()))

if core.config ~= require("apisix.core.config_etcd") then
-- we don't need to report server info if etcd is not in use.
return
Expand All @@ -218,12 +269,12 @@ function _M.init()
end

local report_ttl = attr and attr.report_ttl or default_report_ttl
local report_interval = attr and attr.report_interval or default_report_interval
local start_at = ngx_time()

local fn = function()
local now = ngx_time()
if now - start_at >= report_interval then
-- If ttl remaining time is less than half, then flush the ttl
if now - start_at >= (report_ttl / 2) then
start_at = now
report(nil, report_ttl)
end
Expand All @@ -239,8 +290,7 @@ function _M.init()

timers.register_timer("plugin#server-info", fn, true)

core.log.info("timer created to report server info, interval: ",
report_interval)
core.log.info("timer update the server info ttl, current ttl: ", report_ttl)
end


Expand Down
3 changes: 1 addition & 2 deletions conf/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,7 @@ plugin_attr:
ip: 127.0.0.1
port: 9091
server-info:
report_interval: 60 # server info report interval (unit: second)
report_ttl: 3600 # live time for server info in etcd (unit: second)
report_ttl: 60 # live time for server info in etcd (unit: second)
dubbo-proxy:
upstream_multiplex_count: 32
request-id:
Expand Down
12 changes: 3 additions & 9 deletions docs/en/latest/plugins/server-info.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ The meaning of each item in server information is following:

| Name | Type | Description |
|---------|------|-------------|
| up_time | integer | Elapsed time (in seconds) since APISIX instance was launched, value will be reset when you hot updating APISIX but is kept for intact if you just reloading APISIX. |
| boot_time | integer | Bootstrap time (UNIX timestamp) of the APISIX instance, value will be reset when you hot updating APISIX but is kept for intact if you just reloading APISIX. |
| last_report_time | integer | Last reporting time (UNIX timestamp). |
| id | string | APISIX instance id. |
| etcd_version | string | The etcd cluster version that APISIX is using, value will be `"unknown"` if the network (to etcd) is partitioned. |
| version | string | APISIX version. |
Expand Down Expand Up @@ -75,16 +73,14 @@ We can change the report configurations in the `plugin_attr` section of `conf/co

| Name | Type | Default | Description |
| ------------ | ------ | -------- | -------------------------------------------------------------------- |
| report_interval | integer | 60 | the interval to report server info to etcd (unit: second, maximum: 3600, minimum: 60). |
| report_ttl | integer | 7200 | the live time for server info in etcd (unit: second, maximum: 86400, minimum: 3600). |
| report_ttl | integer | 36 | the live time for server info in etcd (unit: second, maximum: 86400, minimum: 3). |

Here is an example, which modifies the `report_interval` to 10 minutes and sets the `report_ttl` to one hour.
Here is an example, which modifies the `report_ttl` to one minute.

```yaml
plugin_attr:
server-info:
report_interval: 600,
report_ttl: 3600
report_ttl: 60
```

## Test Plugin
Expand All @@ -95,8 +91,6 @@ After enabling this plugin, you can access these data through the plugin Control
$ curl http://127.0.0.1:9090/v1/server_info -s | jq .
{
"etcd_version": "3.5.0",
"up_time": 9460,
"last_report_time": 1608531519,
"id": "b7ce1c5c-b1aa-4df7-888a-cbe403f3e948",
"hostname": "fedora32",
"version": "2.1",
Expand Down
23 changes: 8 additions & 15 deletions docs/zh/latest/plugins/server-info.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,12 @@ title: server-info
服务信息中每一项的含义如下:

| 名称 | 类型 | 描述 |
| ---------------- | ------- | ----------------------------------------------------------------------------------------------------------------------- |
| up_time | integer | APISIX 服务实例当前的运行时间(单位:秒), 如果对 APISIX 进行热更新操作,该值将被重置;普通的 reload 操作不会影响该值。 |
| boot_time | integer | APISIX 服务实例的启动时间(UNIX 时间戳),如果对 APIISIX 进行热更新操作,该值将被重置;普通的 reload 操作不会影响该值。 |
| last_report_time | integer | 最近一次服务信息上报的时间 (UNIX 时间戳)。 |
| ---------------- | ------- | --------------------------------------------------------------------------------------------------------------------- |
| boot_time | integer | APISIX 服务实例的启动时间(UNIX 时间戳),如果对 APIISIX 进行热更新操作,该值将被重置;普通的 reload 操作不会影响该值。 |
| id | string | APISIX 服务实例 id 。 |
| etcd_version | string | etcd 集群的版本信息,如果 APISIX 和 etcd 集群之间存在网络分区,该值将设置为 `"unknown"`。 |
| etcd_version | string | etcd 集群的版本信息,如果 APISIX 和 etcd 集群之间存在网络分区,该值将设置为 `"unknown"`。 |
| version | string | APISIX 版本信息。 |
| hostname | string | APISIX 所部署的机器或 pod 的主机名信息。 |
| hostname | string | APISIX 所部署的机器或 pod 的主机名信息。 |

## 插件属性

Expand Down Expand Up @@ -76,18 +74,15 @@ plugins: # plugin list
我们可以在 `conf/config.yaml` 文件的 `plugin_attr` 一节中修改上报配置。

| 名称 | 类型 | 默认值 | 描述 |
| --------------- | ------- | ------ | ------------------------------------------------------------------ |
| report_interval | integer | 60 | 上报服务信息至 etcd 的间隔(单位:秒,最大值:3600,最小值:60) |
| report_ttl | integer | 7200 | etcd 中服务信息保存的 TTL(单位:秒,最大值:86400,最小值:3600) |
| --------------- | ------- | ------ | --------------------------------------------------------------- |
| report_ttl | integer | 36 | etcd 中服务信息保存的 TTL(单位:秒,最大值:86400,最小值:3) |

下面的例子将 `report_interval` 修改成了 10 分钟,并将 `report_ttl` 修改成了 1
小时:
下面的例子将 `report_ttl` 修改成了 1 分钟:

```yaml
plugin_attr:
server-info:
report_interval: 600
report_ttl: 3600
report_ttl: 60
```

## 测试插件
Expand All @@ -98,8 +93,6 @@ plugin_attr:
$ curl http://127.0.0.1:9090/v1/server_info -s | jq .
{
"etcd_version": "3.5.0",
"up_time": 9460,
"last_report_time": 1608531519,
"id": "b7ce1c5c-b1aa-4df7-888a-cbe403f3e948",
"hostname": "fedora32",
"version": "2.1",
Expand Down
Loading