Skip to content

Commit

Permalink
fix(runloop): use upstream status code in passive health check (#10325)
Browse files Browse the repository at this point in the history
We expect the upstream passive unhealthy healthcheck on HTTP status code should only mark the targets as "down" (not reachable or proxiable) using the returned HTTP status code from the target, and it should not be affected by the final HTTP status code set by Kong plugin, which is returned to the client.

So we change the passive health check implementation to use nginx var `upstream_code`, this will not be changed by any plugin.

FTI-4841
Fix #10281

(cherry picked from commit dbe8d94)
  • Loading branch information
oowl authored and bungle committed Mar 21, 2023
1 parent e517284 commit cf5665e
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@
[#10352](https://github.com/Kong/kong/pull/10352)
- Fix an issue where validation to regex routes may be skipped when the old-fashioned config is used for DB-less Kong.
[#10348](https://github.com/Kong/kong/pull/10348)
- Fix an issue where balancer passive healthcheck would use wrong status code when kong changes status code
from upstream in `header_filter` phase.
[#10325](https://github.com/Kong/kong/pull/10325)

## 3.2.0

Expand Down
7 changes: 5 additions & 2 deletions kong/runloop/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ local certificate = require "kong.runloop.certificate"
local concurrency = require "kong.concurrency"
local lrucache = require "resty.lrucache"
local marshall = require "kong.cache.marshall"
local ktls = require("resty.kong.tls")
local ktls = require "resty.kong.tls"

local PluginsIterator = require "kong.runloop.plugins_iterator"
local instrumentation = require "kong.tracing.instrumentation"
Expand Down Expand Up @@ -1513,7 +1513,10 @@ return {
-- Report HTTP status for health checks
local balancer_data = ctx.balancer_data
if balancer_data and balancer_data.balancer_handle then
local status = ngx.status
-- https://nginx.org/en/docs/http/ngx_http_upstream_module.html#variables
-- because of the way of Nginx do the upstream_status variable, it may be
-- a string or a number, so we need to parse it to get the status
local status = tonumber(sub(var.upstream_status or "", -3)) or ngx.status
if status == 504 then
balancer_data.balancer.report_timeout(balancer_data.balancer_handle)
else
Expand Down
66 changes: 66 additions & 0 deletions spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,72 @@ for _, strategy in helpers.each_strategy() do
end
end)

it("perform passive health checks in downstream status code was changed", function()

for nfails = 1, 3 do

bu.begin_testcase_setup(strategy, bp)
-- configure healthchecks
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = nfails,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local api_host, service_id = bu.add_api(bp, upstream_name)
bp.plugins:insert({
name = "pre-function",
service = { id = service_id },
config = {
header_filter ={
[[
ngx.exit(200)
]],
},
}
})

bu.end_testcase_setup(strategy, bp)

local requests = bu.SLOTS * 2 -- go round the balancer twice

-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server2_oks = math.floor(requests / 4)
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()

-- Go hit them with our test requests
local client_oks1, client_fails1 = bu.client_requests(bu.SLOTS, api_host)
assert(bu.direct_request(localhost, port2, "/unhealthy"))
local client_oks2, client_fails2 = bu.client_requests(bu.SLOTS, api_host)

local client_oks = client_oks1 + client_oks2
local client_fails = client_fails1 + client_fails2

-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()

-- verify
assert.are.equal(requests - server2_oks - nfails, count1.ok)
assert.are.equal(server2_oks, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(nfails, count2.fail)

assert.are.equal(client_oks, requests)
assert.are.equal(0, client_fails)
end
end)

it("threshold for health checks", function()
local fixtures = {
dns_mock = helpers.dns_mock.new()
Expand Down

0 comments on commit cf5665e

Please sign in to comment.