Skip to content
This repository has been archived by the owner on Aug 7, 2021. It is now read-only.

Commit

Permalink
perf(prometheus) use lua-resty-counter on hotpath counter increments
Browse files Browse the repository at this point in the history
Previously counters in prometheus plugin are implemented with shm
counters. Multiple shm operations happen with request, and each
requires shm level lock across all workers. The lock overhead is
especially noticable while running Kong with large amount of cores.

To improve performance, prometheus plugin now uses [lua-resty-counter](https://github.com/kong/lua-resty-counter)
for counter and histogram metrics. It replaces per request shm
operation with cheaper worker level Lua numbers and sync to shm timely.

From #69
For #61
For #43
  • Loading branch information
fffonion authored and hbagdi committed Dec 4, 2019
1 parent adf3de9 commit fd844dc
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 19 deletions.
1 change: 1 addition & 0 deletions kong-prometheus-plugin-0.6.0-1.rockspec
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ description = {
}

dependencies = {
"lua-resty-counter >= 0.2.0",
--"kong >= 0.13.0",
}

Expand Down
24 changes: 21 additions & 3 deletions kong/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
local counter = require "resty.counter"

local find = string.find
local select = select

local DEFAULT_BUCKETS = { 1, 2, 5, 7, 10, 15, 20, 25, 30, 40, 50, 60, 70,
80, 90, 100, 200, 300, 400, 500, 1000,
2000, 5000, 10000, 30000, 60000 }
local metrics = {}
-- prometheus.lua instance
local prometheus
-- lua-resty-counter instance
local counter_instance


local function init()
Expand Down Expand Up @@ -58,6 +63,15 @@ local function init()
{"type", "service", "route"})
end

local function init_worker()
local err
-- create a lua-resty-counter instance with sync interval of every second
counter_instance, err = counter.new("prometheus_metrics", 1)
if err then
error(err)
end
prometheus:set_resty_counter(counter_instance)
end

local function log(message)
if not metrics then
Expand Down Expand Up @@ -156,12 +170,16 @@ local function collect()
{res.workers_lua_vms[i].pid})
end

-- force a manual sync of counter local state to make integration test working
counter_instance:sync()

prometheus:collect()
end


return {
init = init,
log = log,
collect = collect,
init = init,
init_worker = init_worker,
log = log,
collect = collect,
}
4 changes: 4 additions & 0 deletions kong/plugins/prometheus/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ local PrometheusHandler = {
VERSION = "0.6.0",
}

function PrometheusHandler:init_worker(_)
prometheus.init_worker()
end


function PrometheusHandler:log(_)
local message = basic_serializer.serialize(ngx)
Expand Down
40 changes: 24 additions & 16 deletions kong/plugins/prometheus/prometheus.lua
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
-- https://github.com/knyar/nginx-lua-prometheus
-- Released under MIT license.


-- Default set of latency buckets, 5ms to 10s:
local DEFAULT_BUCKETS = {0.005, 0.01, 0.02, 0.03, 0.05, 0.075, 0.1, 0.2, 0.3,
0.4, 0.5, 0.75, 1, 1.5, 2, 3, 4, 5, 10}
Expand Down Expand Up @@ -177,7 +176,22 @@ function Gauge:inc(value, label_values)
self.prometheus:log_error(err)
return
end
self.prometheus:inc(self.name, self.label_names, label_values, value or 1)
local key = full_metric_name(self.name, self.label_names, label_values)

local newval, err = self.dict:incr(key, value)
if newval then
return
end
-- Yes, this looks like a race, so I guess we might under-report some values
-- when multiple workers simultaneously try to create the same metric.
-- Hopefully this does not happen too often (shared dictionary does not get
-- reset during configuation reload).
if err == "not found" then
self:set_key(key, value)
return
end
-- Unexpected error
self:log_error_kv(key, value, err)
end

local Histogram = Metric:new()
Expand Down Expand Up @@ -338,6 +352,8 @@ function Prometheus.init(dict_name, prefix)
"Please define the dictionary using `lua_shared_dict`.")
return self
end
-- by default resty_counter fallback to shdict
self.resty_counter = self.dict
self.help = {}
if prefix then
self.prefix = prefix
Expand All @@ -356,6 +372,11 @@ function Prometheus.init(dict_name, prefix)
return self
end

-- enable the use the lua-resty-counter for Counter and Histogram
function Prometheus:set_resty_counter(counter)
self.resty_counter = counter
end

function Prometheus:log_error(...)
ngx.log(ngx.ERR, ...)
self.dict:incr("nginx_metric_errors_total", 1)
Expand Down Expand Up @@ -493,20 +514,7 @@ function Prometheus:inc(name, label_names, label_values, value)
local key = full_metric_name(name, label_names, label_values)
if value == nil then value = 1 end

local newval, err = self.dict:incr(key, value)
if newval then
return
end
-- Yes, this looks like a race, so I guess we might under-report some values
-- when multiple workers simultaneously try to create the same metric.
-- Hopefully this does not happen too often (shared dictionary does not get
-- reset during configuation reload).
if err == "not found" then
self:set_key(key, value)
return
end
-- Unexpected error
self:log_error_kv(key, value, err)
self.resty_counter:incr(key, value)
end

-- Set the current value of a gauge to `value`
Expand Down

0 comments on commit fd844dc

Please sign in to comment.