Skip to content

Commit

Permalink
Bug fix in buffer pool calculation and headroom checking
Browse files Browse the repository at this point in the history
- Take number of lanes instead of speed into account when determining whether it has doubled pipeline latency
  For speeds other than 400G, eg 100G, it's possible that some 100G ports have 8 lanes and others have 4 lanes
  In this case, we need to add "8_lane" to the profile name to indicate whether the profile is for 8 lane ports or normal ports
- Take advantage of "set" feature of the lua to present the profile referencing count, which also makes the code more maintainable
- Take deviation into account when checking the headroom against the limit

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs committed Jun 17, 2021
1 parent 5847af3 commit d8cfaa3
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 72 deletions.
40 changes: 34 additions & 6 deletions cfgmgr/buffer_check_headroom_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,25 @@ local port = KEYS[1]
local input_profile_name = ARGV[1]
local input_profile_size = ARGV[2]
local new_pg = ARGV[3]
local accumulative_size = 0

local function is_port_with_8lanes(lanes)
-- On Spectrum 3, ports with 8 lanes have doubled pipeline latency
local number_of_lanes = 0
if lanes then
local _
_, number_of_lanes = string.gsub(lanes, ",", ",")
number_of_lanes = number_of_lanes + 1
end
return number_of_lanes == 8
end

-- Initialize the accumulative size with 4096
-- This is to absorb the possible deviation
local accumulative_size = 4096

local appl_db = "0"
local state_db = "6"
local config_db = "4"

local ret_true = {}
local ret = {}
Expand All @@ -20,7 +35,18 @@ table.insert(ret_true, "result:true")

default_ret = ret_true

local speed = redis.call('HGET', 'PORT|' .. port, 'speed')
-- Connect to CONFIG_DB
redis.call('SELECT', config_db)

local lanes

-- On SPC3 switch, we need to know whether it's a 8-lane port because it has extra pipeline latency
local is_spc3 = false
local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform')
if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then
is_spc3 = true
lanes = redis.call('HGET', 'PORT|' .. port, 'lanes')
end

-- Fetch the threshold from STATE_DB
redis.call('SELECT', state_db)
Expand All @@ -31,11 +57,12 @@ if max_headroom_size == nil then
end

local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local pipeline_delay = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
if speed == 400000 then
pipeline_delay = pipeline_delay * 2 - 1
local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
if is_spc3 and is_port_with_8lanes(lanes) then
-- The pipeline latency should be adjusted accordingly for ports with 2 buffer units
pipeline_latency = pipeline_latency * 2 - 1
end
accumulative_size = accumulative_size + 2 * pipeline_delay * 1024
accumulative_size = accumulative_size + 2 * pipeline_latency * 1024

-- Fetch all keys in BUFFER_PG according to the port
redis.call('SELECT', appl_db)
Expand Down Expand Up @@ -95,6 +122,7 @@ end

if max_headroom_size > accumulative_size then
table.insert(ret, "result:true")
table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. ", the maximum available headroom " .. max_headroom_size)
else
table.insert(ret, "result:false")
table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. " exceeds the maximum available headroom which is " .. max_headroom_size)
Expand Down
6 changes: 4 additions & 2 deletions cfgmgr/buffer_headroom_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
-- ARGV[2] - cable length
-- ARGV[3] - port mtu
-- ARGV[4] - gearbox delay
-- ARGV[5] - whether the port is an 8-lane port

-- parameters retried from databases:
-- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN
Expand All @@ -26,6 +27,7 @@ local port_speed = tonumber(ARGV[1])
local cable_length = tonumber(string.sub(ARGV[2], 1, -2))
local port_mtu = tonumber(ARGV[3])
local gearbox_delay = tonumber(ARGV[4])
local is_8lane = ARGV[5]

local appl_db = "0"
local config_db = "4"
Expand Down Expand Up @@ -100,8 +102,8 @@ local xon_value
local headroom_size
local speed_overhead

-- Adjustment for 400G
if port_speed == 400000 then
-- Adjustment for 8-lane port
if is_8lane ~= nil and is_8lane == "true" then
pipeline_latency = 37 * 1024
speed_overhead = port_mtu
else
Expand Down
135 changes: 78 additions & 57 deletions cfgmgr/buffer_pool_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ local appl_db = "0"
local config_db = "4"
local state_db = "6"

local lossypg_reserved = 19 * 1024
local lossypg_reserved_400g = 37 * 1024
-- Number of 400G ports
local port_count_400g = 0
-- Number of lossy PG on 400G ports
local lossypg_400g = 0
-- Number of ports with 8 lanes (whose pipeline latency should be doubled)
local port_count_8lanes = 0
-- Number of lossy PG on ports with 8 lanes
local lossypg_8lanes = 0

local is_spc3 = false

local result = {}
local profiles = {}
Expand All @@ -20,16 +20,7 @@ local total_port = 0
local mgmt_pool_size = 256 * 1024
local egress_mirror_headroom = 10 * 1024

local function find_profile(ref)
-- Remove the surrounding square bracket and the find in the list
local name = string.sub(ref, 2, -2)
for i = 1, #profiles, 1 do
if profiles[i][1] == name then
return i
end
end
return 0
end
local port_set_8lanes = {}

local function iterate_all_items(all_items)
table.sort(all_items)
Expand All @@ -43,9 +34,13 @@ local function iterate_all_items(all_items)
port = string.match(all_items[i], "Ethernet%d+")
if port ~= nil then
local range = string.match(all_items[i], "Ethernet%d+:([^%s]+)$")
local profile = redis.call('HGET', all_items[i], 'profile')
local index = find_profile(profile)
if index == 0 then
local profile_name = redis.call('HGET', all_items[i], 'profile')
if not profile_name then
return 1
end
profile_name = string.sub(profile_name, 2, -2)
local profile_ref_count = profiles[profile_name]
if profile_ref_count == nil then
-- Indicate an error in case the referenced profile hasn't been inserted or has been removed
-- It's possible when the orchagent is busy
-- The buffermgrd will take care of it and retry later
Expand All @@ -57,13 +52,9 @@ local function iterate_all_items(all_items)
else
size = 1 + tonumber(string.sub(range, -1)) - tonumber(string.sub(range, 1, 1))
end
profiles[index][2] = profiles[index][2] + size
local speed = redis.call('HGET', 'PORT_TABLE:'..port, 'speed')
if speed == '400000' then
if profile == '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]' then
lossypg_400g = lossypg_400g + size
end
port_count_400g = port_count_400g + 1
profiles[profile_name] = profile_ref_count + size
if is_spc3 and port_set_8lanes[port] and profile_name == 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' then
lossypg_8lanes = lossypg_8lanes + size
end
end
end
Expand All @@ -77,6 +68,31 @@ local ports_table = redis.call('KEYS', 'PORT|*')

total_port = #ports_table

-- Initialize the port_set_8lanes set
local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform')
if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then
is_spc3 = true
local lanes
local number_of_lanes
local port
for i = 1, total_port, 1 do
-- Load lanes from PORT table
lanes = redis.call('HGET', ports_table[i], 'lanes')
if lanes then
local _
_, number_of_lanes = string.gsub(lanes, ",", ",")
number_of_lanes = number_of_lanes + 1
port = string.sub(ports_table[i], 6, -1)
if (number_of_lanes == 8) then
port_set_8lanes[port] = true
port_count_8lanes = port_count_8lanes + 1
else
port_set_8lanes[port] = false
end
end
end
end

local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size')

-- Whether shared headroom pool is enabled?
Expand All @@ -97,13 +113,30 @@ else
shp_size = 0
end

-- Fetch mmu_size
redis.call('SELECT', state_db)
local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size'))
if mmu_size == nil then
mmu_size = tonumber(egress_lossless_pool_size)
end
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))
local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))

local lossypg_reserved = pipeline_latency * 1024
local lossypg_reserved_8lanes = (2 * pipeline_latency - 1) * 1024

-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
local number_of_cells = math.floor(mmu_size / cell_size)
local ceiling_mmu_size = number_of_cells * cell_size

-- Switch to APPL_DB
redis.call('SELECT', appl_db)

-- Fetch names of all profiles and insert them into the look up table
local all_profiles = redis.call('KEYS', 'BUFFER_PROFILE*')
for i = 1, #all_profiles, 1 do
table.insert(profiles, {all_profiles[i], 0})
profiles[all_profiles[i]] = 0
end

-- Fetch all the PGs
Expand All @@ -122,56 +155,44 @@ local statistics = {}
-- Fetch sizes of all of the profiles, accumulate them
local accumulative_occupied_buffer = 0
local accumulative_xoff = 0
for i = 1, #profiles, 1 do
if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
local size = tonumber(redis.call('HGET', profiles[i][1], 'size'))

for name in pairs(profiles) do
if name ~= "BUFFER_PROFILE_TABLE_KEY_SET" and name ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
local size = tonumber(redis.call('HGET', name, 'size'))
if size ~= nil then
if profiles[i][1] == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then
if name == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then
size = size + lossypg_reserved
end
if profiles[i][1] == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then
profiles[i][2] = total_port
if name == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then
profiles[name] = total_port
end
if size ~= 0 then
if shp_enabled and shp_size == 0 then
local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon'))
local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff'))
local xon = tonumber(redis.call('HGET', name, 'xon'))
local xoff = tonumber(redis.call('HGET', name, 'xoff'))
if xon ~= nil and xoff ~= nil and xon + xoff > size then
accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2]
accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[name]
end
end
accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2]
accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[name]
end
table.insert(statistics, {profiles[i][1], size, profiles[i][2]})
table.insert(statistics, {name, size, profiles[name]})
end
end
end

-- Extra lossy xon buffer for 400G port
local lossypg_extra_for_400g = (lossypg_reserved_400g - lossypg_reserved) * lossypg_400g
accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_400g
-- Extra lossy xon buffer for ports with 8 lanes
local lossypg_extra_for_8lanes = (lossypg_reserved_8lanes - lossypg_reserved) * lossypg_8lanes
accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_8lanes

-- Accumulate sizes for management PGs
local accumulative_management_pg = (total_port - port_count_400g) * lossypg_reserved + port_count_400g * lossypg_reserved_400g
local accumulative_management_pg = (total_port - port_count_8lanes) * lossypg_reserved + port_count_8lanes * lossypg_reserved_8lanes
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_management_pg

-- Accumulate sizes for egress mirror and management pool
local accumulative_egress_mirror_overhead = total_port * egress_mirror_headroom
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_egress_mirror_overhead + mgmt_pool_size

-- Fetch mmu_size
redis.call('SELECT', state_db)
local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size'))
if mmu_size == nil then
mmu_size = tonumber(egress_lossless_pool_size)
end
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))

-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
local number_of_cells = math.floor(mmu_size / cell_size)
local ceiling_mmu_size = number_of_cells * cell_size

-- Switch to CONFIG_DB
redis.call('SELECT', config_db)

Expand Down Expand Up @@ -238,13 +259,13 @@ table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer)
for i = 1, #statistics do
table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3])
end
table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g .. ":" .. port_count_400g)
table.insert(result, "debug:extra_8lanes:" .. (lossypg_reserved_8lanes - lossypg_reserved) .. ":" .. lossypg_8lanes .. ":" .. port_count_8lanes)
table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
table.insert(result, "debug:accumulative_mgmt_pg:" .. accumulative_management_pg)
table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled))
table.insert(result, "debug:shp_size:" .. shp_size)
table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)
table.insert(result, "debug:total port:" .. total_port)
table.insert(result, "debug:total port:" .. total_port .. " ports with 8 lanes:" .. port_count_8lanes)

return result
Loading

0 comments on commit d8cfaa3

Please sign in to comment.