Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dynamic Buffer Calc][Mellanox] Bug fixes and enhancements for the lua plugins for buffer pool calculation and headroom checking #1781

Merged
merged 4 commits into from
Jun 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions cfgmgr/buffer_check_headroom_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,25 @@ local port = KEYS[1]
local input_profile_name = ARGV[1]
local input_profile_size = ARGV[2]
local new_pg = ARGV[3]
local accumulative_size = 0

local function is_port_with_8lanes(lanes)
-- On Spectrum 3, ports with 8 lanes have doubled pipeline latency
local number_of_lanes = 0
if lanes then
local _
_, number_of_lanes = string.gsub(lanes, ",", ",")
number_of_lanes = number_of_lanes + 1
end
return number_of_lanes == 8
end

-- Initialize the accumulative size with 4096
-- This is to absorb the possible deviation
local accumulative_size = 4096

local appl_db = "0"
local state_db = "6"
local config_db = "4"

local ret_true = {}
local ret = {}
Expand All @@ -20,7 +35,13 @@ table.insert(ret_true, "result:true")

default_ret = ret_true

local speed = redis.call('HGET', 'PORT|' .. port, 'speed')
-- Connect to CONFIG_DB
redis.call('SELECT', config_db)

local lanes

-- We need to know whether it's a 8-lane port because it has extra pipeline latency
lanes = redis.call('HGET', 'PORT|' .. port, 'lanes')

-- Fetch the threshold from STATE_DB
redis.call('SELECT', state_db)
Expand All @@ -31,11 +52,12 @@ if max_headroom_size == nil then
end

local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local pipeline_delay = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
if speed == 400000 then
pipeline_delay = pipeline_delay * 2 - 1
local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
if is_port_with_8lanes(lanes) then
-- The pipeline latency should be adjusted accordingly for ports with 2 buffer units
pipeline_latency = pipeline_latency * 2 - 1
end
accumulative_size = accumulative_size + 2 * pipeline_delay * 1024
accumulative_size = accumulative_size + 2 * pipeline_latency * 1024

-- Fetch all keys in BUFFER_PG according to the port
redis.call('SELECT', appl_db)
Expand Down Expand Up @@ -95,6 +117,7 @@ end

if max_headroom_size > accumulative_size then
table.insert(ret, "result:true")
table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. ", the maximum available headroom " .. max_headroom_size)
else
table.insert(ret, "result:false")
table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. " exceeds the maximum available headroom which is " .. max_headroom_size)
Expand Down
8 changes: 5 additions & 3 deletions cfgmgr/buffer_headroom_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
-- ARGV[2] - cable length
-- ARGV[3] - port mtu
-- ARGV[4] - gearbox delay
-- ARGV[5] - lane count of the ports on which the profile will be applied

-- parameters retried from databases:
-- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN
Expand All @@ -26,6 +27,7 @@ local port_speed = tonumber(ARGV[1])
local cable_length = tonumber(string.sub(ARGV[2], 1, -2))
local port_mtu = tonumber(ARGV[3])
local gearbox_delay = tonumber(ARGV[4])
local is_8lane = (ARGV[5] == "8")

local appl_db = "0"
local config_db = "4"
Expand Down Expand Up @@ -100,9 +102,9 @@ local xon_value
local headroom_size
local speed_overhead

-- Adjustment for 400G
if port_speed == 400000 then
pipeline_latency = 37 * 1024
-- Adjustment for 8-lane port
if is_8lane ~= nil and is_8lane then
pipeline_latency = pipeline_latency * 2 - 1024
speed_overhead = port_mtu
else
speed_overhead = 0
Expand Down
169 changes: 109 additions & 60 deletions cfgmgr/buffer_pool_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,31 @@ local appl_db = "0"
local config_db = "4"
local state_db = "6"

local lossypg_reserved = 19 * 1024
local lossypg_reserved_400g = 37 * 1024
-- Number of 400G ports
local port_count_400g = 0
-- Number of lossy PG on 400G ports
local lossypg_400g = 0
-- Number of ports with 8 lanes (whose pipeline latency should be doubled)
local port_count_8lanes = 0
-- Number of lossy PG on ports with 8 lanes
local lossypg_8lanes = 0

-- Private headrom
local private_headroom = 10 * 1024

local result = {}
local profiles = {}
local lossless_profiles = {}

local total_port = 0

local mgmt_pool_size = 256 * 1024
local egress_mirror_headroom = 10 * 1024

local function find_profile(ref)
-- Remove the surrounding square bracket and the find in the list
local name = string.sub(ref, 2, -2)
for i = 1, #profiles, 1 do
if profiles[i][1] == name then
return i
end
end
return 0
end
-- The set of ports with 8 lanes
local port_set_8lanes = {}
-- Number of ports with lossless profiles
local lossless_port_count = 0

local function iterate_all_items(all_items)
local function iterate_all_items(all_items, check_lossless)
table.sort(all_items)
local lossless_ports = {}
local port
local fvpairs
for i = 1, #all_items, 1 do
Expand All @@ -43,9 +40,13 @@ local function iterate_all_items(all_items)
port = string.match(all_items[i], "Ethernet%d+")
if port ~= nil then
local range = string.match(all_items[i], "Ethernet%d+:([^%s]+)$")
local profile = redis.call('HGET', all_items[i], 'profile')
local index = find_profile(profile)
if index == 0 then
local profile_name = redis.call('HGET', all_items[i], 'profile')
if not profile_name then
return 1
stephenxs marked this conversation as resolved.
Show resolved Hide resolved
end
profile_name = string.sub(profile_name, 2, -2)
local profile_ref_count = profiles[profile_name]
if profile_ref_count == nil then
-- Indicate an error in case the referenced profile hasn't been inserted or has been removed
-- It's possible when the orchagent is busy
-- The buffermgrd will take care of it and retry later
Expand All @@ -57,13 +58,15 @@ local function iterate_all_items(all_items)
else
size = 1 + tonumber(string.sub(range, -1)) - tonumber(string.sub(range, 1, 1))
end
profiles[index][2] = profiles[index][2] + size
local speed = redis.call('HGET', 'PORT_TABLE:'..port, 'speed')
if speed == '400000' then
if profile == '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]' then
lossypg_400g = lossypg_400g + size
profiles[profile_name] = profile_ref_count + size
if port_set_8lanes[port] and profile_name == 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' then
lossypg_8lanes = lossypg_8lanes + size
end
if check_lossless and lossless_profiles[profile_name] then
if lossless_ports[port] == nil then
lossless_port_count = lossless_port_count + 1
lossless_ports[port] = true
end
port_count_400g = port_count_400g + 1
end
end
end
Expand All @@ -77,6 +80,27 @@ local ports_table = redis.call('KEYS', 'PORT|*')

total_port = #ports_table

-- Initialize the port_set_8lanes set
local lanes
local number_of_lanes
local port
for i = 1, total_port, 1 do
-- Load lanes from PORT table
lanes = redis.call('HGET', ports_table[i], 'lanes')
if lanes then
local _
_, number_of_lanes = string.gsub(lanes, ",", ",")
number_of_lanes = number_of_lanes + 1
port = string.sub(ports_table[i], 6, -1)
if (number_of_lanes == 8) then
port_set_8lanes[port] = true
port_count_8lanes = port_count_8lanes + 1
else
port_set_8lanes[port] = false
end
end
end

local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size')

-- Whether shared headroom pool is enabled?
Expand All @@ -97,22 +121,45 @@ else
shp_size = 0
end

-- Fetch mmu_size
redis.call('SELECT', state_db)
local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size'))
if mmu_size == nil then
mmu_size = tonumber(egress_lossless_pool_size)
end
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))
local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))

local lossypg_reserved = pipeline_latency * 1024
local lossypg_reserved_8lanes = (2 * pipeline_latency - 1) * 1024

-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
local number_of_cells = math.floor(mmu_size / cell_size)
local ceiling_mmu_size = number_of_cells * cell_size

-- Switch to APPL_DB
redis.call('SELECT', appl_db)

-- Fetch names of all profiles and insert them into the look up table
local all_profiles = redis.call('KEYS', 'BUFFER_PROFILE*')
for i = 1, #all_profiles, 1 do
table.insert(profiles, {all_profiles[i], 0})
if all_profiles[i] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and all_profiles[i] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
local xoff = redis.call('HGET', all_profiles[i], 'xoff')
if xoff then
lossless_profiles[all_profiles[i]] = true
end
profiles[all_profiles[i]] = 0
end
end

-- Fetch all the PGs
local all_pgs = redis.call('KEYS', 'BUFFER_PG*')
local all_tcs = redis.call('KEYS', 'BUFFER_QUEUE*')

local fail_count = 0
fail_count = fail_count + iterate_all_items(all_pgs)
fail_count = fail_count + iterate_all_items(all_tcs)
fail_count = fail_count + iterate_all_items(all_pgs, true)
fail_count = fail_count + iterate_all_items(all_tcs, false)
if fail_count > 0 then
return {}
end
Expand All @@ -122,56 +169,55 @@ local statistics = {}
-- Fetch sizes of all of the profiles, accumulate them
local accumulative_occupied_buffer = 0
local accumulative_xoff = 0
for i = 1, #profiles, 1 do
if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
local size = tonumber(redis.call('HGET', profiles[i][1], 'size'))

for name in pairs(profiles) do
if name ~= "BUFFER_PROFILE_TABLE_KEY_SET" and name ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
local size = tonumber(redis.call('HGET', name, 'size'))
if size ~= nil then
if profiles[i][1] == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then
if name == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then
size = size + lossypg_reserved
end
if profiles[i][1] == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then
profiles[i][2] = total_port
if name == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then
profiles[name] = total_port
end
if size ~= 0 then
if shp_enabled and shp_size == 0 then
local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon'))
local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff'))
local xon = tonumber(redis.call('HGET', name, 'xon'))
local xoff = tonumber(redis.call('HGET', name, 'xoff'))
if xon ~= nil and xoff ~= nil and xon + xoff > size then
accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2]
accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[name]
end
end
accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2]
accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[name]
end
table.insert(statistics, {profiles[i][1], size, profiles[i][2]})
table.insert(statistics, {name, size, profiles[name]})
end
end
end

-- Extra lossy xon buffer for 400G port
local lossypg_extra_for_400g = (lossypg_reserved_400g - lossypg_reserved) * lossypg_400g
accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_400g
-- Extra lossy xon buffer for ports with 8 lanes
local lossypg_extra_for_8lanes = (lossypg_reserved_8lanes - lossypg_reserved) * lossypg_8lanes
accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_8lanes

-- Accumulate sizes for private headrooms
local accumulative_private_headroom = 0
if shp_enabled then
accumulative_private_headroom = lossless_port_count * private_headroom
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_private_headroom
accumulative_xoff = accumulative_xoff - accumulative_private_headroom
if accumulative_xoff < 0 then
accumulative_xoff = 0
end
end

-- Accumulate sizes for management PGs
local accumulative_management_pg = (total_port - port_count_400g) * lossypg_reserved + port_count_400g * lossypg_reserved_400g
local accumulative_management_pg = (total_port - port_count_8lanes) * lossypg_reserved + port_count_8lanes * lossypg_reserved_8lanes
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_management_pg

-- Accumulate sizes for egress mirror and management pool
local accumulative_egress_mirror_overhead = total_port * egress_mirror_headroom
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_egress_mirror_overhead + mgmt_pool_size

-- Fetch mmu_size
redis.call('SELECT', state_db)
local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size'))
if mmu_size == nil then
mmu_size = tonumber(egress_lossless_pool_size)
end
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))

-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
local number_of_cells = math.floor(mmu_size / cell_size)
local ceiling_mmu_size = number_of_cells * cell_size

-- Switch to CONFIG_DB
redis.call('SELECT', config_db)

Expand Down Expand Up @@ -238,13 +284,16 @@ table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer)
for i = 1, #statistics do
table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3])
end
table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g .. ":" .. port_count_400g)
table.insert(result, "debug:extra_8lanes:" .. (lossypg_reserved_8lanes - lossypg_reserved) .. ":" .. lossypg_8lanes .. ":" .. port_count_8lanes)
table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
if shp_enabled then
table.insert(result, "debug:accumulative_private_headroom:" .. accumulative_private_headroom)
table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)
end
table.insert(result, "debug:accumulative_mgmt_pg:" .. accumulative_management_pg)
table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled))
table.insert(result, "debug:shp_size:" .. shp_size)
table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)
table.insert(result, "debug:total port:" .. total_port)
table.insert(result, "debug:total port:" .. total_port .. " ports with 8 lanes:" .. port_count_8lanes)

return result
Loading