Bug fix in buffer pool calculation and headroom checking

- Take number of lanes instead of speed into account when determining whether it has doubled pipeline latency For speeds other than 400G, eg 100G, it's possible that some 100G ports have 8 lanes and others have 4 lanes In this case, we need to add "8_lane" to the profile name to indicate whether the profile is for 8 lane ports or normal ports - Take advantage of "set" feature of the lua to present the profile referencing count, which also makes the code more maintainable - Take deviation into account when checking the headroom against the limit Signed-off-by: Stephen Sun <stephens@nvidia.com>
sonic-net · Jun 17, 2021 · d8cfaa3 · d8cfaa3
1 parent 5847af3
commit d8cfaa3
Show file tree

Hide file tree

Showing 5 changed files with 138 additions and 72 deletions.
diff --git a/cfgmgr/buffer_check_headroom_mellanox.lua b/cfgmgr/buffer_check_headroom_mellanox.lua
@@ -7,10 +7,25 @@ local port = KEYS[1]
 local input_profile_name = ARGV[1]
 local input_profile_size = ARGV[2]
 local new_pg = ARGV[3]
-local accumulative_size = 0
+
+local function is_port_with_8lanes(lanes)
+    -- On Spectrum 3, ports with 8 lanes have doubled pipeline latency
+    local number_of_lanes = 0
+    if lanes then
+        local _
+        _, number_of_lanes = string.gsub(lanes, ",", ",")
+        number_of_lanes = number_of_lanes + 1
+    end
+    return number_of_lanes == 8
+end
+
+-- Initialize the accumulative size with 4096
+-- This is to absorb the possible deviation
+local accumulative_size = 4096
 
 local appl_db = "0"
 local state_db = "6"
+local config_db = "4"
 
 local ret_true = {}
 local ret = {}
@@ -20,7 +35,18 @@ table.insert(ret_true, "result:true")
 
 default_ret = ret_true
 
-local speed = redis.call('HGET', 'PORT|' .. port, 'speed')
+-- Connect to CONFIG_DB
+redis.call('SELECT', config_db)
+
+local lanes
+
+-- On SPC3 switch, we need to know whether it's a 8-lane port because it has extra pipeline latency
+local is_spc3 = false
+local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform')
+if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then
+    is_spc3 = true
+    lanes = redis.call('HGET', 'PORT|' .. port, 'lanes')
+end
 
 -- Fetch the threshold from STATE_DB
 redis.call('SELECT', state_db)
@@ -31,11 +57,12 @@ if max_headroom_size == nil then
 end
 
 local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
-local pipeline_delay = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
-if speed == 400000 then
-    pipeline_delay = pipeline_delay * 2 - 1
+local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
+if is_spc3 and is_port_with_8lanes(lanes) then
+    -- The pipeline latency should be adjusted accordingly for ports with 2 buffer units
+    pipeline_latency = pipeline_latency * 2 - 1
 end
-accumulative_size = accumulative_size + 2 * pipeline_delay * 1024
+accumulative_size = accumulative_size + 2 * pipeline_latency * 1024
 
 -- Fetch all keys in BUFFER_PG according to the port
 redis.call('SELECT', appl_db)
@@ -95,6 +122,7 @@ end
 
 if max_headroom_size > accumulative_size then
     table.insert(ret, "result:true")
+    table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. ", the maximum available headroom " .. max_headroom_size)
 else
     table.insert(ret, "result:false")
     table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. " exceeds the maximum available headroom which is " .. max_headroom_size)

diff --git a/cfgmgr/buffer_headroom_mellanox.lua b/cfgmgr/buffer_headroom_mellanox.lua
@@ -3,6 +3,7 @@
 -- ARGV[2] - cable length
 -- ARGV[3] - port mtu
 -- ARGV[4] - gearbox delay
+-- ARGV[5] - whether the port is an 8-lane port
 
 -- parameters retried from databases:
 -- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN
@@ -26,6 +27,7 @@ local port_speed = tonumber(ARGV[1])
 local cable_length = tonumber(string.sub(ARGV[2], 1, -2))
 local port_mtu = tonumber(ARGV[3])
 local gearbox_delay = tonumber(ARGV[4])
+local is_8lane = ARGV[5]
 
 local appl_db = "0"
 local config_db = "4"
@@ -100,8 +102,8 @@ local xon_value
 local headroom_size
 local speed_overhead
 
--- Adjustment for 400G
-if port_speed == 400000 then
+-- Adjustment for 8-lane port
+if is_8lane ~= nil and is_8lane == "true" then
     pipeline_latency = 37 * 1024
     speed_overhead = port_mtu
 else

diff --git a/cfgmgr/buffer_pool_mellanox.lua b/cfgmgr/buffer_pool_mellanox.lua
@@ -5,12 +5,12 @@ local appl_db = "0"
 local config_db = "4"
 local state_db = "6"
 
-local lossypg_reserved = 19 * 1024
-local lossypg_reserved_400g = 37 * 1024
--- Number of 400G ports
-local port_count_400g = 0
--- Number of lossy PG on 400G ports
-local lossypg_400g = 0
+-- Number of ports with 8 lanes (whose pipeline latency should be doubled)
+local port_count_8lanes = 0
+-- Number of lossy PG on ports with 8 lanes
+local lossypg_8lanes = 0
+
+local is_spc3 = false
 
 local result = {}
 local profiles = {}
@@ -20,16 +20,7 @@ local total_port = 0
 local mgmt_pool_size = 256 * 1024
 local egress_mirror_headroom = 10 * 1024
 
-local function find_profile(ref)
-    -- Remove the surrounding square bracket and the find in the list
-    local name = string.sub(ref, 2, -2)
-    for i = 1, #profiles, 1 do
-        if profiles[i][1] == name then
-            return i
-        end
-    end
-    return 0
-end
+local port_set_8lanes = {}
 
 local function iterate_all_items(all_items)
     table.sort(all_items)
@@ -43,9 +34,13 @@ local function iterate_all_items(all_items)
         port = string.match(all_items[i], "Ethernet%d+")
         if port ~= nil then
             local range = string.match(all_items[i], "Ethernet%d+:([^%s]+)$")
-            local profile = redis.call('HGET', all_items[i], 'profile')
-            local index = find_profile(profile)
-            if index == 0 then
+            local profile_name = redis.call('HGET', all_items[i], 'profile')
+            if not profile_name then
+                return 1
+            end
+            profile_name = string.sub(profile_name, 2, -2)
+            local profile_ref_count = profiles[profile_name]
+            if profile_ref_count == nil then
                 -- Indicate an error in case the referenced profile hasn't been inserted or has been removed
                 -- It's possible when the orchagent is busy
                 -- The buffermgrd will take care of it and retry later
@@ -57,13 +52,9 @@ local function iterate_all_items(all_items)
             else
                 size = 1 + tonumber(string.sub(range, -1)) - tonumber(string.sub(range, 1, 1))
             end
-            profiles[index][2] = profiles[index][2] + size
-            local speed = redis.call('HGET', 'PORT_TABLE:'..port, 'speed')
-            if speed == '400000' then
-                if profile == '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]' then
-                    lossypg_400g = lossypg_400g + size
-                end
-                port_count_400g = port_count_400g + 1
+            profiles[profile_name] = profile_ref_count + size
+            if is_spc3 and port_set_8lanes[port] and profile_name == 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' then
+                lossypg_8lanes = lossypg_8lanes + size
             end
         end
     end
@@ -77,6 +68,31 @@ local ports_table = redis.call('KEYS', 'PORT|*')
 
 total_port = #ports_table
 
+-- Initialize the port_set_8lanes set
+local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform')
+if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then
+    is_spc3 = true
+    local lanes
+    local number_of_lanes
+    local port
+    for i = 1, total_port, 1 do
+        -- Load lanes from PORT table
+        lanes = redis.call('HGET', ports_table[i], 'lanes')
+        if lanes then
+            local _
+            _, number_of_lanes = string.gsub(lanes, ",", ",")
+            number_of_lanes = number_of_lanes + 1
+            port = string.sub(ports_table[i], 6, -1)
+            if (number_of_lanes == 8) then
+                port_set_8lanes[port] = true
+                port_count_8lanes = port_count_8lanes + 1
+            else
+                port_set_8lanes[port] = false
+            end
+        end
+    end
+end
+
 local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size')
 
 -- Whether shared headroom pool is enabled?
@@ -97,13 +113,30 @@ else
     shp_size = 0
 end
 
+-- Fetch mmu_size
+redis.call('SELECT', state_db)
+local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size'))
+if mmu_size == nil then
+    mmu_size = tonumber(egress_lossless_pool_size)
+end
+local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
+local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))
+local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
+
+local lossypg_reserved = pipeline_latency * 1024
+local lossypg_reserved_8lanes = (2 * pipeline_latency - 1) * 1024
+
+-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
+local number_of_cells = math.floor(mmu_size / cell_size)
+local ceiling_mmu_size = number_of_cells * cell_size
+
 -- Switch to APPL_DB
 redis.call('SELECT', appl_db)
 
 -- Fetch names of all profiles and insert them into the look up table
 local all_profiles = redis.call('KEYS', 'BUFFER_PROFILE*')
 for i = 1, #all_profiles, 1 do
-    table.insert(profiles, {all_profiles[i], 0})
+    profiles[all_profiles[i]] = 0
 end
 
 -- Fetch all the PGs
@@ -122,56 +155,44 @@ local statistics = {}
 -- Fetch sizes of all of the profiles, accumulate them
 local accumulative_occupied_buffer = 0
 local accumulative_xoff = 0
-for i = 1, #profiles, 1 do
-    if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
-        local size = tonumber(redis.call('HGET', profiles[i][1], 'size'))
+
+for name in pairs(profiles) do
+    if name ~= "BUFFER_PROFILE_TABLE_KEY_SET" and name ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
+        local size = tonumber(redis.call('HGET', name, 'size'))
         if size ~= nil then 
-            if profiles[i][1] == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then
+            if name == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then
                 size = size + lossypg_reserved
             end
-            if profiles[i][1] == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then
-                profiles[i][2] = total_port
+            if name == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then
+                profiles[name] = total_port
             end
             if size ~= 0 then
                 if shp_enabled and shp_size == 0 then
-                    local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon'))
-                    local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff'))
+                    local xon = tonumber(redis.call('HGET', name, 'xon'))
+                    local xoff = tonumber(redis.call('HGET', name, 'xoff'))
                     if xon ~= nil and xoff ~= nil and xon + xoff > size then
-                        accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2]
+                        accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[name]
                     end
                 end
-                accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2]
+                accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[name]
             end
-            table.insert(statistics, {profiles[i][1], size, profiles[i][2]})
+            table.insert(statistics, {name, size, profiles[name]})
         end
     end
 end
 
--- Extra lossy xon buffer for 400G port
-local lossypg_extra_for_400g = (lossypg_reserved_400g - lossypg_reserved) * lossypg_400g
-accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_400g
+-- Extra lossy xon buffer for ports with 8 lanes
+local lossypg_extra_for_8lanes = (lossypg_reserved_8lanes - lossypg_reserved) * lossypg_8lanes
+accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_8lanes
 
 -- Accumulate sizes for management PGs
-local accumulative_management_pg = (total_port - port_count_400g) * lossypg_reserved + port_count_400g * lossypg_reserved_400g
+local accumulative_management_pg = (total_port - port_count_8lanes) * lossypg_reserved + port_count_8lanes * lossypg_reserved_8lanes
 accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_management_pg
 
 -- Accumulate sizes for egress mirror and management pool
 local accumulative_egress_mirror_overhead = total_port * egress_mirror_headroom
 accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_egress_mirror_overhead + mgmt_pool_size
 
--- Fetch mmu_size
-redis.call('SELECT', state_db)
-local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size'))
-if mmu_size == nil then
-    mmu_size = tonumber(egress_lossless_pool_size)
-end
-local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
-local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))
-
--- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
-local number_of_cells = math.floor(mmu_size / cell_size)
-local ceiling_mmu_size = number_of_cells * cell_size
-
 -- Switch to CONFIG_DB
 redis.call('SELECT', config_db)
 
@@ -238,13 +259,13 @@ table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer)
 for i = 1, #statistics do
     table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3])
 end
-table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g .. ":" .. port_count_400g)
+table.insert(result, "debug:extra_8lanes:" .. (lossypg_reserved_8lanes - lossypg_reserved) .. ":" .. lossypg_8lanes .. ":" .. port_count_8lanes)
 table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
 table.insert(result, "debug:accumulative_mgmt_pg:" .. accumulative_management_pg)
 table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
 table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled))
 table.insert(result, "debug:shp_size:" .. shp_size)
 table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)
-table.insert(result, "debug:total port:" .. total_port)
+table.insert(result, "debug:total port:" .. total_port .. " ports with 8 lanes:" .. port_count_8lanes)
 
 return result