From f3f2c752df0131d46fe6814b4b0ebe2954ef87b5 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 10 Jun 2021 10:49:01 +0800 Subject: [PATCH 1/4] Bug fix in buffer pool calculation and headroom checking - Take number of lanes instead of speed into account when determining whether it has doubled pipeline latency For speeds other than 400G, eg 100G, it's possible that some 100G ports have 8 lanes and others have 4 lanes In this case, we need to add "8_lane" to the profile name to indicate whether the profile is for 8 lane ports or normal ports This is for Mellanox platform only - Take advantage of "set" feature of the lua to present the profile referencing count, which also makes the code more maintainable - Take deviation into account when checking the headroom against the limit - Take private headroom into account when shared headroom pool is enabled Signed-off-by: Stephen Sun --- cfgmgr/buffer_check_headroom_mellanox.lua | 40 ++++- cfgmgr/buffer_headroom_mellanox.lua | 8 +- cfgmgr/buffer_pool_mellanox.lua | 175 ++++++++++++++-------- cfgmgr/buffermgrdyn.cpp | 28 +++- cfgmgr/buffermgrdyn.h | 9 +- tests/test_buffer_dynamic.py | 13 +- 6 files changed, 191 insertions(+), 82 deletions(-) diff --git a/cfgmgr/buffer_check_headroom_mellanox.lua b/cfgmgr/buffer_check_headroom_mellanox.lua index 510a967b73..08f955fe4c 100644 --- a/cfgmgr/buffer_check_headroom_mellanox.lua +++ b/cfgmgr/buffer_check_headroom_mellanox.lua @@ -7,10 +7,25 @@ local port = KEYS[1] local input_profile_name = ARGV[1] local input_profile_size = ARGV[2] local new_pg = ARGV[3] -local accumulative_size = 0 + +local function is_port_with_8lanes(lanes) + -- On Spectrum 3, ports with 8 lanes have doubled pipeline latency + local number_of_lanes = 0 + if lanes then + local _ + _, number_of_lanes = string.gsub(lanes, ",", ",") + number_of_lanes = number_of_lanes + 1 + end + return number_of_lanes == 8 +end + +-- Initialize the accumulative size with 4096 +-- This is to absorb the possible deviation +local accumulative_size = 4096 local appl_db = "0" local state_db = "6" +local config_db = "4" local ret_true = {} local ret = {} @@ -20,7 +35,18 @@ table.insert(ret_true, "result:true") default_ret = ret_true -local speed = redis.call('HGET', 'PORT|' .. port, 'speed') +-- Connect to CONFIG_DB +redis.call('SELECT', config_db) + +local lanes + +-- On SPC3 switch, we need to know whether it's a 8-lane port because it has extra pipeline latency +local is_spc3 = false +local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform') +if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then + is_spc3 = true + lanes = redis.call('HGET', 'PORT|' .. port, 'lanes') +end -- Fetch the threshold from STATE_DB redis.call('SELECT', state_db) @@ -31,11 +57,12 @@ if max_headroom_size == nil then end local asic_keys = redis.call('KEYS', 'ASIC_TABLE*') -local pipeline_delay = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency')) -if speed == 400000 then - pipeline_delay = pipeline_delay * 2 - 1 +local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency')) +if is_spc3 and is_port_with_8lanes(lanes) then + -- The pipeline latency should be adjusted accordingly for ports with 2 buffer units + pipeline_latency = pipeline_latency * 2 - 1 end -accumulative_size = accumulative_size + 2 * pipeline_delay * 1024 +accumulative_size = accumulative_size + 2 * pipeline_latency * 1024 -- Fetch all keys in BUFFER_PG according to the port redis.call('SELECT', appl_db) @@ -95,6 +122,7 @@ end if max_headroom_size > accumulative_size then table.insert(ret, "result:true") + table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. ", the maximum available headroom " .. max_headroom_size) else table.insert(ret, "result:false") table.insert(ret, "debug:Accumulative headroom on port " .. accumulative_size .. " exceeds the maximum available headroom which is " .. max_headroom_size) diff --git a/cfgmgr/buffer_headroom_mellanox.lua b/cfgmgr/buffer_headroom_mellanox.lua index 2c95814008..0a40f03a86 100644 --- a/cfgmgr/buffer_headroom_mellanox.lua +++ b/cfgmgr/buffer_headroom_mellanox.lua @@ -3,6 +3,7 @@ -- ARGV[2] - cable length -- ARGV[3] - port mtu -- ARGV[4] - gearbox delay +-- ARGV[5] - whether the port is an 8-lane port -- parameters retried from databases: -- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN @@ -26,6 +27,7 @@ local port_speed = tonumber(ARGV[1]) local cable_length = tonumber(string.sub(ARGV[2], 1, -2)) local port_mtu = tonumber(ARGV[3]) local gearbox_delay = tonumber(ARGV[4]) +local is_8lane = ARGV[5] local appl_db = "0" local config_db = "4" @@ -100,9 +102,9 @@ local xon_value local headroom_size local speed_overhead --- Adjustment for 400G -if port_speed == 400000 then - pipeline_latency = 37 * 1024 +-- Adjustment for 8-lane port +if is_8lane ~= nil and is_8lane == "true" then + pipeline_latency = pipeline_latency * 2 - 1024 speed_overhead = port_mtu else speed_overhead = 0 diff --git a/cfgmgr/buffer_pool_mellanox.lua b/cfgmgr/buffer_pool_mellanox.lua index f27316d2b7..f43109c168 100644 --- a/cfgmgr/buffer_pool_mellanox.lua +++ b/cfgmgr/buffer_pool_mellanox.lua @@ -5,34 +5,33 @@ local appl_db = "0" local config_db = "4" local state_db = "6" -local lossypg_reserved = 19 * 1024 -local lossypg_reserved_400g = 37 * 1024 --- Number of 400G ports -local port_count_400g = 0 --- Number of lossy PG on 400G ports -local lossypg_400g = 0 +-- Number of ports with 8 lanes (whose pipeline latency should be doubled) +local port_count_8lanes = 0 +-- Number of lossy PG on ports with 8 lanes +local lossypg_8lanes = 0 + +local is_spc3 = false + +-- Private headrom +local private_headroom = 10 * 1024 local result = {} local profiles = {} +local lossless_profiles = {} local total_port = 0 local mgmt_pool_size = 256 * 1024 local egress_mirror_headroom = 10 * 1024 -local function find_profile(ref) - -- Remove the surrounding square bracket and the find in the list - local name = string.sub(ref, 2, -2) - for i = 1, #profiles, 1 do - if profiles[i][1] == name then - return i - end - end - return 0 -end +-- The set of ports with 8 lanes +local port_set_8lanes = {} +-- Number of ports with lossless profiles +local lossless_port_count = 0 -local function iterate_all_items(all_items) +local function iterate_all_items(all_items, check_lossless) table.sort(all_items) + local lossless_ports = {} local port local fvpairs for i = 1, #all_items, 1 do @@ -43,9 +42,13 @@ local function iterate_all_items(all_items) port = string.match(all_items[i], "Ethernet%d+") if port ~= nil then local range = string.match(all_items[i], "Ethernet%d+:([^%s]+)$") - local profile = redis.call('HGET', all_items[i], 'profile') - local index = find_profile(profile) - if index == 0 then + local profile_name = redis.call('HGET', all_items[i], 'profile') + if not profile_name then + return 1 + end + profile_name = string.sub(profile_name, 2, -2) + local profile_ref_count = profiles[profile_name] + if profile_ref_count == nil then -- Indicate an error in case the referenced profile hasn't been inserted or has been removed -- It's possible when the orchagent is busy -- The buffermgrd will take care of it and retry later @@ -57,13 +60,15 @@ local function iterate_all_items(all_items) else size = 1 + tonumber(string.sub(range, -1)) - tonumber(string.sub(range, 1, 1)) end - profiles[index][2] = profiles[index][2] + size - local speed = redis.call('HGET', 'PORT_TABLE:'..port, 'speed') - if speed == '400000' then - if profile == '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]' then - lossypg_400g = lossypg_400g + size + profiles[profile_name] = profile_ref_count + size + if is_spc3 and port_set_8lanes[port] and profile_name == 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' then + lossypg_8lanes = lossypg_8lanes + size + end + if check_lossless and lossless_profiles[profile_name] then + if lossless_ports[port] == nil then + lossless_port_count = lossless_port_count + 1 + lossless_ports[port] = true end - port_count_400g = port_count_400g + 1 end end end @@ -77,6 +82,31 @@ local ports_table = redis.call('KEYS', 'PORT|*') total_port = #ports_table +-- Initialize the port_set_8lanes set +local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform') +if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then + is_spc3 = true + local lanes + local number_of_lanes + local port + for i = 1, total_port, 1 do + -- Load lanes from PORT table + lanes = redis.call('HGET', ports_table[i], 'lanes') + if lanes then + local _ + _, number_of_lanes = string.gsub(lanes, ",", ",") + number_of_lanes = number_of_lanes + 1 + port = string.sub(ports_table[i], 6, -1) + if (number_of_lanes == 8) then + port_set_8lanes[port] = true + port_count_8lanes = port_count_8lanes + 1 + else + port_set_8lanes[port] = false + end + end + end +end + local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size') -- Whether shared headroom pool is enabled? @@ -97,13 +127,36 @@ else shp_size = 0 end +-- Fetch mmu_size +redis.call('SELECT', state_db) +local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size')) +if mmu_size == nil then + mmu_size = tonumber(egress_lossless_pool_size) +end +local asic_keys = redis.call('KEYS', 'ASIC_TABLE*') +local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size')) +local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency')) + +local lossypg_reserved = pipeline_latency * 1024 +local lossypg_reserved_8lanes = (2 * pipeline_latency - 1) * 1024 + +-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail +local number_of_cells = math.floor(mmu_size / cell_size) +local ceiling_mmu_size = number_of_cells * cell_size + -- Switch to APPL_DB redis.call('SELECT', appl_db) -- Fetch names of all profiles and insert them into the look up table local all_profiles = redis.call('KEYS', 'BUFFER_PROFILE*') for i = 1, #all_profiles, 1 do - table.insert(profiles, {all_profiles[i], 0}) + if all_profiles[i] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and all_profiles[i] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then + local xoff = redis.call('HGET', all_profiles[i], 'xoff') + if xoff then + lossless_profiles[all_profiles[i]] = true + end + profiles[all_profiles[i]] = 0 + end end -- Fetch all the PGs @@ -111,8 +164,8 @@ local all_pgs = redis.call('KEYS', 'BUFFER_PG*') local all_tcs = redis.call('KEYS', 'BUFFER_QUEUE*') local fail_count = 0 -fail_count = fail_count + iterate_all_items(all_pgs) -fail_count = fail_count + iterate_all_items(all_tcs) +fail_count = fail_count + iterate_all_items(all_pgs, true) +fail_count = fail_count + iterate_all_items(all_tcs, false) if fail_count > 0 then return {} end @@ -122,56 +175,55 @@ local statistics = {} -- Fetch sizes of all of the profiles, accumulate them local accumulative_occupied_buffer = 0 local accumulative_xoff = 0 -for i = 1, #profiles, 1 do - if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then - local size = tonumber(redis.call('HGET', profiles[i][1], 'size')) + +for name in pairs(profiles) do + if name ~= "BUFFER_PROFILE_TABLE_KEY_SET" and name ~= "BUFFER_PROFILE_TABLE_DEL_SET" then + local size = tonumber(redis.call('HGET', name, 'size')) if size ~= nil then - if profiles[i][1] == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then + if name == "BUFFER_PROFILE_TABLE:ingress_lossy_profile" then size = size + lossypg_reserved end - if profiles[i][1] == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then - profiles[i][2] = total_port + if name == "BUFFER_PROFILE_TABLE:egress_lossy_profile" then + profiles[name] = total_port end if size ~= 0 then if shp_enabled and shp_size == 0 then - local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon')) - local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff')) + local xon = tonumber(redis.call('HGET', name, 'xon')) + local xoff = tonumber(redis.call('HGET', name, 'xoff')) if xon ~= nil and xoff ~= nil and xon + xoff > size then - accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2] + accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[name] end end - accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2] + accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[name] end - table.insert(statistics, {profiles[i][1], size, profiles[i][2]}) + table.insert(statistics, {name, size, profiles[name]}) end end end --- Extra lossy xon buffer for 400G port -local lossypg_extra_for_400g = (lossypg_reserved_400g - lossypg_reserved) * lossypg_400g -accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_400g +-- Extra lossy xon buffer for ports with 8 lanes +local lossypg_extra_for_8lanes = (lossypg_reserved_8lanes - lossypg_reserved) * lossypg_8lanes +accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_8lanes + +-- Accumulate sizes for private headrooms +local accumulative_private_headroom = 0 +if shp_enabled then + accumulative_private_headroom = lossless_port_count * private_headroom + accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_private_headroom + accumulative_xoff = accumulative_xoff - accumulative_private_headroom + if accumulative_xoff < 0 then + accumulative_xoff = 0 + end +end -- Accumulate sizes for management PGs -local accumulative_management_pg = (total_port - port_count_400g) * lossypg_reserved + port_count_400g * lossypg_reserved_400g +local accumulative_management_pg = (total_port - port_count_8lanes) * lossypg_reserved + port_count_8lanes * lossypg_reserved_8lanes accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_management_pg -- Accumulate sizes for egress mirror and management pool local accumulative_egress_mirror_overhead = total_port * egress_mirror_headroom accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_egress_mirror_overhead + mgmt_pool_size --- Fetch mmu_size -redis.call('SELECT', state_db) -local mmu_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|global', 'mmu_size')) -if mmu_size == nil then - mmu_size = tonumber(egress_lossless_pool_size) -end -local asic_keys = redis.call('KEYS', 'ASIC_TABLE*') -local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size')) - --- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail -local number_of_cells = math.floor(mmu_size / cell_size) -local ceiling_mmu_size = number_of_cells * cell_size - -- Switch to CONFIG_DB redis.call('SELECT', config_db) @@ -238,13 +290,16 @@ table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer) for i = 1, #statistics do table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3]) end -table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g .. ":" .. port_count_400g) +table.insert(result, "debug:extra_8lanes:" .. (lossypg_reserved_8lanes - lossypg_reserved) .. ":" .. lossypg_8lanes .. ":" .. port_count_8lanes) table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size) +if shp_enabled then + table.insert(result, "debug:accumulative_private_headroom:" .. accumulative_private_headroom) + table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff) +end table.insert(result, "debug:accumulative_mgmt_pg:" .. accumulative_management_pg) table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead) table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled)) table.insert(result, "debug:shp_size:" .. shp_size) -table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff) -table.insert(result, "debug:total port:" .. total_port) +table.insert(result, "debug:total port:" .. total_port .. " ports with 8 lanes:" .. port_count_8lanes) return result diff --git a/cfgmgr/buffermgrdyn.cpp b/cfgmgr/buffermgrdyn.cpp index 33ef6c3e5e..967743ff25 100644 --- a/cfgmgr/buffermgrdyn.cpp +++ b/cfgmgr/buffermgrdyn.cpp @@ -29,6 +29,7 @@ using namespace swss; BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBConnector *applDb, const vector &tables, shared_ptr> gearboxInfo = nullptr) : Orch(tables), + m_platform(), m_applDb(applDb), m_cfgPortTable(cfgDb, CFG_PORT_TABLE_NAME), m_cfgCableLenTable(cfgDb, CFG_PORT_CABLE_LEN_TABLE_NAME), @@ -68,6 +69,8 @@ BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBC string bufferpoolPluginName = "buffer_pool_" + platform + ".lua"; string checkHeadroomPluginName = "buffer_check_headroom_" + platform + ".lua"; + m_platform = platform; + try { string headroomLuaScript = swss::loadLuaScript(headroomPluginName); @@ -242,7 +245,7 @@ string BufferMgrDynamic::parseObjectNameFromReference(const string &reference) return parseObjectNameFromKey(objName, 1); } -string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string &cable, const string &mtu, const string &threshold, const string &gearbox_model) +string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string &cable, const string &mtu, const string &threshold, const string &gearbox_model, bool is_8lane) { string buffer_profile_key; @@ -265,6 +268,14 @@ string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string buffer_profile_key = buffer_profile_key + "_" + gearbox_model; } + if (is_8lane && m_platform == "mellanox") + { + if (speed != "400000") + { + buffer_profile_key = buffer_profile_key + "_8lane"; + } + } + return buffer_profile_key + "_profile"; } @@ -285,6 +296,7 @@ void BufferMgrDynamic::calculateHeadroomSize(buffer_profile_t &headroom) argv.emplace_back(headroom.cable_length); argv.emplace_back(headroom.port_mtu); argv.emplace_back(m_identifyGearboxDelay); + argv.emplace_back(headroom.is_8lane ? "true" : "false"); try { @@ -571,7 +583,7 @@ void BufferMgrDynamic::updateBufferPgToDb(const string &key, const string &profi } // We have to check the headroom ahead of applying them -task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable_len, const string &mtu, const string &threshold, const string &gearbox_model, string &profile_name) +task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable_len, const string &mtu, const string &threshold, const string &gearbox_model, bool is_8lane, string &profile_name) { // Create record in BUFFER_PROFILE table @@ -595,6 +607,7 @@ task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const profile.cable_length = cable_len; profile.port_mtu = mtu; profile.gearbox_model = gearbox_model; + profile.is_8lane = is_8lane; // Call vendor-specific lua plugin to calculate the xon, xoff, xon_offset, size // Pay attention, the threshold can contain valid value @@ -796,6 +809,7 @@ task_process_status BufferMgrDynamic::refreshPgsForPort(const string &port, cons { port_info_t &portInfo = m_portInfoLookup[port]; string &gearbox_model = portInfo.gearbox_model; + bool is8lane = portInfo.is_8lane; bool isHeadroomUpdated = false; buffer_pg_lookup_t &portPgs = m_portPgLookup[port]; set profilesToBeReleased; @@ -834,8 +848,8 @@ task_process_status BufferMgrDynamic::refreshPgsForPort(const string &port, cons { threshold = m_defaultThreshold; } - newProfile = getDynamicProfileName(speed, cable_length, mtu, threshold, gearbox_model); - auto rc = allocateProfile(speed, cable_length, mtu, threshold, gearbox_model, newProfile); + newProfile = getDynamicProfileName(speed, cable_length, mtu, threshold, gearbox_model, is8lane); + auto rc = allocateProfile(speed, cable_length, mtu, threshold, gearbox_model, is8lane, newProfile); if (task_process_status::task_success != rc) return rc; @@ -1369,6 +1383,12 @@ task_process_status BufferMgrDynamic::handlePortTable(KeyOpFieldsValuesTuple &tu for (auto i : kfvFieldsValues(tuple)) { + if (fvField(i) == "lanes") + { + auto &lanes = fvValue(i); + portInfo.is_8lane = (count(lanes.begin(), lanes.end(), ',') + 1 == 8); + } + if (fvField(i) == "speed" && fvValue(i) != portInfo.speed) { speed_updated = true; diff --git a/cfgmgr/buffermgrdyn.h b/cfgmgr/buffermgrdyn.h index 1a68305909..8c6ac21a4a 100644 --- a/cfgmgr/buffermgrdyn.h +++ b/cfgmgr/buffermgrdyn.h @@ -53,6 +53,7 @@ typedef struct { std::string cable_length; std::string port_mtu; std::string gearbox_model; + bool is_8lane; // APPL_DB.BUFFER_PROFILE fields std::string name; @@ -93,7 +94,7 @@ typedef struct { std::string cable_length; std::string mtu; std::string gearbox_model; -// std::string profile_name; + bool is_8lane; } port_info_t; //TODO: @@ -129,6 +130,8 @@ class BufferMgrDynamic : public Orch typedef std::map buffer_table_handler_map; typedef std::pair buffer_handler_pair; + std::string m_platform; + buffer_table_handler_map m_bufferTableHandlerMap; bool m_portInitDone; @@ -215,7 +218,7 @@ class BufferMgrDynamic : public Orch void transformReference(std::string &name); std::string parseObjectNameFromKey(const std::string &key, size_t pos/* = 1*/); std::string parseObjectNameFromReference(const std::string &reference); - std::string getDynamicProfileName(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model); + std::string getDynamicProfileName(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, bool is_8lane); inline bool isNonZero(const std::string &value) const { return !value.empty() && value != "0"; @@ -230,7 +233,7 @@ class BufferMgrDynamic : public Orch void calculateHeadroomSize(buffer_profile_t &headroom); void checkSharedBufferPoolSize(bool force_update_during_initialization); void recalculateSharedBufferPool(); - task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, std::string &profile_name); + task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, bool is_8lane, std::string &profile_name); void releaseProfile(const std::string &profile_name); bool isHeadroomResourceValid(const std::string &port, const buffer_profile_t &profile, const std::string &new_pg); void refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size); diff --git a/tests/test_buffer_dynamic.py b/tests/test_buffer_dynamic.py index 0d831f7035..07d7703706 100644 --- a/tests/test_buffer_dynamic.py +++ b/tests/test_buffer_dynamic.py @@ -477,11 +477,12 @@ def test_sharedHeadroomPool(self, dvs, testlog): self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) self.check_new_profile_in_asic_db(dvs, expectedProfile) - # check ingress_lossless_pool between appldb and asicdb - # there are only two lossless PGs configured on one port. - # hence the shared headroom pool size should be pg xoff * 2 / over subscribe ratio (2) = xoff. + # Check ingress_lossless_pool between appldb and asicdb + # There are only two lossless PGs configured on one port. + # Hence the shared headroom pool size should be (pg xoff * 2 - private headroom) / over subscribe ratio (2) = xoff - private_headroom / 2. ingress_lossless_pool_in_appldb = self.app_db.get_entry('BUFFER_POOL_TABLE', 'ingress_lossless_pool') - shp_size = profileInApplDb['xoff'] + private_headroom = 10 * 1024 + shp_size = str(int(profileInApplDb['xoff']) - int(private_headroom / 2)) ingress_lossless_pool_in_appldb['xoff'] = shp_size # toggle shared headroom pool, it requires some time to update pools time.sleep(20) @@ -514,8 +515,8 @@ def test_sharedHeadroomPool(self, dvs, testlog): # remove size configuration, new over subscribe ratio takes effect ingress_lossless_pool_in_configdb['xoff'] = '0' self.config_db.update_entry('BUFFER_POOL', 'ingress_lossless_pool', ingress_lossless_pool_in_configdb) - # shp size: pg xoff * 2 / over subscribe ratio (4) = pg xoff / 2 - shp_size = str(int(int(profileInApplDb['xoff']) / 2)) + # shp size: (pg xoff * 2 - private headroom) / over subscribe ratio (4) + shp_size = str(int((2 * int(profileInApplDb['xoff']) - private_headroom) / 4)) time.sleep(30) ingress_lossless_pool_in_appldb['xoff'] = shp_size self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) From 467b8cbb9d468b82f9c4432acc4c76411f2f89dd Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Tue, 22 Jun 2021 14:09:41 +0800 Subject: [PATCH 2/4] Address comments: store number of lanes instead of whether it's an 8-lane port Signed-off-by: Stephen Sun --- cfgmgr/buffer_headroom_mellanox.lua | 6 +++--- cfgmgr/buffermgrdyn.cpp | 20 ++++++++++---------- cfgmgr/buffermgrdyn.h | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cfgmgr/buffer_headroom_mellanox.lua b/cfgmgr/buffer_headroom_mellanox.lua index 0a40f03a86..dc23076971 100644 --- a/cfgmgr/buffer_headroom_mellanox.lua +++ b/cfgmgr/buffer_headroom_mellanox.lua @@ -3,7 +3,7 @@ -- ARGV[2] - cable length -- ARGV[3] - port mtu -- ARGV[4] - gearbox delay --- ARGV[5] - whether the port is an 8-lane port +-- ARGV[5] - lane count of the ports on which the profile will be applied -- parameters retried from databases: -- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN @@ -27,7 +27,7 @@ local port_speed = tonumber(ARGV[1]) local cable_length = tonumber(string.sub(ARGV[2], 1, -2)) local port_mtu = tonumber(ARGV[3]) local gearbox_delay = tonumber(ARGV[4]) -local is_8lane = ARGV[5] +local is_8lane = (ARGV[5] == "8") local appl_db = "0" local config_db = "4" @@ -103,7 +103,7 @@ local headroom_size local speed_overhead -- Adjustment for 8-lane port -if is_8lane ~= nil and is_8lane == "true" then +if is_8lane ~= nil and is_8lane then pipeline_latency = pipeline_latency * 2 - 1024 speed_overhead = port_mtu else diff --git a/cfgmgr/buffermgrdyn.cpp b/cfgmgr/buffermgrdyn.cpp index 967743ff25..34a688f3dd 100644 --- a/cfgmgr/buffermgrdyn.cpp +++ b/cfgmgr/buffermgrdyn.cpp @@ -245,7 +245,7 @@ string BufferMgrDynamic::parseObjectNameFromReference(const string &reference) return parseObjectNameFromKey(objName, 1); } -string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string &cable, const string &mtu, const string &threshold, const string &gearbox_model, bool is_8lane) +string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string &cable, const string &mtu, const string &threshold, const string &gearbox_model, long lane_count) { string buffer_profile_key; @@ -268,9 +268,9 @@ string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string buffer_profile_key = buffer_profile_key + "_" + gearbox_model; } - if (is_8lane && m_platform == "mellanox") + if (m_platform == "mellanox") { - if (speed != "400000") + if ((speed != "400000") && (lane_count == 8)) { buffer_profile_key = buffer_profile_key + "_8lane"; } @@ -296,7 +296,7 @@ void BufferMgrDynamic::calculateHeadroomSize(buffer_profile_t &headroom) argv.emplace_back(headroom.cable_length); argv.emplace_back(headroom.port_mtu); argv.emplace_back(m_identifyGearboxDelay); - argv.emplace_back(headroom.is_8lane ? "true" : "false"); + argv.emplace_back(to_string(headroom.lane_count)); try { @@ -583,7 +583,7 @@ void BufferMgrDynamic::updateBufferPgToDb(const string &key, const string &profi } // We have to check the headroom ahead of applying them -task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable_len, const string &mtu, const string &threshold, const string &gearbox_model, bool is_8lane, string &profile_name) +task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable_len, const string &mtu, const string &threshold, const string &gearbox_model, long lane_count, string &profile_name) { // Create record in BUFFER_PROFILE table @@ -607,7 +607,7 @@ task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const profile.cable_length = cable_len; profile.port_mtu = mtu; profile.gearbox_model = gearbox_model; - profile.is_8lane = is_8lane; + profile.lane_count = lane_count; // Call vendor-specific lua plugin to calculate the xon, xoff, xon_offset, size // Pay attention, the threshold can contain valid value @@ -809,7 +809,7 @@ task_process_status BufferMgrDynamic::refreshPgsForPort(const string &port, cons { port_info_t &portInfo = m_portInfoLookup[port]; string &gearbox_model = portInfo.gearbox_model; - bool is8lane = portInfo.is_8lane; + long laneCount = portInfo.lane_count; bool isHeadroomUpdated = false; buffer_pg_lookup_t &portPgs = m_portPgLookup[port]; set profilesToBeReleased; @@ -848,8 +848,8 @@ task_process_status BufferMgrDynamic::refreshPgsForPort(const string &port, cons { threshold = m_defaultThreshold; } - newProfile = getDynamicProfileName(speed, cable_length, mtu, threshold, gearbox_model, is8lane); - auto rc = allocateProfile(speed, cable_length, mtu, threshold, gearbox_model, is8lane, newProfile); + newProfile = getDynamicProfileName(speed, cable_length, mtu, threshold, gearbox_model, laneCount); + auto rc = allocateProfile(speed, cable_length, mtu, threshold, gearbox_model, laneCount, newProfile); if (task_process_status::task_success != rc) return rc; @@ -1386,7 +1386,7 @@ task_process_status BufferMgrDynamic::handlePortTable(KeyOpFieldsValuesTuple &tu if (fvField(i) == "lanes") { auto &lanes = fvValue(i); - portInfo.is_8lane = (count(lanes.begin(), lanes.end(), ',') + 1 == 8); + portInfo.lane_count = count(lanes.begin(), lanes.end(), ',') + 1; } if (fvField(i) == "speed" && fvValue(i) != portInfo.speed) diff --git a/cfgmgr/buffermgrdyn.h b/cfgmgr/buffermgrdyn.h index 8c6ac21a4a..c14d506dab 100644 --- a/cfgmgr/buffermgrdyn.h +++ b/cfgmgr/buffermgrdyn.h @@ -53,7 +53,7 @@ typedef struct { std::string cable_length; std::string port_mtu; std::string gearbox_model; - bool is_8lane; + long lane_count; // APPL_DB.BUFFER_PROFILE fields std::string name; @@ -94,7 +94,7 @@ typedef struct { std::string cable_length; std::string mtu; std::string gearbox_model; - bool is_8lane; + long lane_count; } port_info_t; //TODO: @@ -218,7 +218,7 @@ class BufferMgrDynamic : public Orch void transformReference(std::string &name); std::string parseObjectNameFromKey(const std::string &key, size_t pos/* = 1*/); std::string parseObjectNameFromReference(const std::string &reference); - std::string getDynamicProfileName(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, bool is_8lane); + std::string getDynamicProfileName(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, long lane_count); inline bool isNonZero(const std::string &value) const { return !value.empty() && value != "0"; @@ -233,7 +233,7 @@ class BufferMgrDynamic : public Orch void calculateHeadroomSize(buffer_profile_t &headroom); void checkSharedBufferPoolSize(bool force_update_during_initialization); void recalculateSharedBufferPool(); - task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, bool is_8lane, std::string &profile_name); + task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, long lane_count, std::string &profile_name); void releaseProfile(const std::string &profile_name); bool isHeadroomResourceValid(const std::string &port, const buffer_profile_t &profile, const std::string &new_pg); void refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size); From 031f64283bf2f34cc25640211b3e4e21e6d0aba4 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Wed, 23 Jun 2021 11:11:45 +0000 Subject: [PATCH 3/4] Read and check the number of lanes regardless of the ASIC type Signed-off-by: Stephen Sun --- cfgmgr/buffer_check_headroom_mellanox.lua | 11 ++----- cfgmgr/buffer_pool_mellanox.lua | 40 ++++++++++------------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/cfgmgr/buffer_check_headroom_mellanox.lua b/cfgmgr/buffer_check_headroom_mellanox.lua index 08f955fe4c..d700862448 100644 --- a/cfgmgr/buffer_check_headroom_mellanox.lua +++ b/cfgmgr/buffer_check_headroom_mellanox.lua @@ -40,13 +40,8 @@ redis.call('SELECT', config_db) local lanes --- On SPC3 switch, we need to know whether it's a 8-lane port because it has extra pipeline latency -local is_spc3 = false -local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform') -if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then - is_spc3 = true - lanes = redis.call('HGET', 'PORT|' .. port, 'lanes') -end +-- We need to know whether it's a 8-lane port because it has extra pipeline latency +lanes = redis.call('HGET', 'PORT|' .. port, 'lanes') -- Fetch the threshold from STATE_DB redis.call('SELECT', state_db) @@ -58,7 +53,7 @@ end local asic_keys = redis.call('KEYS', 'ASIC_TABLE*') local pipeline_latency = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency')) -if is_spc3 and is_port_with_8lanes(lanes) then +if is_port_with_8lanes(lanes) then -- The pipeline latency should be adjusted accordingly for ports with 2 buffer units pipeline_latency = pipeline_latency * 2 - 1 end diff --git a/cfgmgr/buffer_pool_mellanox.lua b/cfgmgr/buffer_pool_mellanox.lua index f43109c168..76316936bc 100644 --- a/cfgmgr/buffer_pool_mellanox.lua +++ b/cfgmgr/buffer_pool_mellanox.lua @@ -10,8 +10,6 @@ local port_count_8lanes = 0 -- Number of lossy PG on ports with 8 lanes local lossypg_8lanes = 0 -local is_spc3 = false - -- Private headrom local private_headroom = 10 * 1024 @@ -61,7 +59,7 @@ local function iterate_all_items(all_items, check_lossless) size = 1 + tonumber(string.sub(range, -1)) - tonumber(string.sub(range, 1, 1)) end profiles[profile_name] = profile_ref_count + size - if is_spc3 and port_set_8lanes[port] and profile_name == 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' then + if port_set_8lanes[port] and profile_name == 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' then lossypg_8lanes = lossypg_8lanes + size end if check_lossless and lossless_profiles[profile_name] then @@ -83,26 +81,22 @@ local ports_table = redis.call('KEYS', 'PORT|*') total_port = #ports_table -- Initialize the port_set_8lanes set -local platform = redis.call('HGET', 'DEVICE_METADATA|localhost', 'platform') -if platform and string.sub(platform, 1, 16) == "x86_64-mlnx_msn4" then - is_spc3 = true - local lanes - local number_of_lanes - local port - for i = 1, total_port, 1 do - -- Load lanes from PORT table - lanes = redis.call('HGET', ports_table[i], 'lanes') - if lanes then - local _ - _, number_of_lanes = string.gsub(lanes, ",", ",") - number_of_lanes = number_of_lanes + 1 - port = string.sub(ports_table[i], 6, -1) - if (number_of_lanes == 8) then - port_set_8lanes[port] = true - port_count_8lanes = port_count_8lanes + 1 - else - port_set_8lanes[port] = false - end +local lanes +local number_of_lanes +local port +for i = 1, total_port, 1 do + -- Load lanes from PORT table + lanes = redis.call('HGET', ports_table[i], 'lanes') + if lanes then + local _ + _, number_of_lanes = string.gsub(lanes, ",", ",") + number_of_lanes = number_of_lanes + 1 + port = string.sub(ports_table[i], 6, -1) + if (number_of_lanes == 8) then + port_set_8lanes[port] = true + port_count_8lanes = port_count_8lanes + 1 + else + port_set_8lanes[port] = false end end end From ea93f122dadc0ffce6c8fd750051c4c16fcd9a01 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Wed, 23 Jun 2021 12:19:51 +0000 Subject: [PATCH 4/4] Add some explaination regarding the "8lane" in the profile name Signed-off-by: Stephen Sun --- cfgmgr/buffermgrdyn.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cfgmgr/buffermgrdyn.cpp b/cfgmgr/buffermgrdyn.cpp index 34a688f3dd..ff99e28636 100644 --- a/cfgmgr/buffermgrdyn.cpp +++ b/cfgmgr/buffermgrdyn.cpp @@ -272,6 +272,16 @@ string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string { if ((speed != "400000") && (lane_count == 8)) { + // On Mellanox platform, ports with 8 lanes have different(double) xon value then other ports + // For ports at speed other than 400G can have + // - 8 lanes, double xon + // - other number of lanes, normal xon + // So they can not share the same buffer profiles. + // An extra "_8lane" is added to the name of buffer profiles to distinguish both scenarios + // Eg. + // - A 100G port with 8 lanes will use buffer profile "pg_profile_100000_5m_8lane_profile" + // - A 100G port with 4 lanes will use buffer profile "pg_profile_100000_5m_profile" + // Currently, 400G ports can only have 8 lanes. So we don't add this to the profile buffer_profile_key = buffer_profile_key + "_8lane"; } }