From 9d66054e5b8c336d6354048fa74ee0df61b49f47 Mon Sep 17 00:00:00 2001 From: DavidZagury <32644413+DavidZagury@users.noreply.github.com> Date: Wed, 5 May 2021 17:25:40 +0300 Subject: [PATCH] [teammgrd]: Create unique LACP key for port-channel (#1660) Fix issue - Azure/sonic-buildimage#4009 When a member port is added to port-channel, create a unique LACP key. When adding a member port to port-channel set the LACP key to a unique number. The number is extracted from the port-channel name and will be the number at the end of the port-channel name with an additional digit at the beginning in order to make sure that this number will be unique in the system. Why I did it If LACP key is not set, then the peer will not be able to distinguish the ports which are connected to different port-channels, as it will receive the LACP key as 0 for all the ports from different port-channels. How I verified it I configure a SONiC switch to have two port-channels and on a second switch, I created one port-channel for both links between the switches. On the second switch only one of the ports comes up in the PO and the other one stayed down. --- cfgmgr/teammgr.cpp | 52 +++++++++++++++++++++++++++++++++ cfgmgr/teammgr.h | 1 + tests/test_portchannel.py | 61 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index cdff7013a07..db1df6884c8 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -504,6 +504,52 @@ bool TeamMgr::removeLag(const string &alias) return true; } +// Port-channel names are in the pattern of "PortChannel####" +// +// The LACP key could be generated in 3 ways based on the value in config DB: +// 1. "auto" - LACP key is extracted from the port-channel name and is set to be the number at the end of the port-channel name +// We are adding 1 at the beginning to avoid LACP key collisions between similar LACP keys e.g. PortChannel10 and PortChannel010. +// 2. n - LACP key will be n. +// 3. "" - LACP key will be 0 - exists for backward compatibility. +uint16_t TeamMgr::generateLacpKey(const string& lag) +{ + vector fvs; + m_cfgLagTable.get(lag, fvs); + + auto it = find_if(fvs.begin(), fvs.end(), [](const FieldValueTuple& fv) + { + return fv.first == "lacp_key"; + }); + string lacp_key; + if (it != fvs.end()) + { + lacp_key = it->second; + if (!lacp_key.empty()) + { + try + { + if (lacp_key == "auto") + { + return static_cast(std::stoul("1" + lag.substr(lag.find_first_of("0123456789")))); + } + else + { + return static_cast(std::stoul(lacp_key)); + } + } + catch (const std::exception& e) + { + SWSS_LOG_THROW("Failed to parse LACP key %s for port channel %s", lacp_key.c_str(), lag.c_str()); + } + } + else + { + return 0; + } + } + return 0; +} + // Once a port is enslaved into a port channel, the port's MTU will // be inherited from the master's MTU while the port's admin status // will still be controlled separately. @@ -520,11 +566,17 @@ task_process_status TeamMgr::addLagMember(const string &lag, const string &membe stringstream cmd; string res; + uint16_t keyId = generateLacpKey(lag); // Set admin down LAG member (required by teamd) and enslave it // ip link set dev down; + // teamdctl port config update { "lacp_key": , "link_watch": { "name": "ethtool" } }; // teamdctl port add ; cmd << IP_CMD << " link set dev " << shellquote(member) << " down; "; + cmd << TEAMDCTL_CMD << " " << shellquote(lag) << " port config update " << shellquote(member) + << " '{\"lacp_key\":" + << keyId + << ",\"link_watch\": {\"name\": \"ethtool\"} }'; "; cmd << TEAMDCTL_CMD << " " << shellquote(lag) << " port add " << shellquote(member); if (exec(cmd.str(), res) != 0) diff --git a/cfgmgr/teammgr.h b/cfgmgr/teammgr.h index 0c0ff62579a..3941d1d5a01 100644 --- a/cfgmgr/teammgr.h +++ b/cfgmgr/teammgr.h @@ -56,6 +56,7 @@ class TeamMgr : public Orch bool checkPortIffUp(const std::string &); bool isPortStateOk(const std::string&); bool isLagStateOk(const std::string&); + uint16_t generateLacpKey(const std::string&); }; } diff --git a/tests/test_portchannel.py b/tests/test_portchannel.py index 4e03a81aacd..d03e8611230 100644 --- a/tests/test_portchannel.py +++ b/tests/test_portchannel.py @@ -2,6 +2,7 @@ import re import json import pytest +import itertools from swsscommon import swsscommon @@ -89,6 +90,66 @@ def test_Portchannel(self, dvs, testlog): lagms = lagmtbl.getKeys() assert len(lagms) == 0 + def test_Portchannel_lacpkey(self, dvs, testlog): + portchannelNamesAuto = [("PortChannel001", "Ethernet0", 1001), + ("PortChannel002", "Ethernet4", 1002), + ("PortChannel2", "Ethernet8", 12), + ("PortChannel000", "Ethernet12", 1000)] + + portchannelNames = [("PortChannel0003", "Ethernet16", 0), + ("PortChannel0004", "Ethernet20", 0), + ("PortChannel0005", "Ethernet24", 564)] + + self.cdb = swsscommon.DBConnector(4, dvs.redis_sock, 0) + + # Create PortChannels + tbl = swsscommon.Table(self.cdb, "PORTCHANNEL") + fvs = swsscommon.FieldValuePairs( + [("admin_status", "up"), ("mtu", "9100"), ("oper_status", "up"), ("lacp_key", "auto")]) + + for portchannel in portchannelNamesAuto: + tbl.set(portchannel[0], fvs) + + fvs_no_lacp_key = swsscommon.FieldValuePairs( + [("admin_status", "up"), ("mtu", "9100"), ("oper_status", "up")]) + tbl.set(portchannelNames[0][0], fvs_no_lacp_key) + + fvs_empty_lacp_key = swsscommon.FieldValuePairs( + [("admin_status", "up"), ("mtu", "9100"), ("oper_status", "up"), ("lacp_key", "")]) + tbl.set(portchannelNames[1][0], fvs_empty_lacp_key) + + fvs_set_number_lacp_key = swsscommon.FieldValuePairs( + [("admin_status", "up"), ("mtu", "9100"), ("oper_status", "up"), ("lacp_key", "564")]) + tbl.set(portchannelNames[2][0], fvs_set_number_lacp_key) + time.sleep(1) + + # Add members to PortChannels + tbl = swsscommon.Table(self.cdb, "PORTCHANNEL_MEMBER") + fvs = swsscommon.FieldValuePairs([("NULL", "NULL")]) + + for portchannel in itertools.chain(portchannelNames, portchannelNamesAuto): + tbl.set(portchannel[0] + "|" + portchannel[1], fvs) + time.sleep(1) + + # TESTS here that LACP key is valid and equls to the expected LACP key + # The expected LACP key in the number at the end of the Port-Channel name with a prefix '1' + for portchannel in itertools.chain(portchannelNames, portchannelNamesAuto): + (exit_code, output) = dvs.runcmd("teamdctl " + portchannel[0] + " state dump") + port_state_dump = json.loads(output) + lacp_key = port_state_dump["ports"][portchannel[1]]["runner"]["actor_lacpdu_info"]["key"] + assert lacp_key == portchannel[2] + + # remove PortChannel members + tbl = swsscommon.Table(self.cdb, "PORTCHANNEL_MEMBER") + for portchannel in itertools.chain(portchannelNames, portchannelNamesAuto): + tbl._del(portchannel[0] + "|" + portchannel[1]) + time.sleep(1) + + # remove PortChannel + tbl = swsscommon.Table(self.cdb, "PORTCHANNEL") + for portchannel in itertools.chain(portchannelNames, portchannelNamesAuto): + tbl._del(portchannel[0]) + time.sleep(1) def test_Portchannel_oper_down(self, dvs, testlog):