Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CLI configuration options for teamd retry count feature #2642

Merged
merged 28 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
42b20c1
Add CLI configuration options for teamd retry count feature
saiarcot895 Feb 1, 2023
b42b635
Add test for error case from teamd when it's not running
saiarcot895 Feb 1, 2023
e9e9af0
Fix up test cases
saiarcot895 Feb 1, 2023
f834b8a
Add some error handling if teamdctl doesn't exist
saiarcot895 Feb 7, 2023
bd40c1b
Merge commit 'd433b2f954e446db7a655e882a7274cd5bce3a50' into teamd-re…
saiarcot895 Apr 20, 2023
7fc5ebd
Add probe functionality and sending current LACPDU packet functionality
saiarcot895 Apr 26, 2023
b5b372b
Check to see if the retry count feature is enabled before doing a get…
saiarcot895 May 4, 2023
c3c6b2e
Add option to only send probe packets or only change retry count
saiarcot895 May 4, 2023
ad54c4c
Call the teamd retry count script if doing a warm-reboot
saiarcot895 May 4, 2023
fc9195f
Fix pycheck errors, and disable scapy's IPv6 and verbose mode
saiarcot895 May 15, 2023
44a6712
Make teamd retry count support optional
saiarcot895 May 15, 2023
5aa89b5
Address review comments, and restructure code to increase code coverage
saiarcot895 May 17, 2023
1a4e17f
Address some review comments
saiarcot895 May 17, 2023
bbad1e3
Replace tabs with spaces
saiarcot895 May 17, 2023
5acd304
Verify that expected keys are present in the data returned from teamdctl
saiarcot895 May 17, 2023
0f4f822
Merge commit '7d2ca0b' into teamd-retry-count-cli
saiarcot895 May 18, 2023
e6acfe0
Fix TimeoutExpired undefined error
saiarcot895 May 18, 2023
9e2d7a3
Add ability to mock subprocess calls (at a limited level)
saiarcot895 May 19, 2023
65c1bdb
Return an actual subprocess object, and add a test for checking timeout
saiarcot895 May 19, 2023
edea4ce
Change variable syntax
saiarcot895 May 22, 2023
f76e1e9
Fix set being accessed with an index
saiarcot895 May 22, 2023
e547f50
Add option to warm-reboot script to control if teamd retry count is r…
saiarcot895 May 23, 2023
a5966f2
Move the teamd retry count check to before orchagent
saiarcot895 May 23, 2023
1670cb7
Move retry count script start to be prior to point-of-no-return
saiarcot895 May 23, 2023
ee72908
Set executable bit
saiarcot895 May 23, 2023
c719f32
Address PR comments
saiarcot895 May 30, 2023
01852c6
Change to case-insensitive string contains check
saiarcot895 May 30, 2023
18fba7c
Make sure the global abort variable is used
saiarcot895 Jun 2, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2255,6 +2255,63 @@ def del_portchannel_member(ctx, portchannel_name, port_name):
except JsonPatchConflict:
ctx.fail("Invalid or nonexistent portchannel or interface. Please ensure existence of portchannel member.")

@portchannel.group(cls=clicommon.AbbreviationGroup, name='retry-count')
@click.pass_context
def portchannel_retry_count(ctx):
pass

@portchannel_retry_count.command('get')
@click.argument('portchannel_name', metavar='<portchannel_name>', required=True)
@click.pass_context
def get_portchannel_retry_count(ctx, portchannel_name):
"""Get the retry count for a port channel"""
db = ValidatedConfigDBConnector(ctx.obj['db'])

if ADHOC_VALIDATION:
# Dont proceed if the port channel name is not valid
if is_portchannel_name_valid(portchannel_name) is False:
ctx.fail("{} is invalid!, name should have prefix '{}' and suffix '{}'"
.format(portchannel_name, CFG_PORTCHANNEL_PREFIX, CFG_PORTCHANNEL_NO))

# Dont proceed if the port channel does not exist
if is_portchannel_present_in_db(db, portchannel_name) is False:
ctx.fail("{} is not present.".format(portchannel_name))

try:
proc = subprocess.Popen(["teamdctl", portchannel_name, "state", "item", "get", "runner.retry_count"], text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = proc.communicate()
vaibhavhd marked this conversation as resolved.
Show resolved Hide resolved
if proc.returncode != 0:
ctx.fail("Unable to get the retry count: {}".format(err.strip()))
click.echo(output.strip())
except FileNotFoundError:
ctx.fail("Unable to get the retry count: teamdctl could not be run")

@portchannel_retry_count.command('set')
@click.argument('portchannel_name', metavar='<portchannel_name>', required=True)
@click.argument('retry_count', metavar='<retry_count>', required=True, type=click.IntRange(3,10))
@click.pass_context
def set_portchannel_retry_count(ctx, portchannel_name, retry_count):
"""Set the retry count for a port channel"""
# Dont proceed if the port channel name is not valid
if is_portchannel_name_valid(portchannel_name) is False:
ctx.fail("{} is invalid!, name should have prefix '{}' and suffix '{}'"
.format(portchannel_name, CFG_PORTCHANNEL_PREFIX, CFG_PORTCHANNEL_NO))

db = ValidatedConfigDBConnector(ctx.obj['db'])

if ADHOC_VALIDATION:
vaibhavhd marked this conversation as resolved.
Show resolved Hide resolved
# Dont proceed if the port channel does not exist
if is_portchannel_present_in_db(db, portchannel_name) is False:
ctx.fail("{} is not present.".format(portchannel_name))

try:
proc = subprocess.Popen(["teamdctl", portchannel_name, "state", "item", "set", "runner.retry_count", str(retry_count)], text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = proc.communicate()
if proc.returncode != 0:
ctx.fail("Unable to set the retry count: {}".format(err.strip()))
except FileNotFoundError:
ctx.fail("Unable to set the retry count: teamdctl could not be run")


#
# 'mirror_session' group ('config mirror_session ...')
Expand Down
146 changes: 146 additions & 0 deletions scripts/teamd_increase_retry_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/python3

import subprocess
import json
from scapy.all import *
import scapy.contrib.lacp
import os
import re
import sys

from swsscommon.swsscommon import DBConnector, Table

MIN_TAG_FOR_EACH_VERSION = {
"20220531": 500
}

class LACPRetryCount(Packet):
name = "LACPRetryCount"
fields_desc = [
ByteField("version", 0xf1),
ByteField("actor_type", 1),
ByteField("actor_length", 20),
ShortField("actor_system_priority", 0),
MACField("actor_system", None),
ShortField("actor_key", 0),
ShortField("actor_port_priority", 0),
ShortField("actor_port_number", 0),
ByteField("actor_state", 0),
XStrFixedLenField("actor_reserved", "", 3),
ByteField("partner_type", 2),
ByteField("partner_length", 20),
ShortField("partner_system_priority", 0),
MACField("partner_system", None),
ShortField("partner_key", 0),
ShortField("partner_port_priority", 0),
ShortField("partner_port_number", 0),
ByteField("partner_state", 0),
XStrFixedLenField("partner_reserved", "", 3),
ByteField("collector_type", 3),
ByteField("collector_length", 16),
ShortField("collector_max_delay", 0),
XStrFixedLenField("collector_reserved", "", 12),
ByteField("actor_retry_count_type", 0x80),
ByteField("actor_retry_count_length", 4),
ByteField("actor_retry_count", 0),
XStrFixedLenField("actor_retry_count_reserved", "", 1),
ByteField("partner_retry_count_type", 0x81),
ByteField("partner_retry_count_length", 4),
ByteField("partner_retry_count", 0),
XStrFixedLenField("partner_retry_count_reserved", "", 1),
ByteField("terminator_type", 0),
ByteField("terminator_length", 0),
XStrFixedLenField("reserved", "", 42),
]

bind_layers(scapy.contrib.lacp.SlowProtocol, LACPRetryCount, subtype=1)

def getPortChannelConfig(portChannelName):
process = subprocess.run(["teamdctl", portChannelName, "state", "dump"], capture_output=True)
return json.loads(process.stdout)

def getLldpNeighbors():
process = subprocess.run(["lldpctl", "-f", "json"], capture_output=True)
return json.loads(process.stdout)

def craftLacpPacket(portChannelConfig, portName):
portConfig = portChannelConfig["ports"][portName]
actorConfig = portConfig["runner"]["actor_lacpdu_info"]
partnerConfig = portConfig["runner"]["partner_lacpdu_info"]
l2 = Ether(dst="01:80:c2:00:00:02", src=portConfig["ifinfo"]["dev_addr"], type=0x8809)
vaibhavhd marked this conversation as resolved.
Show resolved Hide resolved
l3 = scapy.contrib.lacp.SlowProtocol(subtype=0x01)
l4 = LACPRetryCount()
l4.actor_system_priority = actorConfig["system_priority"]
l4.actor_system = actorConfig["system"]
l4.actor_key = actorConfig["key"]
l4.actor_port_priority = actorConfig["port_priority"]
l4.actor_port_number = actorConfig["port"]
l4.actor_state = actorConfig["state"]
l4.partner_system_priority = partnerConfig["system_priority"]
l4.partner_system = partnerConfig["system"]
l4.partner_key = partnerConfig["key"]
l4.partner_port_priority = partnerConfig["port_priority"]
l4.partner_port_number = partnerConfig["port"]
l4.partner_state = partnerConfig["state"]
l4.actor_retry_count = 5
l4.partner_retry_count = 3
packet = l2 / l3 / l4
return packet

def getPortChannels():
configDb = DBConnector("CONFIG_DB", 0)
vaibhavhd marked this conversation as resolved.
Show resolved Hide resolved
portchannelTable = Table(configDb, "PORTCHANNEL")
return list(portchannelTable.getKeys())

def main():
if os.geteuid() != 0:
print("Root privileges required for this operation")
sys.exit(1)
return
portChannels = getPortChannels()
for portChannel in portChannels:
config = getPortChannelConfig(portChannel)
lldpInfo = getLldpNeighbors()
peerSupportsFeature = None
for portName in config["ports"].keys():
interfaceLldpInfo = [k for k in lldpInfo["lldp"]["interface"] if portName in k]
if not interfaceLldpInfo:
print("WARNING: No LLDP info available for {}; skipping".format(portName))
continue
interfaceLldpInfo = interfaceLldpInfo[0][portName]
peerName = list(interfaceLldpInfo["chassis"].keys())[0]
peerInfo = interfaceLldpInfo["chassis"][peerName]
if "descr" not in peerInfo:
print("WARNING: No peer description available via LLDP for {}; skipping".format(portName))
continue
if "SONiC" not in peerInfo["descr"]:
print("WARNING: Peer device is not a SONiC device; skipping")
peerSupportsFeature = False
break
sonicVersionMatch = re.search(r"SONiC Software Version: SONiC\.(.*?)(?: - |$)", peerInfo["descr"])
if not sonicVersionMatch:
print("WARNING: Unable to get SONiC version info for peer device; skipping")
continue
sonicVersion = sonicVersionMatch.group(1)
if "teamd-retry-count" in sonicVersion:
print("SUCCESS: Peer device {} is running version of SONiC ({}) with teamd retry count feature".format(peerName, sonicVersion))
peerSupportsFeature = True
break
sonicVersionComponents = sonicVersion.split(".")
if sonicVersionComponents[0] in MIN_TAG_FOR_EACH_VERSION and int(sonicVersionComponents[1]) >= MIN_TAG_FOR_EACH_VERSION[sonicVersionComponents[0]]:
print("SUCCESS: Peer device {} is running version of SONiC ({}) with teamd retry count feature".format(peerName, sonicVersion))
peerSupportsFeature = True
break
else:
print("WARNING: Peer device {} is running version of SONiC ({}) without teamd retry count feature; skipping".format(peerName, sonicVersion))
peerSupportsFeature = False
break
if peerSupportsFeature:
retryCountChangeProcess = subprocess.run(["config", "portchannel", "retry-count", "set", portChannel, "5"])
if retryCountChangeProcess.returncode != 0:
for portName in config["ports"].keys():
packet = craftLacpPacket(config, portName)
sendp(packet, iface=portName)

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@
'scripts/soft-reboot',
'scripts/storyteller',
'scripts/syseeprom-to-json',
'scripts/teamd_increase_retry_count.py',
'scripts/tempershow',
'scripts/tunnelstat',
'scripts/update_json.py',
Expand Down
70 changes: 70 additions & 0 deletions tests/portchannel_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,76 @@ def test_delete_portchannel_which_is_member_of_a_vlan(self):
assert result.exit_code != 0
assert "PortChannel1001 has vlan Vlan4000 configured, remove vlan membership to proceed" in result.output

def test_get_invalid_portchannel_retry_count(self):
runner = CliRunner()
db = Db()
obj = {'db':db.cfgdb}

# get the retry count of a portchannel with an invalid portchannel name
result = runner.invoke(config.config.commands["portchannel"].commands["retry-count"].commands["get"], ["Ethernet48"], obj=obj)
print(result.exit_code)
print(result.output)
assert result.exit_code != 0
assert "Error: Ethernet48 is invalid!" in result.output

def test_set_invalid_portchannel_retry_count(self):
runner = CliRunner()
db = Db()
obj = {'db':db.cfgdb}

# set the retry count of a portchannel with an invalid portchannel name
result = runner.invoke(config.config.commands["portchannel"].commands["retry-count"].commands["set"], ["Ethernet48", "5"], obj=obj)
print(result.exit_code)
print(result.output)
assert result.exit_code != 0
assert "Error: Ethernet48 is invalid!" in result.output

def test_get_non_existing_portchannel_retry_count(self):
runner = CliRunner()
db = Db()
obj = {'db':db.cfgdb}

# get the retry count of a portchannel with portchannel not yet created
result = runner.invoke(config.config.commands["portchannel"].commands["retry-count"].commands["get"], ["PortChannel0005"], obj=obj)
print(result.exit_code)
print(result.output)
assert result.exit_code != 0
assert "Error: PortChannel0005 is not present." in result.output

def test_set_non_existing_portchannel_retry_count(self):
runner = CliRunner()
db = Db()
obj = {'db':db.cfgdb}

# set the retry count of a portchannel with portchannel not yet created
result = runner.invoke(config.config.commands["portchannel"].commands["retry-count"].commands["set"], ["PortChannel0005", "5"], obj=obj)
print(result.exit_code)
print(result.output)
assert result.exit_code != 0
assert "Error: PortChannel0005 is not present." in result.output

def test_get_portchannel_retry_count(self):
runner = CliRunner()
db = Db()
obj = {'db':db.cfgdb}

# get the retry count of a portchannel
result = runner.invoke(config.config.commands["portchannel"].commands["retry-count"].commands["get"], ["PortChannel1001"], obj=obj)
# this will fail because the actual teamd process is not running during testing
print(result.exit_code)
assert result.exit_code != 0

def test_set_portchannel_retry_count(self):
runner = CliRunner()
db = Db()
obj = {'db':db.cfgdb}

# set the retry count of a portchannel
result = runner.invoke(config.config.commands["portchannel"].commands["retry-count"].commands["set"], ["PortChannel1001", "5"], obj=obj)
# this will fail because the actual teamd process is not running during testing
print(result.exit_code)
assert result.exit_code != 0

@classmethod
def teardown_class(cls):
os.environ['UTILITIES_UNIT_TESTING'] = "0"
Expand Down