From fd1b8033bb62bf06edbca606dac2bfe170054aae Mon Sep 17 00:00:00 2001 From: Arun Saravanan Balachandran <52521751+ArunSaravananBalachandran@users.noreply.github.com> Date: Tue, 26 Jan 2021 21:13:16 +0000 Subject: [PATCH] [pcieutil] Add 'pcie-aer' sub-command to display AER stats (#1169) - What I did Add new "pcie-aer" sub-command in pcieutil to display the AER stats. "pcieutil pcie-aer" has four sub-commands - 'all', 'correctable', 'fatal' and 'non-fatal'. 'all' command displays the AER stats for all severities. 'correctable', 'fatal' and 'non-fatal' commands display the AER stats of respective severity. 'device', 'no-zero' options for pcie-aer sub commands ``` root@sonic:/home/admin# pcieutil pcie-aer Usage: pcieutil pcie-aer [OPTIONS] COMMAND [ARGS]... Display PCIe AER status Options: --help Show this message and exit. Commands: all Show all PCIe AER attributes correctable Show PCIe AER correctable attributes fatal Show PCIe AER fatal attributes non-fatal Show PCIe AER non-fatal attributes root@sonic:/home/admin# root@sonic:/home/admin# pcieutil pcie-aer all --help Usage: pcieutil pcie-aer all [OPTIONS] Show all PCIe AER attributes Options: -d, --device :. Display stats only for the specified device -nz, --no-zero Display non-zero AER stats --help Show this message and exit. root@sonic:/home/admin# ``` Depends on: Azure/sonic-platform-daemons#100 - How I did it Add new functions in pcieutil, to implement sub-commands for retrieving AER stats from STATE_DB and output it in tabular format. --- pcieutil/main.py | 167 ++++++++++++++++++++++++++ tests/mock_tables/state_db.json | 98 +++++++++++++++ tests/pcieutil_test.py | 205 ++++++++++++++++++++++++++++++++ 3 files changed, 470 insertions(+) create mode 100644 tests/pcieutil_test.py diff --git a/pcieutil/main.py b/pcieutil/main.py index de671b38a2..3a8ca57194 100644 --- a/pcieutil/main.py +++ b/pcieutil/main.py @@ -7,10 +7,15 @@ try: import os + import re import sys + from collections import OrderedDict import click from sonic_py_common import device_info, logger + from swsssdk import SonicV2Connector + from tabulate import tabulate + import utilities_common.cli as clicommon except ImportError as e: raise ImportError("%s - required module not found" % str(e)) @@ -105,6 +110,168 @@ def show(): click.echo("bus:dev.fn %s:%s.%s - dev_id=0x%s, %s" % (Bus, Dev, Fn, Id, Name)) +# PCIe AER stats helpers + +aer_fields = { + "correctable": ['RxErr', 'BadTLP', 'BadDLLP', 'Rollover', 'Timeout', 'NonFatalErr', 'CorrIntErr', 'HeaderOF', 'TOTAL_ERR_COR'], + "fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq', + 'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_FATAL'], + "non_fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq', + 'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_NONFATAL'] +} + + +class PcieDevice(click.ParamType): + name = ":." + + def convert(self, value, param, ctx): + match = re.match(r'([0-9A-Fa-f]{1,2}):([0-9A-Fa-f]{1,2})\.([0-9A-Fa-f])', value) + + if not match: + self.fail('{} is not in :. format'.format(value), param, ctx) + + Bus, Dev, Fn = [int(val, 16) for val in match.groups()] + if Bus > 255: + self.fail('Invalid Bus number', param, ctx) + + if Dev > 31: + self.fail('Invalid Dev number', param, ctx) + + if Fn > 7: + self.fail('Invalid Fn number', param, ctx) + + return "%02x:%02x.%d" % (Bus, Dev, Fn) + + +_pcie_aer_click_options = [ + click.Option(['-d', '--device', 'device_key'], + type=PcieDevice(), + help="Display stats only for the specified device"), + click.Option(['-v', '--verbose'], + is_flag=True, + help="Display all stats") +] + + +class PcieAerCommand(click.Command): + '''This subclass of click.Command provides common options, help + and short help text for PCIe AER commands''' + + def __init__(self, *args, **kwargs): + super(PcieAerCommand, self).__init__(*args, **kwargs) + self.params = _pcie_aer_click_options + + def format_help_text(self, ctx, formatter): + formatter.write_paragraph() + with formatter.indentation(): + formatter.write_text("Show {} PCIe AER attributes".format(self.name.replace("_", "-"))) + formatter.write_text("(Default: Display only non-zero attributes)") + + def get_short_help_str(self, limit): + return "Show {} PCIe AER attributes".format(self.name.replace("_", "-")) + + +def pcie_aer_display(ctx, severity): + device_key = ctx.params['device_key'] + no_zero = not ctx.params['verbose'] + header = ["AER - " + severity.upper().replace("_", "")] + fields = aer_fields[severity] + pcie_dev_list = list() + dev_found = False + + statedb = SonicV2Connector() + statedb.connect(statedb.STATE_DB) + + table = OrderedDict() + for field in fields: + table[field] = [field] + + if device_key: + pcie_dev_list = ["PCIE_DEVICE|%s" % device_key] + else: + keys = statedb.keys(statedb.STATE_DB, "PCIE_DEVICE|*") + if keys: + pcie_dev_list = sorted(keys) + + for pcie_dev_key in pcie_dev_list: + aer_attribute = statedb.get_all(statedb.STATE_DB, pcie_dev_key) + if not aer_attribute: + continue + + if device_key: + dev_found = True + + if no_zero and all(val == '0' for key, val in aer_attribute.items() if key.startswith(severity)): + continue + + pcie_dev = pcie_dev_key.split("|")[1] + Id = aer_attribute['id'] + + # Tabulate Header + device_name = "%s\n%s" % (pcie_dev, Id) + header.append(device_name) + + # Tabulate Row + for field in fields: + key = severity + "|" + field + table[field].append(aer_attribute.get(key, 'NA')) + + if device_key and not dev_found: + ctx.exit("Device not found in DB") + + # Strip fields with no non-zero value + if no_zero: + for field in fields: + if all(val == '0' for val in table[field][1:]): + del table[field] + + if not (no_zero and (len(header) == 1)): + if ctx.obj: + click.echo("") + + click.echo(tabulate(list(table.values()), header, tablefmt="grid")) + ctx.obj = True + + +# Show PCIe AER status +@cli.group(cls=clicommon.AliasedGroup) +@click.pass_context +def pcie_aer(ctx): + '''Display PCIe AER status''' + # Set True to insert a line between severities in 'all' context + ctx.obj = False + + +@pcie_aer.command(cls=PcieAerCommand) +@click.pass_context +def correctable(ctx, device_key, verbose): + '''Show correctable PCIe AER attributes''' + pcie_aer_display(ctx, "correctable") + + +@pcie_aer.command(cls=PcieAerCommand) +@click.pass_context +def fatal(ctx, device_key, verbose): + '''Show fatal PCIe AER attributes''' + pcie_aer_display(ctx, "fatal") + + +@pcie_aer.command(cls=PcieAerCommand) +@click.pass_context +def non_fatal(ctx, device_key, verbose): + '''Show non-fatal PCIe AER attributes''' + pcie_aer_display(ctx, "non_fatal") + + +@pcie_aer.command(name='all', cls=PcieAerCommand) +@click.pass_context +def all_errors(ctx, device_key, verbose): + '''Show all PCIe AER attributes''' + pcie_aer_display(ctx, "correctable") + pcie_aer_display(ctx, "fatal") + pcie_aer_display(ctx, "non_fatal") + + # Show PCIE Vender ID and Device ID @cli.command() def check(): diff --git a/tests/mock_tables/state_db.json b/tests/mock_tables/state_db.json index cd92dda583..c15a1b353d 100644 --- a/tests/mock_tables/state_db.json +++ b/tests/mock_tables/state_db.json @@ -455,5 +455,103 @@ "CHASSIS_MIDPLANE_TABLE|LINE-CARD1": { "ip_address": "192.168.1.2", "access": "False" + }, + "PCIE_DEVICE|00:01.0": { + "correctable|BadDLLP": "0", + "correctable|BadTLP": "0", + "correctable|BadTLP": "1", + "correctable|CorrIntErr": "0", + "correctable|HeaderOF": "0", + "correctable|NonFatalErr": "0", + "correctable|Rollover": "0", + "correctable|RxErr": "0", + "correctable|TOTAL_ERR_COR": "0", + "correctable|TOTAL_ERR_COR": "1", + "correctable|Timeout": "0", + "fatal|ACSViol": "0", + "fatal|AtomicOpBlocked": "0", + "fatal|BlockedTLP": "0", + "fatal|CmpltAbrt": "0", + "fatal|CmpltTO": "0", + "fatal|DLP": "0", + "fatal|ECRC": "0", + "fatal|FCP": "0", + "fatal|MalfTLP": "0", + "fatal|RxOF": "0", + "fatal|SDES": "0", + "fatal|TLP": "0", + "fatal|TLPBlockedErr": "0", + "fatal|TOTAL_ERR_FATAL": "0", + "fatal|UncorrIntErr": "0", + "fatal|Undefined": "0", + "fatal|UnsupReq": "0", + "fatal|UnxCmplt": "0", + "id": "0x0001", + "non_fatal|ACSViol": "0", + "non_fatal|AtomicOpBlocked": "0", + "non_fatal|BlockedTLP": "0", + "non_fatal|CmpltAbrt": "0", + "non_fatal|CmpltTO": "0", + "non_fatal|DLP": "0", + "non_fatal|ECRC": "0", + "non_fatal|FCP": "0", + "non_fatal|MalfTLP": "1", + "non_fatal|RxOF": "0", + "non_fatal|SDES": "0", + "non_fatal|TLP": "0", + "non_fatal|TLPBlockedErr": "0", + "non_fatal|TOTAL_ERR_NONFATAL": "1", + "non_fatal|UncorrIntErr": "0", + "non_fatal|Undefined": "0", + "non_fatal|UnsupReq": "0", + "non_fatal|UnxCmplt": "0" + }, + "PCIE_DEVICE|01:00.0": { + "correctable|BadDLLP": "0", + "correctable|BadTLP": "0", + "correctable|CorrIntErr": "0", + "correctable|HeaderOF": "0", + "correctable|NonFatalErr": "0", + "correctable|Rollover": "0", + "correctable|RxErr": "1", + "correctable|TOTAL_ERR_COR": "1", + "correctable|Timeout": "0", + "fatal|ACSViol": "0", + "fatal|AtomicOpBlocked": "0", + "fatal|BlockedTLP": "0", + "fatal|CmpltAbrt": "0", + "fatal|CmpltTO": "0", + "fatal|DLP": "0", + "fatal|ECRC": "0", + "fatal|FCP": "0", + "fatal|MalfTLP": "0", + "fatal|RxOF": "0", + "fatal|SDES": "0", + "fatal|TLP": "0", + "fatal|TLPBlockedErr": "0", + "fatal|TOTAL_ERR_FATAL": "0", + "fatal|UncorrIntErr": "0", + "fatal|Undefined": "0", + "fatal|UnsupReq": "0", + "fatal|UnxCmplt": "0", + "id": "0x0002", + "non_fatal|ACSViol": "0", + "non_fatal|AtomicOpBlocked": "0", + "non_fatal|BlockedTLP": "0", + "non_fatal|CmpltAbrt": "0", + "non_fatal|CmpltTO": "0", + "non_fatal|DLP": "0", + "non_fatal|ECRC": "0", + "non_fatal|FCP": "0", + "non_fatal|MalfTLP": "0", + "non_fatal|RxOF": "0", + "non_fatal|SDES": "0", + "non_fatal|TLP": "0", + "non_fatal|TLPBlockedErr": "0", + "non_fatal|TOTAL_ERR_NONFATAL": "0", + "non_fatal|UncorrIntErr": "0", + "non_fatal|Undefined": "0", + "non_fatal|UnsupReq": "0", + "non_fatal|UnxCmplt": "0" } } diff --git a/tests/pcieutil_test.py b/tests/pcieutil_test.py new file mode 100644 index 0000000000..cee1feec88 --- /dev/null +++ b/tests/pcieutil_test.py @@ -0,0 +1,205 @@ +import sys +import os +from unittest import mock + +from click.testing import CliRunner + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +import pcieutil.main as pcieutil + +pcieutil_pcie_aer_correctable_output = """\ ++---------------------+-----------+-----------+ +| AER - CORRECTABLE | 00:01.0 | 01:00.0 | +| | 0x0001 | 0x0002 | ++=====================+===========+===========+ +| RxErr | 0 | 1 | ++---------------------+-----------+-----------+ +| BadTLP | 1 | 0 | ++---------------------+-----------+-----------+ +| TOTAL_ERR_COR | 1 | 1 | ++---------------------+-----------+-----------+ +""" + +pcieutil_pcie_aer_nonfatal_output = """\ ++--------------------+-----------+ +| AER - NONFATAL | 00:01.0 | +| | 0x0001 | ++====================+===========+ +| MalfTLP | 1 | ++--------------------+-----------+ +| TOTAL_ERR_NONFATAL | 1 | ++--------------------+-----------+ +""" + +pcieutil_pcie_aer_correctable_verbose_output = """\ ++---------------------+-----------+-----------+ +| AER - CORRECTABLE | 00:01.0 | 01:00.0 | +| | 0x0001 | 0x0002 | ++=====================+===========+===========+ +| RxErr | 0 | 1 | ++---------------------+-----------+-----------+ +| BadTLP | 1 | 0 | ++---------------------+-----------+-----------+ +| BadDLLP | 0 | 0 | ++---------------------+-----------+-----------+ +| Rollover | 0 | 0 | ++---------------------+-----------+-----------+ +| Timeout | 0 | 0 | ++---------------------+-----------+-----------+ +| NonFatalErr | 0 | 0 | ++---------------------+-----------+-----------+ +| CorrIntErr | 0 | 0 | ++---------------------+-----------+-----------+ +| HeaderOF | 0 | 0 | ++---------------------+-----------+-----------+ +| TOTAL_ERR_COR | 1 | 1 | ++---------------------+-----------+-----------+ +""" + +pcieutil_pcie_aer_fatal_verbose_output = """\ ++-----------------+-----------+-----------+ +| AER - FATAL | 00:01.0 | 01:00.0 | +| | 0x0001 | 0x0002 | ++=================+===========+===========+ +| Undefined | 0 | 0 | ++-----------------+-----------+-----------+ +| DLP | 0 | 0 | ++-----------------+-----------+-----------+ +| SDES | 0 | 0 | ++-----------------+-----------+-----------+ +| TLP | 0 | 0 | ++-----------------+-----------+-----------+ +| FCP | 0 | 0 | ++-----------------+-----------+-----------+ +| CmpltTO | 0 | 0 | ++-----------------+-----------+-----------+ +| CmpltAbrt | 0 | 0 | ++-----------------+-----------+-----------+ +| UnxCmplt | 0 | 0 | ++-----------------+-----------+-----------+ +| RxOF | 0 | 0 | ++-----------------+-----------+-----------+ +| MalfTLP | 0 | 0 | ++-----------------+-----------+-----------+ +| ECRC | 0 | 0 | ++-----------------+-----------+-----------+ +| UnsupReq | 0 | 0 | ++-----------------+-----------+-----------+ +| ACSViol | 0 | 0 | ++-----------------+-----------+-----------+ +| UncorrIntErr | 0 | 0 | ++-----------------+-----------+-----------+ +| BlockedTLP | 0 | 0 | ++-----------------+-----------+-----------+ +| AtomicOpBlocked | 0 | 0 | ++-----------------+-----------+-----------+ +| TLPBlockedErr | 0 | 0 | ++-----------------+-----------+-----------+ +| TOTAL_ERR_FATAL | 0 | 0 | ++-----------------+-----------+-----------+ +""" + +pcieutil_pcie_aer_nonfatal_verbose_output = """\ ++--------------------+-----------+-----------+ +| AER - NONFATAL | 00:01.0 | 01:00.0 | +| | 0x0001 | 0x0002 | ++====================+===========+===========+ +| Undefined | 0 | 0 | ++--------------------+-----------+-----------+ +| DLP | 0 | 0 | ++--------------------+-----------+-----------+ +| SDES | 0 | 0 | ++--------------------+-----------+-----------+ +| TLP | 0 | 0 | ++--------------------+-----------+-----------+ +| FCP | 0 | 0 | ++--------------------+-----------+-----------+ +| CmpltTO | 0 | 0 | ++--------------------+-----------+-----------+ +| CmpltAbrt | 0 | 0 | ++--------------------+-----------+-----------+ +| UnxCmplt | 0 | 0 | ++--------------------+-----------+-----------+ +| RxOF | 0 | 0 | ++--------------------+-----------+-----------+ +| MalfTLP | 1 | 0 | ++--------------------+-----------+-----------+ +| ECRC | 0 | 0 | ++--------------------+-----------+-----------+ +| UnsupReq | 0 | 0 | ++--------------------+-----------+-----------+ +| ACSViol | 0 | 0 | ++--------------------+-----------+-----------+ +| UncorrIntErr | 0 | 0 | ++--------------------+-----------+-----------+ +| BlockedTLP | 0 | 0 | ++--------------------+-----------+-----------+ +| AtomicOpBlocked | 0 | 0 | ++--------------------+-----------+-----------+ +| TLPBlockedErr | 0 | 0 | ++--------------------+-----------+-----------+ +| TOTAL_ERR_NONFATAL | 1 | 0 | ++--------------------+-----------+-----------+ +""" + +pcieutil_pcie_aer_correctable_dev_output = """\ ++---------------------+-----------+ +| AER - CORRECTABLE | 00:01.0 | +| | 0x0001 | ++=====================+===========+ +| BadTLP | 1 | ++---------------------+-----------+ +| TOTAL_ERR_COR | 1 | ++---------------------+-----------+ +""" + +class TestPcieUtil(object): + @classmethod + def setup_class(cls): + print("SETUP") + os.environ["UTILITIES_UNIT_TESTING"] = "1" + + def test_aer_all(self): + runner = CliRunner() + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["all"], []) + assert result.output == (pcieutil_pcie_aer_correctable_output + "\n" + + pcieutil_pcie_aer_nonfatal_output) + + def test_aer_correctable(self): + runner = CliRunner() + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["correctable"], []) + assert result.output == pcieutil_pcie_aer_correctable_output + + def test_aer_fatal(self): + runner = CliRunner() + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["fatal"], []) + assert result.output == "" + + def test_aer_non_fatal(self): + runner = CliRunner() + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["non-fatal"], []) + assert result.output == pcieutil_pcie_aer_nonfatal_output + + def test_aer_option_verbose(self): + runner = CliRunner() + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["correctable"], ["-v"]) + assert result.output == pcieutil_pcie_aer_correctable_verbose_output + + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["fatal"], ["-v"]) + assert result.output == pcieutil_pcie_aer_fatal_verbose_output + + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["non-fatal"], ["-v"]) + assert result.output == pcieutil_pcie_aer_nonfatal_verbose_output + + def test_aer_option_device(self): + runner = CliRunner() + result = runner.invoke(pcieutil.cli.commands["pcie-aer"].commands["correctable"], ["-d", "0:1.0"]) + assert result.output == pcieutil_pcie_aer_correctable_dev_output + + @classmethod + def teardown_class(cls): + print("TEARDOWN") + os.environ["UTILITIES_UNIT_TESTING"] = "0"