Skip to content

Commit

Permalink
[pcieutil] Add 'pcie-aer' sub-command to display AER stats (#1169)
Browse files Browse the repository at this point in the history
- What I did
Add new "pcie-aer" sub-command in pcieutil to display the AER stats.
"pcieutil pcie-aer" has four sub-commands - 'all', 'correctable', 'fatal' and 'non-fatal'.

'all' command displays the AER stats for all severities.
'correctable', 'fatal' and 'non-fatal' commands display the AER stats of respective severity.
'device', 'no-zero' options for pcie-aer sub commands

```
root@sonic:/home/admin# pcieutil pcie-aer
Usage: pcieutil pcie-aer [OPTIONS] COMMAND [ARGS]...

  Display PCIe AER status

Options:
  --help  Show this message and exit.

Commands:
  all          Show all PCIe AER attributes
  correctable  Show PCIe AER correctable attributes
  fatal        Show PCIe AER fatal attributes
  non-fatal    Show PCIe AER non-fatal attributes
root@sonic:/home/admin#
root@sonic:/home/admin# pcieutil pcie-aer all --help
Usage: pcieutil pcie-aer all [OPTIONS]

  Show all PCIe AER attributes

Options:
  -d, --device <BUS>:<DEV>.<FN>  Display stats only for the specified device
  -nz, --no-zero                 Display non-zero AER stats
  --help                         Show this message and exit.
root@sonic:/home/admin#
```
Depends on: sonic-net/sonic-platform-daemons#100

- How I did it
Add new functions in pcieutil, to implement sub-commands for retrieving AER stats from STATE_DB and output it in tabular format.
  • Loading branch information
ArunSaravananBalachandran authored Jan 26, 2021
1 parent 47f412b commit 41e62c6
Show file tree
Hide file tree
Showing 3 changed files with 470 additions and 0 deletions.
167 changes: 167 additions & 0 deletions pcieutil/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@

try:
import os
import re
import sys
from collections import OrderedDict

import click
from sonic_py_common import device_info, logger
from swsssdk import SonicV2Connector
from tabulate import tabulate
import utilities_common.cli as clicommon
except ImportError as e:
raise ImportError("%s - required module not found" % str(e))

Expand Down Expand Up @@ -105,6 +110,168 @@ def show():
click.echo("bus:dev.fn %s:%s.%s - dev_id=0x%s, %s" % (Bus, Dev, Fn, Id, Name))


# PCIe AER stats helpers

aer_fields = {
"correctable": ['RxErr', 'BadTLP', 'BadDLLP', 'Rollover', 'Timeout', 'NonFatalErr', 'CorrIntErr', 'HeaderOF', 'TOTAL_ERR_COR'],
"fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq',
'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_FATAL'],
"non_fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq',
'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_NONFATAL']
}


class PcieDevice(click.ParamType):
name = "<Bus>:<Dev>.<Fn>"

def convert(self, value, param, ctx):
match = re.match(r'([0-9A-Fa-f]{1,2}):([0-9A-Fa-f]{1,2})\.([0-9A-Fa-f])', value)

if not match:
self.fail('{} is not in <Bus>:<Dev>.<Fn> format'.format(value), param, ctx)

Bus, Dev, Fn = [int(val, 16) for val in match.groups()]
if Bus > 255:
self.fail('Invalid Bus number', param, ctx)

if Dev > 31:
self.fail('Invalid Dev number', param, ctx)

if Fn > 7:
self.fail('Invalid Fn number', param, ctx)

return "%02x:%02x.%d" % (Bus, Dev, Fn)


_pcie_aer_click_options = [
click.Option(['-d', '--device', 'device_key'],
type=PcieDevice(),
help="Display stats only for the specified device"),
click.Option(['-v', '--verbose'],
is_flag=True,
help="Display all stats")
]


class PcieAerCommand(click.Command):
'''This subclass of click.Command provides common options, help
and short help text for PCIe AER commands'''

def __init__(self, *args, **kwargs):
super(PcieAerCommand, self).__init__(*args, **kwargs)
self.params = _pcie_aer_click_options

def format_help_text(self, ctx, formatter):
formatter.write_paragraph()
with formatter.indentation():
formatter.write_text("Show {} PCIe AER attributes".format(self.name.replace("_", "-")))
formatter.write_text("(Default: Display only non-zero attributes)")

def get_short_help_str(self, limit):
return "Show {} PCIe AER attributes".format(self.name.replace("_", "-"))


def pcie_aer_display(ctx, severity):
device_key = ctx.params['device_key']
no_zero = not ctx.params['verbose']
header = ["AER - " + severity.upper().replace("_", "")]
fields = aer_fields[severity]
pcie_dev_list = list()
dev_found = False

statedb = SonicV2Connector()
statedb.connect(statedb.STATE_DB)

table = OrderedDict()
for field in fields:
table[field] = [field]

if device_key:
pcie_dev_list = ["PCIE_DEVICE|%s" % device_key]
else:
keys = statedb.keys(statedb.STATE_DB, "PCIE_DEVICE|*")
if keys:
pcie_dev_list = sorted(keys)

for pcie_dev_key in pcie_dev_list:
aer_attribute = statedb.get_all(statedb.STATE_DB, pcie_dev_key)
if not aer_attribute:
continue

if device_key:
dev_found = True

if no_zero and all(val == '0' for key, val in aer_attribute.items() if key.startswith(severity)):
continue

pcie_dev = pcie_dev_key.split("|")[1]
Id = aer_attribute['id']

# Tabulate Header
device_name = "%s\n%s" % (pcie_dev, Id)
header.append(device_name)

# Tabulate Row
for field in fields:
key = severity + "|" + field
table[field].append(aer_attribute.get(key, 'NA'))

if device_key and not dev_found:
ctx.exit("Device not found in DB")

# Strip fields with no non-zero value
if no_zero:
for field in fields:
if all(val == '0' for val in table[field][1:]):
del table[field]

if not (no_zero and (len(header) == 1)):
if ctx.obj:
click.echo("")

click.echo(tabulate(list(table.values()), header, tablefmt="grid"))
ctx.obj = True


# Show PCIe AER status
@cli.group(cls=clicommon.AliasedGroup)
@click.pass_context
def pcie_aer(ctx):
'''Display PCIe AER status'''
# Set True to insert a line between severities in 'all' context
ctx.obj = False


@pcie_aer.command(cls=PcieAerCommand)
@click.pass_context
def correctable(ctx, device_key, verbose):
'''Show correctable PCIe AER attributes'''
pcie_aer_display(ctx, "correctable")


@pcie_aer.command(cls=PcieAerCommand)
@click.pass_context
def fatal(ctx, device_key, verbose):
'''Show fatal PCIe AER attributes'''
pcie_aer_display(ctx, "fatal")


@pcie_aer.command(cls=PcieAerCommand)
@click.pass_context
def non_fatal(ctx, device_key, verbose):
'''Show non-fatal PCIe AER attributes'''
pcie_aer_display(ctx, "non_fatal")


@pcie_aer.command(name='all', cls=PcieAerCommand)
@click.pass_context
def all_errors(ctx, device_key, verbose):
'''Show all PCIe AER attributes'''
pcie_aer_display(ctx, "correctable")
pcie_aer_display(ctx, "fatal")
pcie_aer_display(ctx, "non_fatal")


# Show PCIE Vender ID and Device ID
@cli.command()
def check():
Expand Down
98 changes: 98 additions & 0 deletions tests/mock_tables/state_db.json
Original file line number Diff line number Diff line change
Expand Up @@ -455,5 +455,103 @@
"CHASSIS_MIDPLANE_TABLE|LINE-CARD1": {
"ip_address": "192.168.1.2",
"access": "False"
},
"PCIE_DEVICE|00:01.0": {
"correctable|BadDLLP": "0",
"correctable|BadTLP": "0",
"correctable|BadTLP": "1",
"correctable|CorrIntErr": "0",
"correctable|HeaderOF": "0",
"correctable|NonFatalErr": "0",
"correctable|Rollover": "0",
"correctable|RxErr": "0",
"correctable|TOTAL_ERR_COR": "0",
"correctable|TOTAL_ERR_COR": "1",
"correctable|Timeout": "0",
"fatal|ACSViol": "0",
"fatal|AtomicOpBlocked": "0",
"fatal|BlockedTLP": "0",
"fatal|CmpltAbrt": "0",
"fatal|CmpltTO": "0",
"fatal|DLP": "0",
"fatal|ECRC": "0",
"fatal|FCP": "0",
"fatal|MalfTLP": "0",
"fatal|RxOF": "0",
"fatal|SDES": "0",
"fatal|TLP": "0",
"fatal|TLPBlockedErr": "0",
"fatal|TOTAL_ERR_FATAL": "0",
"fatal|UncorrIntErr": "0",
"fatal|Undefined": "0",
"fatal|UnsupReq": "0",
"fatal|UnxCmplt": "0",
"id": "0x0001",
"non_fatal|ACSViol": "0",
"non_fatal|AtomicOpBlocked": "0",
"non_fatal|BlockedTLP": "0",
"non_fatal|CmpltAbrt": "0",
"non_fatal|CmpltTO": "0",
"non_fatal|DLP": "0",
"non_fatal|ECRC": "0",
"non_fatal|FCP": "0",
"non_fatal|MalfTLP": "1",
"non_fatal|RxOF": "0",
"non_fatal|SDES": "0",
"non_fatal|TLP": "0",
"non_fatal|TLPBlockedErr": "0",
"non_fatal|TOTAL_ERR_NONFATAL": "1",
"non_fatal|UncorrIntErr": "0",
"non_fatal|Undefined": "0",
"non_fatal|UnsupReq": "0",
"non_fatal|UnxCmplt": "0"
},
"PCIE_DEVICE|01:00.0": {
"correctable|BadDLLP": "0",
"correctable|BadTLP": "0",
"correctable|CorrIntErr": "0",
"correctable|HeaderOF": "0",
"correctable|NonFatalErr": "0",
"correctable|Rollover": "0",
"correctable|RxErr": "1",
"correctable|TOTAL_ERR_COR": "1",
"correctable|Timeout": "0",
"fatal|ACSViol": "0",
"fatal|AtomicOpBlocked": "0",
"fatal|BlockedTLP": "0",
"fatal|CmpltAbrt": "0",
"fatal|CmpltTO": "0",
"fatal|DLP": "0",
"fatal|ECRC": "0",
"fatal|FCP": "0",
"fatal|MalfTLP": "0",
"fatal|RxOF": "0",
"fatal|SDES": "0",
"fatal|TLP": "0",
"fatal|TLPBlockedErr": "0",
"fatal|TOTAL_ERR_FATAL": "0",
"fatal|UncorrIntErr": "0",
"fatal|Undefined": "0",
"fatal|UnsupReq": "0",
"fatal|UnxCmplt": "0",
"id": "0x0002",
"non_fatal|ACSViol": "0",
"non_fatal|AtomicOpBlocked": "0",
"non_fatal|BlockedTLP": "0",
"non_fatal|CmpltAbrt": "0",
"non_fatal|CmpltTO": "0",
"non_fatal|DLP": "0",
"non_fatal|ECRC": "0",
"non_fatal|FCP": "0",
"non_fatal|MalfTLP": "0",
"non_fatal|RxOF": "0",
"non_fatal|SDES": "0",
"non_fatal|TLP": "0",
"non_fatal|TLPBlockedErr": "0",
"non_fatal|TOTAL_ERR_NONFATAL": "0",
"non_fatal|UncorrIntErr": "0",
"non_fatal|Undefined": "0",
"non_fatal|UnsupReq": "0",
"non_fatal|UnxCmplt": "0"
}
}
Loading

0 comments on commit 41e62c6

Please sign in to comment.