Skip to content

Commit

Permalink
Refactor Pcied and add unittest (sonic-net#189)
Browse files Browse the repository at this point in the history
Description
Refactor the pcied and add the unit test

Motivation and Context
Added unit test to increase the pmon unit test coverage.

How Has This Been Tested?
Build with unit test enabled and run manually on a dut to verify the pcied.
  • Loading branch information
sujinmkang authored Jun 17, 2021
1 parent eb8a223 commit 2fc05b2
Show file tree
Hide file tree
Showing 10 changed files with 542 additions and 115 deletions.
2 changes: 2 additions & 0 deletions sonic-pcied/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
addopts = --cov=scripts --cov-report html --cov-report term --cov-report xml --junitxml=test-results.xml -vv
265 changes: 150 additions & 115 deletions sonic-pcied/scripts/pcied
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,62 @@
PCIe device monitoring daemon for SONiC
"""

try:
import os
import signal
import sys
import threading
import os
import signal
import sys
import threading

from sonic_py_common import daemon_base, device_info
from swsscommon import swsscommon
except ImportError as e:
raise ImportError(str(e) + " - required module not found")
from sonic_py_common import daemon_base, device_info
from swsscommon import swsscommon

#
# Constants ====================================================================
#

# TODO: Once we no longer support Python 2, we can eliminate this and get the
# name using the 'name' field (e.g., `signal.SIGINT.name`) starting with Python 3.5
SIGNALS_TO_NAMES_DICT = dict((getattr(signal, n), n)
for n in dir(signal) if n.startswith('SIG') and '_' not in n)

SYSLOG_IDENTIFIER = "pcied"

PCIE_RESULT_REGEX = "PCIe Device Checking All Test"
PCIE_TABLE_NAME = "PCIE_STATUS"
PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE"

PCIE_CONF_FILE = 'pcie.yaml'
PCIE_STATUS_TABLE_NAME = "PCIE_DEVICES"

PCIED_MAIN_THREAD_SLEEP_SECS = 60
REDIS_HOSTIP = "127.0.0.1"

PCIEUTIL_CONF_FILE_ERROR = 1
PCIEUTIL_LOAD_ERROR = 2

platform_pcieutil = None

exit_code = 0

# wrapper functions to call the platform api
def load_platform_pcieutil():
_platform_pcieutil = None
(platform_path, _) = device_info.get_paths_to_platform_and_hwsku_dirs()
try:
from sonic_platform.pcie import Pcie
_platform_pcieutil = Pcie(platform_path)
except ImportError as e:
self.log_error("Failed to load platform Pcie module. Error : {}".format(str(e)), True)
try:
from sonic_platform_base.sonic_pcie.pcie_common import PcieUtil
_platform_pcieutil = PcieUtil(platform_path)
except ImportError as e:
self.log_error("Failed to load default PcieUtil module. Error : {}".format(str(e)), True)
return _platform_pcieutil

def read_id_file(device_name):
id = None
dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name

if os.path.exists(dev_id_path):
with open(dev_id_path, 'r') as fd:
id = fd.read().strip()
return id

#
# Daemon =======================================================================
Expand All @@ -39,142 +71,145 @@ class DaemonPcied(daemon_base.DaemonBase):
def __init__(self, log_identifier):
super(DaemonPcied, self).__init__(log_identifier)

(platform_path, _) = device_info.get_paths_to_platform_and_hwsku_dirs()
pciefilePath = os.path.join(platform_path, PCIE_CONF_FILE)
if not os.path.exists(pciefilePath):
self.log_error("Platform pcie configuration file doesn't exist! Exiting ...")
sys.exit("Platform PCIe Configuration file doesn't exist!")

self.timeout = PCIED_MAIN_THREAD_SLEEP_SECS
self.stop_event = threading.Event()

self.state_db = swsscommon.SonicV2Connector(host=REDIS_HOSTIP)
self.state_db.connect("STATE_DB")
state_db = daemon_base.db_connect("STATE_DB")
self.device_table = swsscommon.Table(state_db, PCIE_DEVICE_TABLE_NAME)

# Load AER-fields into STATEDB
def update_aer_to_statedb(self, device_name, aer_stats):
self.state_db = None
self.device_table = None
self.table = None
self.resultInfo = []
self.device_name = None
self.aer_stats = {}

global platform_pcieutil

platform_pcieutil = load_platform_pcieutil()
if platform_pcieutil is None:
sys.exit(PCIEUTIL_LOAD_ERROR)

# Connect to STATE_DB and create pcie device table
self.state_db = daemon_base.db_connect("STATE_DB")
self.device_table = swsscommon.Table(self.state_db, PCIE_DEVICE_TABLE_NAME)
self.status_table = swsscommon.Table(self.state_db, PCIE_STATUS_TABLE_NAME)

def __del__(self):
if self.device_table:
table_keys = self.device_table.getKeys()
for tk in table_keys:
self.device_table._del(tk)
if self.status_table:
stable_keys = self.status_table.getKeys()
for stk in stable_keys:
self.status_table._del(stk)

# load aer-fields into statedb
def update_aer_to_statedb(self):
if self.aer_stats is None:
self.log_debug("PCIe device {} has no AER Stats".format(device_name))
return

aer_fields = {}

for field, value in aer_stats['correctable'].items():
correctable_field = "correctable|" + field
aer_fields[correctable_field] = value

for field, value in aer_stats['fatal'].items():
fatal_field = "fatal|" + field
aer_fields[fatal_field] = value

for field, value in aer_stats['non_fatal'].items():
non_fatal_field = "non_fatal|" + field
aer_fields[non_fatal_field] = value
for key, fv in self.aer_stats.items():
for field, value in fv.items():
key_field = "{}|{}".format(key,field)
aer_fields[key_field] = value

if aer_fields:
formatted_fields = swsscommon.FieldValuePairs(list(aer_fields.items()))
self.device_table.set(device_name, formatted_fields)
self.device_table.set(self.device_name, formatted_fields)
else:
self.log_debug("PCIe device {} has no AER attriutes".format(device_name))
self.log_debug("PCIe device {} has no AER attriutes".format(self.device_name))

# Check the PCIe devices
def check_pcie_devices(self):
try:
platform_path, _ = device_info.get_paths_to_platform_and_hwsku_dirs()
from sonic_platform_base.sonic_pcie.pcie_common import PcieUtil
platform_pcieutil = PcieUtil(platform_path)
except ImportError as e:
self.log_error("Failed to load default PcieUtil module. Error : {}".format(str(e)), True)
raise e

resultInfo = platform_pcieutil.get_pcie_check()
err = 0
# Check the PCIe AER Stats
def check_n_update_pcie_aer_stats(self, Bus, Dev, Fn):
self.device_name = "%02x:%02x.%d" % (Bus, Dev, Fn)

for item in resultInfo:
if item["result"] == "Failed":
self.log_warning("PCIe Device: " + item["name"] + " Not Found")
err += 1
Id = read_id_file(self.device_name)

self.aer_stats = {}
if Id is not None:
self.device_table.set(self.device_name, [('id', Id)])
self.aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, dev=Dev, func=Fn)
self.update_aer_to_statedb()


# Update the PCIe devices status to DB
def update_pcie_devices_status_db(self, err):
if err:
self.update_state_db("PCIE_DEVICES", "status", "FAILED")
self.log_error("PCIe device status check : FAILED")
pcie_status = "FAILED"
self.log_error("PCIe device status check : {}".format(pcie_status))
else:
self.update_state_db("PCIE_DEVICES", "status", "PASSED")
self.log_info("PCIe device status check : PASSED")
pcie_status = "PASSED"
self.log_info("PCIe device status check : {}".format(pcie_status))
fvs = swsscommon.FieldValuePairs([
('status', pcie_status)
])

# update AER-attributes to DB
for item in resultInfo:
if item["result"] == "Failed":
continue
self.status_table.set("status", fvs)

Bus = int(item["bus"], 16)
Dev = int(item["dev"], 16)
Fn = int(item["fn"], 16)
# Check the PCIe devices
def check_pcie_devices(self):
self.resultInfo = platform_pcieutil.get_pcie_check()
err = 0
if self.resultInfo is None:
return

device_name = "%02x:%02x.%d" % (Bus, Dev, Fn)
dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name
with open(dev_id_path, 'r') as fd:
Id = fd.read().strip()
for result in self.resultInfo:
if result["result"] == "Failed":
self.log_warning("PCIe Device: " + result["name"] + " Not Found")
err += 1
else:
Bus = int(result["bus"], 16)
Dev = int(result["dev"], 16)
Fn = int(result["fn"], 16)
# update AER-attributes to DB
self.check_n_update_pcie_aer_stats(Bus, Dev, Fn)

self.device_table.set(device_name, [('id', Id)])
aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, device=Dev, func=Fn)
self.update_aer_to_statedb(device_name, aer_stats)
# update PCIe Device Status to DB
self.update_pcie_devices_status_db(err)

def read_state_db(self, key1, key2):
return self.state_db.get('STATE_DB', key1, key2)
# Override signal handler from DaemonBase
def signal_handler(self, sig, frame):
FATAL_SIGNALS = [signal.SIGINT, signal.SIGTERM]
NONFATAL_SIGNALS = [signal.SIGHUP]

def update_state_db(self, key1, key2, value):
self.state_db.set('STATE_DB', key1, key2, value)
global exit_code

# Signal handler
def signal_handler(self, sig, frame):
if sig == signal.SIGHUP:
self.log_info("Caught SIGHUP - ignoring...")
elif sig == signal.SIGINT:
self.log_info("Caught SIGINT - exiting...")
self.stop_event.set()
elif sig == signal.SIGTERM:
self.log_info("Caught SIGTERM - exiting...")
if sig in FATAL_SIGNALS:
self.log_info("Caught signal '{}' - exiting...".format(SIGNALS_TO_NAMES_DICT[sig]))
exit_code = 128 + sig # Make sure we exit with a non-zero code so that supervisor will try to restart us
self.stop_event.set()
elif sig in NONFATAL_SIGNALS:
self.log_info("Caught signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig]))
else:
self.log_warning("Caught unhandled signal '" + sig + "'")
self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig]))

# Initialize daemon
def init(self):
self.log_info("Start daemon init...")

# Deinitialize daemon
def deinit(self):
self.log_info("Start daemon deinit...")

# Run daemon
# Main daemon logic
def run(self):
self.log_info("Starting up...")

# Start daemon initialization sequence
self.init()

# Start main loop
self.log_info("Start daemon main loop")

while not self.stop_event.wait(self.timeout):
# Check the Pcie device status
self.check_pcie_devices()

self.log_info("Stop daemon main loop")
if self.stop_event.wait(self.timeout):
# We received a fatal signal
return False

# Start daemon deinitialization sequence
self.deinit()

self.log_info("Shutting down...")
self.check_pcie_devices()

return True
#
# Main =========================================================================
#


def main():
pcied = DaemonPcied(SYSLOG_IDENTIFIER)
pcied.run()

pcied.log_info("Starting up...")

while pcied.run():
pass

pcied.log_info("Shutting down...")

return exit_code

if __name__ == '__main__':
main()
sys.exit(main())
2 changes: 2 additions & 0 deletions sonic-pcied/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[aliases]
test=pytest
12 changes: 12 additions & 0 deletions sonic-pcied/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,19 @@
'scripts/pcied',
],
setup_requires=[
'pytest-runner',
'wheel'
],
install_requires=[
'enum34; python_version < "3.4"',
'sonic-py-common',
],
tests_requires=[
'mock>=2.0.0; python_version < "3.3"',
'pytest',
'pytest-cov',
'sonic-platform-common'
],
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: No Input/Output (Daemon)',
Expand All @@ -29,4 +40,5 @@
'Topic :: System :: Hardware',
],
keywords='sonic SONiC PCIe pcie PCIED pcied',
test_suite='setup.get_test_suite'
)
Empty file added sonic-pcied/tests/__init__.py
Empty file.
Loading

0 comments on commit 2fc05b2

Please sign in to comment.