From a71c72b2b45be79a27458678690b1780195d9c66 Mon Sep 17 00:00:00 2001 From: shlomibitton <60430976+shlomibitton@users.noreply.github.com> Date: Mon, 12 Oct 2020 20:51:11 +0300 Subject: [PATCH] [show] Add 'show' CLI for system-health feature (#971) * Add 'show' CLI for system-health feature Signed-off-by: Shlomi Bitton * Add unit test for 'system-health' feature, add support for testing in 'show' script, Fix comments Signed-off-by: Shlomi Bitton * Fix additional comments * Fix comments * Update Command-Reference.md Add a CLI reference for system-health feature. * Fix LGTM alerts * Fix comment Change 'Ignore' to 'Ignored' * Update Command-Reference.md Fix example output * Update Command-Reference.md * Change 'summary' output and adapt test and reference to the new change * Update main.py * Fix multiline output for expected output * keep output aligned * Fix import for unit testing after community change * Add clicommon for @cli.group after community change * Align changes in the feature to the CLI on commit https://github.com/Azure/sonic-buildimage/pull/4835/commits/8ea2ab58981dbc98ab9854b756ab4d1cb82055b3 Signed-off-by: Shlomi Bitton * Update main.py * Move new group CLI into a separate file * Organize imports per PEP8 standards * Organize imports per PEP8 standards * Reformat docstring for readability --- doc/Command-Reference.md | 185 ++++++++++++++++++++ show/main.py | 5 +- show/system_health.py | 212 +++++++++++++++++++++++ tests/system_health_test.py | 332 ++++++++++++++++++++++++++++++++++++ 4 files changed, 733 insertions(+), 1 deletion(-) create mode 100644 show/system_health.py create mode 100644 tests/system_health_test.py diff --git a/doc/Command-Reference.md b/doc/Command-Reference.md index 80ec70a850c9..ae1cf7f22654 100644 --- a/doc/Command-Reference.md +++ b/doc/Command-Reference.md @@ -108,6 +108,7 @@ * [System State](#system-state) * [Processes](#processes) * [Services & Memory](#services--memory) +* [System-Health](#System-Health) * [VLAN & FDB](#vlan--fdb) * [VLAN](#vlan) * [VLAN show commands](#vlan-show-commands) @@ -5940,6 +5941,190 @@ NOTE: This command is not working. It crashes as follows. A bug ticket is opened Go Back To [Beginning of the document](#) or [Beginning of this section](#System-State) +Go Back To [Beginning of the document](#) or [Beginning of this section](#System-Health) + +### System-Health + +These commands are used to monitor the system current running services and hardware state. + +**show system-health summary** + +This command displays the current status of 'Services' and 'Hardware' under monitoring. +If any of the elements under each of these two sections is 'Not OK' a proper message will appear under the relevant section. + +- Usage: + ``` + show system-health summary + ``` + +- Example: + ``` + admin@sonic:~$ show system-health summary + System status summary + + System status LED red + Services: + Status: Not OK + Not Running: 'telemetry', 'sflowmgrd' + Hardware: + Status: OK + ``` + ``` + admin@sonic:~$ show system-health summary + System status summary + + System status LED green + Services: + Status: OK + Hardware: + Status: OK + ``` + +**show system-health monitor-list** + +This command displays a list of all current 'Services' and 'Hardware' being monitored, their status and type. + +- Usage: + ``` + show system-health monitor-list + ``` + +- Example: + ``` + admin@sonic:~$ show system-health monitor-list + System services and devices monitor list + + Name Status Type + -------------- -------- ---------- + telemetry Not OK Process + orchagent Not OK Process + neighsyncd OK Process + vrfmgrd OK Process + dialout_client OK Process + zebra OK Process + rsyslog OK Process + snmpd OK Process + redis_server OK Process + intfmgrd OK Process + vxlanmgrd OK Process + lldpd_monitor OK Process + portsyncd OK Process + var-log OK Filesystem + lldpmgrd OK Process + syncd OK Process + sonic OK System + buffermgrd OK Process + portmgrd OK Process + staticd OK Process + bgpd OK Process + lldp_syncd OK Process + bgpcfgd OK Process + snmp_subagent OK Process + root-overlay OK Filesystem + fpmsyncd OK Process + sflowmgrd OK Process + vlanmgrd OK Process + nbrmgrd OK Process + PSU 2 OK PSU + psu_1_fan_1 OK Fan + psu_2_fan_1 OK Fan + fan11 OK Fan + fan10 OK Fan + fan12 OK Fan + ASIC OK ASIC + fan1 OK Fan + PSU 1 OK PSU + fan3 OK Fan + fan2 OK Fan + fan5 OK Fan + fan4 OK Fan + fan7 OK Fan + fan6 OK Fan + fan9 OK Fan + fan8 OK Fan + ``` + +**show system-health detail** + +This command displays the current status of 'Services' and 'Hardware' under monitoring. +If any of the elements under each of these two sections is 'Not OK' a proper message will appear under the relevant section. +In addition, displays a list of all current 'Services' and 'Hardware' being monitored and a list of ignored elements. + +- Usage: + ``` + show system-health detail + ``` + +- Example: + ``` + admin@sonic:~$ show system-health detail + System status summary + + System status LED red + Services: + Status: Not OK + Not Running: 'telemetry', 'orchagent' + Hardware: + Status: OK + + System services and devices monitor list + + Name Status Type + -------------- -------- ---------- + telemetry Not OK Process + orchagent Not OK Process + neighsyncd OK Process + vrfmgrd OK Process + dialout_client OK Process + zebra OK Process + rsyslog OK Process + snmpd OK Process + redis_server OK Process + intfmgrd OK Process + vxlanmgrd OK Process + lldpd_monitor OK Process + portsyncd OK Process + var-log OK Filesystem + lldpmgrd OK Process + syncd OK Process + sonic OK System + buffermgrd OK Process + portmgrd OK Process + staticd OK Process + bgpd OK Process + lldp_syncd OK Process + bgpcfgd OK Process + snmp_subagent OK Process + root-overlay OK Filesystem + fpmsyncd OK Process + sflowmgrd OK Process + vlanmgrd OK Process + nbrmgrd OK Process + PSU 2 OK PSU + psu_1_fan_1 OK Fan + psu_2_fan_1 OK Fan + fan11 OK Fan + fan10 OK Fan + fan12 OK Fan + ASIC OK ASIC + fan1 OK Fan + PSU 1 OK PSU + fan3 OK Fan + fan2 OK Fan + fan5 OK Fan + fan4 OK Fan + fan7 OK Fan + fan6 OK Fan + fan9 OK Fan + fan8 OK Fan + + System services and devices ignore list + + Name Status Type + ----------- -------- ------ + psu.voltage Ignored Device + ``` +Go Back To [Beginning of the document](#) or [Beginning of this section](#System-Health) ## VLAN & FDB diff --git a/show/main.py b/show/main.py index 1a5c6fa82eb0..04f5731d4e45 100755 --- a/show/main.py +++ b/show/main.py @@ -17,12 +17,15 @@ import mlnx import utilities_common.cli as clicommon import vlan +import system_health + from sonic_py_common import device_info from swsssdk import ConfigDBConnector, SonicV2Connector from tabulate import tabulate from utilities_common.db import Db import utilities_common.multi_asic as multi_asic_util + # Global Variables PLATFORM_JSON = 'platform.json' HWSKU_JSON = 'hwsku.json' @@ -126,6 +129,7 @@ def cli(ctx): cli.add_command(interfaces.interfaces) cli.add_command(kube.kubernetes) cli.add_command(vlan.vlan) +cli.add_command(system_health.system_health) # # 'vrf' command ("show vrf") @@ -2431,6 +2435,5 @@ def tunnel(): click.echo(tabulate(table, header)) - if __name__ == '__main__': cli() diff --git a/show/system_health.py b/show/system_health.py new file mode 100644 index 000000000000..b1319922fa5d --- /dev/null +++ b/show/system_health.py @@ -0,0 +1,212 @@ +import os +import sys + +import click +from tabulate import tabulate +import utilities_common.cli as clicommon + +# +# 'system-health' command ("show system-health") +# +@click.group(name='system-health', cls=clicommon.AliasedGroup) +def system_health(): + """SONiC command line - 'show system-health' command""" + return + +@system_health.command() +def summary(): + """Show system-health summary information""" + # Mock the redis for unit test purposes # + try: + if os.environ["UTILITIES_UNIT_TESTING"] == "1": + modules_path = os.path.join(os.path.dirname(__file__), "..") + sys.path.insert(0, modules_path) + from tests.system_health_test import MockerManager + from tests.system_health_test import MockerChassis + HealthCheckerManager = MockerManager + Chassis = MockerChassis + except Exception: + # Normal run... # + if os.geteuid(): + click.echo("Root privileges are required for this operation") + return + from health_checker.manager import HealthCheckerManager + from sonic_platform.chassis import Chassis + + manager = HealthCheckerManager() + if not manager.config.config_file_exists(): + click.echo("System health configuration file not found, exit...") + return + chassis = Chassis() + state, stat = manager.check(chassis) + if state == HealthCheckerManager.STATE_BOOTING: + click.echo("System is currently booting...") + return + if state == HealthCheckerManager.STATE_RUNNING: + chassis.initizalize_system_led() + led = chassis.get_status_led() + click.echo("System status summary\n\n System status LED " + led) + services_list = [] + fs_list = [] + device_list =[] + for category, elements in stat.items(): + for element in elements: + if elements[element]['status'] != "OK": + if 'Running' in elements[element]['message']: + services_list.append(element) + elif 'Accessible' in elements[element]['message']: + fs_list.append(element) + else: + device_list.append(elements[element]['message']) + if len(services_list) or len(fs_list): + click.echo(" Services:\n Status: Not OK") + else: + click.echo(" Services:\n Status: OK") + if len(services_list): + services_list_string = str(services_list) + click.echo(" Not Running: " + services_list_string.replace("[", "").replace(']', "")) + if len(fs_list): + fs_list_string = str(fs_list) + click.echo(" Not Accessible: " + fs_list_string.replace("[", "").replace(']', "")) + if len(device_list): + click.echo(" Hardware:\n Status: Not OK") + click.echo(" Reasons: " + device_list.pop()) + while len(device_list): + click.echo("\t " + device_list.pop()) + else: + click.echo(" Hardware:\n Status: OK") + +@system_health.command() +def detail(): + """Show system-health detail information""" + # Mock the redis for unit test purposes # + try: + if os.environ["UTILITIES_UNIT_TESTING"] == "1": + modules_path = os.path.join(os.path.dirname(__file__), "..") + sys.path.insert(0, modules_path) + from tests.system_health_test import MockerManager + from tests.system_health_test import MockerChassis + HealthCheckerManager = MockerManager + Chassis = MockerChassis + except Exception: + # Normal run... # + if os.geteuid(): + click.echo("Root privileges are required for this operation") + return + from health_checker.manager import HealthCheckerManager + from sonic_platform.chassis import Chassis + + manager = HealthCheckerManager() + if not manager.config.config_file_exists(): + click.echo("System health configuration file not found, exit...") + return + chassis = Chassis() + state, stat = manager.check(chassis) + if state == HealthCheckerManager.STATE_BOOTING: + click.echo("System is currently booting...") + return + if state == HealthCheckerManager.STATE_RUNNING: + #summary output + chassis.initizalize_system_led() + led = chassis.get_status_led() + click.echo("System status summary\n\n System status LED " + led) + services_list = [] + fs_list = [] + device_list =[] + for category, elements in stat.items(): + for element in elements: + if elements[element]['status'] != "OK": + if 'Running' in elements[element]['message']: + services_list.append(element) + elif 'Accessible' in elements[element]['message']: + fs_list.append(element) + else: + device_list.append(elements[element]['message']) + if len(services_list) or len(fs_list): + click.echo(" Services:\n Status: Not OK") + else: + click.echo(" Services:\n Status: OK") + if len(services_list): + services_list_string = str(services_list) + click.echo(" Not Running: " + services_list_string.replace("[", "").replace(']', "")) + if len(fs_list): + fs_list_string = str(fs_list) + click.echo(" Not Accessible: " + fs_list_string.replace("[", "").replace(']', "")) + if len(device_list): + click.echo(" Hardware:\n Status: Not OK") + click.echo(" Reasons: " + device_list.pop()) + while len(device_list): + click.echo("\t " + device_list.pop()) + else: + click.echo(" Hardware:\n Status: OK") + + click.echo('\nSystem services and devices monitor list\n') + header = ['Name', 'Status', 'Type'] + table = [] + for category, elements in stat.items(): + for element in sorted(elements.items(), key=lambda (x, y): y['status']): + entry = [] + entry.append(element[0]) + entry.append(element[1]['status']) + entry.append(element[1]['type']) + table.append(entry) + click.echo(tabulate(table, header)) + click.echo('\nSystem services and devices ignore list\n') + table = [] + if manager.config.ignore_services: + for element in manager.config.ignore_services: + entry = [] + entry.append(element) + entry.append("Ignored") + entry.append("Service") + table.append(entry) + if manager.config.ignore_devices: + for element in manager.config.ignore_devices: + entry = [] + entry.append(element) + entry.append("Ignored") + entry.append("Device") + table.append(entry) + click.echo(tabulate(table, header)) + +@system_health.command() +def monitor_list(): + """Show system-health monitored services and devices name list""" + # Mock the redis for unit test purposes # + try: + if os.environ["UTILITIES_UNIT_TESTING"] == "1": + modules_path = os.path.join(os.path.dirname(__file__), "..") + sys.path.insert(0, modules_path) + from tests.system_health_test import MockerManager + from tests.system_health_test import MockerChassis + HealthCheckerManager = MockerManager + Chassis = MockerChassis + except Exception: + # Normal run... # + if os.geteuid(): + click.echo("Root privileges are required for this operation") + return + from health_checker.manager import HealthCheckerManager + from sonic_platform.chassis import Chassis + + manager = HealthCheckerManager() + if not manager.config.config_file_exists(): + click.echo("System health configuration file not found, exit...") + return + chassis = Chassis() + state, stat = manager.check(chassis) + if state == HealthCheckerManager.STATE_BOOTING: + click.echo("System is currently booting...") + return + if state == HealthCheckerManager.STATE_RUNNING: + click.echo('\nSystem services and devices monitor list\n') + header = ['Name', 'Status', 'Type'] + table = [] + for category, elements in stat.items(): + for element in sorted(elements.items(), key=lambda (x, y): y['status']): + entry = [] + entry.append(element[0]) + entry.append(element[1]['status']) + entry.append(element[1]['type']) + table.append(entry) + click.echo(tabulate(table, header)) diff --git a/tests/system_health_test.py b/tests/system_health_test.py new file mode 100644 index 000000000000..8e2a39c5857a --- /dev/null +++ b/tests/system_health_test.py @@ -0,0 +1,332 @@ +#! /usr/bin/python -u + +import sys +import os + +import click +from click.testing import CliRunner +import mock_tables.dbconnector + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +scripts_path = os.path.join(modules_path, "scripts") +sys.path.insert(0, modules_path) + +class MockerConfig(object): + ignore_devices = [] + ignore_services = [] + first_time = True + + def config_file_exists(self): + if MockerConfig.first_time: + MockerConfig.first_time = False + return False + else: + return True + +class MockerManager(object): + STATE_BOOTING = 'booting' + STATE_RUNNING = 'running' + counter = 0 + + def __init__(self): + self.config = MockerConfig() + + def check(self, chassis): + if MockerManager.counter == 0: + state = MockerManager.STATE_BOOTING + stats = {} + elif MockerManager.counter == 1: + state = MockerManager.STATE_RUNNING + stats = {'Services': {'neighsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vrfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'telemetry': {'status': 'Not OK', 'message': 'telemetry is not Running', 'type': 'Process'}, 'dialout_client': {'status': 'OK', 'message': '', 'type': 'Process'}, 'zebra': {'status': 'OK', 'message': '', 'type': 'Process'}, 'rsyslog': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'redis_server': {'status': 'OK', 'message': '', 'type': 'Process'}, 'intfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'orchagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vxlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldpd_monitor': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'var-log': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'lldpmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sonic': {'status': 'OK', 'message': '', 'type': 'System'}, 'buffermgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'staticd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldp_syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpcfgd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmp_subagent': {'status': 'Not OK', 'message': 'snmp_subagent is not Running', 'type': 'Process'}, 'root-overlay': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'fpmsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sflowmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'nbrmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}}, 'Hardware': {'psu_1_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'psu_2_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 1': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'fan10': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 2': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'ASIC': {'status': 'OK', 'message': '', 'type': 'ASIC'}, 'fan1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan3': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan2': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan5': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan4': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan7': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan6': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan9': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan8': {'status': 'OK', 'message': '', 'type': 'Fan'}}} + elif MockerManager.counter == 2: + state = MockerManager.STATE_RUNNING + stats = {'Services': {'neighsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vrfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'telemetry': {'status': 'OK', 'message': '', 'type': 'Process'}, 'dialout_client': {'status': 'OK', 'message': '', 'type': 'Process'}, 'zebra': {'status': 'OK', 'message': '', 'type': 'Process'}, 'rsyslog': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'redis_server': {'status': 'OK', 'message': '', 'type': 'Process'}, 'intfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'orchagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vxlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldpd_monitor': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'var-log': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'lldpmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sonic': {'status': 'OK', 'message': '', 'type': 'System'}, 'buffermgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'staticd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldp_syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpcfgd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmp_subagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'root-overlay': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'fpmsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sflowmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'nbrmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}}, 'Hardware': {'psu_1_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'psu_2_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 1': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'fan10': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 2': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'ASIC': {'status': 'OK', 'message': '', 'type': 'ASIC'}, 'fan1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan3': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan2': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan5': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan4': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan7': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan6': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan9': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan8': {'status': 'OK', 'message': '', 'type': 'Fan'}}} + elif MockerManager.counter == 3: + state = MockerManager.STATE_RUNNING + stats = {'Services': {'neighsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vrfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'telemetry': {'status': 'Not OK', 'message': 'telemetry is not Running', 'type': 'Process'}, 'dialout_client': {'status': 'OK', 'message': '', 'type': 'Process'}, 'zebra': {'status': 'OK', 'message': '', 'type': 'Process'}, 'rsyslog': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'redis_server': {'status': 'OK', 'message': '', 'type': 'Process'}, 'intfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'orchagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vxlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldpd_monitor': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'var-log': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'lldpmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sonic': {'status': 'OK', 'message': '', 'type': 'System'}, 'buffermgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'staticd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldp_syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpcfgd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmp_subagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'root-overlay': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'fpmsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sflowmgrd': {'status': 'Not OK', 'message': 'sflowmgrd is not Running', 'type': 'Process'}, 'vlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'nbrmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}}, 'Hardware': {'PSU 2': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'psu_1_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'psu_2_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan11': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan10': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan12': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'ASIC': {'status': 'OK', 'message': '', 'type': 'ASIC'}, 'fan1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 1': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'fan3': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan2': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan5': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan4': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan7': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan6': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan9': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan8': {'status': 'OK', 'message': '', 'type': 'Fan'}}} + elif MockerManager.counter == 4: + state = MockerManager.STATE_RUNNING + stats = {'Services': {'neighsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vrfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'telemetry': {'status': 'Not OK', 'message': 'telemetry is not Running', 'type': 'Process'}, 'dialout_client': {'status': 'OK', 'message': '', 'type': 'Process'}, 'zebra': {'status': 'OK', 'message': '', 'type': 'Process'}, 'rsyslog': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'redis_server': {'status': 'OK', 'message': '', 'type': 'Process'}, 'intfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'orchagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vxlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldpd_monitor': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'var-log': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'lldpmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sonic': {'status': 'OK', 'message': '', 'type': 'System'}, 'buffermgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'staticd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldp_syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpcfgd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmp_subagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'root-overlay': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'fpmsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sflowmgrd': {'status': 'Not OK', 'message': 'sflowmgrd is not Running', 'type': 'Process'}, 'vlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'nbrmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}}, 'Hardware': {'PSU 2': {'status': 'Not OK', 'message': 'Failed to get voltage minimum threshold data for PSU 2', 'type': 'PSU'}, 'psu_1_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'psu_2_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan11': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan10': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan12': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'ASIC': {'status': 'OK', 'message': '', 'type': 'ASIC'}, 'fan1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 1': {'status': 'Not OK', 'message': 'Failed to get voltage minimum threshold data for PSU 1', 'type': 'PSU'}, 'fan3': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan2': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan5': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan4': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan7': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan6': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan9': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan8': {'status': 'OK', 'message': '', 'type': 'Fan'}}} + elif MockerManager.counter == 5: + state = MockerManager.STATE_RUNNING + stats = {'Services': {'neighsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vrfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'telemetry': {'status': 'Not OK', 'message': 'telemetry is not Running', 'type': 'Process'}, 'dialout_client': {'status': 'OK', 'message': '', 'type': 'Process'}, 'zebra': {'status': 'OK', 'message': '', 'type': 'Process'}, 'rsyslog': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'redis_server': {'status': 'OK', 'message': '', 'type': 'Process'}, 'intfmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'orchagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'vxlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldpd_monitor': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'var-log': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'lldpmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sonic': {'status': 'OK', 'message': '', 'type': 'System'}, 'buffermgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'portmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'staticd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'lldp_syncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'bgpcfgd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'snmp_subagent': {'status': 'OK', 'message': '', 'type': 'Process'}, 'root-overlay': {'status': 'OK', 'message': '', 'type': 'Filesystem'}, 'fpmsyncd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'sflowmgrd': {'status': 'Not OK', 'message': 'sflowmgrd is not Running', 'type': 'Process'}, 'vlanmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}, 'nbrmgrd': {'status': 'OK', 'message': '', 'type': 'Process'}}, 'Hardware': {'PSU 2': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'psu_1_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'psu_2_fan_1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan11': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan10': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan12': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'ASIC': {'status': 'OK', 'message': '', 'type': 'ASIC'}, 'fan1': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'PSU 1': {'status': 'OK', 'message': '', 'type': 'PSU'}, 'fan3': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan2': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan5': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan4': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan7': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan6': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan9': {'status': 'OK', 'message': '', 'type': 'Fan'}, 'fan8': {'status': 'OK', 'message': '', 'type': 'Fan'}}} + else: + state = MockerManager.STATE_RUNNING + stats = {} + MockerManager.counter += 1 + + return state, stats + +class MockerChassis(object): + counter = 0 + + def initizalize_system_led(self): + return + + def get_status_led(self): + if MockerChassis.counter == 1: + MockerChassis.counter += 1 + return "green" + else: + MockerChassis.counter += 1 + return "red" + +import show.main as show + +class TestHealth(object): + @classmethod + def setup_class(cls): + print("SETUP") + os.environ["PATH"] += os.pathsep + scripts_path + os.environ["UTILITIES_UNIT_TESTING"] = "1" + + def test_health_summary(self): + runner = CliRunner() + result = runner.invoke(show.cli.commands["system-health"].commands["summary"]) + click.echo(result.output) + expected = """\ +System health configuration file not found, exit... +""" + assert result.output == expected + result = runner.invoke(show.cli.commands["system-health"].commands["summary"]) + click.echo(result.output) + expected = """\ +System is currently booting... +""" + assert result.output == expected + result = runner.invoke(show.cli.commands["system-health"].commands["summary"]) + expected = """\ +System status summary + + System status LED red + Services: + Status: Not OK + Not Running: 'telemetry', 'snmp_subagent' + Hardware: + Status: OK +""" + click.echo(result.output) + assert result.output == expected + result = runner.invoke(show.cli.commands["system-health"].commands["summary"]) + click.echo(result.output) + expected = """\ +System status summary + + System status LED green + Services: + Status: OK + Hardware: + Status: OK +""" + assert result.output == expected + + def test_health_monitor(self): + runner = CliRunner() + result = runner.invoke(show.cli.commands["system-health"].commands["monitor-list"]) + click.echo(result.output) + expected = """ +System services and devices monitor list + +Name Status Type +-------------- -------- ---------- +telemetry Not OK Process +sflowmgrd Not OK Process +neighsyncd OK Process +vrfmgrd OK Process +dialout_client OK Process +zebra OK Process +rsyslog OK Process +snmpd OK Process +redis_server OK Process +intfmgrd OK Process +orchagent OK Process +vxlanmgrd OK Process +lldpd_monitor OK Process +portsyncd OK Process +var-log OK Filesystem +lldpmgrd OK Process +syncd OK Process +sonic OK System +buffermgrd OK Process +portmgrd OK Process +staticd OK Process +vlanmgrd OK Process +lldp_syncd OK Process +bgpcfgd OK Process +snmp_subagent OK Process +root-overlay OK Filesystem +fpmsyncd OK Process +bgpd OK Process +nbrmgrd OK Process +fan12 OK Fan +psu_1_fan_1 OK Fan +psu_2_fan_1 OK Fan +fan11 OK Fan +fan10 OK Fan +PSU 2 OK PSU +ASIC OK ASIC +fan1 OK Fan +PSU 1 OK PSU +fan3 OK Fan +fan2 OK Fan +fan5 OK Fan +fan4 OK Fan +fan7 OK Fan +fan6 OK Fan +fan9 OK Fan +fan8 OK Fan +""" + assert result.output == expected + + def test_health_detail(self): + runner = CliRunner() + result = runner.invoke(show.cli.commands["system-health"].commands["detail"]) + click.echo(result.output) + expected = """\ +System status summary + + System status LED red + Services: + Status: Not OK + Not Running: 'telemetry', 'sflowmgrd' + Hardware: + Status: Not OK + Reasons: Failed to get voltage minimum threshold data for PSU 1 + Failed to get voltage minimum threshold data for PSU 2 + +System services and devices monitor list + +Name Status Type +-------------- -------- ---------- +telemetry Not OK Process +sflowmgrd Not OK Process +neighsyncd OK Process +vrfmgrd OK Process +dialout_client OK Process +zebra OK Process +rsyslog OK Process +snmpd OK Process +redis_server OK Process +intfmgrd OK Process +orchagent OK Process +vxlanmgrd OK Process +lldpd_monitor OK Process +portsyncd OK Process +var-log OK Filesystem +lldpmgrd OK Process +syncd OK Process +sonic OK System +buffermgrd OK Process +portmgrd OK Process +staticd OK Process +vlanmgrd OK Process +lldp_syncd OK Process +bgpcfgd OK Process +snmp_subagent OK Process +root-overlay OK Filesystem +fpmsyncd OK Process +bgpd OK Process +nbrmgrd OK Process +PSU 2 Not OK PSU +PSU 1 Not OK PSU +fan12 OK Fan +psu_1_fan_1 OK Fan +psu_2_fan_1 OK Fan +fan11 OK Fan +fan10 OK Fan +ASIC OK ASIC +fan1 OK Fan +fan3 OK Fan +fan2 OK Fan +fan5 OK Fan +fan4 OK Fan +fan7 OK Fan +fan6 OK Fan +fan9 OK Fan +fan8 OK Fan + +System services and devices ignore list + +Name Status Type +------ -------- ------ +""" + assert result.output == expected + MockerConfig.ignore_devices.insert(0, "psu.voltage") + result = runner.invoke(show.cli.commands["system-health"].commands["detail"]) + click.echo(result.output) + expected = """\ +System status summary + + System status LED red + Services: + Status: Not OK + Not Running: 'telemetry', 'sflowmgrd' + Hardware: + Status: OK + +System services and devices monitor list + +Name Status Type +-------------- -------- ---------- +telemetry Not OK Process +sflowmgrd Not OK Process +neighsyncd OK Process +vrfmgrd OK Process +dialout_client OK Process +zebra OK Process +rsyslog OK Process +snmpd OK Process +redis_server OK Process +intfmgrd OK Process +orchagent OK Process +vxlanmgrd OK Process +lldpd_monitor OK Process +portsyncd OK Process +var-log OK Filesystem +lldpmgrd OK Process +syncd OK Process +sonic OK System +buffermgrd OK Process +portmgrd OK Process +staticd OK Process +vlanmgrd OK Process +lldp_syncd OK Process +bgpcfgd OK Process +snmp_subagent OK Process +root-overlay OK Filesystem +fpmsyncd OK Process +bgpd OK Process +nbrmgrd OK Process +fan12 OK Fan +psu_1_fan_1 OK Fan +psu_2_fan_1 OK Fan +fan11 OK Fan +fan10 OK Fan +PSU 2 OK PSU +ASIC OK ASIC +fan1 OK Fan +PSU 1 OK PSU +fan3 OK Fan +fan2 OK Fan +fan5 OK Fan +fan4 OK Fan +fan7 OK Fan +fan6 OK Fan +fan9 OK Fan +fan8 OK Fan + +System services and devices ignore list + +Name Status Type +----------- -------- ------ +psu.voltage Ignored Device +""" + assert result.output == expected + + @classmethod + def teardown_class(cls): + print("TEARDOWN") + os.environ["PATH"] = os.pathsep.join(os.environ["PATH"].split(os.pathsep)[:-1]) + os.environ["UTILITIES_UNIT_TESTING"] = "0" +