From a56133bcac3f5d93ba7e5243acc75488bcdf3c17 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Tue, 16 Aug 2022 15:48:11 -0700 Subject: [PATCH 01/12] Add hardware reboot cause as actual reboot cause for soft reboot failed --- scripts/determine-reboot-cause | 39 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause index 1408ad0e2952..1e28356491a3 100755 --- a/scripts/determine-reboot-cause +++ b/scripts/determine-reboot-cause @@ -181,18 +181,41 @@ def main(): # any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE additional_reboot_info = "N/A" - # Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline + # 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause() - # If /proc/cmdline does not indicate reboot cause, check if the previous reboot was caused by hardware - if proc_cmdline_reboot_cause is None: - previous_reboot_cause = find_hardware_reboot_cause() - if previous_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE): - # If the reboot cause is non-hardware, get the reboot cause from REBOOT_CAUSE_FILE - previous_reboot_cause = find_software_reboot_cause() + # 2. Check if the previous reboot was caused by hardware + # If yes, the hardware reboot cause will be treated as the reboot cause + hardware_reboot_cause = find_hardware_reboot_cause() + + # 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related + # reboot info. We will use it as the previous cause. + software_reboot_cause = find_software_reboot_cause() + + # The main decision logic of the reboot cause: + # If there is a reboot cause indicated by /proc/cmdline, it should be warmreboot/fastreboot + # the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt + # will be treated as the reboot cause + # Elif there is a reboot cause indicated by platform API, + # the hardware_reboot_cause will be treated as the reboot cause + # Else the software_reboot_cause will be treated as the reboot cause + if proc_cmdline_reboot_cause is not None: + if not hardware_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE): + # Add the hardware_reboot_cause as actual reboot cause + previous_reboot_cause = hardware_reboot_cause + additional_reboot_info = software_reboot_cause + else: + previous_reboot_cause = software_reboot_cause + elif hardware_reboot_cause is not None: + # Check if any software reboot was issued before this hardware reboot happened + if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN: + previous_reboot_cause = hardware_reboot_cause + additional_reboot_info = software_reboot_cause + else: + previous_reboot_cause = hardware_reboot_cause else: # Get the reboot cause from REBOOT_CAUSE_FILE - previous_reboot_cause = find_software_reboot_cause() + previous_reboot_cause = software_reboot_cause # Current time reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) From 25379d36740b75c3d43633ed13070aa18c02b381 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Tue, 16 Aug 2022 17:59:41 -0700 Subject: [PATCH 02/12] Add unit test case --- scripts/determine-reboot-cause | 43 ++++++++++++++++------------ tests/determine-reboot-cause_test.py | 33 +++++++++++++++++++++ 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause index 1e28356491a3..4320b41a7a02 100755 --- a/scripts/determine-reboot-cause +++ b/scripts/determine-reboot-cause @@ -158,25 +158,7 @@ def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time): return reboot_cause_dict - -def main(): - # Configure logger to log all messages INFO level and higher - sonic_logger.set_min_log_priority_info() - - sonic_logger.log_info("Starting up...") - - if not os.geteuid() == 0: - sonic_logger.log_error("User {} does not have permission to execute".format(pwd.getpwuid(os.getuid()).pw_name)) - sys.exit("This utility must be run as root") - - # Create REBOOT_CAUSE_DIR if it doesn't exist - if not os.path.exists(REBOOT_CAUSE_DIR): - os.makedirs(REBOOT_CAUSE_DIR) - - # Remove stale PREVIOUS_REBOOT_CAUSE_FILE if it exists - if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE): - os.remove(PREVIOUS_REBOOT_CAUSE_FILE) - +def determin_reboot_cause(): # This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides # any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE additional_reboot_info = "N/A" @@ -217,6 +199,29 @@ def main(): # Get the reboot cause from REBOOT_CAUSE_FILE previous_reboot_cause = software_reboot_cause + return previous_reboot_cause, additional_reboot_info + + +def main(): + # Configure logger to log all messages INFO level and higher + sonic_logger.set_min_log_priority_info() + + sonic_logger.log_info("Starting up...") + + if not os.geteuid() == 0: + sonic_logger.log_error("User {} does not have permission to execute".format(pwd.getpwuid(os.getuid()).pw_name)) + sys.exit("This utility must be run as root") + + # Create REBOOT_CAUSE_DIR if it doesn't exist + if not os.path.exists(REBOOT_CAUSE_DIR): + os.makedirs(REBOOT_CAUSE_DIR) + + # Remove stale PREVIOUS_REBOOT_CAUSE_FILE if it exists + if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE): + os.remove(PREVIOUS_REBOOT_CAUSE_FILE) + + previous_reboot_cause, additional_reboot_info = determin_reboot_cause() + # Current time reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index 7d22a512f8ee..743823dc5e26 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -117,3 +117,36 @@ def test_get_reboot_cause_dict_user(self): def test_get_reboot_cause_dict_kernel_panic(self): reboot_cause_dict = determine_reboot_cause.get_reboot_cause_dict(REBOOT_CAUSE_KERNEL_PANIC, "", GEN_TIME_KERNEL_PANIC) assert reboot_cause_dict == EXPECTED_KERNEL_PANIC_REBOOT_CAUSE_DICT + + def test_determine_reboot_cause_hardware(self): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value="Power Cycle"): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): + previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == "Power Cycle" + assert additional_info == "N/A" + + def test_determine_reboot_cause_software(self): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER + assert additional_info == "N/A" + + def test_determine_reboot_cause_cmdline(self): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER + assert additional_info == "N/A" + + def test_determine_reboot_cause_cmdline_hardware(self): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_WATCHDOG): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == REBOOT_CAUSE_WATCHDOG + assert additional_info == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER + From 1ba611fc1a6dbcf20afed34920834e6aeadca7e7 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Tue, 16 Aug 2022 18:07:25 -0700 Subject: [PATCH 03/12] fix typo --- scripts/determine-reboot-cause | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause index 4320b41a7a02..6252947663e1 100755 --- a/scripts/determine-reboot-cause +++ b/scripts/determine-reboot-cause @@ -158,7 +158,7 @@ def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time): return reboot_cause_dict -def determin_reboot_cause(): +def determine_reboot_cause(): # This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides # any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE additional_reboot_info = "N/A" @@ -220,7 +220,7 @@ def main(): if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE): os.remove(PREVIOUS_REBOOT_CAUSE_FILE) - previous_reboot_cause, additional_reboot_info = determin_reboot_cause() + previous_reboot_cause, additional_reboot_info = determine_reboot_cause() # Current time reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) From 7c8003db45ca118b3f4113f406b356b17653c663 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Tue, 16 Aug 2022 18:29:43 -0700 Subject: [PATCH 04/12] fix the reboot-cause regix for test --- tests/determine-reboot-cause_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index 743823dc5e26..e976469c8bde 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -128,24 +128,24 @@ def test_determine_reboot_cause_hardware(self): def test_determine_reboot_cause_software(self): with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value="Unknown"): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" def test_determine_reboot_cause_cmdline(self): with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value="Unknown"): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" def test_determine_reboot_cause_cmdline_hardware(self): with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_WATCHDOG): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=REBOOT_CAUSE_WATCHDOG): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == REBOOT_CAUSE_WATCHDOG assert additional_info == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER From 53ad7cdd37d56f4c454d6a5f135a305b9c655894 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Tue, 16 Aug 2022 18:51:49 -0700 Subject: [PATCH 05/12] fix the mock patch function --- tests/determine-reboot-cause_test.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index e976469c8bde..509c32337e6c 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -119,33 +119,33 @@ def test_get_reboot_cause_dict_kernel_panic(self): assert reboot_cause_dict == EXPECTED_KERNEL_PANIC_REBOOT_CAUSE_DICT def test_determine_reboot_cause_hardware(self): - with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value="Power Cycle"): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value="Power Cycle"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == "Power Cycle" assert additional_info == "N/A" def test_determine_reboot_cause_software(self): - with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" def test_determine_reboot_cause_cmdline(self): - with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" def test_determine_reboot_cause_cmdline_hardware(self): - with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=REBOOT_CAUSE_WATCHDOG): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=REBOOT_CAUSE_WATCHDOG): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == REBOOT_CAUSE_WATCHDOG assert additional_info == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER From 8543ddf2072a5a9b4f7707d01b1d9103ad3d5020 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Wed, 17 Aug 2022 16:42:57 -0700 Subject: [PATCH 06/12] update the reboot cause logic and update the unit test --- scripts/determine-reboot-cause | 31 +++++++++--------- tests/determine-reboot-cause_test.py | 47 ++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause index 6252947663e1..b4d01d0ffc1f 100755 --- a/scripts/determine-reboot-cause +++ b/scripts/determine-reboot-cause @@ -175,28 +175,29 @@ def determine_reboot_cause(): software_reboot_cause = find_software_reboot_cause() # The main decision logic of the reboot cause: - # If there is a reboot cause indicated by /proc/cmdline, it should be warmreboot/fastreboot + # If there is a valid hardware reboot cause indicated by platform API, + # check the software reboot cause to add additional rebot cause. + # If there is a reboot cause indicated by /proc/cmdline, and/or warmreboot/fastreboot/softreboot # the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt - # will be treated as the reboot cause - # Elif there is a reboot cause indicated by platform API, - # the hardware_reboot_cause will be treated as the reboot cause + # will be treated as the additional reboot cause + # Elif there is a cmdline reboot cause, + # the software_reboot_cause will be treated as the reboot cause if it's not unknown + # otherwise, the cmdline_reboot_cause will be treated as the reboot cause if it's not none # Else the software_reboot_cause will be treated as the reboot cause - if proc_cmdline_reboot_cause is not None: - if not hardware_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE): - # Add the hardware_reboot_cause as actual reboot cause - previous_reboot_cause = hardware_reboot_cause - additional_reboot_info = software_reboot_cause - else: - previous_reboot_cause = software_reboot_cause - elif hardware_reboot_cause is not None: + if not hardware_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE): + previous_reboot_cause = hardware_reboot_cause # Check if any software reboot was issued before this hardware reboot happened if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN: - previous_reboot_cause = hardware_reboot_cause additional_reboot_info = software_reboot_cause + elif proc_cmdline_reboot_cause is not None: + additional_reboot_info = proc_cmdline_reboot_cause + elif proc_cmdline_reboot_cause is not None: + if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN: + # Get the reboot cause from REBOOT_CAUSE_FILE + previous_reboot_cause = software_reboot_cause else: - previous_reboot_cause = hardware_reboot_cause + previous_reboot_cause = proc_cmdline_reboot_cause else: - # Get the reboot cause from REBOOT_CAUSE_FILE previous_reboot_cause = software_reboot_cause return previous_reboot_cause, additional_reboot_info diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index 509c32337e6c..a2ddea3f38b9 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -54,11 +54,16 @@ GEN_TIME_KERNEL_PANIC = "2021_3_28_13_48_49" +REBOOT_CAUSE_UNKNOWN = "Unknown" +REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware" +EXPECTED_NON_HARDWARE_REBOOT_CAUSE = {REBOOT_CAUSE_NON_HARDWARE, "N/A"} +REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other" +EXPECTED_HARDWARE_REBOOT_CAUSE = {REBOOT_CAUSE_HARDWARE_OTHER, ""} + EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm-reboot" EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER = "User issued 'warm-reboot' command [User: admin, Time: Mon Nov 2 22:37:45 UTC 2020]" EXPECTED_FIND_FIRSTBOOT_VERSION = " (First boot of SONiC version 20191130.52)" EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_FIRSTBOOT = "Unknown (First boot of SONiC version 20191130.52)" -EXPECTED_HARDWARE_REBOOT_CAUSE = {"warm-reboot", ""} EXPECTED_WATCHDOG_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_15_08', 'cause': 'Watchdog', 'user': 'N/A', 'time': 'N/A'} EXPECTED_USER_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_14_07', 'cause': 'reboot', 'user': 'admin', 'time': 'Thu Oct 22 03:11:08 UTC 2020'} @@ -119,33 +124,49 @@ def test_get_reboot_cause_dict_kernel_panic(self): assert reboot_cause_dict == EXPECTED_KERNEL_PANIC_REBOOT_CAUSE_DICT def test_determine_reboot_cause_hardware(self): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value="Power Cycle"): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): - previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() - assert previous_reboot_cause == "Power Cycle" - assert additional_info == "N/A" + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=None): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE): + previous_reboot_cause, additional_reboot_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE + assert additional_reboot_info == "N/A" def test_determine_reboot_cause_software(self): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=None): with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" - def test_determine_reboot_cause_cmdline(self): + def test_determine_reboot_cause_cmdline_software(self): with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value="Unknown"): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" + def test_determine_reboot_cause_cmdline_no_software(self): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): + previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE + assert additional_info == "N/A" + def test_determine_reboot_cause_cmdline_hardware(self): with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=REBOOT_CAUSE_WATCHDOG): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): + with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE): + previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() + assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE + assert additional_info == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE + + def test_determine_reboot_cause_software_hardware(self): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == REBOOT_CAUSE_WATCHDOG assert additional_info == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER From 8a630bb72589add6b123ff686bc9e1c47a692425 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Wed, 17 Aug 2022 18:35:35 -0700 Subject: [PATCH 07/12] fix mock patch --- tests/determine-reboot-cause_test.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index a2ddea3f38b9..5dff482ef696 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -132,33 +132,33 @@ def test_determine_reboot_cause_hardware(self): assert additional_reboot_info == "N/A" def test_determine_reboot_cause_software(self): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=None): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=None): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" def test_determine_reboot_cause_cmdline_software(self): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER assert additional_info == "N/A" def test_determine_reboot_cause_cmdline_no_software(self): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE assert additional_info == "N/A" def test_determine_reboot_cause_cmdline_hardware(self): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): - with mock.patch("determine_reboot_cause.determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE): + with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE): + with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN): + with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE assert additional_info == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE From ef86b53345905f08b2182ae71749183f6f531245 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Thu, 18 Aug 2022 14:10:08 -0700 Subject: [PATCH 08/12] Fix startswith Attribute error --- scripts/determine-reboot-cause | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause index b4d01d0ffc1f..6d2c0acf0192 100755 --- a/scripts/determine-reboot-cause +++ b/scripts/determine-reboot-cause @@ -184,7 +184,7 @@ def determine_reboot_cause(): # the software_reboot_cause will be treated as the reboot cause if it's not unknown # otherwise, the cmdline_reboot_cause will be treated as the reboot cause if it's not none # Else the software_reboot_cause will be treated as the reboot cause - if not hardware_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE): + if REBOOT_CAUSE_NON_HARDWARE not in hardware_reboot_cause: previous_reboot_cause = hardware_reboot_cause # Check if any software reboot was issued before this hardware reboot happened if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN: From e47d8312d57b9fdf792bf4bcd7e1276484f1acfe Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Thu, 18 Aug 2022 15:40:48 -0700 Subject: [PATCH 09/12] fix the wrong expected result comparision --- tests/determine-reboot-cause_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index 5dff482ef696..e495ef10b731 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -168,6 +168,6 @@ def test_determine_reboot_cause_software_hardware(self): with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER): with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE): previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause() - assert previous_reboot_cause == REBOOT_CAUSE_WATCHDOG + assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE assert additional_info == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER From 0dcc7fe35d6cdcd6be3d19ed16ab2246c3fbe794 Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Thu, 18 Aug 2022 15:50:29 -0700 Subject: [PATCH 10/12] remove the empty bracket if no hardware reboot cause minor --- scripts/determine-reboot-cause | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause index 6d2c0acf0192..e4d8a38c9f7a 100755 --- a/scripts/determine-reboot-cause +++ b/scripts/determine-reboot-cause @@ -122,7 +122,12 @@ def find_hardware_reboot_cause(): else: sonic_logger.log_info("No reboot cause found from platform api") - hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor) + hardware_reboot_cause_minor_str = "" + if hardware_reboot_cause_minor: + hardware_reboot_cause_minor_str = " ({})".format(hardware_reboot_cause_minor) + + hardware_reboot_cause = hardware_reboot_cause_major + hardware_reboot_cause_minor_str + return hardware_reboot_cause From 8720561aa7c382e70e746c11673d2c03148508ea Mon Sep 17 00:00:00 2001 From: sujinmkang Date: Thu, 18 Aug 2022 15:59:53 -0700 Subject: [PATCH 11/12] Fix and add hardware reboot cause determination tests --- tests/determine-reboot-cause_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py index e495ef10b731..b4c07af4683b 100644 --- a/tests/determine-reboot-cause_test.py +++ b/tests/determine-reboot-cause_test.py @@ -109,7 +109,12 @@ def test_find_proc_cmdline_reboot_cause(self): def test_find_hardware_reboot_cause(self): with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", None)): result = determine_reboot_cause.find_hardware_reboot_cause() - assert result == "Powerloss (None)" + assert result == "Powerloss" + + def test_find_hardware_reboot_cause_with_minor(self): + with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", "under-voltage")): + result = determine_reboot_cause.find_hardware_reboot_cause() + assert result == "Powerloss (under-voltage)" def test_get_reboot_cause_dict_watchdog(self): reboot_cause_dict = determine_reboot_cause.get_reboot_cause_dict(REBOOT_CAUSE_WATCHDOG, "", GEN_TIME_WATCHDOG) From f9af7aee3468786d941ac38ebdd4c9654f559ef6 Mon Sep 17 00:00:00 2001 From: Yevhen Fastiuk Date: Mon, 22 Aug 2022 00:47:53 +0300 Subject: [PATCH 12/12] [CLI] Move hostname, mgmt interface/vrf config to hostcfgd (#2) This PR depends on https://github.com/Azure/sonic-utilities/pull/2173 #### Why I did it To be able to configure the management interface and hostname standalone by changing database config at runtime. From the CLI perspective fo view, the following behavior is the same. But now you have two ways of configuring it: CLI, directly through the database. #### How I did it Moved configuration part of the interface and hostname to "hostcfgd". #### How to verify it * Built an image * Flash it to the switch * Run CLI commands ``` # Set IP address: verify address is set on the iface sudo config interface ip add eth0 10.210.25.127/22 10.210.24.1 ip address show eth0 # 2: eth0: mtu 1500 qdisc mq state UP group default qlen 1000 # link/ether 98:03:9b:a2:be:80 brd ff:ff:ff:ff:ff:ff # inet 10.210.25.127/22 brd 10.210.27.255 scope global eth0 # valid_lft forever preferred_lft forever # inet6 fe80::9a03:9bff:fea2:be80/64 scope link # valid_lft forever preferred_lft forever # Remove IP address: verify you received address form DHCP sudo config interface ip remove eth0 10.210.25.127/22 ip address show eth0 # 2: eth0: mtu 1500 qdisc mq state UP group default qlen 1000 # link/ether 98:03:9b:a2:be:80 brd ff:ff:ff:ff:ff:ff # inet 10.210.25.127/22 brd 10.210.27.255 scope global eth0 # valid_lft forever preferred_lft forever # inet6 fe80::9a03:9bff:fea2:be80/64 scope link # valid_lft forever preferred_lft forever # Enable/disable mgmt VRF ip address show mgmt # Device "mgmt" does not exist. sudo config vrf add mgmt ip address show mgmt # 72: mgmt: mtu 65575 qdisc noqueue state UP group default qlen 1000 # link/ether fa:9b:ad:7b:1e:83 brd ff:ff:ff:ff:ff:ff sudo config vrf del mgmt ip address show mgmt # Device "mgmt" does not exist. # Setting the hostname admin@r-anaconda-27:~$ sudo config hostname bla # Login / Logout admin@bla:~$ ``` #### Description for the changelog * Moved management interface configuration to hostcfgd. * Moved management VRF configuration to hostcfgd. * Moved hostname configuration to hostcfgd. #### Submodules PR's : | Repo | PR title | State | | ----------------- | ----------------- | ----------------- | | sonic-utilities | [[CLI] Move hostname, mgmt interface/vrf config to hostcfgd](https://github.com/Azure/sonic-utilities/pull/2173) | ![GitHub issue/pull request detail](https://img.shields.io/github/pulls/detail/state/Azure/sonic-utilities/2173) | --- scripts/hostcfgd | 173 ++++++++++++++++++++++++++++++-- tests/hostcfgd/hostcfgd_test.py | 75 ++++++++++++++ tests/hostcfgd/test_vectors.py | 47 ++++++++- 3 files changed, 283 insertions(+), 12 deletions(-) diff --git a/scripts/hostcfgd b/scripts/hostcfgd index a82a630bfc0a..30ee0a28bd52 100755 --- a/scripts/hostcfgd +++ b/scripts/hostcfgd @@ -12,6 +12,7 @@ import re import jinja2 from sonic_py_common import device_info from swsscommon.swsscommon import ConfigDBConnector, DBConnector, Table +from swsscommon import swsscommon # FILE PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic" @@ -1253,6 +1254,143 @@ class PamLimitsCfg(object): "modify pam_limits config file failed with exception: {}" .format(e)) +class DeviceMetaCfg(object): + """ + DeviceMetaCfg Config Daemon + Handles changes in DEVICE_METADATA table. + 1) Handle hostname change + """ + + def __init__(self): + self.hostname = '' + + def load(self, dev_meta={}): + # Get hostname initial + self.hostname = dev_meta.get('localhost', {}).get('hostname', '') + syslog.syslog(syslog.LOG_DEBUG, f'Initial hostname: {self.hostname}') + + def hostname_update(self, data): + """ + Apply hostname handler. + + Args: + data: Read table's key's data. + """ + syslog.syslog(syslog.LOG_DEBUG, 'DeviceMetaCfg: hostname update') + new_hostname = data.get('hostname') + + # Restart hostname-config service when hostname was changed. + # Empty not allowed + if new_hostname and new_hostname != self.hostname: + syslog.syslog(syslog.LOG_INFO, 'DeviceMetaCfg: Set new hostname: {}' + .format(new_hostname)) + self.hostname = new_hostname + try: + run_cmd('sudo service hostname-config restart', True, True) + except subprocess.CalledProcessError as e: + syslog.syslog(syslog.LOG_ERR, 'DeviceMetaCfg: Failed to set new' + ' hostname: {}'.format(e)) + return + + run_cmd('sudo monit reload') + else: + msg = 'Hostname was not updated: ' + msg += 'Already set up' if new_hostname else 'Empty not allowed' + syslog.syslog(syslog.LOG_ERR, msg) + + +class MgmtIfaceCfg(object): + """ + MgmtIfaceCfg Config Daemon + Handles changes in MGMT_INTERFACE, MGMT_VRF_CONFIG tables. + 1) Handle change of interface ip + 2) Handle change of management VRF state + """ + + def __init__(self): + self.iface_config_data = {} + self.mgmt_vrf_enabled = '' + + def load(self, mgmt_iface={}, mgmt_vrf={}): + # Get initial data + self.iface_config_data = mgmt_iface + self.mgmt_vrf_enabled = mgmt_vrf.get('mgmtVrfEnabled', '') + syslog.syslog(syslog.LOG_DEBUG, + f'Initial mgmt interface conf: {self.iface_config_data}') + syslog.syslog(syslog.LOG_DEBUG, + f'Initial mgmt VRF state: {self.mgmt_vrf_enabled}') + + def update_mgmt_iface(self, iface, key, data): + """Handle update management interface config + """ + syslog.syslog(syslog.LOG_DEBUG, 'MgmtIfaceCfg: mgmt iface update') + + # Restart management interface service when config was changed + if data != self.iface_config_data.get(key): + cfg = {key: data} + syslog.syslog(syslog.LOG_INFO, f'MgmtIfaceCfg: Set new interface ' + f'config {cfg} for {iface}') + try: + run_cmd('sudo systemctl restart interfaces-config', True, True) + run_cmd('sudo systemctl restart ntp-config', True, True) + except subprocess.CalledProcessError: + syslog.syslog(syslog.LOG_ERR, f'Failed to restart management ' + 'interface services') + return + + self.iface_config_data[key] = data + + def update_mgmt_vrf(self, data): + """Handle update management VRF state + """ + syslog.syslog(syslog.LOG_DEBUG, 'MgmtIfaceCfg: mgmt vrf state update') + + # Restart mgmt vrf services when mgmt vrf config was changed. + # Empty not allowed. + enabled = data.get('mgmtVrfEnabled', '') + if not enabled or enabled == self.mgmt_vrf_enabled: + return + + syslog.syslog(syslog.LOG_INFO, f'Set mgmt vrf state {enabled}') + + # Restart related vrfs services + try: + run_cmd('service ntp stop', True, True) + run_cmd('systemctl restart interfaces-config', True, True) + run_cmd('service ntp start', True, True) + except subprocess.CalledProcessError: + syslog.syslog(syslog.LOG_ERR, f'Failed to restart management vrf ' + 'services') + return + + # Remove mgmt if route + if enabled == 'true': + """ + The regular expression for grep in below cmd is to match eth0 line + in /proc/net/route, sample file: + $ cat /proc/net/route + Iface Destination Gateway Flags RefCnt Use + eth0 00000000 01803B0A 0003 0 0 + #################### Line break here #################### + Metric Mask MTU Window IRTT + 202 00000000 0 0 0 + """ + try: + run_cmd(r"""cat /proc/net/route | grep -E \"eth0\s+""" + r"""00000000\s+[0-9A-Z]+\s+[0-9]+\s+[0-9]+\s+[0-9]+""" + r"""\s+202\" | wc -l""", + True, True) + except subprocess.CalledProcessError: + syslog.syslog(syslog.LOG_ERR, 'MgmtIfaceCfg: Could not delete ' + 'eth0 route') + return + + run_cmd("ip -4 route del default dev eth0 metric 202", False) + + # Update cache + self.mgmt_vrf_enabled = enabled + + class HostConfigDaemon: def __init__(self): # Just a sanity check to verify if the CONFIG_DB has been initialized @@ -1284,7 +1422,6 @@ class HostConfigDaemon: self.is_multi_npu = device_info.is_multi_npu() # Initialize AAACfg - self.hostname_cache="" self.aaacfg = AaaCfg() # Initialize PasswHardening @@ -1294,6 +1431,12 @@ class HostConfigDaemon: self.pamLimitsCfg = PamLimitsCfg(self.config_db) self.pamLimitsCfg.update_config_file() + # Initialize DeviceMetaCfg + self.devmetacfg = DeviceMetaCfg() + + # Initialize MgmtIfaceCfg + self.mgmtifacecfg = MgmtIfaceCfg() + def load(self, init_data): features = init_data['FEATURE'] aaa = init_data['AAA'] @@ -1306,6 +1449,9 @@ class HostConfigDaemon: ntp_global = init_data['NTP'] kdump = init_data['KDUMP'] passwh = init_data['PASSW_HARDENING'] + dev_meta = init_data.get(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME, {}) + mgmt_ifc = init_data.get(swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME, {}) + mgmt_vrf = init_data.get(swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME, {}) self.feature_handler.sync_state_field(features) self.aaacfg.load(aaa, tacacs_global, tacacs_server, radius_global, radius_server) @@ -1313,14 +1459,11 @@ class HostConfigDaemon: self.ntpcfg.load(ntp_global, ntp_server) self.kdumpCfg.load(kdump) self.passwcfg.load(passwh) - - dev_meta = self.config_db.get_table('DEVICE_METADATA') - if 'localhost' in dev_meta: - if 'hostname' in dev_meta['localhost']: - self.hostname_cache = dev_meta['localhost']['hostname'] + self.devmetacfg.load(dev_meta) + self.mgmtifacecfg.load(mgmt_ifc, mgmt_vrf) # Update AAA with the hostname - self.aaacfg.hostname_update(self.hostname_cache) + self.aaacfg.hostname_update(self.devmetacfg.hostname) def __get_intf_name(self, key): if isinstance(key, tuple) and key: @@ -1370,6 +1513,10 @@ class HostConfigDaemon: mgmt_intf_name = self.__get_intf_name(key) self.aaacfg.handle_radius_source_intf_ip_chg(mgmt_intf_name) self.aaacfg.handle_radius_nas_ip_chg(mgmt_intf_name) + self.mgmtifacecfg.update_mgmt_iface(mgmt_intf_name, key, data) + + def mgmt_vrf_handler(self, key, op, data): + self.mgmtifacecfg.update_mgmt_vrf(data) def lpbk_handler(self, key, op, data): key = ConfigDBConnector.deserialize_key(key) @@ -1409,6 +1556,10 @@ class HostConfigDaemon: syslog.syslog(syslog.LOG_INFO, 'Kdump handler...') self.kdumpCfg.kdump_update(key, data) + def device_metadata_handler(self, key, op, data): + syslog.syslog(syslog.LOG_INFO, 'DeviceMeta handler...') + self.devmetacfg.hostname_update(data) + def wait_till_system_init_done(self): # No need to print the output in the log file so using the "--quiet" # flag @@ -1448,6 +1599,14 @@ class HostConfigDaemon: self.config_db.subscribe('PORTCHANNEL_INTERFACE', make_callback(self.portchannel_intf_handler)) self.config_db.subscribe('INTERFACE', make_callback(self.phy_intf_handler)) + # Handle DEVICE_MEATADATA changes + self.config_db.subscribe(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME, + make_callback(self.device_metadata_handler)) + + # Handle MGMT_VRF_CONFIG changes + self.config_db.subscribe(swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME, + make_callback(self.mgmt_vrf_handler)) + syslog.syslog(syslog.LOG_INFO, "Waiting for systemctl to finish initialization") self.wait_till_system_init_done() diff --git a/tests/hostcfgd/hostcfgd_test.py b/tests/hostcfgd/hostcfgd_test.py index 786bd1c8f2a9..eae97368a0e2 100644 --- a/tests/hostcfgd/hostcfgd_test.py +++ b/tests/hostcfgd/hostcfgd_test.py @@ -7,6 +7,7 @@ from sonic_py_common.general import load_module_from_source from unittest import TestCase, mock +from .test_vectors import HOSTCFG_DAEMON_INIT_CFG_DB from .test_vectors import HOSTCFGD_TEST_VECTOR, HOSTCFG_DAEMON_CFG_DB from tests.common.mock_configdb import MockConfigDb, MockDBConnector @@ -357,3 +358,77 @@ def test_kdump_event(self): call('sonic-kdump-config --num_dumps 3', shell=True), call('sonic-kdump-config --memory 0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M', shell=True)] mocked_subprocess.check_call.assert_has_calls(expected, any_order=True) + + def test_devicemeta_event(self): + """ + Test handling DEVICE_METADATA events. + 1) Hostname reload + """ + MockConfigDb.set_config_db(HOSTCFG_DAEMON_CFG_DB) + MockConfigDb.event_queue = [(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME, + 'localhost')] + daemon = hostcfgd.HostConfigDaemon() + daemon.aaacfg = mock.MagicMock() + daemon.iptables = mock.MagicMock() + daemon.passwcfg = mock.MagicMock() + daemon.load(HOSTCFG_DAEMON_INIT_CFG_DB) + daemon.register_callbacks() + with mock.patch('hostcfgd.subprocess') as mocked_subprocess: + popen_mock = mock.Mock() + attrs = {'communicate.return_value': ('output', 'error')} + popen_mock.configure_mock(**attrs) + mocked_subprocess.Popen.return_value = popen_mock + + try: + daemon.start() + except TimeoutError: + pass + + expected = [ + call('sudo service hostname-config restart', shell=True), + call('sudo monit reload', shell=True) + ] + mocked_subprocess.check_call.assert_has_calls(expected, + any_order=True) + + def test_mgmtiface_event(self): + """ + Test handling mgmt events. + 1) Management interface setup + 2) Management vrf setup + """ + MockConfigDb.set_config_db(HOSTCFG_DAEMON_CFG_DB) + MockConfigDb.event_queue = [ + (swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME, 'eth0|1.2.3.4/24'), + (swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME, 'vrf_global') + ] + daemon = hostcfgd.HostConfigDaemon() + daemon.register_callbacks() + daemon.aaacfg = mock.MagicMock() + daemon.iptables = mock.MagicMock() + daemon.passwcfg = mock.MagicMock() + daemon.load(HOSTCFG_DAEMON_INIT_CFG_DB) + with mock.patch('hostcfgd.subprocess') as mocked_subprocess: + popen_mock = mock.Mock() + attrs = {'communicate.return_value': ('output', 'error')} + popen_mock.configure_mock(**attrs) + mocked_subprocess.Popen.return_value = popen_mock + + try: + daemon.start() + except TimeoutError: + pass + + expected = [ + call('sudo systemctl restart interfaces-config', shell=True), + call('sudo systemctl restart ntp-config', shell=True), + call('service ntp stop', shell=True), + call('systemctl restart interfaces-config', shell=True), + call('service ntp start', shell=True), + call('cat /proc/net/route | grep -E \\"eth0\\s+00000000' + '\\s+[0-9A-Z]+\\s+[0-9]+\\s+[0-9]+\\s+[0-9]+\\s+202\\" | ' + 'wc -l', shell=True), + call('ip -4 route del default dev eth0 metric 202', shell=True) + ] + mocked_subprocess.check_call.assert_has_calls(expected, + any_order=True) diff --git a/tests/hostcfgd/test_vectors.py b/tests/hostcfgd/test_vectors.py index 43754252c0e3..c9c47a35af07 100644 --- a/tests/hostcfgd/test_vectors.py +++ b/tests/hostcfgd/test_vectors.py @@ -19,7 +19,7 @@ "enabled": "false", "num_dumps": "3", "memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M" - } + } }, "FEATURE": { "dhcp_relay": { @@ -118,7 +118,7 @@ "enabled": "false", "num_dumps": "3", "memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M" - } + } }, "FEATURE": { "dhcp_relay": { @@ -235,7 +235,7 @@ "enabled": "false", "num_dumps": "3", "memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M" - } + } }, "FEATURE": { "dhcp_relay": { @@ -331,7 +331,7 @@ "enabled": "false", "num_dumps": "3", "memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M" - } + } }, "FEATURE": { "dhcp_relay": { @@ -431,7 +431,7 @@ "enabled": "false", "num_dumps": "3", "memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M" - } + } }, "FEATURE": { "dhcp_relay": { @@ -507,6 +507,28 @@ ] ] +HOSTCFG_DAEMON_INIT_CFG_DB = { + "FEATURE": {}, + "AAA": {}, + "TACPLUS": {}, + "TACPLUS_SERVER": {}, + "RADIUS": {}, + "RADIUS_SERVER": {}, + "PASSW_HARDENING": {}, + "KDUMP": {}, + "NTP": {}, + "NTP_SERVER": {}, + "LOOPBACK_INTERFACE": {}, + "DEVICE_METADATA": { + "localhost": { + "hostname": "old-hostname" + } + }, + "MGMT_INTERFACE": {}, + "MGMT_VRF_CONFIG": {} +} + + HOSTCFG_DAEMON_CFG_DB = { "FEATURE": { "dhcp_relay": { @@ -538,6 +560,12 @@ "status": "enabled" }, }, + "AAA": {}, + "TACPLUS": {}, + "TACPLUS_SERVER": {}, + "RADIUS": {}, + "RADIUS_SERVER": {}, + "PASSW_HARDENING": {}, "KDUMP": { "config": { @@ -562,6 +590,15 @@ "localhost": { "subtype": "DualToR", "type": "ToRRouter", + "hostname": "SomeNewHostname" + } + }, + "MGMT_INTERFACE": { + "eth0|1.2.3.4/24": {} + }, + "MGMT_VRF_CONFIG": { + "vrf_global": { + 'mgmtVrfEnabled': 'true' } } }