From 82992ccd5b5271a8d9aa02cf723997e4a539e429 Mon Sep 17 00:00:00 2001 From: Deepak Singhal <115033986+deepak-singhal0408@users.noreply.github.com> Date: Fri, 1 Dec 2023 16:22:30 -0800 Subject: [PATCH] Disable systemd auto-restart of dependent services for spineRouters (#91) --- scripts/hostcfgd | 15 ++++- tests/hostcfgd/hostcfgd_test.py | 16 +++-- tests/hostcfgd/test_vectors.py | 108 +++++++++++++++++++++++++++++--- 3 files changed, 123 insertions(+), 16 deletions(-) diff --git a/scripts/hostcfgd b/scripts/hostcfgd index d2a0e768..a6cd8690 100644 --- a/scripts/hostcfgd +++ b/scripts/hostcfgd @@ -454,7 +454,20 @@ class FeatureHandler(object): Returns: None. """ - restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no" + # As per the current code(due to various dependencies) SWSS service stop/start also stops/starts the dependent services(syncd, teamd, bgpd etc) + # There is an issue seen of syncd service getting stopped twice upon a critical process crash in syncd service due to above reason. + # Also early start of syncd service has traffic impact on VOQ chassis. + # to fix the above issue, we are disabling the auto restart of syncd service as it will be started by swss service. + # This change can be extended to other dependent services as well in future and also on pizza box platforms. + + device_type = self._device_config.get('DEVICE_METADATA', {}).get('localhost', {}).get('type') + is_dependent_service = feature_config.name in ['syncd', 'gbsyncd'] + if device_type == 'SpineRouter' and is_dependent_service: + syslog.syslog(syslog.LOG_INFO, "Skipped setting Restart field in systemd for {}".format(feature_config.name)) + restart_field_str = "no" + else: + restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no" + feature_systemd_config = "[Service]\nRestart={}\n".format(restart_field_str) feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature_config) diff --git a/tests/hostcfgd/hostcfgd_test.py b/tests/hostcfgd/hostcfgd_test.py index 8d8a28c4..6e7214ec 100644 --- a/tests/hostcfgd/hostcfgd_test.py +++ b/tests/hostcfgd/hostcfgd_test.py @@ -51,7 +51,7 @@ def checks_config_table(self, feature_table, expected_table): return True if not ddiff else False - def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=None): + def checks_systemd_config_file(self, device_type, feature_table, feature_systemd_name_map=None): """Checks whether the systemd configuration file of each feature was created or not and whether the `Restart=` field in the file is set correctly or not. @@ -68,6 +68,7 @@ def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=Non 'auto_restart.conf') for feature_name in feature_table: + is_dependent_feature = True if feature_name in ['syncd', 'gbsyncd'] else False auto_restart_status = feature_table[feature_name].get('auto_restart', 'disabled') if "enabled" in auto_restart_status: auto_restart_status = "enabled" @@ -83,7 +84,10 @@ def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=Non with open(feature_systemd_config_file_path) as systemd_config_file: status = systemd_config_file.read().strip() - assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status]) + if device_type == 'SpineRouter' and is_dependent_feature: + assert status == '[Service]\nRestart=no' + else: + assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status]) def get_state_db_set_calls(self, feature_table): """Returns a Mock call objects which recorded the `set` calls to `FEATURE` table in `STATE_DB`. @@ -140,6 +144,7 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs): device_config = {} device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA'] device_config.update(config_data['device_runtime_metadata']) + device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type'] feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config, False) @@ -166,13 +171,13 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs): feature_table_state_db_calls = self.get_state_db_set_calls(feature_table) - self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map) + self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map) mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'], any_order=True) mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'], any_order=True) feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls) - self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map) + self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map) @parameterized.expand(HOSTCFGD_TEST_VECTOR) @patchfs @@ -204,6 +209,7 @@ def test_handler(self, test_scenario_name, config_data, fs): device_config = {} device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA'] device_config.update(config_data['device_runtime_metadata']) + device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type'] feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config, False) feature_handler.enable_delayed_service = True @@ -217,7 +223,7 @@ def test_handler(self, test_scenario_name, config_data, fs): feature_names, _ = feature_handler.get_multiasic_feature_instances(feature) feature_systemd_name_map[feature_name] = feature_names - self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map) + self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map) mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'], any_order=True) mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'], diff --git a/tests/hostcfgd/test_vectors.py b/tests/hostcfgd/test_vectors.py index 09be2444..a05009c3 100644 --- a/tests/hostcfgd/test_vectors.py +++ b/tests/hostcfgd/test_vectors.py @@ -576,8 +576,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -606,12 +612,23 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ call(["sudo", "systemctl", "stop", "bgp.service"]), call(["sudo", "systemctl", "disable", "bgp.service"]), call(["sudo", "systemctl", "mask", "bgp.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "unmask", "syncd.service"]), ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]), @@ -671,8 +688,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -701,12 +724,23 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ call(["sudo", "systemctl", "stop", "bgp.service"]), call(["sudo", "systemctl", "disable", "bgp.service"]), call(["sudo", "systemctl", "mask", "bgp.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "unmask", "syncd.service"]), ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]), @@ -766,8 +800,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -796,6 +836,14 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ @@ -805,6 +853,9 @@ call(["sudo", "systemctl", "start", "teamd.service"]), call(["sudo", "systemctl", "enable", "teamd.service"]), call(["sudo", "systemctl", "unmask", "teamd.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "unmask", "syncd.service"]), ], "daemon_reload_subprocess_call": [ @@ -865,8 +916,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -895,6 +952,14 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ @@ -904,6 +969,9 @@ call(["sudo", "systemctl", "start", "teamd.service"]), call(["sudo", "systemctl", "enable", "teamd.service"]), call(["sudo", "systemctl", "unmask", "teamd.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "unmask", "syncd.service"]), ], "daemon_reload_subprocess_call": [ @@ -965,8 +1033,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "False", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -995,6 +1069,14 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "False", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ @@ -1016,6 +1098,12 @@ call(["sudo", "systemctl", "stop", "lldp@1.service"]), call(["sudo", "systemctl", "disable", "lldp@1.service"]), call(["sudo", "systemctl", "mask", "lldp@1.service"]), + call(["sudo", "systemctl", "start", "syncd@0.service"]), + call(["sudo", "systemctl", "enable", "syncd@0.service"]), + call(["sudo", "systemctl", "unmask", "syncd@0.service"]), + call(["sudo", "systemctl", "start", "syncd@1.service"]), + call(["sudo", "systemctl", "enable", "syncd@1.service"]), + call(["sudo", "systemctl", "unmask", "syncd@1.service"]), ], "daemon_reload_subprocess_call": [