From 586b1e9ec1511e1b0d77b936b1c758f42c530d28 Mon Sep 17 00:00:00 2001 From: Deepak Singhal <115033986+deepak-singhal0408@users.noreply.github.com> Date: Tue, 14 Nov 2023 13:45:12 -0800 Subject: [PATCH] Disable systemd auto-restart of dependent services for spineRouters (#83) --- scripts/featured | 15 ++++- tests/featured/featured_test.py | 17 +++-- tests/featured/test_vectors.py | 115 ++++++++++++++++++++++++++++---- 3 files changed, 127 insertions(+), 20 deletions(-) diff --git a/scripts/featured b/scripts/featured index 1966e60e..d66e8b10 100644 --- a/scripts/featured +++ b/scripts/featured @@ -319,7 +319,20 @@ class FeatureHandler(object): Returns: None. """ - restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no" + # As per the current code(due to various dependencies) SWSS service stop/start also stops/starts the dependent services(syncd, teamd, bgpd etc) + # There is an issue seen of syncd service getting stopped twice upon a critical process crash in syncd service due to above reason. + # Also early start of syncd service has traffic impact on VOQ chassis. + # to fix the above issue, we are disabling the auto restart of syncd service as it will be started by swss service. + # This change can be extended to other dependent services as well in future and also on pizza box platforms. + + device_type = self._device_config.get('DEVICE_METADATA', {}).get('localhost', {}).get('type') + is_dependent_service = feature_config.name in ['syncd', 'gbsyncd'] + if device_type == 'SpineRouter' and is_dependent_service: + syslog.syslog(syslog.LOG_INFO, "Skipped setting Restart field in systemd for {}".format(feature_config.name)) + restart_field_str = "no" + else: + restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no" + feature_systemd_config = "[Service]\nRestart={}\n".format(restart_field_str) feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature_config) diff --git a/tests/featured/featured_test.py b/tests/featured/featured_test.py index e8a38809..8ee04181 100644 --- a/tests/featured/featured_test.py +++ b/tests/featured/featured_test.py @@ -54,7 +54,7 @@ def checks_config_table(self, feature_table, expected_table): return True if not ddiff else False - def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=None): + def checks_systemd_config_file(self, device_type, feature_table, feature_systemd_name_map=None): """Checks whether the systemd configuration file of each feature was created or not and whether the `Restart=` field in the file is set correctly or not. @@ -71,6 +71,7 @@ def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=Non 'auto_restart.conf') for feature_name in feature_table: + is_dependent_feature = True if feature_name in ['syncd', 'gbsyncd'] else False auto_restart_status = feature_table[feature_name].get('auto_restart', 'disabled') if "enabled" in auto_restart_status: auto_restart_status = "enabled" @@ -86,7 +87,10 @@ def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=Non with open(feature_systemd_config_file_path) as systemd_config_file: status = systemd_config_file.read().strip() - assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status]) + if device_type == 'SpineRouter' and is_dependent_feature: + assert status == '[Service]\nRestart=no' + else: + assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status]) def get_state_db_set_calls(self, feature_table): """Returns a Mock call objects which recorded the `set` calls to `FEATURE` table in `STATE_DB`. @@ -143,6 +147,7 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs): device_config = {} device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA'] device_config.update(config_data['device_runtime_metadata']) + device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type'] feature_handler = featured.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config, False) @@ -169,13 +174,13 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs): feature_table_state_db_calls = self.get_state_db_set_calls(feature_table) - self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map) + self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map) mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'], any_order=True) mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'], any_order=True) feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls) - self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map) + self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map) @parameterized.expand(FEATURED_TEST_VECTOR) @patchfs @@ -207,6 +212,7 @@ def test_handler(self, test_scenario_name, config_data, fs): device_config = {} device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA'] device_config.update(config_data['device_runtime_metadata']) + device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type'] feature_handler = featured.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config, False) feature_handler.is_delayed_enabled = True @@ -220,7 +226,7 @@ def test_handler(self, test_scenario_name, config_data, fs): feature_names, _ = feature_handler.get_multiasic_feature_instances(feature) feature_systemd_name_map[feature_name] = feature_names - self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map) + self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map) mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'], any_order=True) mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'], @@ -467,4 +473,3 @@ def test_portinit_timeout(self, mock_syslog, get_runtime): call(['sudo', 'systemctl', 'enable', 'telemetry.service']), call(['sudo', 'systemctl', 'start', 'telemetry.service'])] mocked_subprocess.check_call.assert_has_calls(expected) - diff --git a/tests/featured/test_vectors.py b/tests/featured/test_vectors.py index fd916eb0..bc0d0cac 100644 --- a/tests/featured/test_vectors.py +++ b/tests/featured/test_vectors.py @@ -532,8 +532,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -562,12 +568,24 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ call(["sudo", "systemctl", "stop", "bgp.service"]), call(["sudo", "systemctl", "disable", "bgp.service"]), call(["sudo", "systemctl", "mask", "bgp.service"]), + call(["sudo", "systemctl", "unmask", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]), @@ -620,8 +638,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -650,12 +674,24 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ call(["sudo", "systemctl", "stop", "bgp.service"]), call(["sudo", "systemctl", "disable", "bgp.service"]), call(["sudo", "systemctl", "mask", "bgp.service"]), + call(["sudo", "systemctl", "unmask", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]), @@ -708,8 +744,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -738,6 +780,14 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ @@ -747,7 +797,10 @@ call(["sudo", "systemctl", "start", "teamd.service"]), call(["sudo", "systemctl", "enable", "teamd.service"]), call(["sudo", "systemctl", "unmask", "teamd.service"]), - + call(["sudo", "systemctl", "unmask", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]), @@ -800,8 +853,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -830,6 +889,14 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "True", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ @@ -839,7 +906,10 @@ call(["sudo", "systemctl", "start", "teamd.service"]), call(["sudo", "systemctl", "enable", "teamd.service"]), call(["sudo", "systemctl", "unmask", "teamd.service"]), - + call(["sudo", "systemctl", "unmask", "syncd.service"]), + call(["sudo", "systemctl", "enable", "syncd.service"]), + call(["sudo", "systemctl", "start", "syncd.service"]), + ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]), @@ -893,8 +963,14 @@ "auto_restart": "enabled", "high_mem_alert": "disabled" }, - - + "syncd": { + "state": "enabled", + "delayed": "False", + "has_global_scope": "False", + "has_per_asic_scope": "True", + "auto_restart": "enabled", + "high_mem_alert": "disabled" + }, }, }, "expected_config_db": { @@ -923,6 +999,14 @@ "high_mem_alert": "disabled", "state": "enabled" }, + "syncd": { + "auto_restart": "enabled", + "has_global_scope": "False", + "has_per_asic_scope": "True", + "delayed": "False", + "high_mem_alert": "disabled", + "state": "enabled" + }, }, }, "enable_feature_subprocess_calls": [ @@ -944,7 +1028,12 @@ call(["sudo", "systemctl", "stop", "lldp@1.service"]), call(["sudo", "systemctl", "disable", "lldp@1.service"]), call(["sudo", "systemctl", "mask", "lldp@1.service"]), - + call(["sudo", "systemctl", "start", "syncd@0.service"]), + call(["sudo", "systemctl", "enable", "syncd@0.service"]), + call(["sudo", "systemctl", "unmask", "syncd@0.service"]), + call(["sudo", "systemctl", "start", "syncd@1.service"]), + call(["sudo", "systemctl", "enable", "syncd@1.service"]), + call(["sudo", "systemctl", "unmask", "syncd@1.service"]), ], "daemon_reload_subprocess_call": [ call(["sudo", "systemctl", "daemon-reload"]),