From f74afe20d60190ece96e88e90af2358030a81731 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Thu, 10 Sep 2020 19:43:45 +0000 Subject: [PATCH 1/6] Enhanced Feature Table state enable/disbale for multi-asic platforms. In Multi-asic for some features we can service per asic so we need to get list of all services. Also updated logic to return if any one of systemctl command return failure and make sure syslog of feature getting enable/disable only come when all commads are sucessful. Moved the service list get api from sonic-util to sonic-py-common Signed-off-by: Abhishek Dosi --- files/image_config/hostcfgd/hostcfgd | 109 +++++++++++------- .../sonic_py_common/device_info.py | 18 +++ 2 files changed, 86 insertions(+), 41 deletions(-) diff --git a/files/image_config/hostcfgd/hostcfgd b/files/image_config/hostcfgd/hostcfgd index acfc3d0c8055..6292186adda1 100755 --- a/files/image_config/hostcfgd/hostcfgd +++ b/files/image_config/hostcfgd/hostcfgd @@ -10,6 +10,7 @@ import copy import jinja2 import ipaddr as ipaddress from swsssdk import ConfigDBConnector +from sonic_py_common import device_info # FILE PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic" @@ -42,45 +43,6 @@ def obfuscate(data): return data -def update_feature_state(feature_name, state, has_timer): - feature_suffixes = ["service"] + (["timer"] if ast.literal_eval(has_timer) else []) - if state == "enabled": - start_cmds = [] - for suffix in feature_suffixes: - start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name, suffix)) - # If feature has timer associated with it, start/enable corresponding systemd .timer unit - # otherwise, start/enable corresponding systemd .service unit - start_cmds.append("sudo systemctl enable {}.{}".format(feature_name, feature_suffixes[-1])) - start_cmds.append("sudo systemctl start {}.{}".format(feature_name, feature_suffixes[-1])) - for cmd in start_cmds: - syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd)) - try: - subprocess.check_call(cmd, shell=True) - except subprocess.CalledProcessError as err: - syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}" - .format(err.cmd, err.returncode, err.output)) - continue - syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started" - .format(feature_name, feature_suffixes[-1])) - elif state == "disabled": - stop_cmds = [] - for suffix in reversed(feature_suffixes): - stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name, suffix)) - stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name, suffix)) - stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name, suffix)) - for cmd in stop_cmds: - syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd)) - try: - subprocess.check_call(cmd, shell=True) - except subprocess.CalledProcessError as err: - syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}" - .format(err.cmd, err.returncode, err.output)) - continue - syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name)) - else: - syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'" - .format(state, feature_name)) - class Iptables(object): def __init__(self): @@ -279,9 +241,12 @@ class HostConfigDaemon: lpbk_table = self.config_db.get_table('LOOPBACK_INTERFACE') self.iptables = Iptables() self.iptables.load(lpbk_table) + self.host_feature_list = [] + self.namespace_feature_list = [] def update_all_feature_states(self): feature_table = self.config_db.get_table('FEATURE') + generated_services_list, generated_multi_instance_services_list = device_info.get_sonic_generated_services() for feature_name in feature_table.keys(): if not feature_name: syslog.syslog(syslog.LOG_WARNING, "Feature is None") @@ -292,7 +257,69 @@ class HostConfigDaemon: syslog.syslog(syslog.LOG_WARNING, "Eanble state of feature '{}' is None".format(feature_name)) continue - update_feature_state(feature_name, state, feature_table[feature_name]['has_timer']) + if feature_name + '.service' in generated_services_list: + self.host_feature_list.append(feature_name) + + if feature_name + '.service' in generated_multi_instance_services_list: + self.namespace_feature_list.append(feature_name) + + for feature_name in feature_table.keys(): + self.update_feature_state(feature_name, state, feature_table[feature_name]['has_timer']) + + def update_feature_state(self, feature_name, state, has_timer): + # Create feature name suffix depending feature is running in host or namespace or in both + feature_name_suffix_list = (([feature_name] if feature_name in self.host_feature_list else []) + + ([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus()) + if feature_name in self.namespace_feature_list])) + + if not feature_name_suffix_list: + syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available" + .format(feature_name)) + + feature_suffixes = ["service"] + (["timer"] if ast.literal_eval(has_timer) else []) + + if state == "enabled": + start_cmds = [] + for feature_name_suffix in feature_name_suffix_list: + for suffix in feature_suffixes: + start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name_suffix, suffix)) + # If feature has timer associated with it, start/enable corresponding systemd .timer unit + # otherwise, start/enable corresponding systemd .service unit + start_cmds.append("sudo systemctl enable {}.{}".format(feature_name_suffix, feature_suffixes[-1])) + start_cmds.append("sudo systemctl start {}.{}".format(feature_name_suffix, feature_suffixes[-1])) + for cmd in start_cmds: + syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd)) + try: + subprocess.check_call(cmd, shell=True) + except subprocess.CalledProcessError as err: + syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}" + .format(err.cmd, err.returncode, err.output)) + syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started" + .format(feature_name, feature_suffixes[-1])) + return + syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started" + .format(feature_name, feature_suffixes[-1])) + elif state == "disabled": + stop_cmds = [] + for feature_name_suffix in feature_name_suffix_list: + for suffix in reversed(feature_suffixes): + stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name_suffix, suffix)) + stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name_suffix, suffix)) + stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name_suffix, suffix)) + for cmd in stop_cmds: + syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd)) + try: + subprocess.check_call(cmd, shell=True) + except subprocess.CalledProcessError as err: + syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}" + .format(err.cmd, err.returncode, err.output)) + syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to be stopped and disabled".format(feature_name)) + return + syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name)) + else: + syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'" + .format(state, feature_name)) + def aaa_handler(self, key, data): self.aaacfg.aaa_update(key, data) @@ -334,7 +361,7 @@ class HostConfigDaemon: syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name)) return - update_feature_state(feature_name, state, feature_table[feature_name]['has_timer']) + self.update_feature_state(feature_name, state, feature_table[feature_name]['has_timer']) def start(self): # Update all feature states once upon starting diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index f6b25de55ab5..09f0525134b4 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -20,6 +20,7 @@ PORT_CONFIG_FILE = "port_config.ini" PLATFORM_JSON_FILE = "platform.json" +SONIC_GENERATED_SERVICE_PATH = '/etc/sonic/generated_services.conf' # Multi-NPU constants # TODO: Move Multi-ASIC-related functions and constants to a "multi_asic.py" module NPU_NAME_PREFIX = "asic" @@ -420,3 +421,20 @@ def get_system_routing_stack(): raise OSError("Cannot detect routing stack") return result + +def get_sonic_generated_services(): + if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH): + return None + generated_services_list = [] + generated_multi_instance_services = [] + with open(SONIC_GENERATED_SERVICE_PATH) as generated_service_file: + for line in generated_service_file: + if '@' in line: + line = line.replace('@', '') + if is_multi_npu(): + generated_multi_instance_services.append(line.rstrip('\n')) + else: + generated_services_list.append(line.rstrip('\n')) + else: + generated_services_list.append(line.rstrip('\n')) + return generated_services_list, generated_multi_instance_services From 23649733d24a15dfd31ad58997c6ff8118394022 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Thu, 10 Sep 2020 20:03:35 +0000 Subject: [PATCH 2/6] Make sure to retun None for both service list in case of error. Signed-off-by: Abhishek Dosi --- src/sonic-py-common/sonic_py_common/device_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index 09f0525134b4..f172feeda153 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -424,7 +424,7 @@ def get_system_routing_stack(): def get_sonic_generated_services(): if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH): - return None + return None, None generated_services_list = [] generated_multi_instance_services = [] with open(SONIC_GENERATED_SERVICE_PATH) as generated_service_file: From 524eb462711c6268894db1d3d4e68bb5bdfc8cd9 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Thu, 10 Sep 2020 21:21:40 +0000 Subject: [PATCH 3/6] Return empty list as fail condition Signed-off-by: Abhishek Dosi --- src/sonic-py-common/sonic_py_common/device_info.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index f172feeda153..2ba5425fe80e 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -423,10 +423,10 @@ def get_system_routing_stack(): return result def get_sonic_generated_services(): - if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH): - return None, None generated_services_list = [] generated_multi_instance_services = [] + if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH): + return generated_services_list, generated_multi_instance_services with open(SONIC_GENERATED_SERVICE_PATH) as generated_service_file: for line in generated_service_file: if '@' in line: From 383aecdafac3be65eecb227ce60dbbe474a3e8a0 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Sat, 19 Sep 2020 01:35:53 +0000 Subject: [PATCH 4/6] Address Review Comments. Made init_cfg.json.j2 knowledegable of Feature service is global scope or per asic scope Signed-off-by: Abhishek Dosi --- files/build_templates/init_cfg.json.j2 | 2 + files/image_config/hostcfgd/hostcfgd | 52 ++++++++----------- .../sonic_py_common/device_info.py | 18 ------- 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/files/build_templates/init_cfg.json.j2 b/files/build_templates/init_cfg.json.j2 index 7126f2648d74..07ff9b1b3a72 100644 --- a/files/build_templates/init_cfg.json.j2 +++ b/files/build_templates/init_cfg.json.j2 @@ -39,6 +39,8 @@ "{{feature}}": { "state": "{{state}}", "has_timer" : {{has_timer | lower()}}, + "has_global_scope": {% if feature + '.service' in installer_services.split(' ') %}true{% else %}false{% endif %}, + "has_per_asic_scope": {% if feature + '@.service' in installer_services.split(' ') %}true{% else %}false{% endif %}, "auto_restart": "{{autorestart}}", "high_mem_alert": "disabled" }{% if not loop.last %},{% endif -%} diff --git a/files/image_config/hostcfgd/hostcfgd b/files/image_config/hostcfgd/hostcfgd index 6292186adda1..f12de33ff3b6 100755 --- a/files/image_config/hostcfgd/hostcfgd +++ b/files/image_config/hostcfgd/hostcfgd @@ -241,42 +241,22 @@ class HostConfigDaemon: lpbk_table = self.config_db.get_table('LOOPBACK_INTERFACE') self.iptables = Iptables() self.iptables.load(lpbk_table) - self.host_feature_list = [] - self.namespace_feature_list = [] - def update_all_feature_states(self): - feature_table = self.config_db.get_table('FEATURE') - generated_services_list, generated_multi_instance_services_list = device_info.get_sonic_generated_services() - for feature_name in feature_table.keys(): - if not feature_name: - syslog.syslog(syslog.LOG_WARNING, "Feature is None") - continue - - state = feature_table[feature_name]['state'] - if not state: - syslog.syslog(syslog.LOG_WARNING, "Eanble state of feature '{}' is None".format(feature_name)) - continue - - if feature_name + '.service' in generated_services_list: - self.host_feature_list.append(feature_name) - - if feature_name + '.service' in generated_multi_instance_services_list: - self.namespace_feature_list.append(feature_name) - - for feature_name in feature_table.keys(): - self.update_feature_state(feature_name, state, feature_table[feature_name]['has_timer']) + def update_feature_state(self, feature_name, state, feature_table): + has_timer = ast.literal_eval(feature_table[feature_name]['has_timer']) + has_global_scope = ast.literal_eval(feature_table[feature_name]['has_global_scope']) + has_per_asic_scope = ast.literal_eval(feature_table[feature_name]['has_per_asic_scope']) - def update_feature_state(self, feature_name, state, has_timer): - # Create feature name suffix depending feature is running in host or namespace or in both - feature_name_suffix_list = (([feature_name] if feature_name in self.host_feature_list else []) + + # Create feature name suffix depending feature is running in host or namespace or in both + feature_name_suffix_list = (([feature_name] if has_global_scope or not device_info.is_multi_npu() else []) + ([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus()) - if feature_name in self.namespace_feature_list])) + if has_per_asic_scope and device_info.is_multi_npu()])) if not feature_name_suffix_list: syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available" .format(feature_name)) - feature_suffixes = ["service"] + (["timer"] if ast.literal_eval(has_timer) else []) + feature_suffixes = ["service"] + (["timer"] if has_timer else []) if state == "enabled": start_cmds = [] @@ -321,6 +301,20 @@ class HostConfigDaemon: .format(state, feature_name)) + def update_all_feature_states(self): + feature_table = self.config_db.get_table('FEATURE') + for feature_name in feature_table.keys(): + if not feature_name: + syslog.syslog(syslog.LOG_WARNING, "Feature is None") + continue + + state = feature_table[feature_name]['state'] + if not state: + syslog.syslog(syslog.LOG_WARNING, "Eanble state of feature '{}' is None".format(feature_name)) + continue + + self.update_feature_state(feature_name, state, feature_table) + def aaa_handler(self, key, data): self.aaacfg.aaa_update(key, data) @@ -361,7 +355,7 @@ class HostConfigDaemon: syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name)) return - self.update_feature_state(feature_name, state, feature_table[feature_name]['has_timer']) + self.update_feature_state(feature_name, state, feature_table) def start(self): # Update all feature states once upon starting diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index 2ba5425fe80e..f6b25de55ab5 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -20,7 +20,6 @@ PORT_CONFIG_FILE = "port_config.ini" PLATFORM_JSON_FILE = "platform.json" -SONIC_GENERATED_SERVICE_PATH = '/etc/sonic/generated_services.conf' # Multi-NPU constants # TODO: Move Multi-ASIC-related functions and constants to a "multi_asic.py" module NPU_NAME_PREFIX = "asic" @@ -421,20 +420,3 @@ def get_system_routing_stack(): raise OSError("Cannot detect routing stack") return result - -def get_sonic_generated_services(): - generated_services_list = [] - generated_multi_instance_services = [] - if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH): - return generated_services_list, generated_multi_instance_services - with open(SONIC_GENERATED_SERVICE_PATH) as generated_service_file: - for line in generated_service_file: - if '@' in line: - line = line.replace('@', '') - if is_multi_npu(): - generated_multi_instance_services.append(line.rstrip('\n')) - else: - generated_services_list.append(line.rstrip('\n')) - else: - generated_services_list.append(line.rstrip('\n')) - return generated_services_list, generated_multi_instance_services From 084e4abc41cfc435b48af15d083142878d958c32 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Sat, 19 Sep 2020 19:26:54 +0000 Subject: [PATCH 5/6] Fix merge conflict --- files/image_config/hostcfgd/hostcfgd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/image_config/hostcfgd/hostcfgd b/files/image_config/hostcfgd/hostcfgd index 0d8c2f3dd0ea..73795dbdd150 100755 --- a/files/image_config/hostcfgd/hostcfgd +++ b/files/image_config/hostcfgd/hostcfgd @@ -245,7 +245,7 @@ class HostConfigDaemon: def update_feature_state(self, feature_name, state, feature_table): has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False')) has_global_scope = ast.literal_eval(feature_table[feature_name].get('has_global_scope', 'True')) - has_per_asic_scope = ast.literal_eval(feature_table[feature_name]('has_per_asic_scope', 'False')) + has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False')) # Create feature name suffix depending feature is running in host or namespace or in both feature_name_suffix_list = (([feature_name] if has_global_scope or not device_info.is_multi_npu() else []) + From 855658f81e5ce11bdde362c642508b4e706f80d6 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Sat, 19 Sep 2020 19:29:58 +0000 Subject: [PATCH 6/6] Address Review Comment. Signed-off-by: Abhishek Dosi --- files/image_config/hostcfgd/hostcfgd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/files/image_config/hostcfgd/hostcfgd b/files/image_config/hostcfgd/hostcfgd index 73795dbdd150..2b505ee5a0ee 100755 --- a/files/image_config/hostcfgd/hostcfgd +++ b/files/image_config/hostcfgd/hostcfgd @@ -241,6 +241,7 @@ class HostConfigDaemon: lpbk_table = self.config_db.get_table('LOOPBACK_INTERFACE') self.iptables = Iptables() self.iptables.load(lpbk_table) + self.is_multi_npu = device_info.is_multi_npu() def update_feature_state(self, feature_name, state, feature_table): has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False')) @@ -248,9 +249,9 @@ class HostConfigDaemon: has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False')) # Create feature name suffix depending feature is running in host or namespace or in both - feature_name_suffix_list = (([feature_name] if has_global_scope or not device_info.is_multi_npu() else []) + + feature_name_suffix_list = (([feature_name] if has_global_scope or not self.is_multi_npu else []) + ([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus()) - if has_per_asic_scope and device_info.is_multi_npu()])) + if has_per_asic_scope and self.is_multi_npu])) if not feature_name_suffix_list: syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"