From c4ab4abe5433d8be6df034a9424d8c4128827c8b Mon Sep 17 00:00:00 2001 From: liangxin1300 Date: Wed, 3 Nov 2021 09:26:22 +0800 Subject: [PATCH] Dev: sbd: Adjust timeout related values * Consolidate sbd timeout related methods/constants/formulas into class SBDTimeout * Adjust stonith-timeout value, formulas are: stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd stonith-timeout >= 1.2 * max(stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd stonith-timeout >= max(STONITH_TIMEOUT_DEFAULT, token+consensus) # for all situations * Adjust SBD_DELAY_START value, formulas are: SBD_DELAY_START = no # for non virtualization environment or non-2node cluster, which is the system default SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd * pcmk_delay_max=30 # only for the single stonith device in the 2-node cluster without qdevice pcmk_delay_max deletion # only for the single stonith device, not in the 2-node cluster without qdevice --- crmsh/bootstrap.py | 57 +++-- crmsh/constants.py | 2 + crmsh/corosync.py | 39 ++++ crmsh/crash_test/utils.py | 18 +- crmsh/sbd.py | 436 ++++++++++++++++++++++++-------------- crmsh/utils.py | 62 ++++++ 6 files changed, 424 insertions(+), 190 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 7fd74532c6..8f7148ee9c 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -33,7 +33,7 @@ from . import tmpfiles from . import lock from . import userdir -from .constants import SSH_OPTION, QDEVICE_HELP_INFO, CRM_MON_ONE_SHOT +from .constants import SSH_OPTION, QDEVICE_HELP_INFO, CRM_MON_ONE_SHOT, STONITH_TIMEOUT_DEFAULT from . import ocfs2 from . import qdevice from . import log @@ -63,13 +63,14 @@ BOOTH_DIR = "/etc/booth" BOOTH_CFG = "/etc/booth/booth.conf" BOOTH_AUTH = "/etc/booth/authkey" +SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d" FILES_TO_SYNC = (BOOTH_DIR, corosync.conf(), COROSYNC_AUTH, CSYNC2_CFG, CSYNC2_KEY, "/etc/ctdb/nodes", "/etc/drbd.conf", "/etc/drbd.d", "/etc/ha.d/ldirectord.cf", "/etc/lvm/lvm.conf", "/etc/multipath.conf", "/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SYSCONFIG_SBD, PCMK_REMOTE_AUTH, WATCHDOG_CFG, - PROFILES_FILE, CRM_CFG) - + PROFILES_FILE, CRM_CFG, SBD_SYSTEMD_DELAY_START_DIR) INIT_STAGES = ("ssh", "ssh_remote", "csync2", "csync2_remote", "corosync", "sbd", "cluster", "ocfs2", "admin", "qdevice") + class QdevicePolicy(Enum): QDEVICE_RELOAD = 0 QDEVICE_RESTART = 1 @@ -698,12 +699,14 @@ def start_pacemaker(node_list=[]): Start pacemaker service with wait time for sbd When node_list set, start pacemaker service in parallel """ - from .sbd import SBDManager + from .sbd import SBDTimeout pacemaker_start_msg = "Starting pacemaker" - if utils.package_is_installed("sbd") and \ + # not _context means not in init or join process + if not _context and \ + utils.package_is_installed("sbd") and \ utils.service_is_enabled("sbd.service") and \ - SBDManager.is_delay_start(): - pacemaker_start_msg += "(waiting for sbd {}s)".format(SBDManager.get_suitable_sbd_systemd_timeout()) + SBDTimeout.is_sbd_delay_start(): + pacemaker_start_msg += "(delaying start of sbd for {}s)".format(SBDTimeout.get_sbd_delay_start_sec_from_sysconfig()) with logger_utils.status_long(pacemaker_start_msg): utils.start_service("pacemaker.service", enable=True, node_list=node_list) @@ -1237,7 +1240,7 @@ def init_cluster(): rsc_defaults rsc-options: resource-stickiness=1 migration-threshold=3 """) - _context.sbd_manager.configure_sbd_resource() + _context.sbd_manager.configure_sbd_resource_and_properties() def init_admin(): @@ -1334,20 +1337,17 @@ def init_qdevice(): utils.disable_service("corosync-qdevice.service") return if _context.stage == "qdevice": - from .sbd import SBDManager + from .sbd import SBDManager, SBDTimeout utils.check_all_nodes_reachable() using_diskless_sbd = SBDManager.is_using_diskless_sbd() _context.qdevice_reload_policy = evaluate_qdevice_quorum_effect(QDEVICE_ADD, using_diskless_sbd) # add qdevice after diskless sbd started if using_diskless_sbd: res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") - if res: - sbd_watchdog_timeout = max(int(res), SBDManager.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE) - else: - sbd_watchdog_timeout = SBDManager.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE - stonith_timeout = SBDManager.calculate_stonith_timeout(sbd_watchdog_timeout) - SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout)}) - invokerc("crm configure property stonith-watchdog-timeout=-1 stonith-timeout={}s".format(stonith_timeout)) + if not res or int(res) < SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE: + sbd_watchdog_timeout_qdevice = SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE + SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)}) + utils.set_property(stonith_timeout=SBDTimeout.get_stonith_timeout()) logger.info("""Configure Qdevice/Qnetd:""") qdevice_inst = _context.qdevice_inst @@ -1822,6 +1822,14 @@ def update_nodeid(nodeid, node=None): # attempt to join the cluster failed) init_cluster_local() + if utils.service_is_active("sbd.service"): + from .sbd import SBDTimeout + SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() + else: + value = get_stonith_timeout_generally_expected() + if value: + utils.set_property_conditionally("stonith-timeout", value) + with logger_utils.status_long("Reloading cluster configuration"): if ipv6_flag and not is_unicast: @@ -1927,6 +1935,10 @@ def remove_node_from_cluster(): """ Remove node from running cluster and the corosync / pacemaker configuration. """ + if utils.service_is_active("sbd.service"): + from .sbd import SBDTimeout + SBDTimeout.adjust_sbd_timeout_related_cluster_configuration(removing=True) + node = _context.cluster_node set_cluster_node_ip() @@ -2392,4 +2404,17 @@ def bootstrap_arbitrator(context): logger.info("Enabling and starting the booth arbitrator service") utils.start_service("booth@booth", enable=True) + +def get_stonith_timeout_generally_expected(): + """ + Adjust stonith-timeout for all scenarios, formula is: + + stonith-timeout >= max(STONITH_TIMEOUT_DEFAULT, token+consensus) + """ + stonith_enabled = utils.get_property("stonith-enabled") + # When stonith disabled, return + if utils.is_boolean_false(stonith_enabled): + return None + + return max(STONITH_TIMEOUT_DEFAULT, corosync.token_and_consensus_timeout()) # EOF diff --git a/crmsh/constants.py b/crmsh/constants.py index 95224e7478..d0afb8c798 100644 --- a/crmsh/constants.py +++ b/crmsh/constants.py @@ -524,4 +524,6 @@ """ STANDBY_NV_RE = r'(= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + stonith-timeout >= 1.2 * max (stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd + + And above value will compare with max(STONITH_TIMEOUT_DEFAULT, token+consensus), + then return the larger one + """ + if self.disk_based: + value_from_sbd = int(1.2*(self.pcmk_delay_max + self.msgwait)) + else: + value_from_sbd = int(1.2*max(self.stonith_watchdog_timeout, 2*self.sbd_watchdog_timeout)) + + value = max(value_from_sbd, bootstrap.get_stonith_timeout_generally_expected()) + logger.debug("Result of SBDTimeout.get_stonith_timeout_expected %d", value) + return value + + @classmethod + def get_stonith_timeout(cls): + cls_inst = cls() + cls_inst._load_configurations() + return cls_inst.get_stonith_timeout_expected() + + def get_sbd_delay_start_expected(self): + """ + Get the value for SBD_DELAY_START, formulas are: + + SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd + """ + token_and_consensus_timeout = corosync.token_and_consensus_timeout() + if self.disk_based: + value = token_and_consensus_timeout + self.pcmk_delay_max + self.msgwait + else: + value = token_and_consensus_timeout + 2*self.sbd_watchdog_timeout + return value + + @staticmethod + def get_sbd_delay_start_sec_from_sysconfig(): + """ + Get suitable systemd start timeout for sbd.service + """ + # TODO 5ms, 5us, 5s, 5m, 5h are also valid for sbd sysconfig + value = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + if utils.is_boolean_true(value): + return 2*SBDTimeout.get_sbd_watchdog_timeout() + return int(value) + + @staticmethod + def is_sbd_delay_start(): + """ + Check if SBD_DELAY_START is not no or not set + """ + res = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + return res and res != "no" + + def adjust_systemd_start_timeout(self): + """ + Adjust start timeout for sbd when set SBD_DELAY_START + """ + sbd_delay_start_value = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + if sbd_delay_start_value == "no": + return + + cmd = "systemctl show -p TimeoutStartUSec sbd --value" + out = utils.get_stdout_or_raise_error(cmd) + start_timeout = utils.get_systemd_timeout_start_in_sec(out) + if start_timeout >= int(sbd_delay_start_value): + return + + utils.mkdirp(SBD_SYSTEMD_DELAY_START_DIR) + sbd_delay_start_file = "{}/sbd_delay_start.conf".format(SBD_SYSTEMD_DELAY_START_DIR) + utils.str2file("[Service]\nTimeoutSec={}".format(int(1.2*int(sbd_delay_start_value))), sbd_delay_start_file) + bootstrap.csync2_update(SBD_SYSTEMD_DELAY_START_DIR) + utils.cluster_run_cmd("systemctl daemon-reload") + + def adjust_stonith_timeout(self): + """ + Adjust stonith-timeout property + """ + utils.set_property_conditionally("stonith-timeout", self.get_stonith_timeout_expected()) + + def adjust_pcmk_delay_max(self): + """ + Adjust pcmk_delay_max parameter for sbd ra + """ + # TODO this function should be outside of sbd.py, to adjust any fence device + + if not utils.has_resource_configured(SBDManager.SBD_RA): + return + + if self.two_node_without_qdevice: + cmd = "crm resource param {} set pcmk_delay_max {}s".format(SBDManager.SBD_RA_ID, self.pcmk_delay_max) + else: + cmd = "crm resource param {} delete pcmk_delay_max".format(SBDManager.SBD_RA_ID) + utils.get_stdout_or_raise_error(cmd) + + def adjust_sbd_delay_start(self): + """ + Adjust SBD_DELAY_START in /etc/sysconfig/sbd + """ + run_time_value = str(self.sbd_delay_start_value_expected) + config_value = self.sbd_delay_start_value_from_config + if run_time_value == config_value: + return + if run_time_value == "no" \ + or (not re.search(r'\d+', config_value)) \ + or (int(run_time_value) > int(config_value)): + SBDManager.update_configuration({"SBD_DELAY_START": run_time_value}) + + @classmethod + def adjust_sbd_timeout_related_cluster_configuration(cls, removing=False): + """ + Adjust sbd timeout related configurations + """ + cls_inst = cls(removing=removing) + cls_inst._load_configurations() + + message = "Adjusting sbd related timeout values for 2-node cluster" + with logger_utils.status_long(message): + cls_inst.adjust_sbd_delay_start() + cls_inst.adjust_pcmk_delay_max() + cls_inst.adjust_stonith_timeout() + cls_inst.adjust_systemd_start_timeout() + + class SBDManager(object): """ Class to manage sbd configuration and services @@ -26,14 +274,11 @@ class SBDManager(object): specify here will be destroyed. """ SBD_WARNING = "Not configuring SBD - STONITH will be disabled." - DISKLESS_SBD_WARNING = """Diskless SBD requires cluster with three or more nodes. -If you want to use diskless SBD for two-nodes cluster, should be combined with QDevice.""" + DISKLESS_SBD_WARNING = "Diskless SBD requires cluster with three or more nodes. If you want to use diskless SBD for 2-node cluster, should be combined with QDevice." PARSE_RE = "[; ]" DISKLESS_CRM_CMD = "crm configure property stonith-enabled=true stonith-watchdog-timeout={} stonith-timeout={}" - - SBD_WATCHDOG_TIMEOUT_DEFAULT = 5 - SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 = 15 - SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE = 35 + SBD_RA = "stonith:external/sbd" + SBD_RA_ID = "stonith-sbd" def __init__(self, context): """ @@ -46,14 +291,9 @@ def __init__(self, context): self.diskless_sbd = context.diskless_sbd self._sbd_devices = None self._watchdog_inst = None - self._stonith_timeout = 60 - if context.is_s390: - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 - else: - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT - self._stonith_watchdog_timeout = -1 self._context = context self._delay_start = False + self.timeout_inst = None @staticmethod def _get_device_uuid(dev, node=None): @@ -66,18 +306,6 @@ def _get_device_uuid(dev, node=None): raise ValueError("Cannot find sbd device UUID for {}".format(dev)) return res.group(1) - @staticmethod - def _get_sbd_msgwait(dev): - """ - Get msgwait for sbd device - """ - out = utils.get_stdout_or_raise_error("sbd -d {} dump".format(dev)) - # Format like "Timeout (msgwait) : 30" - res = re.search("\(msgwait\)\s+:\s+(\d+)", out) - if not res: - raise ValueError("Cannot get sbd msgwait for {}".format(dev)) - return int(res.group(1)) - def _compare_device_uuid(self, dev, node_list): """ Compare local sbd device UUID with other node's sbd device UUID @@ -156,37 +384,22 @@ def _get_sbd_device(self): dev_list = self._get_sbd_device_interactive() self._sbd_devices = dev_list - def _adjust_sbd_watchdog_timeout_for_s390(self): - """ - Correct watchdog timeout if less than s390 default - """ - if self._context.is_s390 and self._sbd_watchdog_timeout < self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390: - logger.warning("sbd_watchdog_timeout is set to {} for s390, it was {}".format(self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390, self._sbd_watchdog_timeout)) - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 - def _initialize_sbd(self): """ Initialize SBD parameters according to profiles.yml, or the crmsh defined defaulst as the last resort. This covers both disk-based-sbd, and diskless-sbd scenarios. - For diskless-sbd, set _sbd_watchdog_timeout then return; + For diskless-sbd, set sbd_watchdog_timeout then return; For disk-based-sbd, also calculate the msgwait value, then initialize the SBD device. """ logger.info("Initializing {}SBD".format("diskless " if self.diskless_sbd else "")) - - if "sbd.watchdog_timeout" in self._context.profiles_dict: - self._sbd_watchdog_timeout = self._context.profiles_dict["sbd.watchdog_timeout"] - self._adjust_sbd_watchdog_timeout_for_s390() + self.timeout_inst = SBDTimeout(self._context) + self.timeout_inst.set_sbd_watchdog_timeout() if self.diskless_sbd: + self.timeout_inst.adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() return - sbd_msgwait_default = int(self._sbd_watchdog_timeout) * 2 - sbd_msgwait = sbd_msgwait_default - if "sbd.msgwait" in self._context.profiles_dict: - sbd_msgwait = self._context.profiles_dict["sbd.msgwait"] - if int(sbd_msgwait) < sbd_msgwait_default: - logger.warning("sbd msgwait is set to {}, it was {}".format(sbd_msgwait_default, sbd_msgwait)) - sbd_msgwait = sbd_msgwait_default - opt = "-4 {} -1 {}".format(sbd_msgwait, self._sbd_watchdog_timeout) + self.timeout_inst.set_sbd_msgwait() + opt = "-4 {} -1 {}".format(self.timeout_inst.sbd_msgwait, self.timeout_inst.sbd_watchdog_timeout) for dev in self._sbd_devices: rc, _, err = bootstrap.invoke("sbd {} -d {} create".format(opt, dev)) @@ -198,43 +411,15 @@ def _update_sbd_configuration(self): Update /etc/sysconfig/sbd """ shutil.copyfile(self.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD) - self._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - if utils.detect_virt(): - self._delay_start = True sbd_config_dict = { - "SBD_PACEMAKER": "yes", - "SBD_STARTMODE": "always", - "SBD_DELAY_START": "yes" if self._delay_start else "no", - "SBD_WATCHDOG_DEV": self._watchdog_inst.watchdog_device_name + "SBD_WATCHDOG_DEV": self._watchdog_inst.watchdog_device_name, + "SBD_WATCHDOG_TIMEOUT": str(self.timeout_inst.sbd_watchdog_timeout) } - if self._sbd_watchdog_timeout > 0: - sbd_config_dict["SBD_WATCHDOG_TIMEOUT"] = str(self._sbd_watchdog_timeout) if self._sbd_devices: sbd_config_dict["SBD_DEVICE"] = ';'.join(self._sbd_devices) utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) bootstrap.csync2_update(SYSCONFIG_SBD) - def _adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self): - """ - When using diskless SBD with Qdevice, adjust value of sbd_watchdog_timeout - """ - if not self.diskless_sbd: - return - # add sbd after qdevice started - if utils.is_qdevice_configured() and utils.service_is_active("corosync-qdevice.service"): - qdevice_sync_timeout = utils.get_qdevice_sync_timeout() - if self._sbd_watchdog_timeout <= qdevice_sync_timeout: - watchdog_timeout_with_qdevice = qdevice_sync_timeout + 5 - logger.warning("sbd_watchdog_timeout is set to {} for qdevice, it was {}".format(watchdog_timeout_with_qdevice, self._sbd_watchdog_timeout)) - self._sbd_watchdog_timeout = watchdog_timeout_with_qdevice - self._stonith_timeout = self.calculate_stonith_timeout(self._sbd_watchdog_timeout) - # add sbd and qdevice together from beginning - elif self._context.qdevice_inst: - if self._sbd_watchdog_timeout < self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE: - logger.warning("sbd_watchdog_timeout is set to {} for qdevice, it was {}".format(self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE, self._sbd_watchdog_timeout)) - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE - self._stonith_timeout = self.calculate_stonith_timeout(self._sbd_watchdog_timeout) - def _get_sbd_device_from_config(self): """ Gets currently configured SBD device, i.e. what's in /etc/sysconfig/sbd @@ -245,44 +430,6 @@ def _get_sbd_device_from_config(self): else: return None - @staticmethod - def is_delay_start(): - """ - Check if SBD_DELAY_START is yes - """ - res = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") - return utils.is_boolean_true(res) - - @staticmethod - def get_sbd_watchdog_timeout(): - """ - Get SBD_WATCHDOG_TIMEOUT from /etc/sysconfig/sbd - """ - res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") - if not res: - raise ValueError("Cannot get the value of SBD_WATCHDOG_TIMEOUT") - return int(res) - - @staticmethod - def get_sbd_start_timeout_threshold(): - """ - Get sbd start timeout threshold - TimeoutStartUSec of sbd shouldn't less than this value - """ - dev_list = SBDManager.get_sbd_device_from_config() - if not dev_list: - return int(SBDManager.get_sbd_watchdog_timeout() * 2) - else: - return int(SBDManager._get_sbd_msgwait(dev_list[0])) - - @staticmethod - def get_suitable_sbd_systemd_timeout(): - """ - Get suitable systemd start timeout for sbd.service - """ - timeout_value = SBDManager.get_sbd_start_timeout_threshold() - return int(timeout_value * 1.2) - def _restart_cluster_and_configure_sbd_ra(self): """ Try to configure sbd resource, restart cluster on needed @@ -291,14 +438,14 @@ def _restart_cluster_and_configure_sbd_ra(self): logger.info("Restarting cluster service") utils.cluster_run_cmd("crm cluster restart") bootstrap.wait_for_cluster() - self.configure_sbd_resource() + self.configure_sbd_resource_and_properties() else: logger.warning("To start sbd.service, need to restart cluster service manually on each node") if self.diskless_sbd: - cmd = self.DISKLESS_CRM_CMD.format(self._stonith_watchdog_timeout, str(self._stonith_timeout)+"s") + cmd = self.DISKLESS_CRM_CMD.format(self.timeout_inst.stonith_watchdog_timeout, SBDTimeout.get_stonith_timeout()) logger.warning("Then run \"{}\" on any node".format(cmd)) else: - self.configure_sbd_resource() + self.configure_sbd_resource_and_properties() def _enable_sbd_service(self): """ @@ -312,29 +459,6 @@ def _enable_sbd_service(self): # in init process bootstrap.invoke("systemctl enable sbd.service") - def _adjust_systemd(self): - """ - Adjust start timeout for sbd when set SBD_DELAY_START - """ - if not self.is_delay_start(): - return - - # TimeoutStartUSec default is 1min 30s, need to parse as seconds - cmd = "systemctl show -p TimeoutStartUSec sbd --value" - out = utils.get_stdout_or_raise_error(cmd) - res_seconds = re.search("(\d+)s", out) - default_start_timeout = int(res_seconds.group(1)) if res_seconds else 0 - res_min = re.search("(\d+)min", out) - default_start_timeout += 60 * int(res_min.group(1)) if res_min else 0 - if default_start_timeout >= self.get_sbd_start_timeout_threshold(): - return - - systemd_sbd_dir = "/etc/systemd/system/sbd.service.d" - utils.mkdirp(systemd_sbd_dir) - sbd_delay_start_file = "{}/sbd_delay_start.conf".format(systemd_sbd_dir) - utils.str2file("[Service]\nTimeoutSec={}".format(self.get_suitable_sbd_systemd_timeout()), sbd_delay_start_file) - utils.get_stdout_or_raise_error("systemctl daemon-reload") - def _warn_diskless_sbd(self, peer=None): """ Give warning when configuring diskless sbd @@ -370,26 +494,28 @@ def sbd_init(self): self._initialize_sbd() self._update_sbd_configuration() self._enable_sbd_service() - self._adjust_systemd() - def configure_sbd_resource(self): + def configure_sbd_resource_and_properties(self): """ - Configure stonith-sbd resource and stonith-enabled property + Configure stonith-sbd resource and related properties """ if not utils.package_is_installed("sbd") or \ not utils.service_is_enabled("sbd.service") or \ - utils.has_resource_configured("stonith:external/sbd"): + utils.has_resource_configured(self.SBD_RA): return + # disk-based sbd if self._get_sbd_device_from_config(): - if not bootstrap.invokerc("crm configure primitive stonith-sbd stonith:external/sbd pcmk_delay_max=30s"): - utils.fatal("Can't create stonith-sbd primitive") - if not bootstrap.invokerc("crm configure property stonith-enabled=true"): - utils.fatal("Can't enable STONITH for SBD") + utils.get_stdout_or_raise_error("crm configure primitive {} {}".format(self.SBD_RA_ID, self.SBD_RA)) + utils.set_property(stonith_enabled="true") + # disk-less sbd else: - cmd = self.DISKLESS_CRM_CMD.format(self._stonith_watchdog_timeout, str(self._stonith_timeout)+"s") - if not bootstrap.invokerc(cmd): - utils.fatal("Can't enable STONITH for diskless SBD") + cmd = self.DISKLESS_CRM_CMD.format(self.timeout_inst.stonith_watchdog_timeout, constants.STONITH_TIMEOUT_DEFAULT) + utils.get_stdout_or_raise_error(cmd) + + # in sbd stage + if self._context.cluster_is_running: + SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() def join_sbd(self, peer_host): """ @@ -411,7 +537,6 @@ def join_sbd(self, peer_host): self._verify_sbd_device(dev_list, [peer_host]) else: self._warn_diskless_sbd(peer_host) - self._adjust_systemd() logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) bootstrap.invoke("systemctl enable sbd.service") @@ -454,13 +579,6 @@ def update_configuration(sbd_config_dict): utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) bootstrap.csync2_update(SYSCONFIG_SBD) - @staticmethod - def calculate_stonith_timeout(sbd_watchdog_timeout): - """ - Calculate stonith timeout - """ - return int(sbd_watchdog_timeout * 2 * 1.2) - @staticmethod def get_sbd_value_from_config(key): """ diff --git a/crmsh/utils.py b/crmsh/utils.py index 2d3a5c0bf0..51dd8b6849 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2973,4 +2973,66 @@ def is_quorate(peer=None): return res.group(1) == "Yes" else: raise ValueError("Failed to get quorate status from corosync-quorumtool") + + +def is_2node_cluster_without_qdevice(removing=False): + """ + Check if current cluster has two nodes without qdevice + """ + current_num = len(list_cluster_nodes()) + remove_num = 1 if removing else 0 + qdevice_num = 1 if is_qdevice_configured() else 0 + return (current_num - remove_num + qdevice_num) == 2 + + +def get_pcmk_delay_max(two_node_without_qdevice=False): + """ + Get value of pcmk_delay_max + """ + if service_is_active("pacemaker.service") and two_node_without_qdevice: + return constants.PCMK_DELAY_MAX + return 0 + + +def get_property(name): + """ + Get cluster properties + """ + cmd = "crm configure get_property " + name + rc, stdout, _ = get_stdout_stderr(cmd) + return stdout if rc == 0 else None + + +def set_property(**kwargs): + """ + Set cluster properties + """ + set_str = "" + for key, value in kwargs.items(): + set_str += "{}={} ".format(key, value) + cmd = "crm configure property " + set_str.strip().replace('_', '-') + get_stdout_or_raise_error(cmd) + + +def set_property_conditionally(property_name, value_from_calculation): + """ + Set cluster property if calculated value is larger then current cib value + """ + _value = get_property(property_name) + value_from_cib = int(_value.strip('s')) if _value else 0 + if value_from_cib < value_from_calculation: + cmd = "crm configure property {}={}".format(property_name, value_from_calculation) + get_stdout_or_raise_error(cmd) + + +def get_systemd_timeout_start_in_sec(time_res): + """ + Get the TimeoutStartUSec value in second unit + The origin format was like: 1min 30s + """ + res_seconds = re.search("(\d+)s", time_res) + start_timeout = int(res_seconds.group(1)) if res_seconds else 0 + res_min = re.search("(\d+)min", time_res) + start_timeout += 60 * int(res_min.group(1)) if res_min else 0 + return start_timeout # vim:ts=4:sw=4:et: