diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 7fd74532c6..800c2ac3d1 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -33,7 +33,7 @@ from . import tmpfiles from . import lock from . import userdir -from .constants import SSH_OPTION, QDEVICE_HELP_INFO, CRM_MON_ONE_SHOT +from .constants import SSH_OPTION, QDEVICE_HELP_INFO, CRM_MON_ONE_SHOT, STONITH_TIMEOUT_DEFAULT from . import ocfs2 from . import qdevice from . import log @@ -63,13 +63,14 @@ BOOTH_DIR = "/etc/booth" BOOTH_CFG = "/etc/booth/booth.conf" BOOTH_AUTH = "/etc/booth/authkey" +SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d" FILES_TO_SYNC = (BOOTH_DIR, corosync.conf(), COROSYNC_AUTH, CSYNC2_CFG, CSYNC2_KEY, "/etc/ctdb/nodes", "/etc/drbd.conf", "/etc/drbd.d", "/etc/ha.d/ldirectord.cf", "/etc/lvm/lvm.conf", "/etc/multipath.conf", "/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SYSCONFIG_SBD, PCMK_REMOTE_AUTH, WATCHDOG_CFG, - PROFILES_FILE, CRM_CFG) - + PROFILES_FILE, CRM_CFG, SBD_SYSTEMD_DELAY_START_DIR) INIT_STAGES = ("ssh", "ssh_remote", "csync2", "csync2_remote", "corosync", "sbd", "cluster", "ocfs2", "admin", "qdevice") + class QdevicePolicy(Enum): QDEVICE_RELOAD = 0 QDEVICE_RESTART = 1 @@ -698,12 +699,14 @@ def start_pacemaker(node_list=[]): Start pacemaker service with wait time for sbd When node_list set, start pacemaker service in parallel """ - from .sbd import SBDManager + from .sbd import SBDTimeout pacemaker_start_msg = "Starting pacemaker" - if utils.package_is_installed("sbd") and \ + # not _context means not in init or join process + if not _context and \ + utils.package_is_installed("sbd") and \ utils.service_is_enabled("sbd.service") and \ - SBDManager.is_delay_start(): - pacemaker_start_msg += "(waiting for sbd {}s)".format(SBDManager.get_suitable_sbd_systemd_timeout()) + SBDTimeout.is_sbd_delay_start(): + pacemaker_start_msg += "(delaying start of sbd for {}s)".format(SBDTimeout.get_sbd_delay_start_sec_from_sysconfig()) with logger_utils.status_long(pacemaker_start_msg): utils.start_service("pacemaker.service", enable=True, node_list=node_list) @@ -1237,7 +1240,7 @@ def init_cluster(): rsc_defaults rsc-options: resource-stickiness=1 migration-threshold=3 """) - _context.sbd_manager.configure_sbd_resource() + _context.sbd_manager.configure_sbd_resource_and_properties() def init_admin(): @@ -1334,20 +1337,17 @@ def init_qdevice(): utils.disable_service("corosync-qdevice.service") return if _context.stage == "qdevice": - from .sbd import SBDManager + from .sbd import SBDManager, SBDTimeout utils.check_all_nodes_reachable() using_diskless_sbd = SBDManager.is_using_diskless_sbd() _context.qdevice_reload_policy = evaluate_qdevice_quorum_effect(QDEVICE_ADD, using_diskless_sbd) # add qdevice after diskless sbd started if using_diskless_sbd: res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") - if res: - sbd_watchdog_timeout = max(int(res), SBDManager.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE) - else: - sbd_watchdog_timeout = SBDManager.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE - stonith_timeout = SBDManager.calculate_stonith_timeout(sbd_watchdog_timeout) - SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout)}) - invokerc("crm configure property stonith-watchdog-timeout=-1 stonith-timeout={}s".format(stonith_timeout)) + if not res or int(res) < SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE: + sbd_watchdog_timeout_qdevice = SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE + SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)}) + utils.set_property(stonith_timeout=SBDTimeout.get_stonith_timeout()) logger.info("""Configure Qdevice/Qnetd:""") qdevice_inst = _context.qdevice_inst @@ -1822,6 +1822,14 @@ def update_nodeid(nodeid, node=None): # attempt to join the cluster failed) init_cluster_local() + if utils.service_is_active("sbd.service"): + from .sbd import SBDTimeout + SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() + else: + value = get_stonith_timeout_generally_expected() + if value: + utils.set_property_conditionally("stonith-timeout", value) + with logger_utils.status_long("Reloading cluster configuration"): if ipv6_flag and not is_unicast: @@ -1927,6 +1935,10 @@ def remove_node_from_cluster(): """ Remove node from running cluster and the corosync / pacemaker configuration. """ + if utils.service_is_active("sbd.service"): + from .sbd import SBDTimeout + SBDTimeout.adjust_sbd_timeout_related_cluster_configuration(removing=True) + node = _context.cluster_node set_cluster_node_ip() @@ -2392,4 +2404,17 @@ def bootstrap_arbitrator(context): logger.info("Enabling and starting the booth arbitrator service") utils.start_service("booth@booth", enable=True) + +def get_stonith_timeout_generally_expected(): + """ + Adjust stonith-timeout for all scenarios, formula is: + + stonith-timeout = STONITH_TIMEOUT_DEFAULT + token + consensus + """ + stonith_enabled = utils.get_property("stonith-enabled") + # When stonith disabled, return + if utils.is_boolean_false(stonith_enabled): + return None + + return STONITH_TIMEOUT_DEFAULT + corosync.token_and_consensus_timeout() # EOF diff --git a/crmsh/constants.py b/crmsh/constants.py index 95224e7478..d0afb8c798 100644 --- a/crmsh/constants.py +++ b/crmsh/constants.py @@ -524,4 +524,6 @@ """ STANDBY_NV_RE = r'( int(sbd_delay_start_value): + return + + utils.mkdirp(SBD_SYSTEMD_DELAY_START_DIR) + sbd_delay_start_file = "{}/sbd_delay_start.conf".format(SBD_SYSTEMD_DELAY_START_DIR) + utils.str2file("[Service]\nTimeoutSec={}".format(int(1.2*int(sbd_delay_start_value))), sbd_delay_start_file) + bootstrap.csync2_update(SBD_SYSTEMD_DELAY_START_DIR) + utils.cluster_run_cmd("systemctl daemon-reload") + + def adjust_stonith_timeout(self): + """ + Adjust stonith-timeout property + """ + utils.set_property_conditionally("stonith-timeout", self.get_stonith_timeout_expected()) + + def adjust_pcmk_delay_max(self): + """ + Adjust pcmk_delay_max parameter for sbd ra + """ + # TODO this function should be outside of sbd.py, to adjust any fence device + + if not utils.has_resource_configured(SBDManager.SBD_RA): + return + + if self.two_node_without_qdevice: + cmd = "crm resource param {} set pcmk_delay_max {}s".format(SBDManager.SBD_RA_ID, self.pcmk_delay_max) + else: + cmd = "crm resource param {} delete pcmk_delay_max".format(SBDManager.SBD_RA_ID) + utils.get_stdout_or_raise_error(cmd) + + def adjust_sbd_delay_start(self): + """ + Adjust SBD_DELAY_START in /etc/sysconfig/sbd + """ + expected_value = str(self.sbd_delay_start_value_expected) + config_value = self.sbd_delay_start_value_from_config + if expected_value == config_value: + return + if expected_value == "no" \ + or (not re.search(r'\d+', config_value)) \ + or (int(expected_value) > int(config_value)): + SBDManager.update_configuration({"SBD_DELAY_START": expected_value}) + + @classmethod + def adjust_sbd_timeout_related_cluster_configuration(cls, removing=False): + """ + Adjust sbd timeout related configurations + """ + cls_inst = cls(removing=removing) + cls_inst._load_configurations() + + message = "Adjusting sbd related timeout values for 2-node cluster" + with logger_utils.status_long(message): + cls_inst.adjust_sbd_delay_start() + cls_inst.adjust_pcmk_delay_max() + cls_inst.adjust_stonith_timeout() + cls_inst.adjust_systemd_start_timeout() + + class SBDManager(object): """ Class to manage sbd configuration and services @@ -26,14 +273,11 @@ class SBDManager(object): specify here will be destroyed. """ SBD_WARNING = "Not configuring SBD - STONITH will be disabled." - DISKLESS_SBD_WARNING = """Diskless SBD requires cluster with three or more nodes. -If you want to use diskless SBD for two-nodes cluster, should be combined with QDevice.""" + DISKLESS_SBD_WARNING = "Diskless SBD requires cluster with three or more nodes. If you want to use diskless SBD for 2-node cluster, should be combined with QDevice." PARSE_RE = "[; ]" DISKLESS_CRM_CMD = "crm configure property stonith-enabled=true stonith-watchdog-timeout={} stonith-timeout={}" - - SBD_WATCHDOG_TIMEOUT_DEFAULT = 5 - SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 = 15 - SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE = 35 + SBD_RA = "stonith:external/sbd" + SBD_RA_ID = "stonith-sbd" def __init__(self, context): """ @@ -46,14 +290,9 @@ def __init__(self, context): self.diskless_sbd = context.diskless_sbd self._sbd_devices = None self._watchdog_inst = None - self._stonith_timeout = 60 - if context.is_s390: - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 - else: - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT - self._stonith_watchdog_timeout = -1 self._context = context self._delay_start = False + self.timeout_inst = None @staticmethod def _get_device_uuid(dev, node=None): @@ -66,18 +305,6 @@ def _get_device_uuid(dev, node=None): raise ValueError("Cannot find sbd device UUID for {}".format(dev)) return res.group(1) - @staticmethod - def _get_sbd_msgwait(dev): - """ - Get msgwait for sbd device - """ - out = utils.get_stdout_or_raise_error("sbd -d {} dump".format(dev)) - # Format like "Timeout (msgwait) : 30" - res = re.search("\(msgwait\)\s+:\s+(\d+)", out) - if not res: - raise ValueError("Cannot get sbd msgwait for {}".format(dev)) - return int(res.group(1)) - def _compare_device_uuid(self, dev, node_list): """ Compare local sbd device UUID with other node's sbd device UUID @@ -156,37 +383,22 @@ def _get_sbd_device(self): dev_list = self._get_sbd_device_interactive() self._sbd_devices = dev_list - def _adjust_sbd_watchdog_timeout_for_s390(self): - """ - Correct watchdog timeout if less than s390 default - """ - if self._context.is_s390 and self._sbd_watchdog_timeout < self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390: - logger.warning("sbd_watchdog_timeout is set to {} for s390, it was {}".format(self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390, self._sbd_watchdog_timeout)) - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 - def _initialize_sbd(self): """ Initialize SBD parameters according to profiles.yml, or the crmsh defined defaulst as the last resort. This covers both disk-based-sbd, and diskless-sbd scenarios. - For diskless-sbd, set _sbd_watchdog_timeout then return; + For diskless-sbd, set sbd_watchdog_timeout then return; For disk-based-sbd, also calculate the msgwait value, then initialize the SBD device. """ logger.info("Initializing {}SBD".format("diskless " if self.diskless_sbd else "")) - - if "sbd.watchdog_timeout" in self._context.profiles_dict: - self._sbd_watchdog_timeout = self._context.profiles_dict["sbd.watchdog_timeout"] - self._adjust_sbd_watchdog_timeout_for_s390() + self.timeout_inst = SBDTimeout(self._context) + self.timeout_inst.set_sbd_watchdog_timeout() if self.diskless_sbd: + self.timeout_inst.adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() return - sbd_msgwait_default = int(self._sbd_watchdog_timeout) * 2 - sbd_msgwait = sbd_msgwait_default - if "sbd.msgwait" in self._context.profiles_dict: - sbd_msgwait = self._context.profiles_dict["sbd.msgwait"] - if int(sbd_msgwait) < sbd_msgwait_default: - logger.warning("sbd msgwait is set to {}, it was {}".format(sbd_msgwait_default, sbd_msgwait)) - sbd_msgwait = sbd_msgwait_default - opt = "-4 {} -1 {}".format(sbd_msgwait, self._sbd_watchdog_timeout) + self.timeout_inst.set_sbd_msgwait() + opt = "-4 {} -1 {}".format(self.timeout_inst.sbd_msgwait, self.timeout_inst.sbd_watchdog_timeout) for dev in self._sbd_devices: rc, _, err = bootstrap.invoke("sbd {} -d {} create".format(opt, dev)) @@ -198,43 +410,15 @@ def _update_sbd_configuration(self): Update /etc/sysconfig/sbd """ shutil.copyfile(self.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD) - self._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - if utils.detect_virt(): - self._delay_start = True sbd_config_dict = { - "SBD_PACEMAKER": "yes", - "SBD_STARTMODE": "always", - "SBD_DELAY_START": "yes" if self._delay_start else "no", - "SBD_WATCHDOG_DEV": self._watchdog_inst.watchdog_device_name + "SBD_WATCHDOG_DEV": self._watchdog_inst.watchdog_device_name, + "SBD_WATCHDOG_TIMEOUT": str(self.timeout_inst.sbd_watchdog_timeout) } - if self._sbd_watchdog_timeout > 0: - sbd_config_dict["SBD_WATCHDOG_TIMEOUT"] = str(self._sbd_watchdog_timeout) if self._sbd_devices: sbd_config_dict["SBD_DEVICE"] = ';'.join(self._sbd_devices) utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) bootstrap.csync2_update(SYSCONFIG_SBD) - def _adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self): - """ - When using diskless SBD with Qdevice, adjust value of sbd_watchdog_timeout - """ - if not self.diskless_sbd: - return - # add sbd after qdevice started - if utils.is_qdevice_configured() and utils.service_is_active("corosync-qdevice.service"): - qdevice_sync_timeout = utils.get_qdevice_sync_timeout() - if self._sbd_watchdog_timeout <= qdevice_sync_timeout: - watchdog_timeout_with_qdevice = qdevice_sync_timeout + 5 - logger.warning("sbd_watchdog_timeout is set to {} for qdevice, it was {}".format(watchdog_timeout_with_qdevice, self._sbd_watchdog_timeout)) - self._sbd_watchdog_timeout = watchdog_timeout_with_qdevice - self._stonith_timeout = self.calculate_stonith_timeout(self._sbd_watchdog_timeout) - # add sbd and qdevice together from beginning - elif self._context.qdevice_inst: - if self._sbd_watchdog_timeout < self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE: - logger.warning("sbd_watchdog_timeout is set to {} for qdevice, it was {}".format(self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE, self._sbd_watchdog_timeout)) - self._sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE - self._stonith_timeout = self.calculate_stonith_timeout(self._sbd_watchdog_timeout) - def _get_sbd_device_from_config(self): """ Gets currently configured SBD device, i.e. what's in /etc/sysconfig/sbd @@ -245,44 +429,6 @@ def _get_sbd_device_from_config(self): else: return None - @staticmethod - def is_delay_start(): - """ - Check if SBD_DELAY_START is yes - """ - res = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") - return utils.is_boolean_true(res) - - @staticmethod - def get_sbd_watchdog_timeout(): - """ - Get SBD_WATCHDOG_TIMEOUT from /etc/sysconfig/sbd - """ - res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") - if not res: - raise ValueError("Cannot get the value of SBD_WATCHDOG_TIMEOUT") - return int(res) - - @staticmethod - def get_sbd_start_timeout_threshold(): - """ - Get sbd start timeout threshold - TimeoutStartUSec of sbd shouldn't less than this value - """ - dev_list = SBDManager.get_sbd_device_from_config() - if not dev_list: - return int(SBDManager.get_sbd_watchdog_timeout() * 2) - else: - return int(SBDManager._get_sbd_msgwait(dev_list[0])) - - @staticmethod - def get_suitable_sbd_systemd_timeout(): - """ - Get suitable systemd start timeout for sbd.service - """ - timeout_value = SBDManager.get_sbd_start_timeout_threshold() - return int(timeout_value * 1.2) - def _restart_cluster_and_configure_sbd_ra(self): """ Try to configure sbd resource, restart cluster on needed @@ -291,14 +437,14 @@ def _restart_cluster_and_configure_sbd_ra(self): logger.info("Restarting cluster service") utils.cluster_run_cmd("crm cluster restart") bootstrap.wait_for_cluster() - self.configure_sbd_resource() + self.configure_sbd_resource_and_properties() else: logger.warning("To start sbd.service, need to restart cluster service manually on each node") if self.diskless_sbd: - cmd = self.DISKLESS_CRM_CMD.format(self._stonith_watchdog_timeout, str(self._stonith_timeout)+"s") + cmd = self.DISKLESS_CRM_CMD.format(self.timeout_inst.stonith_watchdog_timeout, SBDTimeout.get_stonith_timeout()) logger.warning("Then run \"{}\" on any node".format(cmd)) else: - self.configure_sbd_resource() + self.configure_sbd_resource_and_properties() def _enable_sbd_service(self): """ @@ -312,29 +458,6 @@ def _enable_sbd_service(self): # in init process bootstrap.invoke("systemctl enable sbd.service") - def _adjust_systemd(self): - """ - Adjust start timeout for sbd when set SBD_DELAY_START - """ - if not self.is_delay_start(): - return - - # TimeoutStartUSec default is 1min 30s, need to parse as seconds - cmd = "systemctl show -p TimeoutStartUSec sbd --value" - out = utils.get_stdout_or_raise_error(cmd) - res_seconds = re.search("(\d+)s", out) - default_start_timeout = int(res_seconds.group(1)) if res_seconds else 0 - res_min = re.search("(\d+)min", out) - default_start_timeout += 60 * int(res_min.group(1)) if res_min else 0 - if default_start_timeout >= self.get_sbd_start_timeout_threshold(): - return - - systemd_sbd_dir = "/etc/systemd/system/sbd.service.d" - utils.mkdirp(systemd_sbd_dir) - sbd_delay_start_file = "{}/sbd_delay_start.conf".format(systemd_sbd_dir) - utils.str2file("[Service]\nTimeoutSec={}".format(self.get_suitable_sbd_systemd_timeout()), sbd_delay_start_file) - utils.get_stdout_or_raise_error("systemctl daemon-reload") - def _warn_diskless_sbd(self, peer=None): """ Give warning when configuring diskless sbd @@ -370,26 +493,28 @@ def sbd_init(self): self._initialize_sbd() self._update_sbd_configuration() self._enable_sbd_service() - self._adjust_systemd() - def configure_sbd_resource(self): + def configure_sbd_resource_and_properties(self): """ - Configure stonith-sbd resource and stonith-enabled property + Configure stonith-sbd resource and related properties """ if not utils.package_is_installed("sbd") or \ not utils.service_is_enabled("sbd.service") or \ - utils.has_resource_configured("stonith:external/sbd"): + utils.has_resource_configured(self.SBD_RA): return + # disk-based sbd if self._get_sbd_device_from_config(): - if not bootstrap.invokerc("crm configure primitive stonith-sbd stonith:external/sbd pcmk_delay_max=30s"): - utils.fatal("Can't create stonith-sbd primitive") - if not bootstrap.invokerc("crm configure property stonith-enabled=true"): - utils.fatal("Can't enable STONITH for SBD") + utils.get_stdout_or_raise_error("crm configure primitive {} {}".format(self.SBD_RA_ID, self.SBD_RA)) + utils.set_property(stonith_enabled="true") + # disk-less sbd else: - cmd = self.DISKLESS_CRM_CMD.format(self._stonith_watchdog_timeout, str(self._stonith_timeout)+"s") - if not bootstrap.invokerc(cmd): - utils.fatal("Can't enable STONITH for diskless SBD") + cmd = self.DISKLESS_CRM_CMD.format(self.timeout_inst.stonith_watchdog_timeout, constants.STONITH_TIMEOUT_DEFAULT) + utils.get_stdout_or_raise_error(cmd) + + # in sbd stage + if self._context.cluster_is_running: + SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() def join_sbd(self, peer_host): """ @@ -411,7 +536,6 @@ def join_sbd(self, peer_host): self._verify_sbd_device(dev_list, [peer_host]) else: self._warn_diskless_sbd(peer_host) - self._adjust_systemd() logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) bootstrap.invoke("systemctl enable sbd.service") @@ -454,13 +578,6 @@ def update_configuration(sbd_config_dict): utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) bootstrap.csync2_update(SYSCONFIG_SBD) - @staticmethod - def calculate_stonith_timeout(sbd_watchdog_timeout): - """ - Calculate stonith timeout - """ - return int(sbd_watchdog_timeout * 2 * 1.2) - @staticmethod def get_sbd_value_from_config(key): """ diff --git a/crmsh/utils.py b/crmsh/utils.py index 92c09ba749..543afe06e2 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2975,6 +2975,25 @@ def is_quorate(peer=None): raise ValueError("Failed to get quorate status from corosync-quorumtool") +def is_2node_cluster_without_qdevice(removing=False): + """ + Check if current cluster has two nodes without qdevice + """ + current_num = len(list_cluster_nodes()) + remove_num = 1 if removing else 0 + qdevice_num = 1 if is_qdevice_configured() else 0 + return (current_num - remove_num + qdevice_num) == 2 + + +def get_pcmk_delay_max(two_node_without_qdevice=False): + """ + Get value of pcmk_delay_max + """ + if service_is_active("pacemaker.service") and two_node_without_qdevice: + return constants.PCMK_DELAY_MAX + return 0 + + def get_property(name): """ Get cluster properties @@ -3004,4 +3023,27 @@ def check_no_quorum_policy_with_dlm(): res = get_property("no-quorum-policy") if not res or res != "freeze": logger.warning("The DLM cluster best practice suggests to set the cluster property \"no-quorum-policy=freeze\"") + + +def set_property_conditionally(property_name, value_from_calculation): + """ + Set cluster property if calculated value is larger then current cib value + """ + _value = get_property(property_name) + value_from_cib = int(_value.strip('s')) if _value else 0 + if value_from_cib < value_from_calculation: + cmd = "crm configure property {}={}".format(property_name, value_from_calculation) + get_stdout_or_raise_error(cmd) + + +def get_systemd_timeout_start_in_sec(time_res): + """ + Get the TimeoutStartUSec value in second unit + The origin format was like: 1min 30s + """ + res_seconds = re.search("(\d+)s", time_res) + start_timeout = int(res_seconds.group(1)) if res_seconds else 0 + res_min = re.search("(\d+)min", time_res) + start_timeout += 60 * int(res_min.group(1)) if res_min else 0 + return start_timeout # vim:ts=4:sw=4:et: diff --git a/data-manifest b/data-manifest index 2c016a9204..60c085965b 100644 --- a/data-manifest +++ b/data-manifest @@ -68,7 +68,8 @@ test/evaltest.sh test/features/bootstrap_bugs.feature test/features/bootstrap_init_join_remove.feature test/features/bootstrap_options.feature -test/features/bootstrap_sbd.feature +test/features/bootstrap_sbd_delay.feature +test/features/bootstrap_sbd_normal.feature test/features/configure_bugs.feature test/features/constraints_bugs.feature test/features/environment.py diff --git a/test/features/bootstrap_sbd_delay.feature b/test/features/bootstrap_sbd_delay.feature new file mode 100644 index 0000000000..be7db7db93 --- /dev/null +++ b/test/features/bootstrap_sbd_delay.feature @@ -0,0 +1,231 @@ +@sbd +Feature: configure sbd delay start correctly + + Tag @clean means need to stop cluster service if the service is available + + @clean + Scenario: disk-based SBD with small sbd_watchdog_timeout + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And SBD option "SBD_DELAY_START" value is "no" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + # calculated and set by sbd RA + And Cluster property "stonith-timeout" is "43" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + Given Has disk "/dev/sda1" on "hanode3" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Service "sbd" is "started" on "hanode3" + # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + # runtime value is "41", we keep the larger one here + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + # runtime value is "71", we keep ther larger one here + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + When Run "crm cluster remove hanode3 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode3" + And Service "sbd" is "stopped" on "hanode3" + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + @clean + Scenario: disk-less SBD with small sbd_watchdog_timeout + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -S -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And SBD option "SBD_DELAY_START" value is "no" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And Cluster property "stonith-timeout" is "60" + + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + # SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + # stonith-timeout >= 1.2 * max(stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + And Cluster property "stonith-timeout" is "71" + + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And Cluster property "stonith-timeout" is "71" + + When Run "crm cluster remove hanode3 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode3" + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And Cluster property "stonith-timeout" is "71" + + @clean + Scenario: disk-based SBD with big sbd_watchdog_timeout + When Run "sed -i 's/watchdog_timeout: 15/watchdog_timeout: 60/' /etc/crm/profiles.yml" on "hanode1" + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "60" + + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And SBD option "SBD_DELAY_START" value is "no" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + # calculated and set by sbd RA + And Cluster property "stonith-timeout" is "172" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + And SBD option "SBD_DELAY_START" value is "161" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + And Cluster property "stonith-timeout" is "191" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + # since SBD_DELAY_START value(161s) > default systemd startup value(1min 30s) + And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + # 1.2*SBD_DELAY_START + And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + + Given Has disk "/dev/sda1" on "hanode3" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Service "sbd" is "started" on "hanode3" + And SBD option "SBD_DELAY_START" value is "161" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + And Cluster property "stonith-timeout" is "191" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + + When Run "crm cluster remove hanode3 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode3" + And Service "sbd" is "stopped" on "hanode3" + And SBD option "SBD_DELAY_START" value is "161" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + And Cluster property "stonith-timeout" is "191" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + When Run "sed -i 's/watchdog_timeout: 60/watchdog_timeout: 15/g' /etc/crm/profiles.yml" on "hanode1" + + @clean + Scenario: Add sbd via stage on a running cluster + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + + Given Has disk "/dev/sda1" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + + When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + Then Service "sbd" is "started" on "hanode2" + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + @clean + Scenario: Add disk-based sbd with qdevice + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + Given Has disk "/dev/sda1" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + + When Run "crm cluster init -s /dev/sda1 --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + And Cluster property "stonith-timeout" is "71" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + @clean + Scenario: Add disk-less sbd with qdevice + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + + When Run "crm cluster init -S --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + + And SBD option "SBD_DELAY_START" value is "81" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "35" + And Cluster property "stonith-timeout" is "95" + And Cluster property "stonith-watchdog-timeout" is "-1" diff --git a/test/features/bootstrap_sbd.feature b/test/features/bootstrap_sbd_normal.feature similarity index 99% rename from test/features/bootstrap_sbd.feature rename to test/features/bootstrap_sbd_normal.feature index 33fa4ffc7c..8bf63d9f4c 100644 --- a/test/features/bootstrap_sbd.feature +++ b/test/features/bootstrap_sbd_normal.feature @@ -1,4 +1,4 @@ -@bootstrap +@sbd Feature: crmsh bootstrap sbd management Tag @clean means need to stop cluster service if the service is available diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index d72fba390a..5e2213f8e7 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -2,8 +2,9 @@ import time import os import datetime +import yaml from behave import given, when, then -from crmsh import corosync, parallax +from crmsh import corosync, parallax, sbd from crmsh import utils as crmutils from utils import check_cluster_state, check_service_state, online, run_command, me, \ run_command_local_or_remote, file_in_archive @@ -35,6 +36,24 @@ def step_impl(context, nodelist): assert online(context, nodelist) is True +@given('Run "{cmd}" OK') +def step_impl(context, cmd): + rc, _, = run_command(context, cmd) + assert rc == 0 + + +@then('Run "{cmd}" OK') +def step_impl(context, cmd): + rc, _, = run_command(context, cmd) + assert rc == 0 + + +@when('Run "{cmd}" OK') +def step_impl(context, cmd): + rc, _, = run_command(context, cmd) + assert rc == 0 + + @given('IP "{addr}" is belong to "{iface}"') def step_impl(context, addr, iface): cmd = 'ip address show dev {}'.format(iface) @@ -318,3 +337,44 @@ def step_impl(context, res_id, node): def step_impl(context, res_id, node): rc, out, err = crmutils.get_stdout_stderr("crm_mon -1") assert re.search(r'\*\s+{}\s+.*Started\s+{}'.format(res_id, node), out) is not None + + +@then('SBD option "{key}" value is "{value}"') +def step_impl(context, key, value): + res = sbd.SBDManager.get_sbd_value_from_config(key) + assert res == value + + +@then('SBD option "{key}" value for "{dev}" is "{value}"') +def step_impl(context, key, dev, value): + res = sbd.SBDTimeout.get_sbd_msgwait(dev) + assert res == int(value) + + +@then('Cluster property "{key}" is "{value}"') +def step_impl(context, key, value): + res = crmutils.get_property(key) + assert res is not None and str(res) == value + + +@then('Parameter "{param_name}" not configured in "{res_id}"') +def step_impl(context, param_name, res_id): + _, out = run_command(context, "crm configure show {}".format(res_id)) + result = re.search("params {}=".format(param_name), out) + assert result is None + + +@then('Parameter "{param_name}" configured in "{res_id}"') +def step_impl(context, param_name, res_id): + _, out = run_command(context, "crm configure show {}".format(res_id)) + result = re.search("params {}=".format(param_name), out) + assert result is not None + + +@given('Yaml "{path}" value is "{value}"') +def step_impl(context, path, value): + yaml_file = "/etc/crm/profiles.yml" + with open(yaml_file) as f: + data = yaml.load(f, Loader=yaml.SafeLoader) + sec_name, key = path.split(':') + assert str(data[sec_name][key]) == str(value) diff --git a/test/unittests/test_bootstrap.py b/test/unittests/test_bootstrap.py index 2ce23d974a..76e1fa2057 100644 --- a/test/unittests/test_bootstrap.py +++ b/test/unittests/test_bootstrap.py @@ -263,17 +263,18 @@ def tearDownClass(cls): @mock.patch('crmsh.log.LoggerUtils.status_long') @mock.patch('crmsh.utils.start_service') - @mock.patch('crmsh.sbd.SBDManager.get_suitable_sbd_systemd_timeout') - @mock.patch('crmsh.sbd.SBDManager.is_delay_start') + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig') + @mock.patch('crmsh.sbd.SBDTimeout.is_sbd_delay_start') @mock.patch('crmsh.utils.service_is_enabled') @mock.patch('crmsh.utils.package_is_installed') def test_start_pacemaker(self, mock_installed, mock_enabled, mock_delay_start, mock_timeout, mock_start, mock_long): + bootstrap._context = None mock_installed.return_value = True mock_enabled.return_value = True mock_delay_start.return_value = True mock_timeout.return_value = 60 bootstrap.start_pacemaker() - mock_long.assert_called_once_with('Starting pacemaker(waiting for sbd 60s)') + mock_long.assert_called_once_with('Starting pacemaker(delaying start of sbd for 60s)') mock_start.assert_called_once_with('pacemaker.service', enable=True, node_list=[]) @mock.patch('crmsh.bootstrap.configure_local_ssh_key') diff --git a/test/unittests/test_corosync.py b/test/unittests/test_corosync.py index bceabcbaff..6d053e7a6c 100644 --- a/test/unittests/test_corosync.py +++ b/test/unittests/test_corosync.py @@ -245,6 +245,36 @@ def test_is_unicast(mock_get_value): mock_get_value.assert_called_once_with("totem.transport") +@mock.patch('crmsh.corosync.get_corosync_value_dict') +def test_token_and_consensus_timeout(mock_get_dict): + mock_get_dict.return_value = {"token": 10, "consensus": 12} + assert corosync.token_and_consensus_timeout() == 22 + + +@mock.patch('crmsh.corosync.get_corosync_value') +def test_get_corosync_value_dict(mock_get_value): + mock_get_value.side_effect = ["10000", None] + res = corosync.get_corosync_value_dict() + assert res == {"token": 10, "consensus": 12} + + +@mock.patch('crmsh.corosync.get_value') +@mock.patch('crmsh.utils.get_stdout_or_raise_error') +def test_get_corosync_value_raise(mock_run, mock_get_value): + mock_run.side_effect = ValueError + mock_get_value.return_value = None + assert corosync.get_corosync_value("xxx") is None + mock_run.assert_called_once_with("corosync-cmapctl xxx") + mock_get_value.assert_called_once_with("xxx") + + +@mock.patch('crmsh.utils.get_stdout_or_raise_error') +def test_get_corosync_value(mock_run): + mock_run.return_value = "totem.token = 10000" + assert corosync.get_corosync_value("totem.token") == "10000" + mock_run.assert_called_once_with("corosync-cmapctl totem.token") + + class TestCorosyncParser(unittest.TestCase): def test_parse(self): p = Parser(F1) diff --git a/test/unittests/test_crashtest_utils.py b/test/unittests/test_crashtest_utils.py index 2e2849228a..8c4de7b3ea 100644 --- a/test/unittests/test_crashtest_utils.py +++ b/test/unittests/test_crashtest_utils.py @@ -62,14 +62,14 @@ def tearDownClass(cls): Global tearDown. """ - @mock.patch('crmsh.crash_test.utils.get_property') + @mock.patch('crmsh.crash_test.utils.crmshutils.get_property') def test_fence_enabled_false(self, mock_get_property): mock_get_property.return_value = None res = self.fence_info_inst.fence_enabled self.assertEqual(res, False) mock_get_property.assert_called_once_with("stonith-enabled") - @mock.patch('crmsh.crash_test.utils.get_property') + @mock.patch('crmsh.crash_test.utils.crmshutils.get_property') def test_fence_enabled_true(self, mock_get_property): mock_get_property.return_value = "True" res = self.fence_info_inst.fence_enabled @@ -77,7 +77,7 @@ def test_fence_enabled_true(self, mock_get_property): mock_get_property.assert_called_once_with("stonith-enabled") @mock.patch('crmsh.crash_test.utils.msg_error') - @mock.patch('crmsh.crash_test.utils.get_property') + @mock.patch('crmsh.crash_test.utils.crmshutils.get_property') def test_fence_action_none(self, mock_get_property, mock_error): mock_get_property.return_value = None res = self.fence_info_inst.fence_action @@ -85,21 +85,21 @@ def test_fence_action_none(self, mock_get_property, mock_error): mock_get_property.assert_called_once_with("stonith-action") mock_error.assert_called_once_with('Cluster property "stonith-action" should be reboot|off|poweroff') - @mock.patch('crmsh.crash_test.utils.get_property') + @mock.patch('crmsh.crash_test.utils.crmshutils.get_property') def test_fence_action(self, mock_get_property): mock_get_property.return_value = "reboot" res = self.fence_info_inst.fence_action self.assertEqual(res, "reboot") mock_get_property.assert_called_once_with("stonith-action") - @mock.patch('crmsh.crash_test.utils.get_property') + @mock.patch('crmsh.crash_test.utils.crmshutils.get_property') def test_fence_timeout(self, mock_get_property): mock_get_property.return_value = "60s" res = self.fence_info_inst.fence_timeout self.assertEqual(res, "60") mock_get_property.assert_called_once_with("stonith-timeout") - @mock.patch('crmsh.crash_test.utils.get_property') + @mock.patch('crmsh.crash_test.utils.crmshutils.get_property') def test_fence_timeout_default(self, mock_get_property): mock_get_property.return_value = None res = self.fence_info_inst.fence_timeout @@ -381,20 +381,6 @@ def test_online_nodes(self, mock_run): self.assertEqual(res, ["15sp2-1", "15sp2-2"]) mock_run.assert_called_once_with("crm_mon -1") - @mock.patch('crmsh.crash_test.utils.crmshutils.get_stdout_stderr') - def test_get_property_none(self, mock_run): - mock_run.return_value = (1, None, "error") - res = utils.get_property("test") - self.assertEqual(res, None) - mock_run.assert_called_once_with("crm configure get_property test") - - @mock.patch('crmsh.crash_test.utils.crmshutils.get_stdout_stderr') - def test_get_property(self, mock_run): - mock_run.return_value = (0, "data", None) - res = utils.get_property("test") - self.assertEqual(res, "data") - mock_run.assert_called_once_with("crm configure get_property test") - @mock.patch('crmsh.crash_test.utils.online_nodes') def test_peer_node_list_empty(self, mock_online): mock_online.return_value = None diff --git a/test/unittests/test_sbd.py b/test/unittests/test_sbd.py index 03320c850d..4f583d1c64 100644 --- a/test/unittests/test_sbd.py +++ b/test/unittests/test_sbd.py @@ -11,6 +11,275 @@ from crmsh import sbd +class TestSBDTimeout(unittest.TestCase): + """ + Unitary tests for crmsh.sbd.SBDTimeout + """ + + @classmethod + def setUpClass(cls): + """ + Global setUp. + """ + + def setUp(self): + """ + Test setUp. + """ + _dict = {"sbd.watchdog_timeout": 5, "sbd.msgwait": 10} + _inst_q = mock.Mock() + self.sbd_timeout_inst = sbd.SBDTimeout(mock.Mock(profiles_dict=_dict, is_s390=True, qdevice_inst=_inst_q)) + self.sbd_timeout_inst_removing = sbd.SBDTimeout(mock.Mock(), True) + + def tearDown(self): + """ + Test tearDown. + """ + + @classmethod + def tearDownClass(cls): + """ + Global tearDown. + """ + + @mock.patch('logging.Logger.warning') + def test_set_sbd_watchdog_timeout(self, mock_warn): + self.sbd_timeout_inst.set_sbd_watchdog_timeout() + mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to %d for s390, it was %d", sbd.SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390, 5) + + @mock.patch('logging.Logger.warning') + def test_set_sbd_msgwait(self, mock_warn): + self.sbd_timeout_inst.sbd_watchdog_timeout = 15 + self.sbd_timeout_inst.set_sbd_msgwait() + mock_warn.assert_called_once_with("sbd msgwait is set to %d, it was %d", 30, 10) + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.utils.get_qdevice_sync_timeout') + @mock.patch('crmsh.utils.service_is_active') + @mock.patch('crmsh.utils.is_qdevice_configured') + def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice_sbd_stage(self, mock_is_configured, mock_is_active, mock_get_sync, mock_warn): + mock_is_configured.return_value = True + mock_is_active.return_value = True + mock_get_sync.return_value = 15 + self.sbd_timeout_inst.sbd_watchdog_timeout = 5 + self.sbd_timeout_inst.adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() + mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 20 for qdevice, it was 5") + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.utils.is_qdevice_configured') + def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice_all(self, mock_is_configured, mock_warn): + mock_is_configured.return_value = False + self.sbd_timeout_inst.sbd_watchdog_timeout = 5 + self.sbd_timeout_inst.adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() + mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 35 for qdevice, it was 5") + + @mock.patch('crmsh.utils.get_stdout_or_raise_error') + def test_get_sbd_msgwait_exception(self, mock_run): + mock_run.return_value = "data" + with self.assertRaises(ValueError) as err: + sbd.SBDTimeout.get_sbd_msgwait("/dev/sda1") + self.assertEqual("Cannot get sbd msgwait for /dev/sda1", str(err.exception)) + mock_run.assert_called_once_with("sbd -d /dev/sda1 dump") + + @mock.patch('crmsh.utils.get_stdout_or_raise_error') + def test_get_sbd_msgwait(self, mock_run): + mock_run.return_value = """ + Timeout (loop) : 1 + Timeout (msgwait) : 10 + ==Header on disk /dev/sda1 is dumped + """ + res = sbd.SBDTimeout.get_sbd_msgwait("/dev/sda1") + assert res == 10 + mock_run.assert_called_once_with("sbd -d /dev/sda1 dump") + + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_get_sbd_watchdog_timeout_exception(self, mock_get): + mock_get.return_value = None + with self.assertRaises(ValueError) as err: + sbd.SBDTimeout.get_sbd_watchdog_timeout() + self.assertEqual("Cannot get the value of SBD_WATCHDOG_TIMEOUT", str(err.exception)) + mock_get.assert_called_once_with("SBD_WATCHDOG_TIMEOUT") + + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_get_sbd_watchdog_timeout(self, mock_get): + mock_get.return_value = 5 + res = sbd.SBDTimeout.get_sbd_watchdog_timeout() + assert res == 5 + mock_get.assert_called_once_with("SBD_WATCHDOG_TIMEOUT") + + @mock.patch('crmsh.utils.service_is_active') + def test_get_stonith_watchdog_timeout_return(self, mock_active): + mock_active.return_value = False + res = sbd.SBDTimeout.get_stonith_watchdog_timeout() + assert res == sbd.SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT + mock_active.assert_called_once_with("pacemaker.service") + + @mock.patch('crmsh.utils.get_property') + @mock.patch('crmsh.utils.service_is_active') + def test_get_stonith_watchdog_timeout(self, mock_active, mock_get_property): + mock_active.return_value = True + mock_get_property.return_value = "60s" + res = sbd.SBDTimeout.get_stonith_watchdog_timeout() + assert res == 60 + mock_active.assert_called_once_with("pacemaker.service") + + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + @mock.patch('crmsh.utils.detect_virt') + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_delay_start_expected') + @mock.patch('crmsh.utils.get_pcmk_delay_max') + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_msgwait') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_device_from_config') + @mock.patch('crmsh.utils.is_2node_cluster_without_qdevice') + def test_load_configurations(self, mock_2node, mock_get_sbd_dev, mock_get_msgwait, mock_pcmk_delay, mock_delay_expected, mock_detect, mock_get_sbd_value, mock_debug): + mock_2node.return_value = True + mock_debug.return_value = False + mock_get_sbd_value.return_value = "no" + mock_get_sbd_dev.return_value = ["/dev/sda1"] + mock_get_msgwait.return_value = 30 + mock_pcmk_delay.return_value = 30 + + self.sbd_timeout_inst._load_configurations() + + mock_2node.assert_called_once_with(False) + mock_get_sbd_dev.assert_called_once_with() + mock_get_msgwait.assert_called_once_with("/dev/sda1") + mock_pcmk_delay.assert_called_once_with(True) + + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + @mock.patch('crmsh.utils.detect_virt') + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_delay_start_expected') + @mock.patch('crmsh.sbd.SBDTimeout.get_stonith_watchdog_timeout') + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_watchdog_timeout') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_device_from_config') + @mock.patch('crmsh.utils.is_2node_cluster_without_qdevice') + def test_load_configurations_diskless(self, mock_2node, mock_get_sbd_dev, mock_get_watchdog_timeout, mock_get_stonith_watchdog_timeout, mock_delay_expected, mock_detect, mock_get_sbd_value, mock_debug): + mock_2node.return_value = True + mock_debug.return_value = False + mock_get_sbd_value.return_value = "no" + mock_get_sbd_dev.return_value = [] + mock_get_watchdog_timeout.return_value = 30 + mock_get_stonith_watchdog_timeout.return_value = 30 + + self.sbd_timeout_inst._load_configurations() + + mock_2node.assert_called_once_with(False) + mock_get_sbd_dev.assert_called_once_with() + mock_get_watchdog_timeout.assert_called_once_with() + mock_get_stonith_watchdog_timeout.assert_called_once_with() + + @mock.patch('crmsh.corosync.token_and_consensus_timeout') + @mock.patch('logging.Logger.debug') + def test_get_stonith_timeout_expected(self, mock_debug, mock_general): + self.sbd_timeout_inst.disk_based = True + self.sbd_timeout_inst.pcmk_delay_max = 30 + self.sbd_timeout_inst.msgwait = 30 + mock_general.return_value = 11 + res = self.sbd_timeout_inst.get_stonith_timeout_expected() + assert res == 83 + + @mock.patch('crmsh.corosync.token_and_consensus_timeout') + @mock.patch('logging.Logger.debug') + def test_get_stonith_timeout_expected_diskless(self, mock_debug, mock_general): + self.sbd_timeout_inst.disk_based = False + self.sbd_timeout_inst.stonith_watchdog_timeout = -1 + self.sbd_timeout_inst.sbd_watchdog_timeout = 15 + mock_general.return_value = 11 + res = self.sbd_timeout_inst.get_stonith_timeout_expected() + assert res == 71 + + @mock.patch('crmsh.corosync.token_and_consensus_timeout') + def test_get_sbd_delay_start_expected(self, mock_corosync): + mock_corosync.return_value = 30 + self.sbd_timeout_inst.disk_based = True + self.sbd_timeout_inst.pcmk_delay_max = 30 + self.sbd_timeout_inst.msgwait = 30 + res = self.sbd_timeout_inst.get_sbd_delay_start_expected() + assert res == 90 + + @mock.patch('crmsh.corosync.token_and_consensus_timeout') + def test_get_sbd_delay_start_expected_diskless(self, mock_corosync): + mock_corosync.return_value = 30 + self.sbd_timeout_inst.disk_based = False + self.sbd_timeout_inst.sbd_watchdog_timeout = 30 + res = self.sbd_timeout_inst.get_sbd_delay_start_expected() + assert res == 90 + + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_is_sbd_delay_start(self, mock_get_sbd_value): + mock_get_sbd_value.return_value = "100" + assert sbd.SBDTimeout.is_sbd_delay_start() is True + mock_get_sbd_value.assert_called_once_with("SBD_DELAY_START") + + @mock.patch('crmsh.sbd.SBDManager.update_configuration') + def test_adjust_sbd_delay_start_return(self, mock_update): + self.sbd_timeout_inst.sbd_delay_start_value_expected = 100 + self.sbd_timeout_inst.sbd_delay_start_value_from_config = "100" + self.sbd_timeout_inst.adjust_sbd_delay_start() + mock_update.assert_not_called() + + @mock.patch('crmsh.sbd.SBDManager.update_configuration') + def test_adjust_sbd_delay_start(self, mock_update): + self.sbd_timeout_inst.sbd_delay_start_value_expected = 100 + self.sbd_timeout_inst.sbd_delay_start_value_from_config = "no" + self.sbd_timeout_inst.adjust_sbd_delay_start() + mock_update.assert_called_once_with({"SBD_DELAY_START": "100"}) + + @mock.patch('crmsh.utils.get_stdout_or_raise_error') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_adjust_systemd_start_timeout_no_delay_start_no(self, mock_get_sbd_value, mock_run): + mock_get_sbd_value.return_value = "no" + self.sbd_timeout_inst.adjust_systemd_start_timeout() + mock_run.assert_not_called() + + @mock.patch('crmsh.utils.mkdirp') + @mock.patch('crmsh.utils.get_systemd_timeout_start_in_sec') + @mock.patch('crmsh.utils.get_stdout_or_raise_error') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_adjust_systemd_start_timeout_no_delay_start_return(self, mock_get_sbd_value, mock_run, mock_get_systemd_sec, mock_mkdirp): + mock_get_sbd_value.return_value = "10" + mock_run.return_value = "1min 30s" + mock_get_systemd_sec.return_value = 90 + self.sbd_timeout_inst.adjust_systemd_start_timeout() + mock_run.assert_called_once_with("systemctl show -p TimeoutStartUSec sbd --value") + mock_get_systemd_sec.assert_called_once_with("1min 30s") + mock_mkdirp.assert_not_called() + + @mock.patch('crmsh.utils.cluster_run_cmd') + @mock.patch('crmsh.bootstrap.csync2_update') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.utils.mkdirp') + @mock.patch('crmsh.utils.get_systemd_timeout_start_in_sec') + @mock.patch('crmsh.utils.get_stdout_or_raise_error') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_adjust_systemd_start_timeout_no_delay_start(self, mock_get_sbd_value, mock_run, mock_get_systemd_sec, mock_mkdirp, mock_str2file, mock_csync2, mock_cluster_run): + mock_get_sbd_value.return_value = "100" + mock_run.return_value = "1min 30s" + mock_get_systemd_sec.return_value = 90 + self.sbd_timeout_inst.adjust_systemd_start_timeout() + mock_run.assert_called_once_with("systemctl show -p TimeoutStartUSec sbd --value") + mock_get_systemd_sec.assert_called_once_with("1min 30s") + mock_mkdirp.assert_called_once_with(bootstrap.SBD_SYSTEMD_DELAY_START_DIR) + mock_str2file.assert_called_once_with('[Service]\nTimeoutSec=120', '/etc/systemd/system/sbd.service.d/sbd_delay_start.conf') + mock_csync2.assert_called_once_with(bootstrap.SBD_SYSTEMD_DELAY_START_DIR) + mock_cluster_run.assert_called_once_with("systemctl daemon-reload") + + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_watchdog_timeout') + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_get_sbd_delay_start_sec_from_sysconfig_yes(self, mock_get_sbd_value, mock_get_sbd_timeout): + mock_get_sbd_value.return_value = "yes" + mock_get_sbd_timeout.return_value = 30 + assert sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig() == 60 + mock_get_sbd_value.assert_called_once_with("SBD_DELAY_START") + + @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') + def test_get_sbd_delay_start_sec_from_sysconfig(self, mock_get_sbd_value): + mock_get_sbd_value.return_value = "30" + assert sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig() == 30 + mock_get_sbd_value.assert_called_once_with("SBD_DELAY_START") + + class TestSBDManager(unittest.TestCase): """ Unitary tests for crmsh.sbd.SBDManager @@ -189,19 +458,27 @@ def test_get_sbd_device_from_interactive(self, mock_interactive): def test_get_sbd_device_diskless(self): self.sbd_inst_diskless._get_sbd_device() - def test_initialize_sbd_return(self): + @mock.patch('crmsh.sbd.SBDTimeout') + @mock.patch('logging.Logger.info') + def test_initialize_sbd_return(self, mock_info, mock_sbd_timeout): + mock_inst = mock.Mock() + mock_sbd_timeout.return_value = mock_inst + mock_inst.set_sbd_watchdog_timeout = mock.Mock() + mock_inst.adjust_sbd_watchdog_timeout_with_diskless_and_qdevice = mock.Mock() self.sbd_inst_diskless._context = mock.Mock(profiles_dict={}) self.sbd_inst_diskless._initialize_sbd() + mock_info.assert_called_once_with("Initializing diskless SBD") + mock_inst.adjust_sbd_watchdog_timeout_with_diskless_and_qdevice.assert_called_once_with() - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.sbd.SBDManager._adjust_sbd_watchdog_timeout_for_s390') @mock.patch('crmsh.utils.fatal') @mock.patch('crmsh.bootstrap.invoke') - def test_initialize_sbd(self, mock_invoke, mock_error, mock_adjust_s390, mock_warn): - self.sbd_inst._context = mock.Mock(profiles_dict={ - "sbd.msgwait": 9, - "sbd.watchdog_timeout": 5 - }) + @mock.patch('crmsh.sbd.SBDTimeout') + @mock.patch('logging.Logger.info') + def test_initialize_sbd(self, mock_info, mock_sbd_timeout, mock_invoke, mock_error): + mock_inst = mock.Mock(sbd_msgwait=10, sbd_watchdog_timeout=5) + mock_sbd_timeout.return_value = mock_inst + mock_inst.set_sbd_watchdog_timeout = mock.Mock() + mock_inst.set_sbd_msgwait = mock.Mock() self.sbd_inst._sbd_devices = ["/dev/sdb1", "/dev/sdc1"] mock_invoke.side_effect = [(True, None, None), (False, None, "error")] mock_error.side_effect = ValueError @@ -213,24 +490,20 @@ def test_initialize_sbd(self, mock_invoke, mock_error, mock_adjust_s390, mock_wa mock.call("sbd -4 10 -1 5 -d /dev/sdb1 create"), mock.call("sbd -4 10 -1 5 -d /dev/sdc1 create") ]) - mock_warn.assert_called_once_with("sbd msgwait is set to 10, it was 9") mock_error.assert_called_once_with("Failed to initialize SBD device /dev/sdc1: error") - @mock.patch('crmsh.utils.detect_virt') @mock.patch('crmsh.bootstrap.csync2_update') @mock.patch('crmsh.utils.sysconfig_set') - @mock.patch('crmsh.sbd.SBDManager._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice') @mock.patch('shutil.copyfile') - def test_update_configuration(self, mock_copy, mock_determine, mock_sysconfig, mock_update, mock_detect): + def test_update_configuration(self, mock_copy, mock_sysconfig, mock_update): self.sbd_inst._sbd_devices = ["/dev/sdb1", "/dev/sdc1"] self.sbd_inst._watchdog_inst = mock.Mock(watchdog_device_name="/dev/watchdog") - mock_detect.return_value = True + self.sbd_inst.timeout_inst = mock.Mock(sbd_watchdog_timeout=15) - self.sbd_inst._sbd_watchdog_timeout = 30 self.sbd_inst._update_sbd_configuration() mock_copy.assert_called_once_with("/usr/share/fillup-templates/sysconfig.sbd", "/etc/sysconfig/sbd") - mock_sysconfig.assert_called_once_with("/etc/sysconfig/sbd", SBD_PACEMAKER='yes', SBD_STARTMODE='always', SBD_DELAY_START='yes', SBD_WATCHDOG_DEV='/dev/watchdog', SBD_DEVICE='/dev/sdb1;/dev/sdc1', SBD_WATCHDOG_TIMEOUT="30") + mock_sysconfig.assert_called_once_with("/etc/sysconfig/sbd", SBD_WATCHDOG_DEV='/dev/watchdog', SBD_DEVICE='/dev/sdb1;/dev/sdc1', SBD_WATCHDOG_TIMEOUT="15") mock_update.assert_called_once_with("/etc/sysconfig/sbd") @mock.patch('crmsh.bootstrap.utils.parse_sysconfig') @@ -331,7 +604,7 @@ def test_sbd_init(self, mock_package, mock_watchdog, mock_get_device, mock_initi mock_warn.assert_called_once_with() mock_enable_sbd.assert_called_once_with() - @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource') + @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource_and_properties') @mock.patch('crmsh.bootstrap.wait_for_cluster') @mock.patch('crmsh.utils.cluster_run_cmd') @mock.patch('logging.Logger.info') @@ -344,17 +617,20 @@ def test_restart_cluster_on_needed_no_ra_running(self, mock_ra_running, mock_sta mock_wait.assert_called_once_with() mock_config_sbd_ra.assert_called_once_with() + @mock.patch('crmsh.sbd.SBDTimeout.get_stonith_timeout') @mock.patch('logging.Logger.warning') @mock.patch('crmsh.utils.has_resource_running') - def test_restart_cluster_on_needed_diskless(self, mock_ra_running, mock_warn): + def test_restart_cluster_on_needed_diskless(self, mock_ra_running, mock_warn, mock_get_timeout): mock_ra_running.return_value = True + mock_get_timeout.return_value = 60 + self.sbd_inst_diskless.timeout_inst = mock.Mock(stonith_watchdog_timeout=-1) self.sbd_inst_diskless._restart_cluster_and_configure_sbd_ra() mock_warn.assert_has_calls([ mock.call("To start sbd.service, need to restart cluster service manually on each node"), - mock.call("Then run \"crm configure property stonith-enabled=true stonith-watchdog-timeout=-1 stonith-timeout=60s\" on any node") + mock.call("Then run \"crm configure property stonith-enabled=true stonith-watchdog-timeout=-1 stonith-timeout=60\" on any node") ]) - @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource') + @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource_and_properties') @mock.patch('logging.Logger.warning') @mock.patch('crmsh.utils.has_resource_running') def test_restart_cluster_on_needed(self, mock_ra_running, mock_warn, mock_config_sbd_ra): @@ -381,103 +657,33 @@ def test_enable_sbd_service_restart(self, mock_cluster_run, mock_restart): ]) mock_restart.assert_called_once_with() - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource') - @mock.patch('crmsh.utils.has_resource_running') - @mock.patch('crmsh.utils.cluster_run_cmd') - def test_enable_sbd_service(self, mock_cluster_run, mock_ra_running, mock_config_sbd_ra, mock_warn): - self.sbd_inst._context = mock.Mock(cluster_is_running=True) - mock_ra_running.return_value = True - - self.sbd_inst._enable_sbd_service() - - mock_cluster_run.assert_has_calls([ - mock.call("systemctl enable sbd.service"), - ]) - mock_ra_running.assert_called_once_with() - mock_config_sbd_ra.assert_called_once_with() - mock_warn.assert_called_once_with("To start sbd.service, need to restart cluster service manually on each node") - @mock.patch('crmsh.utils.package_is_installed') - def test_configure_sbd_resource_not_installed(self, mock_package): + def test_configure_sbd_resource_and_properties_not_installed(self, mock_package): mock_package.return_value = False - self.sbd_inst.configure_sbd_resource() + self.sbd_inst.configure_sbd_resource_and_properties() mock_package.assert_called_once_with("sbd") - @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.bootstrap.invokerc') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.utils.has_resource_configured') - @mock.patch('crmsh.utils.service_is_enabled') - @mock.patch('crmsh.utils.package_is_installed') - def test_configure_sbd_resource_error_primitive(self, mock_package, mock_enabled, mock_ra_configured, mock_get_device, mock_invoke, mock_error): - mock_package.return_value = True - mock_enabled.return_value = True - mock_ra_configured.return_value = False - mock_get_device.return_value = ["/dev/sdb1"] - mock_invoke.return_value = False - mock_error.side_effect = ValueError - - with self.assertRaises(ValueError): - self.sbd_inst.configure_sbd_resource() - - mock_package.assert_called_once_with("sbd") - mock_enabled.assert_called_once_with("sbd.service") - mock_ra_configured.assert_called_once_with("stonith:external/sbd") - mock_get_device.assert_called_once_with() - mock_invoke.assert_called_once_with("crm configure primitive stonith-sbd stonith:external/sbd pcmk_delay_max=30s") - mock_error.assert_called_once_with("Can't create stonith-sbd primitive") - - @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.bootstrap.invokerc') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.utils.has_resource_configured') - @mock.patch('crmsh.utils.service_is_enabled') - @mock.patch('crmsh.utils.package_is_installed') - def test_configure_sbd_resource_error_property(self, mock_package, mock_enabled, mock_ra_configured, mock_get_device, mock_invoke, mock_error): - mock_package.return_value = True - mock_enabled.return_value = True - mock_ra_configured.return_value = False - mock_get_device.return_value = ["/dev/sdb1"] - mock_invoke.side_effect = [True, False] - mock_error.side_effect = ValueError - - with self.assertRaises(ValueError): - self.sbd_inst.configure_sbd_resource() - - mock_package.assert_called_once_with("sbd") - mock_enabled.assert_called_once_with("sbd.service") - mock_ra_configured.assert_called_once_with("stonith:external/sbd") - mock_get_device.assert_called_once_with() - mock_invoke.assert_has_calls([ - mock.call("crm configure primitive stonith-sbd stonith:external/sbd pcmk_delay_max=30s"), - mock.call("crm configure property stonith-enabled=true") - ]) - mock_error.assert_called_once_with("Can't enable STONITH for SBD") - - @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.bootstrap.invokerc') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') + @mock.patch('crmsh.sbd.SBDTimeout.adjust_sbd_timeout_related_cluster_configuration') + @mock.patch('crmsh.utils.set_property') + @mock.patch('crmsh.utils.get_stdout_or_raise_error') @mock.patch('crmsh.utils.has_resource_configured') @mock.patch('crmsh.utils.service_is_enabled') @mock.patch('crmsh.utils.package_is_installed') - def test_configure_sbd_resource_diskless(self, mock_package, mock_enabled, mock_ra_configured, mock_get_device, mock_invoke, mock_error): + def test_configure_sbd_resource_and_properties(self, mock_package, mock_enabled, mock_configured, mock_run, mock_set_property, sbd_adjust): mock_package.return_value = True mock_enabled.return_value = True - mock_ra_configured.return_value = False - mock_get_device.return_value = None - mock_invoke.return_value = False - mock_error.side_effect = ValueError + mock_configured.return_value = False + self.sbd_inst._context = mock.Mock(cluster_is_running=True) + self.sbd_inst._get_sbd_device_from_config = mock.Mock() + self.sbd_inst._get_sbd_device_from_config.return_value = ["/dev/sda1"] - with self.assertRaises(ValueError): - self.sbd_inst_diskless.configure_sbd_resource() + self.sbd_inst.configure_sbd_resource_and_properties() mock_package.assert_called_once_with("sbd") mock_enabled.assert_called_once_with("sbd.service") - mock_get_device.assert_called_once_with() - mock_invoke.assert_called_once_with("crm configure property stonith-enabled=true stonith-watchdog-timeout=-1 stonith-timeout=60s") - mock_error.assert_called_once_with("Can't enable STONITH for diskless SBD") - mock_ra_configured.assert_called_once_with("stonith:external/sbd") + mock_configured.assert_called_once_with(sbd.SBDManager.SBD_RA) + mock_run.assert_called_once_with("crm configure primitive {} {}".format(sbd.SBDManager.SBD_RA_ID, sbd.SBDManager.SBD_RA)) + mock_set_property.assert_called_once_with(stonith_enabled="true") @mock.patch('crmsh.utils.package_is_installed') def test_join_sbd_config_not_installed(self, mock_package): @@ -512,6 +718,7 @@ def test_join_sbd_config_disabled(self, mock_package, mock_exists, mock_enabled, mock_invoke.assert_called_once_with("systemctl disable sbd.service") mock_enabled.assert_called_once_with("sbd.service", "node1") + @mock.patch('crmsh.utils.sysconfig_set') @mock.patch('logging.Logger.info') @mock.patch('crmsh.sbd.SBDManager._verify_sbd_device') @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') @@ -520,7 +727,7 @@ def test_join_sbd_config_disabled(self, mock_package, mock_exists, mock_enabled, @mock.patch('crmsh.utils.service_is_enabled') @mock.patch('os.path.exists') @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd(self, mock_package, mock_exists, mock_enabled, mock_invoke, mock_watchdog, mock_get_device, mock_verify, mock_status): + def test_join_sbd(self, mock_package, mock_exists, mock_enabled, mock_invoke, mock_watchdog, mock_get_device, mock_verify, mock_status, mock_set): mock_package.return_value = True mock_exists.return_value = True mock_enabled.return_value = True @@ -541,6 +748,7 @@ def test_join_sbd(self, mock_package, mock_exists, mock_enabled, mock_invoke, mo mock_watchdog.assert_called_once_with(peer_host="node1") mock_watchdog_inst.join_watchdog.assert_called_once_with() + @mock.patch('crmsh.utils.sysconfig_set') @mock.patch('logging.Logger.info') @mock.patch('crmsh.sbd.SBDManager._warn_diskless_sbd') @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') @@ -549,7 +757,7 @@ def test_join_sbd(self, mock_package, mock_exists, mock_enabled, mock_invoke, mo @mock.patch('crmsh.utils.service_is_enabled') @mock.patch('os.path.exists') @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd_diskless(self, mock_package, mock_exists, mock_enabled, mock_invoke, mock_watchdog, mock_get_device, mock_warn, mock_status): + def test_join_sbd_diskless(self, mock_package, mock_exists, mock_enabled, mock_invoke, mock_watchdog, mock_get_device, mock_warn, mock_status, mock_set): mock_package.return_value = True mock_exists.return_value = True mock_enabled.return_value = True @@ -628,46 +836,6 @@ def test_get_device_uuid(self, mock_run): self.assertEqual(res, "a2e9a92c-cc72-4ef9-ac55-ccc342f3546b") mock_run.assert_called_once_with("sbd -d /dev/sda1 dump", remote="node1") - @mock.patch('crmsh.utils.is_qdevice_configured') - def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice_return(self, mock_qdevice_configured): - self.sbd_inst._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - mock_qdevice_configured.assert_not_called() - - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.sbd.SBDManager.calculate_stonith_timeout') - @mock.patch('crmsh.utils.get_qdevice_sync_timeout') - @mock.patch('crmsh.utils.service_is_active') - @mock.patch('crmsh.utils.is_qdevice_configured') - def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice_after_qdevice(self, mock_qdevice_configured, mock_active, mock_get_qsync_timeout, mock_cal_timeout, mock_warn): - mock_qdevice_configured.return_value = True - mock_active.return_value = True - mock_get_qsync_timeout.return_value = 30 - self.sbd_inst_diskless._sbd_watchdog_timeout = 5 - mock_cal_timeout.return_value = 70 - - self.sbd_inst_diskless._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - - mock_qdevice_configured.assert_called_once_with() - mock_active.assert_called_once_with("corosync-qdevice.service") - mock_get_qsync_timeout.assert_called_once_with() - mock_cal_timeout.assert_called_once_with(35) - mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 35 for qdevice, it was 5") - - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.sbd.SBDManager.calculate_stonith_timeout') - @mock.patch('crmsh.utils.is_qdevice_configured') - def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self, mock_qdevice_configured, mock_cal_timeout, mock_warn): - self.sbd_inst_diskless._context = mock.Mock(qdevice_inst=mock.Mock()) - mock_qdevice_configured.return_value = False - mock_cal_timeout.return_value = 10 - self.sbd_inst_diskless._sbd_watchdog_timeout = 5 - - self.sbd_inst_diskless._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - - mock_qdevice_configured.assert_called_once_with() - mock_cal_timeout.assert_called_once_with(sbd.SBDManager.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE) - mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 35 for qdevice, it was 5") - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') @mock.patch('crmsh.utils.service_is_active') @mock.patch('crmsh.bootstrap.Context') @@ -712,122 +880,3 @@ def test_update_configuration_static(self, mock_config_set, mock_csync2): self.sbd_inst.update_configuration(sbd_config_dict) mock_config_set.assert_called_once_with(bootstrap.SYSCONFIG_SBD, **sbd_config_dict) mock_csync2.assert_called_once_with(bootstrap.SYSCONFIG_SBD) - - def test_calculate_stonith_timeout(self): - res = self.sbd_inst.calculate_stonith_timeout(5) - assert res == 12 - - @mock.patch('crmsh.sbd.SBDManager.is_delay_start') - @mock.patch('crmsh.utils.get_stdout_or_raise_error') - def test_adjust_systemd_no_delay(self, mock_run, mock_delay_start): - mock_delay_start.return_value = False - self.sbd_inst._adjust_systemd() - mock_delay_start.assert_called_once_with() - mock_run.assert_not_called() - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_start_timeout_threshold') - @mock.patch('crmsh.utils.mkdirp') - @mock.patch('crmsh.utils.get_stdout_or_raise_error') - @mock.patch('crmsh.sbd.SBDManager.is_delay_start') - def test_adjust_systemd_return(self, mock_delay_start, mock_run, mock_dirp, mock_threshold): - mock_threshold.return_value = 10 - mock_delay_start.return_value = True - mock_run.return_value = "1min 30s" - - self.sbd_inst._adjust_systemd() - - mock_run.assert_called_once_with("systemctl show -p TimeoutStartUSec sbd --value") - mock_threshold.assert_called_once_with() - mock_dirp.assert_not_called() - - @mock.patch('crmsh.utils.str2file') - @mock.patch('crmsh.utils.mkdirp') - @mock.patch('crmsh.utils.get_stdout_or_raise_error') - @mock.patch('crmsh.sbd.SBDManager.get_suitable_sbd_systemd_timeout') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_start_timeout_threshold') - @mock.patch('crmsh.sbd.SBDManager.is_delay_start') - def test_adjust_systemd(self, mock_delay_start, mock_threshold, mock_systemd_timeout, mock_run, mock_dirp, mock_str2file): - mock_delay_start.return_value = True - mock_threshold.return_value = 120 - mock_systemd_timeout.return_value = 144 - mock_run.return_value = "1min 30s" - - self.sbd_inst._adjust_systemd() - - mock_run.assert_has_calls([ - mock.call("systemctl show -p TimeoutStartUSec sbd --value"), - mock.call("systemctl daemon-reload") - ]) - mock_dirp.assert_called_once_with("/etc/systemd/system/sbd.service.d") - mock_str2file.assert_called_once_with('[Service]\nTimeoutSec=144', '/etc/systemd/system/sbd.service.d/sbd_delay_start.conf') - mock_delay_start.assert_called_once_with() - mock_threshold.assert_called_once_with() - mock_systemd_timeout.assert_called_once_with() - - @mock.patch('crmsh.utils.get_stdout_or_raise_error') - def test_get_sbd_msgwait_exception(self, mock_run): - mock_run.return_value = "data" - with self.assertRaises(ValueError) as err: - sbd.SBDManager._get_sbd_msgwait("/dev/sda1") - self.assertEqual("Cannot get sbd msgwait for /dev/sda1", str(err.exception)) - - @mock.patch('crmsh.utils.get_stdout_or_raise_error') - def test_get_sbd_msgwait(self, mock_run): - mock_run.return_value = """ - Timeout (allocate) : 2 - Timeout (loop) : 1 - Timeout (msgwait) : 30 - ==Header on disk /dev/sda1 is dumped - """ - res = sbd.SBDManager._get_sbd_msgwait("/dev/sda1") - self.assertEqual(res, 30) - mock_run.assert_called_once_with("sbd -d /dev/sda1 dump") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_start_timeout_threshold') - def test_get_suitable_sbd_systemd_timeout(self, mock_threshold): - mock_threshold.return_value = 10 - res = sbd.SBDManager.get_suitable_sbd_systemd_timeout() - self.assertEqual(res, 12) - mock_threshold.assert_called_once_with() - - @mock.patch('logging.Logger.warning') - def test_adjust_sbd_watchdog_timeout_for_s390(self, mock_warn): - self.sbd_inst._context = mock.Mock(is_s390=True) - self.sbd_inst._sbd_watchdog_timeout = 10 - self.sbd_inst._adjust_sbd_watchdog_timeout_for_s390() - mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 15 for s390, it was 10") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_get_sbd_watchdog_timeout_exception(self, mock_get_value): - mock_get_value.return_value = None - with self.assertRaises(ValueError) as err: - sbd.SBDManager.get_sbd_watchdog_timeout() - self.assertEqual("Cannot get the value of SBD_WATCHDOG_TIMEOUT", str(err.exception)) - mock_get_value.assert_called_once_with("SBD_WATCHDOG_TIMEOUT") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_get_sbd_watchdog_timeout(self, mock_get_value): - mock_get_value.return_value = 10 - res = sbd.SBDManager.get_sbd_watchdog_timeout() - self.assertEqual(res, 10) - mock_get_value.assert_called_once_with("SBD_WATCHDOG_TIMEOUT") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_watchdog_timeout') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_device_from_config') - def test_get_sbd_start_timeout_threshold_diskless(self, mock_get_device, mock_get_timeout): - mock_get_device.return_value = None - mock_get_timeout.return_value = 10 - res = sbd.SBDManager.get_sbd_start_timeout_threshold() - self.assertEqual(res, 20) - mock_get_device.assert_called_once_with() - mock_get_timeout.assert_called_once_with() - - @mock.patch('crmsh.sbd.SBDManager._get_sbd_msgwait') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_device_from_config') - def test_get_sbd_start_timeout_threshold(self, mock_get_device, mock_get_msgwait): - mock_get_device.return_value = ["/dev/sdb1"] - mock_get_msgwait.return_value = 10 - res = sbd.SBDManager.get_sbd_start_timeout_threshold() - self.assertEqual(res, 10) - mock_get_device.assert_called_once_with() - mock_get_msgwait.assert_called_once_with("/dev/sdb1") diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py index fccd4423fc..75978fdfaa 100644 --- a/test/unittests/test_utils.py +++ b/test/unittests/test_utils.py @@ -1635,3 +1635,27 @@ def test_check_no_quorum_policy_with_dlm(mock_dlm, mock_get_property, mock_warn) mock_dlm.assert_called_once_with() mock_get_property.assert_called_once_with("no-quorum-policy") mock_warn.assert_called_once_with('The DLM cluster best practice suggests to set the cluster property "no-quorum-policy=freeze"') + + +@mock.patch('crmsh.utils.is_qdevice_configured') +@mock.patch('crmsh.utils.list_cluster_nodes') +def test_is_2node_cluster_without_qdevice(mock_list, mock_is_qdevice): + mock_list.return_value = ["node1", "node2"] + mock_is_qdevice.return_value = False + res = utils.is_2node_cluster_without_qdevice() + assert res is True + mock_list.assert_called_once_with() + mock_is_qdevice.assert_called_once_with() + + +def test_get_systemd_timeout_start_in_sec(): + res = utils.get_systemd_timeout_start_in_sec("1min 31s") + assert res == 91 + + +@mock.patch('crmsh.utils.get_stdout_or_raise_error') +@mock.patch('crmsh.utils.get_property') +def test_set_property_conditionally(mock_get_property, mock_run): + mock_get_property.return_value = "100s" + utils.set_property_conditionally("stonith-timeout", 101) + mock_run.assert_called_once_with("crm configure property stonith-timeout=101")