Dev: behave: Add functional test for previous changes and cases

ClusterLabs · Dec 2, 2021 · b7f8bec · b7f8bec
1 parent c4ab4ab
commit b7f8bec
Show file tree

Hide file tree

Showing 4 changed files with 292 additions and 3 deletions.
diff --git a/data-manifest b/data-manifest
@@ -68,7 +68,8 @@ test/evaltest.sh
 test/features/bootstrap_bugs.feature
 test/features/bootstrap_init_join_remove.feature
 test/features/bootstrap_options.feature
-test/features/bootstrap_sbd.feature
+test/features/bootstrap_sbd_delay.feature
+test/features/bootstrap_sbd_normal.feature
 test/features/configure_bugs.feature
 test/features/constraints_bugs.feature
 test/features/environment.py

diff --git a/test/features/bootstrap_sbd_delay.feature b/test/features/bootstrap_sbd_delay.feature
@@ -0,0 +1,228 @@
+@sbd
+Feature: configure sbd delay start correctly
+
+  Tag @clean means need to stop cluster service if the service is available
+
+  @clean
+  Scenario: disk-based SBD with small sbd_watchdog_timeout
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode1"
+    When    Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Resource "stonith-sbd" type "external/sbd" is "Started"
+    And     SBD option "SBD_DELAY_START" value is "no"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    # calculated and set by sbd RA
+    And     Cluster property "stonith-timeout" is "43"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode2"
+    # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait)  # for disk-based sbd
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait)  # for disk-based sbd
+    And     Cluster property "stonith-timeout" is "72"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+    Given   Has disk "/dev/sda1" on "hanode3"
+    Given   Cluster service is "stopped" on "hanode3"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode3"
+    Then    Cluster service is "started" on "hanode3"
+    And     Service "sbd" is "started" on "hanode3"
+    # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait)  # for disk-based sbd
+    # runtime value is "41", we keep the larger one here
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait)  # for disk-based sbd
+    # runtime value is "36", we keep ther larger one here
+    And     Cluster property "stonith-timeout" is "72"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+    When    Run "crm cluster remove hanode3 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode3"
+    And     Service "sbd" is "stopped" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    And     Cluster property "stonith-timeout" is "72"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+  @clean
+  Scenario: disk-less SBD with small sbd_watchdog_timeout
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+
+    Given   Cluster service is "stopped" on "hanode1"
+    When    Run "crm cluster init -S -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    And     SBD option "SBD_DELAY_START" value is "no"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     Cluster property "stonith-timeout" is "60"
+
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    # SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    # stonith-timeout >= 1.2 * max(stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT)  # for disk-less sbd
+    # stonith-timeout >= max(STONITH_TIMEOUT_DEFAULT, token+consensus)  # for ALL situations
+    And     Cluster property "stonith-timeout" is "60"
+
+    Given   Cluster service is "stopped" on "hanode3"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode3"
+    Then    Cluster service is "started" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     Cluster property "stonith-timeout" is "60"
+
+    When    Run "crm cluster remove hanode3 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     Cluster property "stonith-timeout" is "60"
+
+  @clean
+  Scenario: disk-based SBD with big sbd_watchdog_timeout
+    When    Run "sed -i 's/watchdog_timeout: 15/watchdog_timeout: 60/' /etc/crm/profiles.yml" on "hanode1"
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "60"
+
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode1"
+    When    Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Resource "stonith-sbd" type "external/sbd" is "Started"
+    And     SBD option "SBD_DELAY_START" value is "no"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    # calculated and set by sbd RA
+    And     Cluster property "stonith-timeout" is "172"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode2"
+    # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait)  # for disk-based sbd
+    And     SBD option "SBD_DELAY_START" value is "161"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait)  # for disk-based sbd
+    And     Cluster property "stonith-timeout" is "180"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+    # since SBD_DELAY_START value(161s) > default systemd startup value(1min 30s)
+    And     Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    # 1.2*SBD_DELAY_START
+    And     Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+
+    Given   Has disk "/dev/sda1" on "hanode3"
+    Given   Cluster service is "stopped" on "hanode3"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode3"
+    Then    Cluster service is "started" on "hanode3"
+    And     Service "sbd" is "started" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "161"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    And     Cluster property "stonith-timeout" is "180"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+    And     Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    And     Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+
+    When    Run "crm cluster remove hanode3 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode3"
+    And     Service "sbd" is "stopped" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "161"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    And     Cluster property "stonith-timeout" is "180"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+    And     Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    And     Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    When    Run "sed -i 's/watchdog_timeout: 60/watchdog_timeout: 15/g' /etc/crm/profiles.yml" on "hanode1"
+
+  @clean
+  Scenario: Add sbd via stage on a running cluster
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster init -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+
+    When    Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1"
+    Then    Service "sbd" is "started" on "hanode1"
+    Then    Service "sbd" is "started" on "hanode2"
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    And     Cluster property "stonith-timeout" is "72"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+  @clean
+  Scenario: Add disk-based sbd with qdevice
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+
+    When    Run "crm cluster init -s /dev/sda1 --qnetd-hostname=qnetd-node -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "corosync-qdevice" is "started" on "hanode1"
+    And     Service "corosync-qdevice" is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode2"
+
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    And     Cluster property "stonith-timeout" is "60"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+  @clean
+  Scenario: Add disk-less sbd with qdevice
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+
+    When    Run "crm cluster init -S --qnetd-hostname=qnetd-node -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "corosync-qdevice" is "started" on "hanode1"
+    And     Service "corosync-qdevice" is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode2"
+
+    And     SBD option "SBD_DELAY_START" value is "81"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "35"
+    And     Cluster property "stonith-timeout" is "84"
+    And     Cluster property "stonith-watchdog-timeout" is "-1"
diff --git a/test/features/bootstrap_sbd.feature → test/features/bootstrap_sbd_normal.feature b/test/features/bootstrap_sbd.feature → test/features/bootstrap_sbd_normal.feature
@@ -1,4 +1,4 @@
-@bootstrap
+@sbd
 Feature: crmsh bootstrap sbd management
 
   Tag @clean means need to stop cluster service if the service is available

diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py
@@ -2,8 +2,9 @@
 import time
 import os
 import datetime
+import yaml
 from behave import given, when, then
-from crmsh import corosync, parallax
+from crmsh import corosync, parallax, sbd
 from crmsh import utils as crmutils
 from utils import check_cluster_state, check_service_state, online, run_command, me, \
                   run_command_local_or_remote, file_in_archive
@@ -35,6 +36,24 @@ def step_impl(context, nodelist):
     assert online(context, nodelist) is True
 
 
+@given('Run "{cmd}" OK')
+def step_impl(context, cmd):
+    rc, _, = run_command(context, cmd)
+    assert rc == 0
+
+
+@then('Run "{cmd}" OK')
+def step_impl(context, cmd):
+    rc, _, = run_command(context, cmd)
+    assert rc == 0
+
+
+@when('Run "{cmd}" OK')
+def step_impl(context, cmd):
+    rc, _, = run_command(context, cmd)
+    assert rc == 0
+
+
 @given('IP "{addr}" is belong to "{iface}"')
 def step_impl(context, addr, iface):
     cmd = 'ip address show dev {}'.format(iface)
@@ -318,3 +337,44 @@ def step_impl(context, res_id, node):
 def step_impl(context, res_id, node):
     rc, out, err = crmutils.get_stdout_stderr("crm_mon -1")
     assert re.search(r'\*\s+{}\s+.*Started\s+{}'.format(res_id, node), out) is not None
+
+
+@then('SBD option "{key}" value is "{value}"')
+def step_impl(context, key, value):
+    res = sbd.SBDManager.get_sbd_value_from_config(key)
+    assert res == value
+
+
+@then('SBD option "{key}" value for "{dev}" is "{value}"')
+def step_impl(context, key, dev, value):
+    res = sbd.SBDTimeout.get_sbd_msgwait(dev)
+    assert res == int(value)
+
+
+@then('Cluster property "{key}" is "{value}"')
+def step_impl(context, key, value):
+    res = crmutils.get_property(key)
+    assert res is not None and str(res) == value
+
+
+@then('Parameter "{param_name}" not configured in "{res_id}"')
+def step_impl(context, param_name, res_id):
+    _, out = run_command(context, "crm configure show {}".format(res_id))
+    result = re.search("params {}=".format(param_name), out)
+    assert result is None
+
+
+@then('Parameter "{param_name}" configured in "{res_id}"')
+def step_impl(context, param_name, res_id):
+    _, out = run_command(context, "crm configure show {}".format(res_id))
+    result = re.search("params {}=".format(param_name), out)
+    assert result is not None
+
+
+@given('Yaml "{path}" value is "{value}"')
+def step_impl(context, path, value):
+    yaml_file = "/etc/crm/profiles.yml"
+    with open(yaml_file) as f:
+        data = yaml.load(f, Loader=yaml.SafeLoader)
+    sec_name, key = path.split(':')
+    assert str(data[sec_name][key]) == str(value)