Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support --all or specific node to manage cluster and nodes #797

Merged
merged 8 commits into from
Nov 16, 2021
43 changes: 30 additions & 13 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from . import tmpfiles
from . import lock
from . import userdir
from .constants import SSH_OPTION, QDEVICE_HELP_INFO
from .constants import SSH_OPTION, QDEVICE_HELP_INFO, CRM_MON_ONE_SHOT
from . import ocfs2
from . import qdevice
from . import log
Expand Down Expand Up @@ -363,36 +363,52 @@ def wait_for_resource(message, resource):
sleep(1)


def wait_for_cluster():
with logger_utils.status_long("Waiting for cluster"):
def wait_for_cluster(message="Waiting for cluster", node_list=[]):
"""
Wait for local node or specific node(s) online
"""
# Sleep here since just after pacemaker.service started, crm_mon might not ready
sleep(2)
# Check if already online
if is_online(node_list):
return

with logger_utils.status_long(message):
while True:
_rc, out, _err = utils.get_stdout_stderr("crm_mon -1")
if is_online(out):
if is_online(node_list):
break
status_progress()
sleep(2)
# Sleep here since when do_stop function calling wait_for_cluster,
# just after nodes online, if no sleep to wait, some nodes might hang with pending
sleep(2)


def get_cluster_node_hostname():
"""
Get the hostname of the cluster node
"""
peer_node = None
if _context.cluster_node:
if _context and _context.cluster_node:
rc, out, err = utils.get_stdout_stderr("ssh {} {} crm_node --name".format(SSH_OPTION, _context.cluster_node))
if rc != 0:
utils.fatal(err)
peer_node = out
return peer_node


def is_online(crm_mon_txt):
def is_online(node_list=[]):
"""
Check whether local node is online
Check whether local node is online, or specific node(s) online
Besides that, in join process, check whether init node is online
"""
if not re.search("Online: .* {} ".format(utils.this_node()), crm_mon_txt):
return False
_list = node_list if node_list else [utils.this_node()]
crm_mon_txt = utils.get_stdout_or_raise_error(CRM_MON_ONE_SHOT, remote=_list[0])
# Make sure all nodes online
# TODO how about the shutdown node?
for node in _list:
if not re.search(r'Online:\s+\[.*{}\s+.*'.format(node), crm_mon_txt):
return False

# if peer_node is None, this is in the init process
peer_node = get_cluster_node_hostname()
Expand Down Expand Up @@ -677,9 +693,10 @@ def init_cluster_local():
wait_for_cluster()


def start_pacemaker():
def start_pacemaker(node_list=[]):
"""
Start pacemaker service with wait time for sbd
When node_list set, start pacemaker service in parallel
"""
from .sbd import SBDManager
pacemaker_start_msg = "Starting pacemaker"
Expand All @@ -688,7 +705,7 @@ def start_pacemaker():
SBDManager.is_delay_start():
pacemaker_start_msg += "(waiting for sbd {}s)".format(SBDManager.get_suitable_sbd_systemd_timeout())
with logger_utils.status_long(pacemaker_start_msg):
utils.start_service("pacemaker.service", enable=True)
utils.start_service("pacemaker.service", enable=True, node_list=node_list)
zzhou1 marked this conversation as resolved.
Show resolved Hide resolved


def install_tmp(tmpfile, to):
Expand Down Expand Up @@ -1263,7 +1280,7 @@ def evaluate_qdevice_quorum_effect(mode, diskless_sbd=False):
elif mode == QDEVICE_REMOVE:
actual_votes -= 1

if utils.is_quorate(expected_votes, actual_votes) and not diskless_sbd:
if utils.calculate_quorate_status(expected_votes, actual_votes) and not diskless_sbd:
# safe to use reload
return QdevicePolicy.QDEVICE_RELOAD
elif utils.has_resource_running():
Expand Down
23 changes: 23 additions & 0 deletions crmsh/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,4 +501,27 @@
YELLOW = '\033[33m'
GREEN = '\033[32m'
END = '\033[0m'


CIB_QUERY = "cibadmin -Q"
CIB_REPLACE = "cibadmin -R -X '{xmlstr}'"
CIB_RAW_FILE = "/var/lib/pacemaker/cib/cib.xml"
XML_NODE_PATH = "/cib/configuration/nodes/node"
XML_STATUS_PATH = "/cib/status/node_state"
XML_NODE_QUERY_STANDBY_PATH = "//nodes/node[@id='{node_id}']/instance_attributes/nvpair[@name='standby']/@value"
XML_STATUS_QUERY_STANDBY_PATH = "//status/node_state[@id='{node_id}']/transient_attributes/instance_attributes/nvpair[@name='standby']/@value"
STANDBY_TEMPLATE = """
<instance_attributes id="nodes-{node_id}">
<nvpair id="nodes-{node_id}-standby" name="standby" value="{value}"/>
</instance_attributes>
"""
STANDBY_TEMPLATE_REBOOT = """
<transient_attributes id="{node_id}">
<instance_attributes id="status-{node_id}">
<nvpair id="status-{node_id}-standby" name="standby" value="{value}"/>
</instance_attributes>
</transient_attributes>
"""
STANDBY_NV_RE = r'(<nvpair.*{node_id}.*name="standby".*)value="{value}"(.*)'
CRM_MON_ONE_SHOT = "crm_mon -1"
# vim:ts=4:sw=4:et:
171 changes: 117 additions & 54 deletions crmsh/ui_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,60 @@ def parse_options(parser, args):
options, args = parser.parse_known_args(list(args))
except:
return None, None
if options.help:
if hasattr(options, 'help') and options.help:
parser.print_help()
return None, None
utils.check_space_option_value(options)
return options, args


def parse_option_for_nodes(context, *args):
"""
Parse option for nodes
Return a node list
"""
action_type = context.get_command_name()
action_target = "node" if action_type in ["standby", "online"] else "cluster service"
action = "{} {}".format(action_type, action_target)
usage_template = """
Specify node(s) on which to {action}.
If no nodes are specified, {action} on the local node.
If --all is specified, {action} on all nodes."""
addtion_usage = ""
if action_type == "standby":
usage_template += """
\n\nAdditionally, you may specify a lifetime for the standby---if set to
"reboot", the node will be back online once it reboots. "forever" will
keep the node in standby after reboot. The life time defaults to
"forever"."""
addtion_usage = " [lifetime]"

parser = ArgParser(description=usage_template.format(action=action),
usage="{} [--all | <node>... ]{}".format(action_type, addtion_usage),
add_help=False,
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument("-h", "--help", action="store_true", dest="help", help="Show this help message")
parser.add_argument("--all", help="To {} on all nodes".format(action), action="store_true", dest="all")

options, args = parse_options(parser, args)
if options is None or args is None:
raise utils.TerminateSubCommand
if options.all and args:
context.fatal_error("Should either use --all or specific node(s)")

# return local node
if not options.all and not args:
return [utils.this_node()]
member_list = utils.list_cluster_nodes()
if not member_list:
context.fatal_error("Cannot get the node list from cluster")
for node in args:
if node not in member_list:
context.fatal_error("Node \"{}\" is not a cluster node".format(node))
# return node list
return member_list if options.all else args


def _remove_completer(args):
try:
n = utils.list_cluster_nodes()
Expand Down Expand Up @@ -91,78 +138,94 @@ def __init__(self):
self._inventory_target = None

@command.skill_level('administrator')
def do_start(self, context):
def do_start(self, context, *args):
'''
Starts the cluster services on this node
Starts the cluster services on all nodes or specific node(s)
'''
try:
if utils.service_is_active("pacemaker.service"):
logger.info("Cluster services already started")
return
bootstrap.start_pacemaker()
if utils.is_qdevice_configured():
utils.start_service("corosync-qdevice")
logger.info("Cluster services started")
except IOError as err:
context.fatal_error(str(err))

# TODO: optionally start services on all nodes or specific node
node_list = parse_option_for_nodes(context, *args)
for node in node_list[:]:
if utils.service_is_active("pacemaker.service", remote_addr=node):
logger.info("Cluster services already started on {}".format(node))
node_list.remove(node)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pacemaker.service being active doesn't necessarily mean corosync-qdevice.service is active as well, right? Should corosync-qdevice be checked for the full set of nodes as well?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When using bootstrap to setup cluster with qdevice, qdevice service will be enabled, so after reboot, corosync-qdevice will be started
@zzhou1 What do you think?

Copy link
Contributor

@zzhou1 zzhou1 Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"node_list.remove(node)" can be wrong in the use case of normal stop/start, eg. when pacemaker.service is up but qdevice.service is not in the mean time. No necessary in the case of reboot.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve this in #898

if not node_list:
return

if utils.is_qdevice_configured():
utils.start_service("corosync-qdevice", node_list=node_list)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although there's no dependencies between corosync-qdevice and pacemaker, it'd be better to perform start of corosync-qdevice before pacemaker.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bootstrap.start_pacemaker here is to start pacemaker.service, according the dependency, corosync.service will be started firstly, then start qdevice does make sense; Otherwise, start qdevice without corosync started will be failed:)

Copy link
Member

@gao-yan gao-yan Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that corosync-qdevice.service has these defined as well:

Wants=corosync.service
After=corosync.service

I'd expect pure start of corosync-qdevice.service will resolve the dependency as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can get the point to move corosync-qdevice.service start operation before pacemaker. There is no harm to have multiple systemctl start, in theory.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

bootstrap.start_pacemaker(node_list)
for node in node_list:
logger.info("Cluster services started on {}".format(node))

@command.skill_level('administrator')
def do_stop(self, context):
def do_stop(self, context, *args):
'''
Stops the cluster services on this node
Stops the cluster services on all nodes or specific node(s)
'''
try:
if not utils.service_is_active("corosync.service"):
logger.info("Cluster services already stopped")
return
if utils.service_is_active("corosync-qdevice"):
utils.stop_service("corosync-qdevice")
utils.stop_service("corosync")
logger.info("Cluster services stopped")
except IOError as err:
context.fatal_error(str(err))

# TODO: optionally stop services on all nodes or specific node
node_list = parse_option_for_nodes(context, *args)
for node in node_list[:]:
if not utils.service_is_active("corosync.service", remote_addr=node):
if utils.service_is_active("sbd.service", remote_addr=node):
utils.stop_service("corosync", remote_addr=node)
logger.info("Cluster services stopped on {}".format(node))
else:
logger.info("Cluster services already stopped on {}".format(node))
node_list.remove(node)
if not node_list:
return

bootstrap.wait_for_cluster("Waiting for {} online".format(' '.join(node_list)), node_list)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there special purpose of inserting this into the stop procedure and waiting for the listed nodes to be online?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

  • Just after crm cluster start --all, the status of all nodes are UNCLEAN, on this time, if execute crm cluster stop --all, I found the stop process will be hang, and on some node, the output of crm_mon will show all nodes' status are pending
  • If all nodes already Online, in wait_for_cluster function, I made a check firstly, and the line Waiting for .... online will be quiet

Copy link
Contributor

@zzhou1 zzhou1 Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point.

I can understand the wait experience in bootstrap. However, this triggers me to think probably no necessary to force user to wait crm cluster start, same for crm cluster stop too. I would rather think to remove such kind of wait for both of them.

It makes sense to let crm cluster stop directly abort if the criteria not meet. It's a reasonable error handling, rather than have sysadmin really wait in front of the screen.

For those scripts do want to 'wait', before we implement '--wait' option, the following example steps could help
crm cluster start
crm wait_for_startup
crm cluster stop

Well, my idea is debatable as the different flavor of the user experience. It is not a critical one I think.


# When dlm configured and quorum is lost, before stop cluster service, should set
# enable_quorum_fencing=0, enable_quorum_lockspace=0 for dlm config option
liangxin1300 marked this conversation as resolved.
Show resolved Hide resolved
if utils.is_dlm_configured(node_list[0]) and not utils.is_quorate(node_list[0]):
logger.debug("Quorum is lost; Set enable_quorum_fencing=0 and enable_quorum_lockspace=0 for dlm")
utils.set_dlm_option(peer=node_list[0], enable_quorum_fencing=0, enable_quorum_lockspace=0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, both enable_quorum_fencing and enable_quorum_lockspace need to be disabled for an inquorate dlm to gracefully stop? Could there be any risks?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, both.
from my opinion, there doesn't have any risk. because this action is only triggered by cmd "crm cluster stop".
before this patch, cluster (by dlm_controld) deny to do any further action until quorum is true.
after this patch, cluster will directly stop, there is no other behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The biggest concern is, given that this cluster partition is inquorate, there might a quorate partition standing... For example this partition has been split and inquorate, but somehow hasn't been fenced. If we are shutting down this cluster partition, and in case these settings make the inquorate partition be able to acquire the access to lockspace and corrupt data, that would be a disaster. We should never sacrifice data integrity even for graceful shutdown...

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enable_quorum_lockspace is disabled, this will make dlm lockspace related operation can keep going when the cluster quorum is lost.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

your comment makes sense to me.

Copy link
Member

@gao-yan gao-yan Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about it on the more safe/paranoid side, even if the user has confirmed to proceed, but allowing this simultaneously on multiple nodes is more like opening a Pandora's box... Since right after that, during stop, this cluster partition might spit apart into even more partitions...

If we go for it, we could ask user once, but we'd better proceed this specific procedure the serialized way: so
set_dlm_option -> stop dlm/pacemaker for only one node at a time, and proceed for another node only after it has succeeded on this one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My previous point stays with the last standing single node situation. Given that the node is inquorate already, the proposed behavior for crm cluster stop (aka. crmsh -> set_dlm_option -> stop dlm/patcemaker) is just a "fencing" operation to protect data integrity, though no necessarily STONITH at the node level to do reboot.

Well, for the situation with multiple inquorate nodes, aka. multiple partitions, then the code here do have problem for '--all' situation. Simply because set_dlm_option only applies to one node. Not sure, if it is simple enough to address it in this PR, or open an issue to clarify this in another PR.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set_dlm_option is implemented by "dlm_tool set_config", which only run on a single node each time.

Copy link
Contributor

@zzhou1 zzhou1 Nov 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set_dlm_option is implemented by "dlm_tool set_config", which only run on a single node each time.

Fine.

Given the above situation, when all nodes are inquorate, what's the suggested graceful shutdown steps internally for "stop --all" ? My reading of the code is it only change the current local node, no repeat the same on other nodes.

The situation is more fun, in theory for a big cluster, some nodes are quorate, some are not. Agree, it is a transient corner case. What's the suggested internal steps for "stop --all"? @zhaohem

Copy link
Contributor

@zzhou1 zzhou1 Nov 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe crmsh should never operate a cluster in the transient state at all by default? And ask user to answer Y/n, or use --force ?


# Stop pacemaker since it can make sure cluster has quorum until stop corosync
utils.stop_service("pacemaker", node_list=node_list)
# Then, stop qdevice if is active
if utils.service_is_active("corosync-qdevice.service", node_list[0]):
utils.stop_service("corosync-qdevice.service", node_list=node_list)
# Last, stop corosync
utils.stop_service("corosync", node_list=node_list)

for node in node_list:
logger.info("Cluster services stopped on {}".format(node))

@command.skill_level('administrator')
def do_restart(self, context):
def do_restart(self, context, *args):
'''
Restarts the cluster services on all nodes or specific node(s)
'''
parse_option_for_nodes(context, *args)
self.do_stop(context, *args)
self.do_start(context, *args)

def _enable_disable_common(self, context, *args):
'''
Restarts the cluster services on this node
Common part for enable and disable
'''
self.do_stop(context)
self.do_start(context)
node_list = parse_option_for_nodes(context, *args)
action = context.get_command_name()
utils.cluster_run_cmd("systemctl {} pacemaker.service".format(action), node_list)
if utils.is_qdevice_configured():
utils.cluster_run_cmd("systemctl {} corosync-qdevice.service".format(action), node_list)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically maybe it should be able to disable corosync-qdevice.service even if qdevice is not configured?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

corosync-qdevice.service will not be started if not configured, right? So I think to check if configured then do the action will no harm?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it depends on how is_qdevice_configured() does the check. Of course one could remove qdevice configuration from corosync.conf before stopping the running corosync-qdevice.service...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I think I would naturally expect crm cluster stop to stop corosync-qdevice.service even if corosync.conf has no qdevice.

Copy link
Collaborator Author

@liangxin1300 liangxin1300 Dec 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR to improve this: #895
To check if corosync-qdevice.service is available, not to check if it is configured
stop qdevice should no this issue, since I check if the service is active, then stop

for node in node_list:
logger.info("Cluster services %s on %s", action+'d', node)

@command.skill_level('administrator')
def do_enable(self, context):
def do_enable(self, context, *args):
'''
Enable the cluster services on this node
'''
try:
utils.enable_service("pacemaker.service")
if utils.is_qdevice_configured():
utils.enable_service("corosync-qdevice.service")
logger.info("Cluster services enabled")
except IOError as err:
context.fatal_error(str(err))

# TODO: optionally enable services on all nodes or specific node
self._enable_disable_common(context, *args)

@command.skill_level('administrator')
def do_disable(self, context):
def do_disable(self, context, *args):
'''
Disable the cluster services on this node
'''
try:
utils.disable_service("pacemaker.service")
if utils.is_qdevice_configured():
utils.disable_service("corosync-qdevice.service")
logger.info("Cluster services disabled")
except IOError as err:
context.fatal_error(str(err))

# TODO: optionally disable services on all nodes or specific node
self._enable_disable_common(context, *args)

def _args_implicit(self, context, args, name):
'''
Expand Down Expand Up @@ -663,7 +726,7 @@ def do_wait_for_startup(self, context, timeout='10'):
@command.skill_level('expert')
def do_run(self, context, cmd, *nodes):
'''
Execute the given command on all nodes/specific node, report outcome
Execute the given command on all nodes/specific node(s), report outcome
'''
try:
import parallax
Expand Down
Loading