Skip to content

Commit

Permalink
DRAFT: Feature: Non-privileged hacluster
Browse files Browse the repository at this point in the history
It enables creating the cluster under the hacluster
It requires some preparaions. To use it, do:

$ sudo mv /sbin/crm* /sbin/cibadmin /sbin/stonith \
        /sbin/corosync-keygen /sbin/corosync-cfgtool /usr/bin/
$ sudo cp /sbin/csync2 /sbin/sbd /usr/bin/ # sbd csync2 must exist in /sbin/

$ sudo chmod 770 /etc/pacemaker/
$ sudo chown hacluster:haclient -R /etc/csync2 /etc/corosync \
        /usr/share/doc/packages/corosync \
        /etc/sysconfig /var/lib/csync2 \
        /etc/crm /etc/lvm /etc/samba
$ sudo chmod 777 -R /etc/csync2 /etc/corosync \
        /usr/share/doc/packages/corosync \
        /etc/sysconfig /var/lib/csync2 \
        /etc/crm /etc/lvm /etc/samba
Permissions for /run to use the lock
Pay attention:
  1) not recursive
  2) the permissions are reset to default after reboot
$ sudo chown hacluster:haclient /run
$ sudo chmod 777 /run

Besides set up the csync2 running as hacluster
Add 'User=hacluster' into the '[Service]' section

$ sudo echo 'User=hacluster' >> /usr/lib/systemd/system/csync2@.service
$ sudo systemctl daemon-reload

$ su - hacluster

And now you can do the usual routines under hacluster

$ crm cluster init ...
$ crm cluster join ...
  • Loading branch information
Aleksei Burlakov committed Aug 18, 2022
1 parent 5acffe4 commit d35e38c
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 53 deletions.
81 changes: 40 additions & 41 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def init_firewall_suse(tcp, udp):

def init_firewall_firewalld(tcp, udp):
has_firewalld = utils.service_is_active("firewalld")
cmdbase = 'firewall-cmd --zone=public --permanent ' if has_firewalld else 'firewall-offline-cmd --zone=public '
cmdbase = 'sudo firewall-cmd --zone=public --permanent ' if has_firewalld else 'sudo firewall-offline-cmd --zone=public '

def cmd(args):
if not invokerc(cmdbase + args):
Expand All @@ -595,7 +595,7 @@ def cmd(args):
cmd("--add-port={}/udp".format(p))

if has_firewalld:
if not invokerc("firewall-cmd --reload"):
if not invokerc("sudo firewall-cmd --reload"):
utils.fatal("Failed to reload firewall configuration.")

def init_firewall_ufw(tcp, udp):
Expand Down Expand Up @@ -652,6 +652,7 @@ def init_cluster_local():
firewall_open_corosync_ports()

# reset password, but only if it's not already set
#FIXME! What to do with it? Do we need hacluster if there is already another user.
_rc, outp = utils.get_stdout("passwd -S hacluster")
ps = outp.strip().split()[1]
pass_msg = ""
Expand Down Expand Up @@ -724,7 +725,7 @@ def append_unique(fromfile, tofile, remote=None, from_local=False):
"""
if not utils.check_file_content_included(fromfile, tofile, remote=remote, source_local=from_local):
if from_local and remote:
append_to_remote_file(fromfile, remote, tofile)
append_to_remote_file(fromfile, "hacluster", remote, tofile)
else:
append(fromfile, tofile, remote=remote)

Expand Down Expand Up @@ -760,8 +761,7 @@ def init_ssh():
Configure passwordless SSH.
"""
utils.start_service("sshd.service", enable=True)
for user in USER_LIST:
configure_ssh_key(user)
configure_ssh_key("hacluster")

# If not use -N/--nodes option
if not _context.node_list:
Expand All @@ -771,7 +771,7 @@ def init_ssh():
node_list = _context.node_list
# Swap public ssh key between remote node and local
for node in node_list:
swap_public_ssh_key(node, add=True)
swap_public_ssh_key(node, user="hacluster", add=True)
if utils.service_is_active("pacemaker.service", node):
utils.fatal("Cluster is currently active on {} - can't run".format(node))
# Swap public ssh key between one remote node and other remote nodes
Expand Down Expand Up @@ -831,18 +831,18 @@ def configure_ssh_key(user="root", remote=None):

cmd = ""
private_key, public_key, authorized_file = key_files(user).values()
if not utils.detect_file(private_key, remote=remote):
if not utils.detect_file(private_key, user=user, remote=remote):
logger.info("SSH key for {} does not exist, hence generate it now".format(user))
cmd = "ssh-keygen -q -f {} -C 'Cluster Internal on {}' -N ''".format(private_key, remote if remote else utils.this_node())
elif not utils.detect_file(public_key, remote=remote):
elif not utils.detect_file(public_key, user=user, remote=remote):
cmd = "ssh-keygen -y -f {} > {}".format(private_key, public_key)
if cmd:
cmd = utils.add_su(cmd, user)
utils.get_stdout_or_raise_error(cmd, remote=remote)
#cmd = utils.add_su(cmd, user) # no more
utils.get_stdout_or_raise_error(cmd, user=user, remote=remote)

if not utils.detect_file(authorized_file, remote=remote):
if not utils.detect_file(authorized_file, user=user, remote=remote):
cmd = "touch {}".format(authorized_file)
utils.get_stdout_or_raise_error(cmd, remote=remote)
utils.get_stdout_or_raise_error(cmd, user=user, remote=remote)

append_unique(public_key, authorized_file, remote=remote)

Expand All @@ -851,13 +851,13 @@ def init_ssh_remote():
"""
Called by ha-cluster-join
"""
authorized_keys_file = "/root/.ssh/authorized_keys"
authorized_keys_file = "/var/lib/heartbeat/cores/hacluster/.ssh/authorized_keys"
if not os.path.exists(authorized_keys_file):
open(authorized_keys_file, 'w').close()
authkeys = open(authorized_keys_file, "r+")
authkeys_data = authkeys.read()
for key in ("id_rsa", "id_dsa", "id_ecdsa", "id_ed25519"):
fn = os.path.join("/root/.ssh", key)
fn = os.path.join("/var/lib/heartbeat/cores/hacluster/.ssh", key)
if not os.path.exists(fn):
continue
keydata = open(fn + ".pub").read()
Expand All @@ -881,11 +881,11 @@ def copy_ssh_key(source_key, user, remote_node):
utils.fatal("{}\n{}".format(str(err), err_details_string))


def append_to_remote_file(fromfile, remote_node, tofile):
def append_to_remote_file(fromfile, user, remote_node, tofile):
"""
Append content of fromfile to tofile on remote_node
"""
cmd = "cat {} | ssh {} root@{} 'cat >> {}'".format(fromfile, SSH_OPTION, remote_node, tofile)
cmd = "cat {} | ssh {} {}@{} 'cat >> {}'".format(fromfile, SSH_OPTION, user, remote_node, tofile)
utils.get_stdout_or_raise_error(cmd)


Expand Down Expand Up @@ -923,11 +923,11 @@ def csync2_update(path):
If there was a conflict, use '-f' to force this side to win
'''
invoke("csync2 -rm {}".format(path))
if invokerc("csync2 -rxv {}".format(path)):
invoke("sudo csync2 -rm {}".format(path))
if invokerc("sudo csync2 -rxv {}".format(path)):
return
invoke("csync2 -rf {}".format(path))
if not invokerc("csync2 -rxv {}".format(path)):
invoke("sudo csync2 -rf {}".format(path))
if not invokerc("sudo csync2 -rxv {}".format(path)):
logger.warning("{} was not synced".format(path))


Expand Down Expand Up @@ -1380,15 +1380,14 @@ def join_ssh(seed_host):
utils.fatal("No existing IP/hostname specified (use -c option)")

utils.start_service("sshd.service", enable=True)
for user in USER_LIST:
configure_ssh_key(user)
swap_public_ssh_key(seed_host, user)
configure_ssh_key("hacluster")
swap_public_ssh_key(seed_host, "hacluster")

# This makes sure the seed host has its own SSH keys in its own
# authorized_keys file (again, to help with the case where the
# user has done manual initial setup without the assistance of
# ha-cluster-init).
rc, _, err = invoke("ssh {} root@{} crm cluster init -i {} ssh_remote".format(SSH_OPTION, seed_host, _context.default_nic_list[0]))
rc, _, err = invoke("ssh {} hacluster@{} crm cluster init -i {} ssh_remote".format(SSH_OPTION, seed_host, _context.default_nic_list[0]))
if not rc:
utils.fatal("Can't invoke crm cluster init -i {} ssh_remote on {}: {}".format(_context.default_nic_list[0], seed_host, err))

Expand All @@ -1409,10 +1408,10 @@ def swap_public_ssh_key(remote_node, user="root", add=False):
if user == "root":
copy_ssh_key(public_key, user, remote_node)
else:
append_to_remote_file(public_key, remote_node, authorized_file)
append_to_remote_file(public_key, user, remote_node, authorized_file)

if add:
configure_ssh_key(remote=remote_node)
configure_ssh_key(user, remote_node)

try:
# Fetch public key file from remote_node
Expand All @@ -1438,11 +1437,11 @@ def fetch_public_key_from_remote_node(node, user="root"):
home_dir = userdir.gethomedir(user)
for key in ("id_rsa", "id_ecdsa", "id_ed25519", "id_dsa"):
public_key_file = "{}/.ssh/{}.pub".format(home_dir, key)
cmd = "ssh {} root@{} 'test -f {}'".format(SSH_OPTION, node, public_key_file)
cmd = "ssh {} {}@{} 'test -f {}'".format(SSH_OPTION, user, node, public_key_file)
if not invokerc(cmd):
continue
_, temp_public_key_file = tmpfiles.create()
cmd = "scp {} root@{}:{} {}".format(SSH_OPTION, node, public_key_file, temp_public_key_file)
cmd = "scp {} {}@{}:{} {}".format(SSH_OPTION, user, node, public_key_file, temp_public_key_file)
rc, _, err = invoke(cmd)
if not rc:
utils.fatal("Failed to run \"{}\": {}".format(cmd, err))
Expand All @@ -1469,7 +1468,7 @@ def join_csync2(seed_host):
# If we *were* updating /etc/hosts, the next line would have "\"$hosts_line\"" as
# the last arg (but this requires re-enabling this functionality in ha-cluster-init)
cmd = "crm cluster init -i {} csync2_remote {}".format(_context.default_nic_list[0], utils.this_node())
rc, _, err = invoke("ssh {} root@{} {}".format(SSH_OPTION, seed_host, cmd))
rc, _, err = invoke("ssh {} hacluster@{} {}".format(SSH_OPTION, seed_host, cmd))
if not rc:
utils.fatal("Can't invoke \"{}\" on {}: {}".format(cmd, seed_host, err))

Expand All @@ -1479,7 +1478,7 @@ def join_csync2(seed_host):
# invoke scp root@seed_host:/etc/hosts $tmp_conf \
# || error "Can't retrieve /etc/hosts from seed_host"
# install_tmp $tmp_conf /etc/hosts
rc, _, err = invoke("scp root@%s:'/etc/csync2/{csync2.cfg,key_hagroup}' /etc/csync2" % (seed_host))
rc, _, err = invoke("scp hacluster@%s:'/etc/csync2/{csync2.cfg,key_hagroup}' /etc/csync2" % (seed_host))
if not rc:
utils.fatal("Can't retrieve csync2 config from {}: {}".format(seed_host, err))

Expand All @@ -1494,7 +1493,7 @@ def join_csync2(seed_host):
# they haven't gone to all nodes in the cluster, which means a
# subseqent join of another node can fail its sync of corosync.conf
# when it updates expected_votes. Grrr...
if not invokerc('ssh {} root@{} "csync2 -rm /; csync2 -rxv || csync2 -rf / && csync2 -rxv"'.format(SSH_OPTION, seed_host)):
if not invokerc('ssh {} hacluster@{} "csync2 -rm /; csync2 -rxv || csync2 -rf / && csync2 -rxv"'.format(SSH_OPTION, seed_host)):
print("")
logger.warning("csync2 run failed - some files may not be sync'd")

Expand All @@ -1515,7 +1514,7 @@ def join_ssh_merge(_cluster_node):
utils.get_stdout_or_raise_error("ssh {} {} true".format(SSH_OPTION, utils.this_node()))

known_hosts_new = set()
cat_cmd = "[ -e /root/.ssh/known_hosts ] && cat /root/.ssh/known_hosts || true"
cat_cmd = "[ -e /var/lib/heartbeat/cores/hacluster/.ssh/known_hosts ] && cat /var/lib/heartbeat/cores/hacluster/.ssh/known_hosts || true"
logger_utils.log_only_to_file("parallax.call {} : {}".format(hosts, cat_cmd))
results = parallax.parallax_call(hosts, cat_cmd, strict=False)
for host, result in results:
Expand All @@ -1528,7 +1527,7 @@ def join_ssh_merge(_cluster_node):
hoststxt = "\n".join(sorted(known_hosts_new))
tmpf = utils.str2tmp(hoststxt)
logger_utils.log_only_to_file("parallax.copy {} : {}".format(hosts, hoststxt))
results = parallax.parallax_copy(hosts, tmpf, "/root/.ssh/known_hosts", strict=False)
results = parallax.parallax_copy(hosts, tmpf, "/var/lib/heartbeat/cores/hacluster/.ssh/known_hosts", strict=False)
for host, result in results:
if isinstance(result, parallax.Error):
logger.warning("scp to {} failed ({}), known_hosts update may be incomplete".format(host, str(result)))
Expand Down Expand Up @@ -1620,7 +1619,7 @@ def setup_passwordless_with_other_nodes(init_node):
Should fetch the node list from init node, then swap the key
"""
# Fetch cluster nodes list
cmd = "ssh {} root@{} crm_node -l".format(SSH_OPTION, init_node)
cmd = "ssh {} hacluster@{} crm_node -l".format(SSH_OPTION, init_node)
rc, out, err = utils.get_stdout_stderr(cmd)
if rc != 0:
utils.fatal("Can't fetch cluster nodes list from {}: {}".format(init_node, err))
Expand All @@ -1643,7 +1642,7 @@ def setup_passwordless_with_other_nodes(init_node):
cluster_nodes_list.append(tokens[1])

# Filter out init node from cluster_nodes_list
cmd = "ssh {} root@{} hostname".format(SSH_OPTION, init_node)
cmd = "ssh {} hacluster@{} hostname".format(SSH_OPTION, init_node)
rc, out, err = utils.get_stdout_stderr(cmd)
if rc != 0:
utils.fatal("Can't fetch hostname of {}: {}".format(init_node, err))
Expand Down Expand Up @@ -1705,7 +1704,7 @@ def update_nodeid(nodeid, node=None):
# that yet, so the following crawling horror takes a punt on the seed
# node being up, then asks it for a list of mountpoints...
if _context.cluster_node:
_rc, outp, _ = utils.get_stdout_stderr("ssh {} root@{} 'cibadmin -Q --xpath \"//primitive\"'".format(SSH_OPTION, seed_host))
_rc, outp, _ = utils.get_stdout_stderr("ssh {} hacluster@{} 'cibadmin -Q --xpath \"//primitive\"'".format(SSH_OPTION, seed_host))
if outp:
xml = etree.fromstring(outp)
mountpoints = xml.xpath(' and '.join(['//primitive[@class="ocf"',
Expand Down Expand Up @@ -1747,7 +1746,7 @@ def update_nodeid(nodeid, node=None):
except corosync.IPAlreadyConfiguredError as e:
logger.warning(e)
csync2_update(corosync.conf())
invoke("ssh {} root@{} corosync-cfgtool -R".format(SSH_OPTION, seed_host))
invoke("ssh {} hacluster@{} corosync-cfgtool -R".format(SSH_OPTION, seed_host))

_context.sbd_manager.join_sbd(seed_host)

Expand Down Expand Up @@ -1892,7 +1891,7 @@ def remove_node_from_cluster():
qdevice.QDevice.remove_qdevice_db([node])

# delete configuration files from the node to be removed
rc, _, err = invoke('ssh {} root@{} "bash -c \\\"rm -f {}\\\""'.format(SSH_OPTION, node, " ".join(_context.rm_list)))
rc, _, err = invoke('ssh {} hacluster@{} "bash -c \\\"rm -f {}\\\""'.format(SSH_OPTION, node, " ".join(_context.rm_list)))
if not rc:
utils.fatal("Deleting the configuration files failed: {}".format(err))

Expand Down Expand Up @@ -2038,7 +2037,7 @@ def bootstrap_add(context):
logger.info("Adding node {} to cluster".format(node))
cmd = "crm cluster join{} -c {}{}".format(" -y" if _context.yes_to_all else "", utils.this_node(), options)
logger.info("Running command on {}: {}".format(node, cmd))
utils.ext_cmd_nosudo("ssh{} root@{} {} '{}'".format("" if _context.yes_to_all else " -t", node, SSH_OPTION, cmd))
utils.ext_cmd_nosudo("ssh{} hacluster@{} {} '{}'".format("" if _context.yes_to_all else " -t", node, SSH_OPTION, cmd))


def bootstrap_join(context):
Expand Down Expand Up @@ -2312,9 +2311,9 @@ def bootstrap_init_geo(context):

def geo_fetch_config(node):
# TODO: clean this up
logger.info("Retrieving configuration - This may prompt for root@%s:" % (node))
logger.info("Retrieving configuration - This may prompt for hacluster@%s:" % (node))
tmpdir = tmpfiles.create_dir()
rc, _, err = invoke("scp -oStrictHostKeyChecking=no root@{}:'{}/*' {}/".format(node, BOOTH_DIR, tmpdir))
rc, _, err = invoke("scp -oStrictHostKeyChecking=no hacluster@{}:'{}/*' {}/".format(node, BOOTH_DIR, tmpdir))
if not rc:
utils.fatal("Failed to retrieve configuration: {}".format(err))
try:
Expand Down
2 changes: 1 addition & 1 deletion crmsh/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _run(self, cmd):
"""
Run command on remote node
"""
cmd = "ssh {} root@{} \"{}\"".format(self.SSH_OPTION, self.remote_node, cmd)
cmd = "ssh {} hacluster@{} \"{}\"".format(self.SSH_OPTION, self.remote_node, cmd)
rc, out, err = utils.get_stdout_stderr(cmd)
if rc == self.SSH_EXIT_ERR:
raise SSHError(err)
Expand Down
10 changes: 5 additions & 5 deletions crmsh/sbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def _initialize_sbd(self):
for dev in self._sbd_devices:
if self.no_overwrite and SBDManager.has_sbd_device_already_initialized(dev):
continue
rc, _, err = bootstrap.invoke("sbd {} -d {} create".format(opt, dev))
rc, _, err = bootstrap.invoke("sudo sbd {} -d {} create".format(opt, dev))
if not rc:
utils.fatal("Failed to initialize SBD device {}: {}".format(dev, err))

Expand Down Expand Up @@ -471,7 +471,7 @@ def _enable_sbd_service(self):
self._restart_cluster_and_configure_sbd_ra()
else:
# in init process
bootstrap.invoke("systemctl enable sbd.service")
bootstrap.invoke("sudo systemctl enable sbd.service")

def _warn_diskless_sbd(self, peer=None):
"""
Expand Down Expand Up @@ -502,7 +502,7 @@ def sbd_init(self):
self._watchdog_inst.init_watchdog()
self._get_sbd_device()
if not self._sbd_devices and not self.diskless_sbd:
bootstrap.invoke("systemctl disable sbd.service")
bootstrap.invoke("sudo systemctl disable sbd.service")
return
self._warn_diskless_sbd()
self._initialize_sbd()
Expand Down Expand Up @@ -542,7 +542,7 @@ def join_sbd(self, peer_host):
if not utils.package_is_installed("sbd"):
return
if not os.path.exists(SYSCONFIG_SBD) or not utils.service_is_enabled("sbd.service", peer_host):
bootstrap.invoke("systemctl disable sbd.service")
bootstrap.invoke("sudo systemctl disable sbd.service")
return
self._watchdog_inst = Watchdog(peer_host=peer_host)
self._watchdog_inst.join_watchdog()
Expand All @@ -552,7 +552,7 @@ def join_sbd(self, peer_host):
else:
self._warn_diskless_sbd(peer_host)
logger.info("Got {}SBD configuration".format("" if dev_list else "diskless "))
bootstrap.invoke("systemctl enable sbd.service")
bootstrap.invoke("sudo systemctl enable sbd.service")

@classmethod
def verify_sbd_device(cls):
Expand Down
16 changes: 11 additions & 5 deletions crmsh/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2116,7 +2116,7 @@ def check_ssh_passwd_need(host, user="root"):
"""
ssh_options = "-o StrictHostKeyChecking=no -o EscapeChar=none -o ConnectTimeout=15"
ssh_cmd = "ssh {} -T -o Batchmode=yes {} true".format(ssh_options, host)
ssh_cmd = add_su(ssh_cmd, user)
# ssh_cmd = add_su(ssh_cmd, user)
rc, _, _ = get_stdout_stderr(ssh_cmd)
return rc != 0

Expand Down Expand Up @@ -2533,6 +2533,12 @@ def _do_action(self, action_type):
raise ValueError("status_type should be {}".format('/'.join(list(self.ACTION_MAP.values()))))

cmd = "systemctl {} {}".format(action_type, self.service_name)
if action_type not in ["is-active", "is-enabled", "list-unit-files"]:
cmd = "sudo " + cmd

if len(self.node_list) == 1 and self.node_list[0] == this_node():
self.node_list= [] # the else: case below

if self.node_list:
cluster_run_cmd(cmd, self.node_list)
return True, None
Expand Down Expand Up @@ -2675,12 +2681,12 @@ def calculate_quorate_status(expected_votes, actual_votes):
return int(actual_votes)/int(expected_votes) > 0.5


def get_stdout_or_raise_error(cmd, remote=None, success_val_list=[0], no_raise=False):
def get_stdout_or_raise_error(cmd, user="hacluster", remote=None, success_val_list=[0], no_raise=False):
"""
Common function to get stdout from cmd or raise exception
"""
if remote:
cmd = "ssh {} root@{} \"{}\"".format(SSH_OPTION, remote, cmd)
cmd = "ssh {} {}@{} \"{}\"".format(SSH_OPTION, user, remote, cmd)
rc, out, err = get_stdout_stderr(cmd, no_reg=True)
if rc not in success_val_list and not no_raise:
raise ValueError("Failed to run \"{}\": {}".format(cmd, err))
Expand Down Expand Up @@ -3112,15 +3118,15 @@ def has_dup_value(_list):
return _list and len(_list) != len(set(_list))


def detect_file(_file, remote=None):
def detect_file(_file, user="hacluster", remote=None):
"""
Detect if file exists, support both local and remote
"""
rc = False
if not remote:
rc = os.path.exists(_file)
else:
cmd = "ssh {} root@{} 'test -f {}'".format(SSH_OPTION, remote, _file)
cmd = "ssh {} {}@{} 'test -f {}'".format(SSH_OPTION, user, remote, _file)
code, _, _ = get_stdout_stderr(cmd)
rc = code == 0
return rc
Expand Down
2 changes: 1 addition & 1 deletion crmsh/watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Watchdog(object):
"""
Class to find valid watchdog device name
"""
QUERY_CMD = "sbd query-watchdog"
QUERY_CMD = "sudo sbd query-watchdog"
DEVICE_FIND_REGREX = "\[[0-9]+\] (/dev/.*)\n.*\nDriver: (.*)"

def __init__(self, _input=None, peer_host=None):
Expand Down

0 comments on commit d35e38c

Please sign in to comment.