From d35e38cc3bdef77222db0a5bd1691e87cbd50ec6 Mon Sep 17 00:00:00 2001 From: Aleksei Burlakov Date: Mon, 15 Aug 2022 16:50:19 +0200 Subject: [PATCH] DRAFT: Feature: Non-privileged hacluster It enables creating the cluster under the hacluster It requires some preparaions. To use it, do: $ sudo mv /sbin/crm* /sbin/cibadmin /sbin/stonith \ /sbin/corosync-keygen /sbin/corosync-cfgtool /usr/bin/ $ sudo cp /sbin/csync2 /sbin/sbd /usr/bin/ # sbd csync2 must exist in /sbin/ $ sudo chmod 770 /etc/pacemaker/ $ sudo chown hacluster:haclient -R /etc/csync2 /etc/corosync \ /usr/share/doc/packages/corosync \ /etc/sysconfig /var/lib/csync2 \ /etc/crm /etc/lvm /etc/samba $ sudo chmod 777 -R /etc/csync2 /etc/corosync \ /usr/share/doc/packages/corosync \ /etc/sysconfig /var/lib/csync2 \ /etc/crm /etc/lvm /etc/samba Permissions for /run to use the lock Pay attention: 1) not recursive 2) the permissions are reset to default after reboot $ sudo chown hacluster:haclient /run $ sudo chmod 777 /run Besides set up the csync2 running as hacluster Add 'User=hacluster' into the '[Service]' section $ sudo echo 'User=hacluster' >> /usr/lib/systemd/system/csync2@.service $ sudo systemctl daemon-reload $ su - hacluster And now you can do the usual routines under hacluster $ crm cluster init ... $ crm cluster join ... --- crmsh/bootstrap.py | 81 +++++++++++++++++++++++----------------------- crmsh/lock.py | 2 +- crmsh/sbd.py | 10 +++--- crmsh/utils.py | 16 ++++++--- crmsh/watchdog.py | 2 +- 5 files changed, 58 insertions(+), 53 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index d0d62d27c7..ef97f93689 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -582,7 +582,7 @@ def init_firewall_suse(tcp, udp): def init_firewall_firewalld(tcp, udp): has_firewalld = utils.service_is_active("firewalld") - cmdbase = 'firewall-cmd --zone=public --permanent ' if has_firewalld else 'firewall-offline-cmd --zone=public ' + cmdbase = 'sudo firewall-cmd --zone=public --permanent ' if has_firewalld else 'sudo firewall-offline-cmd --zone=public ' def cmd(args): if not invokerc(cmdbase + args): @@ -595,7 +595,7 @@ def cmd(args): cmd("--add-port={}/udp".format(p)) if has_firewalld: - if not invokerc("firewall-cmd --reload"): + if not invokerc("sudo firewall-cmd --reload"): utils.fatal("Failed to reload firewall configuration.") def init_firewall_ufw(tcp, udp): @@ -652,6 +652,7 @@ def init_cluster_local(): firewall_open_corosync_ports() # reset password, but only if it's not already set + #FIXME! What to do with it? Do we need hacluster if there is already another user. _rc, outp = utils.get_stdout("passwd -S hacluster") ps = outp.strip().split()[1] pass_msg = "" @@ -724,7 +725,7 @@ def append_unique(fromfile, tofile, remote=None, from_local=False): """ if not utils.check_file_content_included(fromfile, tofile, remote=remote, source_local=from_local): if from_local and remote: - append_to_remote_file(fromfile, remote, tofile) + append_to_remote_file(fromfile, "hacluster", remote, tofile) else: append(fromfile, tofile, remote=remote) @@ -760,8 +761,7 @@ def init_ssh(): Configure passwordless SSH. """ utils.start_service("sshd.service", enable=True) - for user in USER_LIST: - configure_ssh_key(user) + configure_ssh_key("hacluster") # If not use -N/--nodes option if not _context.node_list: @@ -771,7 +771,7 @@ def init_ssh(): node_list = _context.node_list # Swap public ssh key between remote node and local for node in node_list: - swap_public_ssh_key(node, add=True) + swap_public_ssh_key(node, user="hacluster", add=True) if utils.service_is_active("pacemaker.service", node): utils.fatal("Cluster is currently active on {} - can't run".format(node)) # Swap public ssh key between one remote node and other remote nodes @@ -831,18 +831,18 @@ def configure_ssh_key(user="root", remote=None): cmd = "" private_key, public_key, authorized_file = key_files(user).values() - if not utils.detect_file(private_key, remote=remote): + if not utils.detect_file(private_key, user=user, remote=remote): logger.info("SSH key for {} does not exist, hence generate it now".format(user)) cmd = "ssh-keygen -q -f {} -C 'Cluster Internal on {}' -N ''".format(private_key, remote if remote else utils.this_node()) - elif not utils.detect_file(public_key, remote=remote): + elif not utils.detect_file(public_key, user=user, remote=remote): cmd = "ssh-keygen -y -f {} > {}".format(private_key, public_key) if cmd: - cmd = utils.add_su(cmd, user) - utils.get_stdout_or_raise_error(cmd, remote=remote) + #cmd = utils.add_su(cmd, user) # no more + utils.get_stdout_or_raise_error(cmd, user=user, remote=remote) - if not utils.detect_file(authorized_file, remote=remote): + if not utils.detect_file(authorized_file, user=user, remote=remote): cmd = "touch {}".format(authorized_file) - utils.get_stdout_or_raise_error(cmd, remote=remote) + utils.get_stdout_or_raise_error(cmd, user=user, remote=remote) append_unique(public_key, authorized_file, remote=remote) @@ -851,13 +851,13 @@ def init_ssh_remote(): """ Called by ha-cluster-join """ - authorized_keys_file = "/root/.ssh/authorized_keys" + authorized_keys_file = "/var/lib/heartbeat/cores/hacluster/.ssh/authorized_keys" if not os.path.exists(authorized_keys_file): open(authorized_keys_file, 'w').close() authkeys = open(authorized_keys_file, "r+") authkeys_data = authkeys.read() for key in ("id_rsa", "id_dsa", "id_ecdsa", "id_ed25519"): - fn = os.path.join("/root/.ssh", key) + fn = os.path.join("/var/lib/heartbeat/cores/hacluster/.ssh", key) if not os.path.exists(fn): continue keydata = open(fn + ".pub").read() @@ -881,11 +881,11 @@ def copy_ssh_key(source_key, user, remote_node): utils.fatal("{}\n{}".format(str(err), err_details_string)) -def append_to_remote_file(fromfile, remote_node, tofile): +def append_to_remote_file(fromfile, user, remote_node, tofile): """ Append content of fromfile to tofile on remote_node """ - cmd = "cat {} | ssh {} root@{} 'cat >> {}'".format(fromfile, SSH_OPTION, remote_node, tofile) + cmd = "cat {} | ssh {} {}@{} 'cat >> {}'".format(fromfile, SSH_OPTION, user, remote_node, tofile) utils.get_stdout_or_raise_error(cmd) @@ -923,11 +923,11 @@ def csync2_update(path): If there was a conflict, use '-f' to force this side to win ''' - invoke("csync2 -rm {}".format(path)) - if invokerc("csync2 -rxv {}".format(path)): + invoke("sudo csync2 -rm {}".format(path)) + if invokerc("sudo csync2 -rxv {}".format(path)): return - invoke("csync2 -rf {}".format(path)) - if not invokerc("csync2 -rxv {}".format(path)): + invoke("sudo csync2 -rf {}".format(path)) + if not invokerc("sudo csync2 -rxv {}".format(path)): logger.warning("{} was not synced".format(path)) @@ -1380,15 +1380,14 @@ def join_ssh(seed_host): utils.fatal("No existing IP/hostname specified (use -c option)") utils.start_service("sshd.service", enable=True) - for user in USER_LIST: - configure_ssh_key(user) - swap_public_ssh_key(seed_host, user) + configure_ssh_key("hacluster") + swap_public_ssh_key(seed_host, "hacluster") # This makes sure the seed host has its own SSH keys in its own # authorized_keys file (again, to help with the case where the # user has done manual initial setup without the assistance of # ha-cluster-init). - rc, _, err = invoke("ssh {} root@{} crm cluster init -i {} ssh_remote".format(SSH_OPTION, seed_host, _context.default_nic_list[0])) + rc, _, err = invoke("ssh {} hacluster@{} crm cluster init -i {} ssh_remote".format(SSH_OPTION, seed_host, _context.default_nic_list[0])) if not rc: utils.fatal("Can't invoke crm cluster init -i {} ssh_remote on {}: {}".format(_context.default_nic_list[0], seed_host, err)) @@ -1409,10 +1408,10 @@ def swap_public_ssh_key(remote_node, user="root", add=False): if user == "root": copy_ssh_key(public_key, user, remote_node) else: - append_to_remote_file(public_key, remote_node, authorized_file) + append_to_remote_file(public_key, user, remote_node, authorized_file) if add: - configure_ssh_key(remote=remote_node) + configure_ssh_key(user, remote_node) try: # Fetch public key file from remote_node @@ -1438,11 +1437,11 @@ def fetch_public_key_from_remote_node(node, user="root"): home_dir = userdir.gethomedir(user) for key in ("id_rsa", "id_ecdsa", "id_ed25519", "id_dsa"): public_key_file = "{}/.ssh/{}.pub".format(home_dir, key) - cmd = "ssh {} root@{} 'test -f {}'".format(SSH_OPTION, node, public_key_file) + cmd = "ssh {} {}@{} 'test -f {}'".format(SSH_OPTION, user, node, public_key_file) if not invokerc(cmd): continue _, temp_public_key_file = tmpfiles.create() - cmd = "scp {} root@{}:{} {}".format(SSH_OPTION, node, public_key_file, temp_public_key_file) + cmd = "scp {} {}@{}:{} {}".format(SSH_OPTION, user, node, public_key_file, temp_public_key_file) rc, _, err = invoke(cmd) if not rc: utils.fatal("Failed to run \"{}\": {}".format(cmd, err)) @@ -1469,7 +1468,7 @@ def join_csync2(seed_host): # If we *were* updating /etc/hosts, the next line would have "\"$hosts_line\"" as # the last arg (but this requires re-enabling this functionality in ha-cluster-init) cmd = "crm cluster init -i {} csync2_remote {}".format(_context.default_nic_list[0], utils.this_node()) - rc, _, err = invoke("ssh {} root@{} {}".format(SSH_OPTION, seed_host, cmd)) + rc, _, err = invoke("ssh {} hacluster@{} {}".format(SSH_OPTION, seed_host, cmd)) if not rc: utils.fatal("Can't invoke \"{}\" on {}: {}".format(cmd, seed_host, err)) @@ -1479,7 +1478,7 @@ def join_csync2(seed_host): # invoke scp root@seed_host:/etc/hosts $tmp_conf \ # || error "Can't retrieve /etc/hosts from seed_host" # install_tmp $tmp_conf /etc/hosts - rc, _, err = invoke("scp root@%s:'/etc/csync2/{csync2.cfg,key_hagroup}' /etc/csync2" % (seed_host)) + rc, _, err = invoke("scp hacluster@%s:'/etc/csync2/{csync2.cfg,key_hagroup}' /etc/csync2" % (seed_host)) if not rc: utils.fatal("Can't retrieve csync2 config from {}: {}".format(seed_host, err)) @@ -1494,7 +1493,7 @@ def join_csync2(seed_host): # they haven't gone to all nodes in the cluster, which means a # subseqent join of another node can fail its sync of corosync.conf # when it updates expected_votes. Grrr... - if not invokerc('ssh {} root@{} "csync2 -rm /; csync2 -rxv || csync2 -rf / && csync2 -rxv"'.format(SSH_OPTION, seed_host)): + if not invokerc('ssh {} hacluster@{} "csync2 -rm /; csync2 -rxv || csync2 -rf / && csync2 -rxv"'.format(SSH_OPTION, seed_host)): print("") logger.warning("csync2 run failed - some files may not be sync'd") @@ -1515,7 +1514,7 @@ def join_ssh_merge(_cluster_node): utils.get_stdout_or_raise_error("ssh {} {} true".format(SSH_OPTION, utils.this_node())) known_hosts_new = set() - cat_cmd = "[ -e /root/.ssh/known_hosts ] && cat /root/.ssh/known_hosts || true" + cat_cmd = "[ -e /var/lib/heartbeat/cores/hacluster/.ssh/known_hosts ] && cat /var/lib/heartbeat/cores/hacluster/.ssh/known_hosts || true" logger_utils.log_only_to_file("parallax.call {} : {}".format(hosts, cat_cmd)) results = parallax.parallax_call(hosts, cat_cmd, strict=False) for host, result in results: @@ -1528,7 +1527,7 @@ def join_ssh_merge(_cluster_node): hoststxt = "\n".join(sorted(known_hosts_new)) tmpf = utils.str2tmp(hoststxt) logger_utils.log_only_to_file("parallax.copy {} : {}".format(hosts, hoststxt)) - results = parallax.parallax_copy(hosts, tmpf, "/root/.ssh/known_hosts", strict=False) + results = parallax.parallax_copy(hosts, tmpf, "/var/lib/heartbeat/cores/hacluster/.ssh/known_hosts", strict=False) for host, result in results: if isinstance(result, parallax.Error): logger.warning("scp to {} failed ({}), known_hosts update may be incomplete".format(host, str(result))) @@ -1620,7 +1619,7 @@ def setup_passwordless_with_other_nodes(init_node): Should fetch the node list from init node, then swap the key """ # Fetch cluster nodes list - cmd = "ssh {} root@{} crm_node -l".format(SSH_OPTION, init_node) + cmd = "ssh {} hacluster@{} crm_node -l".format(SSH_OPTION, init_node) rc, out, err = utils.get_stdout_stderr(cmd) if rc != 0: utils.fatal("Can't fetch cluster nodes list from {}: {}".format(init_node, err)) @@ -1643,7 +1642,7 @@ def setup_passwordless_with_other_nodes(init_node): cluster_nodes_list.append(tokens[1]) # Filter out init node from cluster_nodes_list - cmd = "ssh {} root@{} hostname".format(SSH_OPTION, init_node) + cmd = "ssh {} hacluster@{} hostname".format(SSH_OPTION, init_node) rc, out, err = utils.get_stdout_stderr(cmd) if rc != 0: utils.fatal("Can't fetch hostname of {}: {}".format(init_node, err)) @@ -1705,7 +1704,7 @@ def update_nodeid(nodeid, node=None): # that yet, so the following crawling horror takes a punt on the seed # node being up, then asks it for a list of mountpoints... if _context.cluster_node: - _rc, outp, _ = utils.get_stdout_stderr("ssh {} root@{} 'cibadmin -Q --xpath \"//primitive\"'".format(SSH_OPTION, seed_host)) + _rc, outp, _ = utils.get_stdout_stderr("ssh {} hacluster@{} 'cibadmin -Q --xpath \"//primitive\"'".format(SSH_OPTION, seed_host)) if outp: xml = etree.fromstring(outp) mountpoints = xml.xpath(' and '.join(['//primitive[@class="ocf"', @@ -1747,7 +1746,7 @@ def update_nodeid(nodeid, node=None): except corosync.IPAlreadyConfiguredError as e: logger.warning(e) csync2_update(corosync.conf()) - invoke("ssh {} root@{} corosync-cfgtool -R".format(SSH_OPTION, seed_host)) + invoke("ssh {} hacluster@{} corosync-cfgtool -R".format(SSH_OPTION, seed_host)) _context.sbd_manager.join_sbd(seed_host) @@ -1892,7 +1891,7 @@ def remove_node_from_cluster(): qdevice.QDevice.remove_qdevice_db([node]) # delete configuration files from the node to be removed - rc, _, err = invoke('ssh {} root@{} "bash -c \\\"rm -f {}\\\""'.format(SSH_OPTION, node, " ".join(_context.rm_list))) + rc, _, err = invoke('ssh {} hacluster@{} "bash -c \\\"rm -f {}\\\""'.format(SSH_OPTION, node, " ".join(_context.rm_list))) if not rc: utils.fatal("Deleting the configuration files failed: {}".format(err)) @@ -2038,7 +2037,7 @@ def bootstrap_add(context): logger.info("Adding node {} to cluster".format(node)) cmd = "crm cluster join{} -c {}{}".format(" -y" if _context.yes_to_all else "", utils.this_node(), options) logger.info("Running command on {}: {}".format(node, cmd)) - utils.ext_cmd_nosudo("ssh{} root@{} {} '{}'".format("" if _context.yes_to_all else " -t", node, SSH_OPTION, cmd)) + utils.ext_cmd_nosudo("ssh{} hacluster@{} {} '{}'".format("" if _context.yes_to_all else " -t", node, SSH_OPTION, cmd)) def bootstrap_join(context): @@ -2312,9 +2311,9 @@ def bootstrap_init_geo(context): def geo_fetch_config(node): # TODO: clean this up - logger.info("Retrieving configuration - This may prompt for root@%s:" % (node)) + logger.info("Retrieving configuration - This may prompt for hacluster@%s:" % (node)) tmpdir = tmpfiles.create_dir() - rc, _, err = invoke("scp -oStrictHostKeyChecking=no root@{}:'{}/*' {}/".format(node, BOOTH_DIR, tmpdir)) + rc, _, err = invoke("scp -oStrictHostKeyChecking=no hacluster@{}:'{}/*' {}/".format(node, BOOTH_DIR, tmpdir)) if not rc: utils.fatal("Failed to retrieve configuration: {}".format(err)) try: diff --git a/crmsh/lock.py b/crmsh/lock.py index 5a01f38f0a..4575227ca6 100644 --- a/crmsh/lock.py +++ b/crmsh/lock.py @@ -114,7 +114,7 @@ def _run(self, cmd): """ Run command on remote node """ - cmd = "ssh {} root@{} \"{}\"".format(self.SSH_OPTION, self.remote_node, cmd) + cmd = "ssh {} hacluster@{} \"{}\"".format(self.SSH_OPTION, self.remote_node, cmd) rc, out, err = utils.get_stdout_stderr(cmd) if rc == self.SSH_EXIT_ERR: raise SSHError(err) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index aa62caae8f..1631d18145 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -412,7 +412,7 @@ def _initialize_sbd(self): for dev in self._sbd_devices: if self.no_overwrite and SBDManager.has_sbd_device_already_initialized(dev): continue - rc, _, err = bootstrap.invoke("sbd {} -d {} create".format(opt, dev)) + rc, _, err = bootstrap.invoke("sudo sbd {} -d {} create".format(opt, dev)) if not rc: utils.fatal("Failed to initialize SBD device {}: {}".format(dev, err)) @@ -471,7 +471,7 @@ def _enable_sbd_service(self): self._restart_cluster_and_configure_sbd_ra() else: # in init process - bootstrap.invoke("systemctl enable sbd.service") + bootstrap.invoke("sudo systemctl enable sbd.service") def _warn_diskless_sbd(self, peer=None): """ @@ -502,7 +502,7 @@ def sbd_init(self): self._watchdog_inst.init_watchdog() self._get_sbd_device() if not self._sbd_devices and not self.diskless_sbd: - bootstrap.invoke("systemctl disable sbd.service") + bootstrap.invoke("sudo systemctl disable sbd.service") return self._warn_diskless_sbd() self._initialize_sbd() @@ -542,7 +542,7 @@ def join_sbd(self, peer_host): if not utils.package_is_installed("sbd"): return if not os.path.exists(SYSCONFIG_SBD) or not utils.service_is_enabled("sbd.service", peer_host): - bootstrap.invoke("systemctl disable sbd.service") + bootstrap.invoke("sudo systemctl disable sbd.service") return self._watchdog_inst = Watchdog(peer_host=peer_host) self._watchdog_inst.join_watchdog() @@ -552,7 +552,7 @@ def join_sbd(self, peer_host): else: self._warn_diskless_sbd(peer_host) logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) - bootstrap.invoke("systemctl enable sbd.service") + bootstrap.invoke("sudo systemctl enable sbd.service") @classmethod def verify_sbd_device(cls): diff --git a/crmsh/utils.py b/crmsh/utils.py index d4877710ff..df4685739c 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2116,7 +2116,7 @@ def check_ssh_passwd_need(host, user="root"): """ ssh_options = "-o StrictHostKeyChecking=no -o EscapeChar=none -o ConnectTimeout=15" ssh_cmd = "ssh {} -T -o Batchmode=yes {} true".format(ssh_options, host) - ssh_cmd = add_su(ssh_cmd, user) + # ssh_cmd = add_su(ssh_cmd, user) rc, _, _ = get_stdout_stderr(ssh_cmd) return rc != 0 @@ -2533,6 +2533,12 @@ def _do_action(self, action_type): raise ValueError("status_type should be {}".format('/'.join(list(self.ACTION_MAP.values())))) cmd = "systemctl {} {}".format(action_type, self.service_name) + if action_type not in ["is-active", "is-enabled", "list-unit-files"]: + cmd = "sudo " + cmd + + if len(self.node_list) == 1 and self.node_list[0] == this_node(): + self.node_list= [] # the else: case below + if self.node_list: cluster_run_cmd(cmd, self.node_list) return True, None @@ -2675,12 +2681,12 @@ def calculate_quorate_status(expected_votes, actual_votes): return int(actual_votes)/int(expected_votes) > 0.5 -def get_stdout_or_raise_error(cmd, remote=None, success_val_list=[0], no_raise=False): +def get_stdout_or_raise_error(cmd, user="hacluster", remote=None, success_val_list=[0], no_raise=False): """ Common function to get stdout from cmd or raise exception """ if remote: - cmd = "ssh {} root@{} \"{}\"".format(SSH_OPTION, remote, cmd) + cmd = "ssh {} {}@{} \"{}\"".format(SSH_OPTION, user, remote, cmd) rc, out, err = get_stdout_stderr(cmd, no_reg=True) if rc not in success_val_list and not no_raise: raise ValueError("Failed to run \"{}\": {}".format(cmd, err)) @@ -3112,7 +3118,7 @@ def has_dup_value(_list): return _list and len(_list) != len(set(_list)) -def detect_file(_file, remote=None): +def detect_file(_file, user="hacluster", remote=None): """ Detect if file exists, support both local and remote """ @@ -3120,7 +3126,7 @@ def detect_file(_file, remote=None): if not remote: rc = os.path.exists(_file) else: - cmd = "ssh {} root@{} 'test -f {}'".format(SSH_OPTION, remote, _file) + cmd = "ssh {} {}@{} 'test -f {}'".format(SSH_OPTION, user, remote, _file) code, _, _ = get_stdout_stderr(cmd) rc = code == 0 return rc diff --git a/crmsh/watchdog.py b/crmsh/watchdog.py index 1da0ec6a0b..ad486ee16c 100644 --- a/crmsh/watchdog.py +++ b/crmsh/watchdog.py @@ -8,7 +8,7 @@ class Watchdog(object): """ Class to find valid watchdog device name """ - QUERY_CMD = "sbd query-watchdog" + QUERY_CMD = "sudo sbd query-watchdog" DEVICE_FIND_REGREX = "\[[0-9]+\] (/dev/.*)\n.*\nDriver: (.*)" def __init__(self, _input=None, peer_host=None):