From e5f90bc27594a1e428c820d1adc45987853d7ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Hamb=C3=BCchen?= Date: Tue, 8 May 2018 17:21:13 +0200 Subject: [PATCH] hetzner: Add `partitioningScript`. This allows for custom partitioning that Anaconda Kickstart / blivet cannot do. --- nix/hetzner.nix | 156 +++++++++++++++++++++++++++++++++++- nixops/backends/__init__.py | 4 + nixops/backends/hetzner.py | 86 +++++++++++++++++--- 3 files changed, 233 insertions(+), 13 deletions(-) diff --git a/nix/hetzner.nix b/nix/hetzner.nix index 237282050..e662bba21 100644 --- a/nix/hetzner.nix +++ b/nix/hetzner.nix @@ -90,12 +90,166 @@ with lib; part swap --recommended --label=swap --fstype=swap --ondisk=vda part / --fstype=ext4 --label=root --grow --ondisk=vda ''; - type = types.lines; + type = types.nullOr types.lines; description = '' Specify layout of partitions and file systems using Anacondas Kickstart format. For possible options and commands, please have a look at: + + If the Kickstart is not sufficient for your partitioning needs, + consider the "partitioningScript" option instead. + + The "partitions" and "partitioningScript" options are mutually exclusive. + ''; + }; + + partitioningScript = mkOption { + type = types.nullOr types.lines; + default = null; + example = '' + # Example custom partitioningScript + # that creates an ext4 with external journal, across two RAID1s + # (one on HDDs, one on SSDs). + + set -x + set -euo pipefail + + # If the SSD doesn't support the RZAT (Return Zero After Trim) feature, + # we can't do the `lazy_journal_init=1` journal creation speedup + # below, so fail early in that case. + # Note that as per + # https://github.com/torvalds/linux/blob/e64f638483a21105c7ce330d543fa1f1c35b5bc7/drivers/ata/libata-core.c#L4242-L4250 + # TRIM in general is optional and thus this would be unsafe, + # but the kernel announces RZAT only for a whitelist of devices + # that are known to execute TRIM when requested. + # + # Note that this is probably not needed when the ext4 journal is on top + # of an mdadm RAID (because that one likely guarantees to read zeros from + # freshly initialised RAID arrays), but we keep it in here just in case + # somebody wants to run the journal NOT on top of a RAID. + # + # TODO Fall back to slow `lazy_journal_init=1` if RZAT isn't supported. + if hdparm -I /dev/sda | grep -i 'Deterministic read ZEROs after TRIM'; then echo "RZAT supported, can use lazy_journal_init=1 safely"; else echo "RZAT not supported on /dev/sda, cannot use lazy_journal_init=1 safely, exiting" 1>&2; exit 1; fi + if hdparm -I /dev/sdb | grep -i 'Deterministic read ZEROs after TRIM'; then echo "RZAT supported, can use lazy_journal_init=1 safely"; else echo "RZAT not supported on /dev/sdb, cannot use lazy_journal_init=1 safely, exiting" 1>&2; exit 1; fi + + # Stop RAID devices if running, otherwise we can't modify the disks below. + test -b /dev/md0 && mdadm --stop /dev/md0 + test -b /dev/md1 && mdadm --stop /dev/md1 + + # Zero out SSDs with TRIM command, so that `lazy_journal_init=1` can be safely used below. + blkdiscard /dev/sda + blkdiscard /dev/sdb + + # Create BIOS boot partition and main partition for each SSD and HDD. + # Note Hetzner does use BIOS, not UEFI. + # We use GPT because these disks could be too large for MSDOS partitions (e.g. 10TB disks). + parted --script -a optimal /dev/sda -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%' + parted --script -a optimal /dev/sdb -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%' + parted --script -a optimal /dev/sdc -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%' + parted --script -a optimal /dev/sdd -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%' + + # Now /dev/sd*1 is the BIOS boot partition, /dev/sd*2 is the one data partition + + # Reload partition table so Linux can see the changes + partprobe + + # Wait for all devices to exist + udevadm settle --timeout=5 --exit-if-exists=/dev/sda1 + udevadm settle --timeout=5 --exit-if-exists=/dev/sda2 + udevadm settle --timeout=5 --exit-if-exists=/dev/sdb1 + udevadm settle --timeout=5 --exit-if-exists=/dev/sdb2 + udevadm settle --timeout=5 --exit-if-exists=/dev/sdc1 + udevadm settle --timeout=5 --exit-if-exists=/dev/sdc2 + udevadm settle --timeout=5 --exit-if-exists=/dev/sdd1 + udevadm settle --timeout=5 --exit-if-exists=/dev/sdd2 + + # --run makes mdadm not prompt the user for confirmation + mdadm --create --run --verbose /dev/md0 --level=1 --raid-devices=2 /dev/sda2 /dev/sdb2 + mdadm --create --run --verbose /dev/md1 --level=1 --raid-devices=2 /dev/sdc2 /dev/sdd2 + + # Disable RAID recovery. We don't want this to slow down machine provisioning + # in the Hetzner rescue mode. It can run in normal operation after reboot. + echo 0 > /proc/sys/dev/raid/speed_limit_max + + # `lazy_journal_init=1` to not have to zero the device; + # we use ATA TRIM with RZAT support to guarantee the device + # is already zeroed; see comment further up about the safety of that. + mke2fs -F -L rootjournal -O journal_dev -E lazy_journal_init=1 /dev/md0 + mkfs.ext4 -F -L root -J device=/dev/md0 /dev/md1 + ''; + description = '' + Script to run after booting into the Hetzner rescue mode + to manually create partitions. + + Note as of writing, Hetzner uses BIOS, not UEFI, so if you want + to use GPT partition tables (which you need in case you want to + make partitions larger than 2 TiB) you will likely have to make + a BIOS boot partition + (). + + Where possible, use the simpler "partitions" option instead of this option. + + The "partitions" and "partitioningScript" options are mutually exclusive. + + If you use this option, you must set "partitions = null", + you must set "filesystemInfo" to an accurate representation + of the partitions your script creates, + and you must set "mountScript" to mount the created target + root partition at /mnt. + ''; + }; + + mountScript = mkOption { + type = types.nullOr types.lines; + default = null; + example = '' + # Example mountScript matching the example for partitioningScript, + # that creates an ext4 with external journal, across two RAID1s + # (one on HDDs, one on SSDs). + + set -e + mount -o data=journal /dev/md1 /mnt + ''; + description = '' + Script to run after booting into the Hetzner rescue mode, + and after formatting, to mount the root filesystem at /mnt. + + This option is required when "partitioningScript" is used. + ''; + }; + + filesystemInfo = mkOption { + type = types.nullOr types.attrs; + default = null; + example = { + # Example filesystemInfo matching the example for partitioningScript, + # that creates an ext4 with external journal, across two RAID1s + # (one on HDDs, one on SSDs). + swapDevices = []; + boot.loader.grub.devices = [ + "/dev/sda" + "/dev/sdb" + "/dev/sdc" + "/dev/sdd" + ]; + fileSystems = { + "/" = { + fsType = "ext4"; + label = "root"; + options = [ + "journal_path=/dev/disk/by-label/rootjournal" + "data=journal" + "errors=remount-ro" + ]; + }; + }; + }; + description = '' + Override the filesystem info obtained from the machine after partitioning. + + This option is required when "partitioningScript" is used, but can also + be set if the filesystem info obtained via "partitions" is not what you need. ''; }; }; diff --git a/nixops/backends/__init__.py b/nixops/backends/__init__.py index 3642ebc45..3d4bde59a 100644 --- a/nixops/backends/__init__.py +++ b/nixops/backends/__init__.py @@ -195,6 +195,10 @@ def reboot_sync(self, hard=False): # manner, we compare the output of `last reboot` before and after # the reboot. Once the output has changed, the reboot is done. def get_last_reboot_output(): + # Note `last reboot` does not exist on older OSs like + # the Hetzner rescue system, but that doesn't matter much + # because all we care about is when the output of the + # command invocation changes. return self.run_command('last reboot --time-format iso | head -n1', capture_stdout=True).rstrip() pre_reboot_last_reboot_output = get_last_reboot_output() diff --git a/nixops/backends/hetzner.py b/nixops/backends/hetzner.py index fbfff76a0..2955176b0 100644 --- a/nixops/backends/hetzner.py +++ b/nixops/backends/hetzner.py @@ -13,7 +13,7 @@ from nixops.util import attr_property, create_key_pair, xml_expr_to_python from nixops.ssh_util import SSHCommandFailed from nixops.backends import MachineDefinition, MachineState -from nixops.nix_expr import nix2py +from nixops.nix_expr import nix2py, py2nix # This is set to True by tests/hetzner-backend.nix. If it's in effect, no # attempt is made to connect to the real Robot API and the API calls only @@ -65,7 +65,24 @@ def __init__(self, xml, config): assert type(self.robot_pass) is str self.partitions = config["hetzner"]["partitions"] - assert type(self.partitions) is str + assert self.partitions is None or type(self.partitions) is str + + self.partitioning_script = config["hetzner"]["partitioningScript"] + assert self.partitioning_script is None or type(self.partitioning_script) is str + + self.mount_script = config["hetzner"]["mountScript"] + assert self.mount_script is None or type(self.mount_script) is str + + fs_info_py = config["hetzner"]["filesystemInfo"] + assert fs_info_py is None or type(fs_info_py) is dict + # If it's None, we want to keep it None, not turn it into null, + # because the code further down checks for None in order to determine + # if it was set at all. + self.fs_info = py2nix(fs_info_py) if fs_info_py is not None else None + + assert (self.partitions is None) != (self.partitioning_script is None) + assert (self.partitioning_script is None) or (self.fs_info is not None) + assert (self.mount_script is None) == (self.mount_script is None) class HetznerState(MachineState): @@ -82,10 +99,12 @@ def get_type(cls): robot_admin_user = attr_property("hetzner.robotUser", None) robot_admin_pass = attr_property("hetzner.robotPass", None) partitions = attr_property("hetzner.partitions", None) + partitioning_script = attr_property("hetzner.partitioningScript", None) + mount_script = attr_property("hetzner.mountScript", None) + fs_info = attr_property("hetzner.fsInfo", None) just_installed = attr_property("hetzner.justInstalled", False, bool) rescue_passwd = attr_property("hetzner.rescuePasswd", None) - fs_info = attr_property("hetzner.fsInfo", None) net_info = attr_property("hetzner.networkInfo", None, 'json') hw_info = attr_property("hetzner.hardwareInfo", None) @@ -228,13 +247,27 @@ def _bootstrap_rescue_for_existing_system(self): self.run_command("cat >> /etc/motd", stdin_string=fullmsg) self.log_end("done.") - def _bootstrap_rescue(self, install, partitions): + def _bootstrap_rescue(self, + install, + partitions, + partitioning_script=None, + mount_script=None, + fs_info=None): """ Bootstrap everything needed in order to get Nix and the partitioner usable in the rescue system. The keyword arguments are only for partitioning, see reboot_rescue() for description, if not given we will only mount based on information provided in self.partitions. + + Exactly one of `partitions` and `partitioning_script` must be given as + non-None value. + If `partitioning_script` is given, `fs_info` must not be None. + `mount_script` must be given exactly when `partitioning_script` is given. """ + assert (partitions is None) != (partitioning_script is None) + assert (partitioning_script is None) or (fs_info is not None) + assert (partitioning_script is None) == (mount_script is None) + self.log_start("building Nix bootstrap installer... ") expr = os.path.join(self.depl.expr_path, "hetzner-bootstrap.nix") bootstrap_out = subprocess.check_output(["nix-build", expr, @@ -288,8 +321,16 @@ def _bootstrap_rescue(self, install, partitions): if install: self.log_start("partitioning disks... ") try: - out = self.run_command("nixpart -p -", capture_stdout=True, - stdin_string=partitions) + if partitions is not None: + out = self.run_command("nixpart -p -", capture_stdout=True, + stdin_string=partitions) + # Note, `nixpart` already mounts the target / at /mnt + else: + assert partitioning_script is not None + assert mount_script is not None + self.run_command("bash", stdin_string=partitioning_script) + # Mount target / at /mnt + self.run_command("bash", stdin_string=mount_script) except SSHCommandFailed as cmd: # Exit code 100 is when the partitioner requires a reboot. if cmd.exitcode == 100: @@ -302,10 +343,18 @@ def _bootstrap_rescue(self, install, partitions): # This is the *only* place to set self.partitions unless we have # implemented a way to repartition the system! self.partitions = partitions - self.fs_info = out + self.partitioning_script = partitioning_script + self.mount_script = mount_script + # If the user has provided a manual fs_info, use that one, otherwise + # use the one obtained from nixpart. + self.fs_info = out if fs_info is None else fs_info else: self.log_start("mounting filesystems... ") - self.run_command("nixpart -m -", stdin_string=self.partitions) + if partitions is not None: + self.run_command("nixpart -m -", stdin_string=self.partitions) + else: + assert mount_script is not None + self.run_command("bash", stdin_string=mount_script) self.log_end("done.") if not install: @@ -338,14 +387,19 @@ def reboot(self, hard=False, reset=True): else: MachineState.reboot(self, hard=hard, reset=reset) - def reboot_rescue(self, install=False, partitions=None, bootstrap=True, + def reboot_rescue(self, install=False, partitions=None, + partitioning_script=None, + mount_script=None, + fs_info=None, + bootstrap=True, hard=False): """ Use the Robot to activate the rescue system and reboot the system. By default, only mount partitions and do not partition or wipe anything. On installation, both 'installed' has to be set to True and partitions - should contain a Kickstart configuration, otherwise it's read from + should contain a Kickstart configuration (or partitioning_script + should be given), otherwise it's read from self.partitions if available (which it shouldn't if you're not doing something nasty). """ @@ -369,7 +423,11 @@ def reboot_rescue(self, install=False, partitions=None, bootstrap=True, self.state = self.RESCUE self.ssh.reset() if bootstrap: - self._bootstrap_rescue(install, partitions) + self._bootstrap_rescue(install, + partitions=partitions, + partitioning_script=partitioning_script, + mount_script=mount_script, + fs_info=fs_info) def _install_base_system(self): self.log_start("creating missing directories... ") @@ -621,7 +679,11 @@ def create(self, defn, check, allow_reboot, allow_recreate): if not self.vm_id: self.log("installing machine...") - self.reboot_rescue(install=True, partitions=defn.partitions) + self.reboot_rescue(install=True, + partitions=defn.partitions, + partitioning_script=defn.partitioning_script, + mount_script=defn.mount_script, + fs_info=defn.fs_info) self._install_base_system() self._detect_hardware() server = self._get_server_by_ip(self.main_ipv4)