Skip to content

Commit

Permalink
hetzner: Add partitioningScript.
Browse files Browse the repository at this point in the history
This allows for custom partitioning that Anaconda Kickstart / blivet
cannot do.
  • Loading branch information
nh2 committed May 26, 2018
1 parent 7836009 commit 3ca62f2
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 13 deletions.
171 changes: 170 additions & 1 deletion nix/hetzner.nix
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,181 @@ with lib;
part swap --recommended --label=swap --fstype=swap --ondisk=vda
part / --fstype=ext4 --label=root --grow --ondisk=vda
'';
type = types.lines;
type = types.nullOr types.lines;
description = ''
Specify layout of partitions and file systems using Anacondas Kickstart
format. For possible options and commands, please have a look at:
<link xlink:href="http://fedoraproject.org/wiki/Anaconda/Kickstart"/>
If Kickstart is not sufficient for your partitioning needs,
consider the <option>partitioningScript</option> option instead.
The <option>partitions</option> and <option>partitioningScript</option>
options are mutually exclusive.
'';
};

partitioningScript = mkOption {
type = types.nullOr types.lines;
default = null;
example = ''
# Example custom partitioningScript
# that creates an ext4 with external journal, across two RAID1s
# (one on HDDs, one on SSDs).
set -x
set -euo pipefail
# If the SSD doesn't support the RZAT (Return Zero After Trim) feature,
# we can't do the `lazy_journal_init=1` journal creation speedup
# below, so fail early in that case.
# Note that as per
# https://github.com/torvalds/linux/blob/e64f638483a21105c7ce330d543fa1f1c35b5bc7/drivers/ata/libata-core.c#L4242-L4250
# TRIM in general is optional and thus this would be unsafe,
# but the kernel announces RZAT only for a whitelist of devices
# that are known to execute TRIM when requested.
#
# Note that this is probably not needed when the ext4 journal is on top
# of an mdadm RAID (because that one likely guarantees to read zeros from
# freshly initialised RAID arrays, but I haven't checked that this really
# works), but we keep it in here just in case it doesn't work or
# somebody wants to run the journal NOT on top of a RAID.
#
# TODO Fall back to slow `lazy_journal_init=1` if RZAT isn't supported.
if hdparm -I /dev/sda | grep -i 'Deterministic read ZEROs after TRIM'; then echo "RZAT supported, can use lazy_journal_init=1 safely"; else echo "RZAT not supported on /dev/sda, cannot use lazy_journal_init=1 safely, exiting" 1>&2; exit 1; fi
if hdparm -I /dev/sdb | grep -i 'Deterministic read ZEROs after TRIM'; then echo "RZAT supported, can use lazy_journal_init=1 safely"; else echo "RZAT not supported on /dev/sdb, cannot use lazy_journal_init=1 safely, exiting" 1>&2; exit 1; fi
# Stop RAID devices if running, otherwise we can't modify the disks below.
test -b /dev/md0 && mdadm --stop /dev/md0
test -b /dev/md1 && mdadm --stop /dev/md1
# Zero out SSDs with TRIM command, so that `lazy_journal_init=1` can be safely used below.
blkdiscard /dev/sda
blkdiscard /dev/sdb
# Create BIOS boot partition and main partition for each SSD and HDD.
# Note Hetzner does use BIOS, not UEFI.
# We use GPT because these disks could be too large for MSDOS partitions (e.g. 10TB disks).
parted --script -a optimal /dev/sda -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%'
parted --script -a optimal /dev/sdb -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%'
parted --script -a optimal /dev/sdc -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%'
parted --script -a optimal /dev/sdd -- mklabel gpt mkpart primary 1MiB 2MiB set 1 bios_grub on mkpart primary 2MiB '100%'
# Now /dev/sd*1 is the BIOS boot partition, /dev/sd*2 is the one data partition
# Reload partition table so Linux can see the changes
partprobe
# Wait for all devices to exist
udevadm settle --timeout=5 --exit-if-exists=/dev/sda1
udevadm settle --timeout=5 --exit-if-exists=/dev/sda2
udevadm settle --timeout=5 --exit-if-exists=/dev/sdb1
udevadm settle --timeout=5 --exit-if-exists=/dev/sdb2
udevadm settle --timeout=5 --exit-if-exists=/dev/sdc1
udevadm settle --timeout=5 --exit-if-exists=/dev/sdc2
udevadm settle --timeout=5 --exit-if-exists=/dev/sdd1
udevadm settle --timeout=5 --exit-if-exists=/dev/sdd2
# --run makes mdadm not prompt the user for confirmation
mdadm --create --run --verbose /dev/md0 --level=1 --raid-devices=2 /dev/sda2 /dev/sdb2
mdadm --create --run --verbose /dev/md1 --level=1 --raid-devices=2 /dev/sdc2 /dev/sdd2
# Wipe filesystem signatures that might be on the RAID from some
# possibly existing older use of the disks.
# It's not clear to me *why* it is needed, but I have certainly
# observed that it is needed because ext4 labels magically survive
# mdadm RAID re-creations.
# See
# https://serverfault.com/questions/911370/why-does-mdadm-zero-superblock-preserve-file-system-information
wipefs -a /dev/md0
wipefs -a /dev/md1
# Disable RAID recovery. We don't want this to slow down machine provisioning
# in the Hetzner rescue mode. It can run in normal operation after reboot.
echo 0 > /proc/sys/dev/raid/speed_limit_max
# `lazy_journal_init=1` to not have to zero the device;
# we use ATA TRIM with RZAT support to guarantee the device
# is already zeroed; see comment further up about the safety of that.
mke2fs -F -L rootjournal -O journal_dev -E lazy_journal_init=1 /dev/md0
mkfs.ext4 -F -L root -J device=/dev/md0 /dev/md1
'';
description = ''
Script to run after booting into the Hetzner rescue mode
to manually create partitions.
Note as of writing, Hetzner uses BIOS, not UEFI, so if you want
to use GPT partition tables (which you need in case you want to
make partitions larger than 2 TiB) you will likely have to make
a BIOS boot partition
(<link xlink:href="http://fedoraproject.org/wiki/Anaconda/Kickstart"/>).
Where possible, use the simpler <option>partitions</option> option instead of this option.
The <option>partitions</option> and <option>partitioningScript</option>
options are mutually exclusive.
If you use this option, you must set "partitions = null",
you must set "filesystemInfo" to an accurate representation
of the partitions your script creates,
and you must set "mountScript" to mount the created target
root partition at /mnt.
'';
};

mountScript = mkOption {
type = types.nullOr types.lines;
default = null;
example = ''
# Example mountScript matching the example for partitioningScript,
# that creates an ext4 with external journal, across two RAID1s
# (one on HDDs, one on SSDs).
set -e
mount -o data=journal /dev/md1 /mnt
'';
description = ''
Script to run after booting into the Hetzner rescue mode,
and after formatting, to mount the root filesystem at /mnt.
This option is required when "partitioningScript" is used.
'';
};

filesystemInfo = mkOption {
type = types.nullOr types.attrs;
default = null;
example = literalExample ''
{
# Example filesystemInfo matching the example for partitioningScript,
# that creates an ext4 with external journal, across two RAID1s
# (one on HDDs, one on SSDs).
swapDevices = [];
boot.loader.grub.devices = [
"/dev/sda"
"/dev/sdb"
"/dev/sdc"
"/dev/sdd"
];
fileSystems = {
"/" = {
fsType = "ext4";
label = "root";
options = [
"journal_path=/dev/disk/by-label/rootjournal"
"data=journal"
"errors=remount-ro"
];
};
};
}
'';
description = ''
Override the filesystem info obtained from the machine after partitioning.
This option is required when "partitioningScript" is used, but can also
be set if the filesystem info obtained via <option>partitions</option> is not what you need.
'';
};
};
Expand Down
86 changes: 74 additions & 12 deletions nixops/backends/hetzner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from nixops.util import attr_property, create_key_pair, xml_expr_to_python
from nixops.ssh_util import SSHCommandFailed
from nixops.backends import MachineDefinition, MachineState
from nixops.nix_expr import nix2py
from nixops.nix_expr import nix2py, py2nix

# This is set to True by tests/hetzner-backend.nix. If it's in effect, no
# attempt is made to connect to the real Robot API and the API calls only
Expand Down Expand Up @@ -65,7 +65,24 @@ def __init__(self, xml, config):
assert type(self.robot_pass) is str

self.partitions = config["hetzner"]["partitions"]
assert type(self.partitions) is str
assert self.partitions is None or type(self.partitions) is str

self.partitioning_script = config["hetzner"]["partitioningScript"]
assert self.partitioning_script is None or type(self.partitioning_script) is str

self.mount_script = config["hetzner"]["mountScript"]
assert self.mount_script is None or type(self.mount_script) is str

fs_info_py = config["hetzner"]["filesystemInfo"]
assert fs_info_py is None or type(fs_info_py) is dict
# If it's None, we want to keep it None, not turn it into null,
# because the code further down checks for None in order to determine
# if it was set at all.
self.fs_info = py2nix(fs_info_py) if fs_info_py is not None else None

assert (self.partitions is None) != (self.partitioning_script is None)
assert (self.partitioning_script is None) or (self.fs_info is not None)
assert (self.mount_script is None) == (self.mount_script is None)


class HetznerState(MachineState):
Expand All @@ -82,10 +99,12 @@ def get_type(cls):
robot_admin_user = attr_property("hetzner.robotUser", None)
robot_admin_pass = attr_property("hetzner.robotPass", None)
partitions = attr_property("hetzner.partitions", None)
partitioning_script = attr_property("hetzner.partitioningScript", None)
mount_script = attr_property("hetzner.mountScript", None)
fs_info = attr_property("hetzner.fsInfo", None)

just_installed = attr_property("hetzner.justInstalled", False, bool)
rescue_passwd = attr_property("hetzner.rescuePasswd", None)
fs_info = attr_property("hetzner.fsInfo", None)
net_info = attr_property("hetzner.networkInfo", None, 'json')
hw_info = attr_property("hetzner.hardwareInfo", None)

Expand Down Expand Up @@ -228,13 +247,27 @@ def _bootstrap_rescue_for_existing_system(self):
self.run_command("cat >> /etc/motd", stdin_string=fullmsg)
self.log_end("done.")

def _bootstrap_rescue(self, install, partitions):
def _bootstrap_rescue(self,
install,
partitions,
partitioning_script=None,
mount_script=None,
fs_info=None):
"""
Bootstrap everything needed in order to get Nix and the partitioner
usable in the rescue system. The keyword arguments are only for
partitioning, see reboot_rescue() for description, if not given we will
only mount based on information provided in self.partitions.
Exactly one of `partitions` and `partitioning_script` must be given as
non-None value.
If `partitioning_script` is given, `fs_info` must not be None.
`mount_script` must be given exactly when `partitioning_script` is given.
"""
assert (partitions is None) != (partitioning_script is None)
assert (partitioning_script is None) or (fs_info is not None)
assert (partitioning_script is None) == (mount_script is None)

self.log_start("building Nix bootstrap installer... ")
expr = os.path.join(self.depl.expr_path, "hetzner-bootstrap.nix")
bootstrap_out = subprocess.check_output(["nix-build", expr,
Expand Down Expand Up @@ -288,8 +321,16 @@ def _bootstrap_rescue(self, install, partitions):
if install:
self.log_start("partitioning disks... ")
try:
out = self.run_command("nixpart -p -", capture_stdout=True,
stdin_string=partitions)
if partitions is not None:
out = self.run_command("nixpart -p -", capture_stdout=True,
stdin_string=partitions)
# Note, `nixpart` already mounts the target / at /mnt
else:
assert partitioning_script is not None
assert mount_script is not None
self.run_command("bash", stdin_string=partitioning_script)
# Mount target / at /mnt
self.run_command("bash", stdin_string=mount_script)
except SSHCommandFailed as cmd:
# Exit code 100 is when the partitioner requires a reboot.
if cmd.exitcode == 100:
Expand All @@ -302,10 +343,18 @@ def _bootstrap_rescue(self, install, partitions):
# This is the *only* place to set self.partitions unless we have
# implemented a way to repartition the system!
self.partitions = partitions
self.fs_info = out
self.partitioning_script = partitioning_script
self.mount_script = mount_script
# If the user has provided a manual fs_info, use that one, otherwise
# use the one obtained from nixpart.
self.fs_info = out if fs_info is None else fs_info
else:
self.log_start("mounting filesystems... ")
self.run_command("nixpart -m -", stdin_string=self.partitions)
if partitions is not None:
self.run_command("nixpart -m -", stdin_string=self.partitions)
else:
assert mount_script is not None
self.run_command("bash", stdin_string=mount_script)
self.log_end("done.")

if not install:
Expand Down Expand Up @@ -338,14 +387,19 @@ def reboot(self, hard=False, reset=True):
else:
MachineState.reboot(self, hard=hard, reset=reset)

def reboot_rescue(self, install=False, partitions=None, bootstrap=True,
def reboot_rescue(self, install=False, partitions=None,
partitioning_script=None,
mount_script=None,
fs_info=None,
bootstrap=True,
hard=False):
"""
Use the Robot to activate the rescue system and reboot the system. By
default, only mount partitions and do not partition or wipe anything.
On installation, both 'installed' has to be set to True and partitions
should contain a Kickstart configuration, otherwise it's read from
should contain a Kickstart configuration (or partitioning_script
should be given), otherwise it's read from
self.partitions if available (which it shouldn't if you're not doing
something nasty).
"""
Expand All @@ -369,7 +423,11 @@ def reboot_rescue(self, install=False, partitions=None, bootstrap=True,
self.state = self.RESCUE
self.ssh.reset()
if bootstrap:
self._bootstrap_rescue(install, partitions)
self._bootstrap_rescue(install,
partitions=partitions,
partitioning_script=partitioning_script,
mount_script=mount_script,
fs_info=fs_info)

def _install_base_system(self):
self.log_start("creating missing directories... ")
Expand Down Expand Up @@ -621,7 +679,11 @@ def create(self, defn, check, allow_reboot, allow_recreate):

if not self.vm_id:
self.log("installing machine...")
self.reboot_rescue(install=True, partitions=defn.partitions)
self.reboot_rescue(install=True,
partitions=defn.partitions,
partitioning_script=defn.partitioning_script,
mount_script=defn.mount_script,
fs_info=defn.fs_info)
self._install_base_system()
self._detect_hardware()
server = self._get_server_by_ip(self.main_ipv4)
Expand Down

0 comments on commit 3ca62f2

Please sign in to comment.