diff --git a/nixops/backends/__init__.py b/nixops/backends/__init__.py index 8c30d63c4..578ea5f3a 100644 --- a/nixops/backends/__init__.py +++ b/nixops/backends/__init__.py @@ -170,7 +170,7 @@ def backup(self, defn, backup_id): """Make backup of persistent disks, if possible.""" self.warn("don't know how to make backup of disks for machine ‘{0}’".format(self.name)) - def reboot(self, hard=False): + def reboot(self, hard=False, reset=True): """Reboot this machine.""" self.log("rebooting...") if self.state == self.RESCUE: @@ -182,16 +182,33 @@ def reboot(self, hard=False): reboot_command = "systemctl reboot" self.run_command(reboot_command, check=False) self.state = self.STARTING - self.ssh.reset() + if reset: + self.ssh.reset() def reboot_sync(self, hard=False): """Reboot this machine and wait until it's up again.""" - self.reboot(hard=hard) - self.log_start("waiting for the machine to finish rebooting...") - nixops.util.wait_for_tcp_port(self.get_ssh_name(), self.ssh_port, open=False, callback=lambda: self.log_continue(".")) - self.log_continue("[down]") - nixops.util.wait_for_tcp_port(self.get_ssh_name(), self.ssh_port, callback=lambda: self.log_continue(".")) - self.log_end("[up]") + + # To check when the machine has finished rebooting in a race-free + # manner, we compare the output of `last reboot` before and after + # the reboot. Once the output has changed, the reboot is done. + def get_last_reboot_output(): + return self.run_command('last reboot --time-format iso | head -n1', capture_stdout=True).rstrip() + + pre_reboot_last_reboot_output = get_last_reboot_output() + + # We set reset=False so that we can continue running `last reboot` + # remotely; when the reboot happens, our SSH connection will be reset + # by the remote side instead. + self.reboot(hard=hard, reset=False) + + self.log_start("waiting for reboot to complete...") + while True: + last_reboot_output = get_last_reboot_output() + if last_reboot_output != pre_reboot_last_reboot_output: + break + self.log_continue(".") + self.log_end("done.") + self.state = self.UP self.ssh_pinged = True self._ssh_pinged_this_time = True diff --git a/nixops/backends/azure_vm.py b/nixops/backends/azure_vm.py index 349c3a68b..3fc8eea7a 100644 --- a/nixops/backends/azure_vm.py +++ b/nixops/backends/azure_vm.py @@ -948,14 +948,15 @@ def after_activation(self, defn): self._delete_encryption_key(d_id) - def reboot(self, hard=False): + def reboot(self, hard=False, reset=True): if hard: self.log("sending hard reset to Azure machine...") self.cmc().virtual_machines.restart(self.resource_group, self.machine_name) self.state = self.STARTING - self.ssh.reset() + if reset: + self.ssh.reset() else: - MachineState.reboot(self, hard=hard) + MachineState.reboot(self, hard=hard, reset=reset) self.ssh_pinged = False def start(self): diff --git a/nixops/backends/ec2.py b/nixops/backends/ec2.py index c543a9738..ff47f141d 100644 --- a/nixops/backends/ec2.py +++ b/nixops/backends/ec2.py @@ -1407,7 +1407,7 @@ def _check(self, res): res.messages.append(" * {0} - {1}".format(e.not_before, e.not_after)) - def reboot(self, hard=False): + def reboot(self, hard=False, reset=True): self.log("rebooting EC2 machine...") instance = self._get_instance() instance.reboot() diff --git a/nixops/backends/gce.py b/nixops/backends/gce.py index 0ceef31db..f72098781 100644 --- a/nixops/backends/gce.py +++ b/nixops/backends/gce.py @@ -530,13 +530,13 @@ def create_node(self, defn): self.on_host_maintenance = defn.on_host_maintenance - def reboot(self, hard=False): + def reboot(self, hard=False, reset=True): if hard: self.log("sending hard reset to GCE machine...") self.node().reboot() self.state = self.STARTING else: - MachineState.reboot(self, hard=hard) + MachineState.reboot(self, hard=hard, reset=reset) def start(self): diff --git a/nixops/backends/hetzner.py b/nixops/backends/hetzner.py index 320c1accd..126a7d6e7 100644 --- a/nixops/backends/hetzner.py +++ b/nixops/backends/hetzner.py @@ -311,16 +311,17 @@ def _bootstrap_rescue(self, install, partitions): self.run_command(cmd) self.log_end("done.") - def reboot(self, hard=False): + def reboot(self, hard=False, reset=True): if hard: self.log_start("sending hard reset to robot... ") server = self._get_server_by_ip(self.main_ipv4) server.reboot('hard') self.log_end("done.") self.state = self.STARTING - self.ssh.reset() + if reset: + self.ssh.reset() else: - MachineState.reboot(self, hard=hard) + MachineState.reboot(self, hard=hard, reset=reset) def reboot_rescue(self, install=False, partitions=None, bootstrap=True, hard=False):