diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 4666bc196d..4be30657d9 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -319,9 +319,15 @@ func (dn *Daemon) bootstrapNode() error { return err } dn.node = node + if _, err := os.Stat("/etc/defaults/rhcos/reboot-needed"); err == nil { + if err := os.Remove("/etc/defaults/rhcos/reboot-needed"); err != nil { + return err + } + } if err := dn.CheckStateOnBoot(); err != nil { return err } + // finished syncing node for the first time dn.booting = false return nil @@ -330,6 +336,8 @@ func (dn *Daemon) bootstrapNode() error { func (dn *Daemon) handleErr(err error, key interface{}) { if err == nil { dn.queue.Forget(key) + // This is a workaround to have the controller keep calling the syncHandler for the MCD reboot signaling + dn.queue.AddAfter(key, 5*time.Minute) return } @@ -400,6 +408,18 @@ func (dn *Daemon) syncNode(key string) error { return err } } + // FIXME: this is a temporary workaround, remove once an MCD signaling system is in place + if _, err := os.Stat("/etc/defaults/rhcos/reboot-needed"); err == nil { + dn.catchIgnoreSIGTERM() + if err := dn.drainNode(); err != nil { + dn.cancelSIGTERM() + return err + } + if err := dn.reboot("rebooting due to another component signaling"); err != nil { + dn.catchIgnoreSIGTERM() + return err + } + } glog.V(2).Infof("Node %s is already synced", node.Name) } return nil diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 26d815fa20..144713df9d 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -94,6 +94,45 @@ func getNodeRef(node *corev1.Node) *corev1.ObjectReference { } } +func (dn *Daemon) drainNode() error { + // Skip draining of the node when we're not cluster driven + if dn.onceFrom != "" { + return nil + } + + glog.Info("Update prepared; draining the node") + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "Drain", "Draining node to update config.") + + backoff := wait.Backoff{ + Steps: 5, + Duration: 10 * time.Second, + Factor: 2, + } + var lastErr error + if err := wait.ExponentialBackoff(backoff, func() (bool, error) { + err := drain.Drain(dn.kubeClient, []*corev1.Node{dn.node}, &drain.DrainOptions{ + DeleteLocalData: true, + Force: true, + GracePeriodSeconds: 600, + IgnoreDaemonsets: true, + }) + if err == nil { + return true, nil + } + lastErr = err + glog.Infof("Draining failed with: %v, retrying", err) + return false, nil + + }); err != nil { + if err == wait.ErrWaitTimeout { + return errors.Wrapf(lastErr, "failed to drain node (%d tries): %v", backoff.Steps, err) + } + return errors.Wrap(err, "failed to drain node") + } + glog.Info("Node successfully drained") + return nil +} + // updateOSAndReboot is the last step in an update(), and it can also // be called as a special case for the "bootstrap pivot". func (dn *Daemon) updateOSAndReboot(newConfig *mcfgv1.MachineConfig) error { @@ -101,39 +140,8 @@ func (dn *Daemon) updateOSAndReboot(newConfig *mcfgv1.MachineConfig) error { return err } - // Skip draining of the node when we're not cluster driven - if dn.onceFrom == "" { - glog.Info("Update prepared; draining the node") - - dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "Drain", "Draining node to update config.") - - backoff := wait.Backoff{ - Steps: 5, - Duration: 10 * time.Second, - Factor: 2, - } - var lastErr error - if err := wait.ExponentialBackoff(backoff, func() (bool, error) { - err := drain.Drain(dn.kubeClient, []*corev1.Node{dn.node}, &drain.DrainOptions{ - DeleteLocalData: true, - Force: true, - GracePeriodSeconds: 600, - IgnoreDaemonsets: true, - }) - if err == nil { - return true, nil - } - lastErr = err - glog.Infof("Draining failed with: %v, retrying", err) - return false, nil - - }); err != nil { - if err == wait.ErrWaitTimeout { - return errors.Wrapf(lastErr, "failed to drain node (%d tries): %v", backoff.Steps, err) - } - return errors.Wrap(err, "failed to drain node") - } - glog.Info("Node successfully drained") + if err := dn.drainNode(); err != nil { + return err } if err := dn.writePendingState(newConfig); err != nil {