From 4bd204d3e851eb2fa6cb80460fcb5b82ab0c96dc Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Thu, 27 Aug 2020 23:03:24 +0000
Subject: [PATCH] server: Target the spec configuration if we have at least one
 node

The CI cluster hit an issue where a pull secret was broken, and
then we hit a deadlock because the MCO failed to drain nodes on
the old config, because other nodes on the old config couldn't
schedule the pod.

It just generally makes sense for new nodes to use the new config;
do so as long as at least one node has successfully joined the
cluster at that config.  This way we still avoid breaking
the cluster (and scaleup) with a bad config.
---
 pkg/server/cluster_server.go | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pkg/server/cluster_server.go b/pkg/server/cluster_server.go
index 3be2d4237a..3492d037d2 100644
--- a/pkg/server/cluster_server.go
+++ b/pkg/server/cluster_server.go
@@ -65,7 +65,16 @@ func (cs *clusterServer) GetConfig(cr poolRequest) (*runtime.RawExtension, error
 		return nil, fmt.Errorf("could not fetch pool. err: %v", err)
 	}
 
-	currConf := mp.Status.Configuration.Name
+	// For new nodes, we roll out the latest if at least one node has successfully updated.
+	// This avoids deadlocks in situations where the old configuration broke somehow
+	// (e.g. pull secret expired)
+	// and also avoids provisioning a new node, only to update it not long thereafter.
+	var currConf string
+	if mp.Status.UpdatedMachineCount > 0 {
+		currConf = mp.Spec.Configuration.Name
+	} else {
+		currConf = mp.Status.Configuration.Name
+	}
 
 	mc, err := cs.machineClient.MachineConfigs().Get(context.TODO(), currConf, metav1.GetOptions{})
 	if err != nil {