Skip to content

Commit 4eea23c

Browse files
author
Per Goncalves da Silva
committed
mitigate upgrade-e2e flakiness
Signed-off-by: Per Goncalves da Silva <pegoncal@redhat.com>
1 parent b26fb46 commit 4eea23c

File tree

1 file changed

+57
-32
lines changed

1 file changed

+57
-32
lines changed

test/upgrade-e2e/post_upgrade_test.go

Lines changed: 57 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,29 +32,13 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
3232
ctx := context.Background()
3333
defer utils.CollectTestArtifacts(t, artifactName, c, cfg)
3434

35-
managerLabelSelector := labels.Set{"control-plane": "operator-controller-controller-manager"}
35+
// wait for catalogd deployment to finish
36+
t.Log("Wait for catalogd deployment to be ready")
37+
catalogdManagerPod := waitForDeployment(t, ctx, "catalogd-controller-manager")
3638

37-
t.Log("Checking that the controller-manager deployment is updated")
38-
require.EventuallyWithT(t, func(ct *assert.CollectT) {
39-
var managerDeployments appsv1.DeploymentList
40-
assert.NoError(ct, c.List(ctx, &managerDeployments, client.MatchingLabelsSelector{Selector: managerLabelSelector.AsSelector()}))
41-
assert.Len(ct, managerDeployments.Items, 1)
42-
managerDeployment := managerDeployments.Items[0]
43-
44-
assert.True(ct,
45-
managerDeployment.Status.UpdatedReplicas == *managerDeployment.Spec.Replicas &&
46-
managerDeployment.Status.Replicas == *managerDeployment.Spec.Replicas &&
47-
managerDeployment.Status.AvailableReplicas == *managerDeployment.Spec.Replicas &&
48-
managerDeployment.Status.ReadyReplicas == *managerDeployment.Spec.Replicas,
49-
)
50-
}, time.Minute, time.Second)
51-
52-
var managerPods corev1.PodList
53-
t.Log("Waiting for only one controller-manager Pod to remain")
54-
require.EventuallyWithT(t, func(ct *assert.CollectT) {
55-
assert.NoError(ct, c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: managerLabelSelector.AsSelector()}))
56-
assert.Len(ct, managerPods.Items, 1)
57-
}, time.Minute, time.Second)
39+
// wait for operator-controller deployment to finish
40+
t.Log("Wait for operator-controller deployment to be ready")
41+
managerPod := waitForDeployment(t, ctx, "operator-controller-controller-manager")
5842

5943
t.Log("Reading logs to make sure that ClusterExtension was reconciled by operator-controller before we update it")
6044
// Make sure that after we upgrade OLM itself we can still reconcile old objects without any changes
@@ -64,30 +48,40 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
6448
"reconcile ending",
6549
fmt.Sprintf(`ClusterExtension=%q`, testClusterExtensionName),
6650
}
67-
found, err := watchPodLogsForSubstring(logCtx, &managerPods.Items[0], "manager", substrings...)
51+
found, err := watchPodLogsForSubstring(logCtx, managerPod, "manager", substrings...)
6852
require.NoError(t, err)
6953
require.True(t, found)
7054

71-
t.Log("Checking that the ClusterCatalog is serving")
55+
t.Log("Checking that the ClusterCatalog is unpacked")
7256
require.EventuallyWithT(t, func(ct *assert.CollectT) {
7357
var clusterCatalog catalogd.ClusterCatalog
7458
assert.NoError(ct, c.Get(ctx, types.NamespacedName{Name: testClusterCatalogName}, &clusterCatalog))
59+
60+
// check serving condition
7561
cond := apimeta.FindStatusCondition(clusterCatalog.Status.Conditions, catalogd.TypeServing)
76-
if !assert.NotNil(ct, cond) {
62+
assert.NotNil(ct, cond)
63+
assert.Equal(ct, metav1.ConditionTrue, cond.Status)
64+
assert.Equal(ct, catalogd.ReasonAvailable, cond.Reason)
65+
66+
// mitigation for upgrade-e2e flakiness caused by the following bug
67+
// https://github.com/operator-framework/operator-controller/issues/1626
68+
// wait until the unpack time > than the catalogd controller pod creation time
69+
cond = apimeta.FindStatusCondition(clusterCatalog.Status.Conditions, catalogd.TypeProgressing)
70+
if cond == nil {
7771
return
7872
}
7973
assert.Equal(ct, metav1.ConditionTrue, cond.Status)
80-
assert.Equal(ct, catalogd.ReasonAvailable, cond.Reason)
74+
assert.Equal(ct, catalogd.ReasonSucceeded, cond.Reason)
75+
76+
assert.True(ct, clusterCatalog.Status.LastUnpacked.After(catalogdManagerPod.CreationTimestamp.Time))
8177
}, time.Minute, time.Second)
8278

8379
t.Log("Checking that the ClusterExtension is installed")
8480
var clusterExtension ocv1.ClusterExtension
8581
require.EventuallyWithT(t, func(ct *assert.CollectT) {
8682
assert.NoError(ct, c.Get(ctx, types.NamespacedName{Name: testClusterExtensionName}, &clusterExtension))
8783
cond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled)
88-
if !assert.NotNil(ct, cond) {
89-
return
90-
}
84+
assert.NotNil(ct, cond)
9185
assert.Equal(ct, metav1.ConditionTrue, cond.Status)
9286
assert.Equal(ct, ocv1.ReasonSucceeded, cond.Reason)
9387
assert.Contains(ct, cond.Message, "Installed bundle")
@@ -107,16 +101,47 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
107101
require.EventuallyWithT(t, func(ct *assert.CollectT) {
108102
assert.NoError(ct, c.Get(ctx, types.NamespacedName{Name: testClusterExtensionName}, &clusterExtension))
109103
cond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled)
110-
if !assert.NotNil(ct, cond) {
111-
return
112-
}
104+
assert.NotNil(ct, cond)
113105
assert.Equal(ct, ocv1.ReasonSucceeded, cond.Reason)
114106
assert.Contains(ct, cond.Message, "Installed bundle")
115107
assert.Equal(ct, ocv1.BundleMetadata{Name: "test-operator.1.0.1", Version: "1.0.1"}, clusterExtension.Status.Install.Bundle)
116108
assert.NotEqual(ct, previousVersion, clusterExtension.Status.Install.Bundle.Version)
117109
}, time.Minute, time.Second)
118110
}
119111

112+
// waitForDeployment checks that the updated deployment with the given control-plane label
113+
// has reached the desired number of replicas and that the number pods matches that number
114+
// i.e. no old pods remain. It will return a pointer to the first pod. This is only necessary
115+
// to facilitate the mitigation put in place for https://github.com/operator-framework/operator-controller/issues/1626
116+
func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel string) *corev1.Pod {
117+
deploymentLabelSelector := labels.Set{"control-plane": controlPlaneLabel}.AsSelector()
118+
119+
t.Log("Checking that the deployment is updated")
120+
var desiredNumReplicas int32
121+
require.EventuallyWithT(t, func(ct *assert.CollectT) {
122+
var managerDeployments appsv1.DeploymentList
123+
assert.NoError(ct, c.List(ctx, &managerDeployments, client.MatchingLabelsSelector{Selector: deploymentLabelSelector}))
124+
assert.Len(ct, managerDeployments.Items, 1)
125+
managerDeployment := managerDeployments.Items[0]
126+
127+
assert.True(ct,
128+
managerDeployment.Status.UpdatedReplicas == *managerDeployment.Spec.Replicas &&
129+
managerDeployment.Status.Replicas == *managerDeployment.Spec.Replicas &&
130+
managerDeployment.Status.AvailableReplicas == *managerDeployment.Spec.Replicas &&
131+
managerDeployment.Status.ReadyReplicas == *managerDeployment.Spec.Replicas,
132+
)
133+
desiredNumReplicas = *managerDeployment.Spec.Replicas
134+
}, time.Minute, time.Second)
135+
136+
var managerPods corev1.PodList
137+
t.Logf("Ensure the number of remaining pods equal the desired number of replicas (%d)", desiredNumReplicas)
138+
require.EventuallyWithT(t, func(ct *assert.CollectT) {
139+
assert.NoError(ct, c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: deploymentLabelSelector}))
140+
assert.Len(ct, managerPods.Items, 1)
141+
}, time.Minute, time.Second)
142+
return &managerPods.Items[0]
143+
}
144+
120145
func watchPodLogsForSubstring(ctx context.Context, pod *corev1.Pod, container string, substrings ...string) (bool, error) {
121146
podLogOpts := corev1.PodLogOptions{
122147
Follow: true,

0 commit comments

Comments
 (0)