Skip to content

Commit

Permalink
Wait for the webhook service to be listening before advertising the J…
Browse files Browse the repository at this point in the history
…obset replica as ready.
  • Loading branch information
mbobrovskyi committed Jun 21, 2024
1 parent 58d5da2 commit 3c8290b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 23 deletions.
23 changes: 20 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package main

import (
"errors"
"flag"
"net/http"
"os"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand Down Expand Up @@ -136,7 +138,7 @@ func main() {
// Controllers who register after manager starts will start directly.
go setupControllers(mgr, certsReady)

setupHealthzAndReadyzCheck(mgr)
setupHealthzAndReadyzCheck(mgr, certsReady)

setupLog.Info("starting manager")
if err := mgr.Start(ctx); err != nil {
Expand Down Expand Up @@ -186,14 +188,29 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
//+kubebuilder:scaffold:builder
}

func setupHealthzAndReadyzCheck(mgr ctrl.Manager) {
func setupHealthzAndReadyzCheck(mgr ctrl.Manager, certsReady <-chan struct{}) {
defer setupLog.Info("both healthz and readyz check are finished and configured")

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {

// Wait for the webhook server to be listening before advertising the
// Jobset deployment replica as ready. This allows users to wait with sending
// the first requests, requiring webhooks, until the Jobset deployment is
// available, so that the early requests are not rejected during the Jobset's
// startup. We wrap the call to GetWebhookServer in a closure to delay calling
// the function, otherwise a not fully-initialized webhook server (without
// ready certs) fails the start of the manager.
if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
select {
case <-certsReady:
return mgr.GetWebhookServer().StartedChecker()(req)
default:
return errors.New("certificates are not ready")
}
}); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
Expand Down
50 changes: 30 additions & 20 deletions test/e2e/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@ package e2e

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp/cmpopts"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"

jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
testutils "sigs.k8s.io/jobset/pkg/util/testing"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -59,27 +62,34 @@ var _ = ginkgo.BeforeSuite(func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(k8sClient).NotTo(gomega.BeNil())

JobSetReadyForTesting(k8sClient)
jobSetReadyForTesting(k8sClient)
})

func JobSetReadyForTesting(client client.Client) {
func jobSetReadyForTesting(k8sClient client.Client) {
ginkgo.By("waiting for resources to be ready for testing")
// To verify that webhooks are ready, let's create a simple jobset.
js := testutils.MakeJobSet("js", "default").
ReplicatedJob(testutils.MakeReplicatedJob("rjob").
Job(testutils.MakeJobTemplate("job", "default").
PodSpec(testutils.TestPodSpec).Obj()).
Obj()).Obj()

// Once the creation succeeds, that means the webhooks are ready
// and we can begin testing.
gomega.Eventually(func() error {
return client.Create(context.Background(), js)
deploymentKey := types.NamespacedName{Namespace: "jobset-system", Name: "jobset-controller-manager"}
deployment := &appsv1.Deployment{}
pods := &corev1.PodList{}
gomega.Eventually(func(g gomega.Gomega) error {
// Get controller-manager deployment.
g.Expect(k8sClient.Get(ctx, deploymentKey, deployment)).To(gomega.Succeed())
// Get pods matches for controller-manager deployment.
g.Expect(k8sClient.List(ctx, pods, client.InNamespace(deploymentKey.Namespace), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
for _, pod := range pods.Items {
for _, cs := range pod.Status.ContainerStatuses {
// To make sure that we don't have restarts of controller-manager.
// If we have that's mean that something went wrong, and there is
// no needs to continue trying check availability.
if cs.RestartCount > 0 {
return gomega.StopTrying(fmt.Sprintf("%q in %q has restarted %d times", cs.Name, pod.Name, cs.RestartCount))
}
}
}
// To verify that webhooks are ready, checking is deployment have condition Available=True.
g.Expect(deployment.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
appsv1.DeploymentCondition{Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue},
cmpopts.IgnoreFields(appsv1.DeploymentCondition{}, "Reason", "Message", "LastUpdateTime", "LastTransitionTime")),
))
return nil
}, timeout, interval).Should(gomega.Succeed())

// Delete this jobset before beginning tests.
gomega.Expect(client.Delete(ctx, js))
gomega.Eventually(func() error {
return client.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{})
}).ShouldNot(gomega.Succeed())
}

0 comments on commit 3c8290b

Please sign in to comment.