Skip to content

Commit

Permalink
Wait for the webhook service to be listening before advertising the J…
Browse files Browse the repository at this point in the history
…obset replica as ready.
  • Loading branch information
mbobrovskyi committed Jun 20, 2024
1 parent 58d5da2 commit 09d3972
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 23 deletions.
23 changes: 20 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package main

import (
"errors"
"flag"
"net/http"
"os"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand Down Expand Up @@ -136,7 +138,7 @@ func main() {
// Controllers who register after manager starts will start directly.
go setupControllers(mgr, certsReady)

setupHealthzAndReadyzCheck(mgr)
setupHealthzAndReadyzCheck(mgr, certsReady)

setupLog.Info("starting manager")
if err := mgr.Start(ctx); err != nil {
Expand Down Expand Up @@ -186,14 +188,29 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
//+kubebuilder:scaffold:builder
}

func setupHealthzAndReadyzCheck(mgr ctrl.Manager) {
func setupHealthzAndReadyzCheck(mgr ctrl.Manager, certsReady <-chan struct{}) {
defer setupLog.Info("both healthz and readyz check are finished and configured")

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {

// Wait for the webhook server to be listening before advertising the
// Jobset replica as ready. This allows users to wait with sending the first
// requests, requiring webhooks, until the Jobset deployment is available, so
// that the early requests are not rejected during the Jobset's startup.
// We wrap the call to GetWebhookServer in a closure to delay calling
// the function, otherwise a not fully-initialized webhook server (without
// ready certs) fails the start of the manager.
if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
select {
case <-certsReady:
return mgr.GetWebhookServer().StartedChecker()(req)
default:
return errors.New("certificates are not ready")
}
}); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
Expand Down
47 changes: 27 additions & 20 deletions test/e2e/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,22 @@ package e2e

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp/cmpopts"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"

jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
testutils "sigs.k8s.io/jobset/pkg/util/testing"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -59,27 +63,30 @@ var _ = ginkgo.BeforeSuite(func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(k8sClient).NotTo(gomega.BeNil())

JobSetReadyForTesting(k8sClient)
jobSetReadyForTesting(k8sClient)
})

func JobSetReadyForTesting(client client.Client) {
func jobSetReadyForTesting(k8sClient client.Client) {
ginkgo.By("waiting for resources to be ready for testing")
// To verify that webhooks are ready, let's create a simple jobset.
js := testutils.MakeJobSet("js", "default").
ReplicatedJob(testutils.MakeReplicatedJob("rjob").
Job(testutils.MakeJobTemplate("job", "default").
PodSpec(testutils.TestPodSpec).Obj()).
Obj()).Obj()

// Once the creation succeeds, that means the webhooks are ready
// and we can begin testing.
gomega.Eventually(func() error {
return client.Create(context.Background(), js)
deploymentKey := types.NamespacedName{Namespace: "jobset-system", Name: "jobset-controller-manager"}
deployment := &appsv1.Deployment{}
pods := &corev1.PodList{}
gomega.EventuallyWithOffset(1, func(g gomega.Gomega) error {
g.Expect(k8sClient.Get(ctx, deploymentKey, deployment)).To(gomega.Succeed())
g.Expect(deployment.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
appsv1.DeploymentCondition{Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue},
cmpopts.IgnoreFields(appsv1.DeploymentCondition{}, "Reason", "Message", "LastUpdateTime", "LastTransitionTime")),
))
g.Expect(k8sClient.List(ctx, pods, client.InNamespace(deploymentKey.Namespace), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
for _, pod := range pods.Items {
for _, cs := range pod.Status.ContainerStatuses {
g.Expect(ptr.Deref(cs.Started, false)).To(gomega.BeTrue())
g.Expect(cs.Ready).To(gomega.BeTrue())
if cs.RestartCount > 0 {
return gomega.StopTrying(fmt.Sprintf("%q in %q has restarted %d times", cs.Name, pod.Name, cs.RestartCount))
}
}
}
return nil
}, timeout, interval).Should(gomega.Succeed())

// Delete this jobset before beginning tests.
gomega.Expect(client.Delete(ctx, js))
gomega.Eventually(func() error {
return client.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{})
}).ShouldNot(gomega.Succeed())
}

0 comments on commit 09d3972

Please sign in to comment.