Skip to content

Commit

Permalink
Merge pull request #608 from mbobrovskyi/feature/wait-for-the-webhook…
Browse files Browse the repository at this point in the history
…-service-to-be-listening-before-advertising-the-jobset-replica-as-ready

Wait for the webhook service to be listening before advertising the Jobset replica as ready.
  • Loading branch information
k8s-ci-robot authored Jun 24, 2024
2 parents 58d5da2 + 3c8290b commit cc4890b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 23 deletions.
23 changes: 20 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package main

import (
"errors"
"flag"
"net/http"
"os"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand Down Expand Up @@ -136,7 +138,7 @@ func main() {
// Controllers who register after manager starts will start directly.
go setupControllers(mgr, certsReady)

setupHealthzAndReadyzCheck(mgr)
setupHealthzAndReadyzCheck(mgr, certsReady)

setupLog.Info("starting manager")
if err := mgr.Start(ctx); err != nil {
Expand Down Expand Up @@ -186,14 +188,29 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
//+kubebuilder:scaffold:builder
}

func setupHealthzAndReadyzCheck(mgr ctrl.Manager) {
func setupHealthzAndReadyzCheck(mgr ctrl.Manager, certsReady <-chan struct{}) {
defer setupLog.Info("both healthz and readyz check are finished and configured")

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {

// Wait for the webhook server to be listening before advertising the
// Jobset deployment replica as ready. This allows users to wait with sending
// the first requests, requiring webhooks, until the Jobset deployment is
// available, so that the early requests are not rejected during the Jobset's
// startup. We wrap the call to GetWebhookServer in a closure to delay calling
// the function, otherwise a not fully-initialized webhook server (without
// ready certs) fails the start of the manager.
if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
select {
case <-certsReady:
return mgr.GetWebhookServer().StartedChecker()(req)
default:
return errors.New("certificates are not ready")
}
}); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
Expand Down
50 changes: 30 additions & 20 deletions test/e2e/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@ package e2e

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp/cmpopts"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"

jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
testutils "sigs.k8s.io/jobset/pkg/util/testing"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -59,27 +62,34 @@ var _ = ginkgo.BeforeSuite(func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(k8sClient).NotTo(gomega.BeNil())

JobSetReadyForTesting(k8sClient)
jobSetReadyForTesting(k8sClient)
})

func JobSetReadyForTesting(client client.Client) {
func jobSetReadyForTesting(k8sClient client.Client) {
ginkgo.By("waiting for resources to be ready for testing")
// To verify that webhooks are ready, let's create a simple jobset.
js := testutils.MakeJobSet("js", "default").
ReplicatedJob(testutils.MakeReplicatedJob("rjob").
Job(testutils.MakeJobTemplate("job", "default").
PodSpec(testutils.TestPodSpec).Obj()).
Obj()).Obj()

// Once the creation succeeds, that means the webhooks are ready
// and we can begin testing.
gomega.Eventually(func() error {
return client.Create(context.Background(), js)
deploymentKey := types.NamespacedName{Namespace: "jobset-system", Name: "jobset-controller-manager"}
deployment := &appsv1.Deployment{}
pods := &corev1.PodList{}
gomega.Eventually(func(g gomega.Gomega) error {
// Get controller-manager deployment.
g.Expect(k8sClient.Get(ctx, deploymentKey, deployment)).To(gomega.Succeed())
// Get pods matches for controller-manager deployment.
g.Expect(k8sClient.List(ctx, pods, client.InNamespace(deploymentKey.Namespace), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
for _, pod := range pods.Items {
for _, cs := range pod.Status.ContainerStatuses {
// To make sure that we don't have restarts of controller-manager.
// If we have that's mean that something went wrong, and there is
// no needs to continue trying check availability.
if cs.RestartCount > 0 {
return gomega.StopTrying(fmt.Sprintf("%q in %q has restarted %d times", cs.Name, pod.Name, cs.RestartCount))
}
}
}
// To verify that webhooks are ready, checking is deployment have condition Available=True.
g.Expect(deployment.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
appsv1.DeploymentCondition{Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue},
cmpopts.IgnoreFields(appsv1.DeploymentCondition{}, "Reason", "Message", "LastUpdateTime", "LastTransitionTime")),
))
return nil
}, timeout, interval).Should(gomega.Succeed())

// Delete this jobset before beginning tests.
gomega.Expect(client.Delete(ctx, js))
gomega.Eventually(func() error {
return client.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{})
}).ShouldNot(gomega.Succeed())
}

0 comments on commit cc4890b

Please sign in to comment.