Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wait for the webhook service to be listening before advertising the Jobset replica as ready. #608

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package main

import (
"errors"
"flag"
"net/http"
"os"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand Down Expand Up @@ -136,7 +138,7 @@ func main() {
// Controllers who register after manager starts will start directly.
go setupControllers(mgr, certsReady)

setupHealthzAndReadyzCheck(mgr)
setupHealthzAndReadyzCheck(mgr, certsReady)
danielvegamyhre marked this conversation as resolved.
Show resolved Hide resolved

setupLog.Info("starting manager")
if err := mgr.Start(ctx); err != nil {
Expand Down Expand Up @@ -186,14 +188,29 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
//+kubebuilder:scaffold:builder
}

func setupHealthzAndReadyzCheck(mgr ctrl.Manager) {
func setupHealthzAndReadyzCheck(mgr ctrl.Manager, certsReady <-chan struct{}) {
defer setupLog.Info("both healthz and readyz check are finished and configured")

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {

// Wait for the webhook server to be listening before advertising the
// Jobset deployment replica as ready. This allows users to wait with sending
// the first requests, requiring webhooks, until the Jobset deployment is
// available, so that the early requests are not rejected during the Jobset's
// startup. We wrap the call to GetWebhookServer in a closure to delay calling
// the function, otherwise a not fully-initialized webhook server (without
// ready certs) fails the start of the manager.
if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
select {
case <-certsReady:
return mgr.GetWebhookServer().StartedChecker()(req)
default:
return errors.New("certificates are not ready")
}
}); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
Expand Down
50 changes: 30 additions & 20 deletions test/e2e/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@ package e2e

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp/cmpopts"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"

jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
testutils "sigs.k8s.io/jobset/pkg/util/testing"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -59,27 +62,34 @@ var _ = ginkgo.BeforeSuite(func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(k8sClient).NotTo(gomega.BeNil())

JobSetReadyForTesting(k8sClient)
jobSetReadyForTesting(k8sClient)
})

func JobSetReadyForTesting(client client.Client) {
func jobSetReadyForTesting(k8sClient client.Client) {
ginkgo.By("waiting for resources to be ready for testing")
// To verify that webhooks are ready, let's create a simple jobset.
js := testutils.MakeJobSet("js", "default").
ReplicatedJob(testutils.MakeReplicatedJob("rjob").
Job(testutils.MakeJobTemplate("job", "default").
PodSpec(testutils.TestPodSpec).Obj()).
Obj()).Obj()

// Once the creation succeeds, that means the webhooks are ready
// and we can begin testing.
gomega.Eventually(func() error {
return client.Create(context.Background(), js)
deploymentKey := types.NamespacedName{Namespace: "jobset-system", Name: "jobset-controller-manager"}
deployment := &appsv1.Deployment{}
pods := &corev1.PodList{}
gomega.Eventually(func(g gomega.Gomega) error {
// Get controller-manager deployment.
g.Expect(k8sClient.Get(ctx, deploymentKey, deployment)).To(gomega.Succeed())
mbobrovskyi marked this conversation as resolved.
Show resolved Hide resolved
// Get pods matches for controller-manager deployment.
g.Expect(k8sClient.List(ctx, pods, client.InNamespace(deploymentKey.Namespace), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
for _, pod := range pods.Items {
for _, cs := range pod.Status.ContainerStatuses {
// To make sure that we don't have restarts of controller-manager.
// If we have that's mean that something went wrong, and there is
// no needs to continue trying check availability.
if cs.RestartCount > 0 {
mbobrovskyi marked this conversation as resolved.
Show resolved Hide resolved
return gomega.StopTrying(fmt.Sprintf("%q in %q has restarted %d times", cs.Name, pod.Name, cs.RestartCount))
}
}
}
// To verify that webhooks are ready, checking is deployment have condition Available=True.
g.Expect(deployment.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
appsv1.DeploymentCondition{Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue},
cmpopts.IgnoreFields(appsv1.DeploymentCondition{}, "Reason", "Message", "LastUpdateTime", "LastTransitionTime")),
))
return nil
}, timeout, interval).Should(gomega.Succeed())

// Delete this jobset before beginning tests.
gomega.Expect(client.Delete(ctx, js))
gomega.Eventually(func() error {
return client.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{})
}).ShouldNot(gomega.Succeed())
}