Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: timing issue with cert-manager webhook #165

Merged
merged 1 commit into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## v0.10.0

### Bug Fixes

- Fixed a readiness timing issue with cert-manager wherein the webhook
could be unready when the addon reports as ready.
([#159](https://github.com/Kong/kubernetes-testing-framework/issues/159))

### Improvements

- Added a [CertManager](https://cert-manager.io/) addon.
Expand Down
57 changes: 57 additions & 0 deletions pkg/clusters/addons/certmanager/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import (
"os/exec"

"github.com/blang/semver/v4"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"

Expand Down Expand Up @@ -96,6 +99,10 @@ func (a *Addon) Deploy(ctx context.Context, cluster clusters.Cluster) error {
return fmt.Errorf("%s: %w", stderr.String(), err)
}

if err := a.deployWebhookWaitJob(ctx, cluster); err != nil {
return err
}

// we need to wait for deployment readiness before we try to deploy a
// default issuer for the cluster.
deploymentsReady := false
Expand All @@ -121,6 +128,13 @@ func (a *Addon) Delete(ctx context.Context, cluster clusters.Cluster) error {
}
defer os.Remove(kubeconfig.Name())

// delete any webhook wait job that may remain
if err := cluster.Client().BatchV1().Jobs(DefaultNamespace).Delete(ctx, webhookWaitJobName, metav1.DeleteOptions{}); err != nil {
if !errors.IsNotFound(err) { // tolerate the job having already been deleted
return err
}
}

deployArgs := []string{
"--kubeconfig", kubeconfig.Name(),
"delete", "-f", fmt.Sprintf(manifestFormatter, a.version),
Expand Down Expand Up @@ -151,6 +165,20 @@ func (a *Addon) Ready(ctx context.Context, cluster clusters.Cluster) ([]runtime.
}
}

// in addition to deployments we wait for our webhook wait job to complete
// to avoid any timing issues with the webhook webserver and to ensure it
// is responding to HTTP requests.
job, err := cluster.Client().BatchV1().Jobs(DefaultNamespace).Get(ctx, webhookWaitJobName, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
return []runtime.Object{job}, false, nil // wait for the job to exist
}
return []runtime.Object{job}, false, err
}
if job.Status.Succeeded < 1 {
return []runtime.Object{job}, false, nil // not quite ready yet
}

return nil, true, nil
}

Expand Down Expand Up @@ -181,3 +209,32 @@ func (a *Addon) deployDefaultIssuer(ctx context.Context, cluster clusters.Cluste
func (a *Addon) cleanupDefaultIssuer(ctx context.Context, cluster clusters.Cluster) error {
return clusters.DeleteYAML(ctx, cluster, defaultIssuer)
}

const webhookWaitJobName = "cert-manager-webhook-wait"

func (a *Addon) deployWebhookWaitJob(ctx context.Context, cluster clusters.Cluster) error {
job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: webhookWaitJobName,
},
Spec: batchv1.JobSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{
Name: "curl",
Image: "curlimages/curl",
Command: []string{"curl", "-k", fmt.Sprintf("https://cert-manager-webhook.%s.svc/mutate", DefaultNamespace)},
}},
RestartPolicy: corev1.RestartPolicyOnFailure,
},
},
},
}

_, err := cluster.Client().BatchV1().Jobs(DefaultNamespace).Create(ctx, job, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("could not create job to wait for cert-manager webhook: %w", err)
}

return nil
}