Skip to content

Commit

Permalink
Surface events for all reconciliation failures. (#3462)
Browse files Browse the repository at this point in the history
* Surface events for all reconciliation failures.

Today errors during reconciliation are generally just logged which isn't particularly friendly for users who might not want to dig through our controller logs.

This changes our boilerplate to surface any errors we encounter during a call to `Reconcile()`.

Fixes: #2941

* Add event for status update failures in HPA.

All of the other reconcilers do this today.
  • Loading branch information
mattmoor authored and knative-prow-robot committed Mar 21, 2019
1 parent f1640a6 commit 718352e
Show file tree
Hide file tree
Showing 13 changed files with 110 additions and 16 deletions.
14 changes: 9 additions & 5 deletions pkg/reconciler/v1alpha1/autoscaling/hpa/hpa.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/knative/serving/pkg/reconciler"
"github.com/knative/serving/pkg/reconciler/v1alpha1/autoscaling/hpa/resources"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -119,11 +120,14 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
// This is important because the copy we loaded from the informer's
// cache may be stale and we don't want to overwrite a prior update
// to status with this stale state.
} else {
if _, err := c.updateStatus(pa); err != nil {
logger.Warnw("Failed to update pa status", zap.Error(err))
return err
}
} else if _, err := c.updateStatus(pa); err != nil {
logger.Warnw("Failed to update pa status", zap.Error(err))
c.Recorder.Eventf(pa, corev1.EventTypeWarning, "UpdateFailed",
"Failed to update status for PA %q: %v", pa.Name, err)
return err
}
if err != nil {
c.Recorder.Eventf(pa, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/reconciler/v1alpha1/autoscaling/hpa/hpa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/knative/serving/pkg/reconciler/v1alpha1/autoscaling/hpa/resources"
. "github.com/knative/serving/pkg/reconciler/v1alpha1/testing"
autoscalingv1 "k8s.io/api/autoscaling/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand Down Expand Up @@ -189,6 +190,9 @@ func TestReconcile(t *testing.T) {
"FailedCreate", "Failed to create HorizontalPodAutoscaler \"test-revision\".")),
}},
WantErr: true,
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create horizontalpodautoscalers"),
},
}}

defer ClearAllLoggers()
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/v1alpha1/autoscaling/kpa/kpa.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
"Failed to update status for PA %q: %v", pa.Name, err)
return err
}
if err != nil {
c.Recorder.Eventf(pa, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/v1alpha1/clusteringress/clusteringress.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
"Failed to update status for ClusterIngress %q: %v", ci.Name, err)
return err
}
if err != nil {
c.Recorder.Eventf(ci, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ func TestReconcile_Gateway(t *testing.T) {
}},
WantEvents: []string{
Eventf(corev1.EventTypeNormal, "Created", "Created VirtualService %q", "new-created-clusteringress"),
Eventf(corev1.EventTypeWarning, "InternalError", `gateway.networking.istio.io "knative-ingress-gateway" not found`),
},
// Error should be returned when there is no preinstalled gateways.
WantErr: true,
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/v1alpha1/configuration/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
"Failed to update status for Configuration %q: %v", config.Name, err)
return err
}
if err != nil {
c.Recorder.Eventf(config, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}

Expand Down
7 changes: 6 additions & 1 deletion pkg/reconciler/v1alpha1/configuration/configuration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,10 @@ func TestReconcile(t *testing.T) {
})),
},
WantErr: true,
Key: "foo/bad-condition",
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", `unrecognized condition status: Bad on revision "bad-condition"`),
},
Key: "foo/bad-condition",
}, {
Name: "failure creating build",
// We induce a failure creating a build
Expand All @@ -260,6 +263,7 @@ func TestReconcile(t *testing.T) {
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "CreationFailed", "Failed to create Revision for Configuration %q: %v",
"create-build-failure", fmt.Sprintf("Failed to create Build for Configuration %q: %v", "create-build-failure", "inducing failure for create builds")),
Eventf(corev1.EventTypeWarning, "InternalError", `Failed to create Build for Configuration "create-build-failure": inducing failure for create builds`),
},
Key: "foo/create-build-failure",
}, {
Expand All @@ -284,6 +288,7 @@ func TestReconcile(t *testing.T) {
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "CreationFailed", "Failed to create Revision for Configuration %q: %v",
"create-revision-failure", "inducing failure for create revisions"),
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create revisions"),
},
Key: "foo/create-revision-failure",
}, {
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/v1alpha1/revision/revision.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
"Failed to update status for Revision %q: %v", rev.Name, err)
return err
}
if err != nil {
c.Recorder.Eventf(rev, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}

Expand Down
33 changes: 33 additions & 0 deletions pkg/reconciler/v1alpha1/revision/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ func TestReconcile(t *testing.T) {
WithK8sServiceName, WithLogURL, WithInitRevConditions,
WithNoBuild, MarkDeploying("Deploying")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create podautoscalers"),
},
Key: "foo/create-kpa-failure",
}, {
Name: "failure creating user deployment",
Expand All @@ -158,6 +161,9 @@ func TestReconcile(t *testing.T) {
WithLogURL, WithInitRevConditions,
WithNoBuild, MarkDeploying("Deploying")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create deployments"),
},
Key: "foo/create-user-deploy-failure",
}, {
Name: "failure creating user service",
Expand All @@ -183,6 +189,9 @@ func TestReconcile(t *testing.T) {
WithK8sServiceName, WithLogURL, WithInitRevConditions,
WithNoBuild, MarkDeploying("Deploying")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create services"),
},
Key: "foo/create-user-service-failure",
}, {
Name: "stable revision reconciliation",
Expand Down Expand Up @@ -234,6 +243,9 @@ func TestReconcile(t *testing.T) {
WantUpdates: []clientgotesting.UpdateActionImpl{{
Object: deploy("foo", "failure-update-deploy"),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for update deployments"),
},
Key: "foo/failure-update-deploy",
}, {
Name: "deactivated revision is stable",
Expand Down Expand Up @@ -415,6 +427,9 @@ func TestReconcile(t *testing.T) {
WantUpdates: []clientgotesting.UpdateActionImpl{{
Object: svc("foo", "update-user-svc-failure"),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for update services"),
},
Key: "foo/update-user-svc-failure",
}, {
Name: "surface deployment timeout",
Expand Down Expand Up @@ -483,6 +498,9 @@ func TestReconcile(t *testing.T) {
// we should see the following status changes.
WithLogURL, WithInitRevConditions),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", `builds.testing.build.knative.dev "the-build" not found`),
},
Key: "foo/missing-build",
}, {
Name: "build running",
Expand Down Expand Up @@ -627,6 +645,9 @@ func TestReconcile(t *testing.T) {
// When we're missing the OwnerRef for Service we see this update.
MarkResourceNotOwned("Service", "missing-owners-service")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", `Revision: "missing-owners" does not own Service: "missing-owners-service"`),
},
Key: "foo/missing-owners",
}, {
Name: "lost kpa owner ref",
Expand All @@ -646,6 +667,9 @@ func TestReconcile(t *testing.T) {
// When we're missing the OwnerRef for PodAutoscaler we see this update.
MarkResourceNotOwned("PodAutoscaler", "missing-owners")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", `Revision: "missing-owners" does not own PodAutoscaler: "missing-owners"`),
},
Key: "foo/missing-owners",
}, {
Name: "lost deployment owner ref",
Expand All @@ -665,6 +689,9 @@ func TestReconcile(t *testing.T) {
// When we're missing the OwnerRef for Deployment we see this update.
MarkResourceNotOwned("Deployment", "missing-owners-deployment")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", `Revision: "missing-owners" does not own Deployment: "missing-owners-deployment"`),
},
Key: "foo/missing-owners",
}, {
// Prior to Serving 0.4 revisions were labelled with
Expand Down Expand Up @@ -817,6 +844,9 @@ func TestReconcileWithVarLogEnabled(t *testing.T) {
WithK8sServiceName, WithLogURL, WithInitRevConditions,
WithNoBuild, MarkDeploying("Deploying")),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create configmaps"),
},
Key: "foo/create-configmap-failure",
}, {
Name: "steady state after initial creation",
Expand Down Expand Up @@ -881,6 +911,9 @@ func TestReconcileWithVarLogEnabled(t *testing.T) {
// We should see a single update to the configmap we expect.
Object: fluentdConfigMap("foo", "update-configmap-failure", EnableVarLog),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for update configmaps"),
},
Key: "foo/update-configmap-failure",
}}

Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/v1alpha1/route/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
"Failed to update status for Route %q: %v", route.Name, err)
return err
}
if err != nil {
c.Recorder.Eventf(route, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}

Expand Down
31 changes: 21 additions & 10 deletions pkg/reconciler/v1alpha1/route/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ func TestReconcile(t *testing.T) {
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "CreationFailed", "Failed to create service %q: %v",
"create-svc-failure", "inducing failure for create services"),
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create services"),
},
Key: "default/create-svc-failure",
}, {
Expand Down Expand Up @@ -385,8 +386,9 @@ func TestReconcile(t *testing.T) {
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "CreationFailed", "Failed to create ClusterIngress for route %s/%s: %v",
"default", "ingress-create-failure", "inducing failure for create clusteringresses"),
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for create clusteringresses"),
},
Key: "default/ingress-create-failure",
Key: "default/ingress-create-failure",
SkipNamespaceValidation: true,
}, {
Name: "steady state",
Expand Down Expand Up @@ -469,6 +471,9 @@ func TestReconcile(t *testing.T) {
// The owner is not us, so we are unhappy.
MarkServiceNotOwned),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", `Route: "unhappy-owner" does not own Service: "unhappy-owner"`),
},
Key: "default/unhappy-owner",
}, {
// This tests that when the Route is labelled differently, it is configured with a
Expand Down Expand Up @@ -606,7 +611,7 @@ func TestReconcile(t *testing.T) {
Percent: 100,
})),
}},
Key: "default/new-latest-ready",
Key: "default/new-latest-ready",
SkipNamespaceValidation: true,
}, {
Name: "failure updating cluster ingress",
Expand Down Expand Up @@ -674,7 +679,10 @@ func TestReconcile(t *testing.T) {
Percent: 100,
})),
}},
Key: "default/update-ci-failure",
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for update clusteringresses"),
},
Key: "default/update-ci-failure",
SkipNamespaceValidation: true,
}, {
Name: "reconcile service mutation",
Expand Down Expand Up @@ -756,6 +764,9 @@ func TestReconcile(t *testing.T) {
WantUpdates: []clientgotesting.UpdateActionImpl{{
Object: simpleK8sService(route("default", "svc-mutation", WithConfigTarget("config"))),
}},
WantEvents: []string{
Eventf(corev1.EventTypeWarning, "InternalError", "inducing failure for update services"),
},
Key: "default/svc-mutation",
}, {
// In #1789 we switched this to an ExternalName Service. Services created in
Expand Down Expand Up @@ -887,7 +898,7 @@ func TestReconcile(t *testing.T) {
},
),
}},
Key: "default/ingress-mutation",
Key: "default/ingress-mutation",
SkipNamespaceValidation: true,
}, {
Name: "switch to a different config",
Expand Down Expand Up @@ -1029,7 +1040,7 @@ func TestReconcile(t *testing.T) {
Percent: 100,
})),
}},
Key: "default/pinned-becomes-ready",
Key: "default/pinned-becomes-ready",
SkipNamespaceValidation: true,
}, {
Name: "traffic split becomes ready",
Expand Down Expand Up @@ -1103,7 +1114,7 @@ func TestReconcile(t *testing.T) {
WantEvents: []string{
Eventf(corev1.EventTypeNormal, "Created", "Created ClusterIngress %q", "route-34-78"),
},
Key: "default/named-traffic-split",
Key: "default/named-traffic-split",
SkipNamespaceValidation: true,
}, {
Name: "same revision targets",
Expand Down Expand Up @@ -1191,7 +1202,7 @@ func TestReconcile(t *testing.T) {
WantEvents: []string{
Eventf(corev1.EventTypeNormal, "Created", "Created ClusterIngress %q", "route-1-2"),
},
Key: "default/same-revision-targets",
Key: "default/same-revision-targets",
SkipNamespaceValidation: true,
}, {
Name: "change route configuration",
Expand Down Expand Up @@ -1257,7 +1268,7 @@ func TestReconcile(t *testing.T) {
Percent: 100,
}), WithRouteFinalizer),
}},
Key: "default/switch-configs",
Key: "default/switch-configs",
SkipNamespaceValidation: true,
}, {
Name: "update single target to traffic split with unready revision",
Expand Down Expand Up @@ -1321,7 +1332,7 @@ func TestReconcile(t *testing.T) {
Percent: 100,
})),
}},
Key: "default/split",
Key: "default/split",
SkipNamespaceValidation: true,
}, {
Name: "Update stale lastPinned",
Expand Down Expand Up @@ -1455,7 +1466,7 @@ func TestReconcile(t *testing.T) {
})),
}},
SkipNamespaceValidation: true,
Key: "default/delete-in-progress",
Key: "default/delete-in-progress",
}}

// TODO(mattmoor): Revision inactive (direct reference)
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/v1alpha1/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
// If there was a difference and there was no error.
c.Recorder.Eventf(service, corev1.EventTypeNormal, "Updated", "Updated Service %q", service.GetName())
}
if err != nil {
c.Recorder.Eventf(service, corev1.EventTypeWarning, "InternalError", err.Error())
}
return err
}

Expand Down
Loading

0 comments on commit 718352e

Please sign in to comment.