Skip to content

Commit

Permalink
webhook: add options to disable resource_namespace tag in metrics (#2931
Browse files Browse the repository at this point in the history
)

* webhook: add options to disable resource_namespace tag in metrics

To add some context, historically, `resource_name` was removed from this
tag list due to its high potential of causing high metrics cardinality.
See [#1464][1] for more information.

While that's great, but it might not be sufficient for large scale use
cases where namespaces can be super dynamic (with generateName, too) or
grows fase enough. There is an issue report from
[tektoncd/pipeline#3171][2] which talks about this.

This proposal makes it possible to disable `resource_namespace` tag via
an option function. The default behavior is not changed, so no user
impact if any of existing users rely on this tag. There is no API
contract change either due to the beauty of variadic functions.

Now downstream projects can consume this by override `StatsReporter` in
webhook context options with their own preference. As a caveat here, if
downstream project does choose to override `StatsReporter`, the default
`ReportMetrics` function shouldn't be called by default as they may now
have a different set of tag keys to report. As such, this function is
only called if the default `StatsReporter` is used.

[1]: #1464
[2]: tektoncd/pipeline#3171

* webhook: add StatsReporterOptions in webhook.Options

There are two ways to customize StatsReporter:

1. Use a whole new StatsReporter implementation.
1. Or pass Option funcs to customize the default StatsReporter.

Option 1 is less practical at this time due to the metrics registration
conflict. `webhook.RegisterMetrics()` is called regardless which
StatsReporter implementation is used (which is a problem by itself). The
second option is more practical since it works well without dealing with
metrics conflicts.

The `webhook.Option` in particular allows people to discard certain
metrics tags.
  • Loading branch information
zhouhaibing089 authored Apr 1, 2024
1 parent 3f02478 commit 03bf3de
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 42 deletions.
7 changes: 6 additions & 1 deletion injection/sharedmain/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,12 @@ func MainWithConfig(ctx context.Context, component string, cfg *rest.Config, cto
var wh *webhook.Webhook
if len(webhooks) > 0 {
// Register webhook metrics
webhook.RegisterMetrics()
opts := webhook.GetOptions(ctx)
if opts != nil {
webhook.RegisterMetrics(opts.StatsReporterOptions...)
} else {
webhook.RegisterMetrics()
}

wh, err = webhook.New(ctx, webhooks)
if err != nil {
Expand Down
164 changes: 128 additions & 36 deletions webhook/stats_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"go.opencensus.io/tag"
admissionv1 "k8s.io/api/admission/v1"
apixv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/util/sets"
"knative.dev/pkg/metrics"
)

Expand Down Expand Up @@ -65,43 +66,128 @@ var (
resultCodeKey = tag.MustNewKey("result_code")
)

type admissionToValue func(*admissionv1.AdmissionRequest, *admissionv1.AdmissionResponse) string
type conversionToValue func(*apixv1.ConversionRequest, *apixv1.ConversionResponse) string

var (
allAdmissionTags = map[tag.Key]admissionToValue{
requestOperationKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return string(req.Operation)
},
kindGroupKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Kind.Group
},
kindVersionKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Kind.Version
},
kindKindKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Kind.Kind
},
resourceGroupKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Resource.Group
},
resourceVersionKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Resource.Version
},
resourceResourceKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Resource.Resource
},
resourceNamespaceKey: func(req *admissionv1.AdmissionRequest, _ *admissionv1.AdmissionResponse) string {
return req.Namespace
},
admissionAllowedKey: func(_ *admissionv1.AdmissionRequest, resp *admissionv1.AdmissionResponse) string {
return strconv.FormatBool(resp.Allowed)
},
}
allConversionTags = map[tag.Key]conversionToValue{
desiredAPIVersionKey: func(req *apixv1.ConversionRequest, _ *apixv1.ConversionResponse) string {
return req.DesiredAPIVersion
},
resultStatusKey: func(_ *apixv1.ConversionRequest, resp *apixv1.ConversionResponse) string {
return resp.Result.Status
},
resultReasonKey: func(_ *apixv1.ConversionRequest, resp *apixv1.ConversionResponse) string {
return string(resp.Result.Reason)
},
resultCodeKey: func(_ *apixv1.ConversionRequest, resp *apixv1.ConversionResponse) string {
return strconv.Itoa(int(resp.Result.Code))
},
}
)

// StatsReporter reports webhook metrics
type StatsReporter interface {
ReportAdmissionRequest(request *admissionv1.AdmissionRequest, response *admissionv1.AdmissionResponse, d time.Duration) error
ReportConversionRequest(request *apixv1.ConversionRequest, response *apixv1.ConversionResponse, d time.Duration) error
}

type statsReporterOptions struct {
tagsToExclude sets.Set[string]
}

type StatsReporterOption func(_ *statsReporterOptions)

func WithoutTags(tags ...string) StatsReporterOption {
return func(opts *statsReporterOptions) {
opts.tagsToExclude.Insert(tags...)
}
}

// reporter implements StatsReporter interface
type reporter struct {
ctx context.Context

admissionTags map[tag.Key]admissionToValue
conversionTags map[tag.Key]conversionToValue
}

// NewStatsReporter creates a reporter for webhook metrics
func NewStatsReporter() (StatsReporter, error) {
func NewStatsReporter(opts ...StatsReporterOption) (StatsReporter, error) {
ctx, err := tag.New(
context.Background(),
)
if err != nil {
return nil, err
}

return &reporter{ctx: ctx}, nil
options := statsReporterOptions{
tagsToExclude: sets.New[string](),
}
for _, opt := range opts {
opt(&options)
}

admissionTags := make(map[tag.Key]admissionToValue)
for key, f := range allAdmissionTags {
if options.tagsToExclude.Has(key.Name()) {
continue
}
admissionTags[key] = f
}
conversionTags := make(map[tag.Key]conversionToValue)
for key, f := range allConversionTags {
if options.tagsToExclude.Has(key.Name()) {
continue
}
conversionTags[key] = f
}

return &reporter{
ctx: ctx,
admissionTags: admissionTags,
conversionTags: conversionTags,
}, nil
}

// Captures req count metric, recording the count and the duration
func (r *reporter) ReportAdmissionRequest(req *admissionv1.AdmissionRequest, resp *admissionv1.AdmissionResponse, d time.Duration) error {
ctx, err := tag.New(
r.ctx,
tag.Insert(requestOperationKey, string(req.Operation)),
tag.Insert(kindGroupKey, req.Kind.Group),
tag.Insert(kindVersionKey, req.Kind.Version),
tag.Insert(kindKindKey, req.Kind.Kind),
tag.Insert(resourceGroupKey, req.Resource.Group),
tag.Insert(resourceVersionKey, req.Resource.Version),
tag.Insert(resourceResourceKey, req.Resource.Resource),
tag.Insert(resourceNamespaceKey, req.Namespace),
tag.Insert(admissionAllowedKey, strconv.FormatBool(resp.Allowed)),
)
mutators := make([]tag.Mutator, 0, len(r.admissionTags))

for key, f := range r.admissionTags {
mutators = append(mutators, tag.Insert(key, f(req, resp)))
}

ctx, err := tag.New(r.ctx, mutators...)
if err != nil {
return err
}
Expand All @@ -114,13 +200,13 @@ func (r *reporter) ReportAdmissionRequest(req *admissionv1.AdmissionRequest, res

// Captures req count metric, recording the count and the duration
func (r *reporter) ReportConversionRequest(req *apixv1.ConversionRequest, resp *apixv1.ConversionResponse, d time.Duration) error {
ctx, err := tag.New(
r.ctx,
tag.Insert(desiredAPIVersionKey, req.DesiredAPIVersion),
tag.Insert(resultStatusKey, resp.Result.Status),
tag.Insert(resultReasonKey, string(resp.Result.Reason)),
tag.Insert(resultCodeKey, strconv.Itoa(int(resp.Result.Code))),
)
mutators := make([]tag.Mutator, 0, len(r.conversionTags))

for key, f := range r.conversionTags {
mutators = append(mutators, tag.Insert(key, f(req, resp)))
}

ctx, err := tag.New(r.ctx, mutators...)
if err != nil {
return err
}
Expand All @@ -131,21 +217,27 @@ func (r *reporter) ReportConversionRequest(req *apixv1.ConversionRequest, resp *
return nil
}

func RegisterMetrics() {
tagKeys := []tag.Key{
requestOperationKey,
kindGroupKey,
kindVersionKey,
kindKindKey,
resourceGroupKey,
resourceVersionKey,
resourceResourceKey,
resourceNamespaceKey,
admissionAllowedKey,
desiredAPIVersionKey,
resultStatusKey,
resultReasonKey,
resultCodeKey}
func RegisterMetrics(opts ...StatsReporterOption) {
options := statsReporterOptions{
tagsToExclude: sets.New[string](),
}
for _, opt := range opts {
opt(&options)
}

tagKeys := []tag.Key{}
for tag := range allAdmissionTags {
if options.tagsToExclude.Has(tag.Name()) {
continue
}
tagKeys = append(tagKeys, tag)
}
for tag := range allConversionTags {
if options.tagsToExclude.Has(tag.Name()) {
continue
}
tagKeys = append(tagKeys, tag)
}

if err := view.Register(
&view.View{
Expand Down
49 changes: 45 additions & 4 deletions webhook/stats_reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,47 @@ func TestWebhookStatsReporterAdmission(t *testing.T) {
metricstest.CheckDistributionData(t, requestLatenciesName, expectedTags, 2, shortTime, longTime)
}

func TestWebhookStatsReporterAdmissionWithoutNamespaceTag(t *testing.T) {
setup(WithoutTags(resourceNamespaceKey.Name()))
req := &admissionv1.AdmissionRequest{
UID: "705ab4f5-6393-11e8-b7cc-42010a800002",
Kind: metav1.GroupVersionKind{Group: "autoscaling", Version: "v1", Kind: "Scale"},
Resource: metav1.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"},
Name: "my-deployment",
Namespace: "my-namespace",
Operation: admissionv1.Update,
}

resp := &admissionv1.AdmissionResponse{
UID: req.UID,
Allowed: true,
}

r, _ := NewStatsReporter(WithoutTags(resourceNamespaceKey.Name()))

shortTime, longTime := 1100.0, 9100.0
expectedTags := map[string]string{
requestOperationKey.Name(): string(req.Operation),
kindGroupKey.Name(): req.Kind.Group,
kindVersionKey.Name(): req.Kind.Version,
kindKindKey.Name(): req.Kind.Kind,
resourceGroupKey.Name(): req.Resource.Group,
resourceVersionKey.Name(): req.Resource.Version,
resourceResourceKey.Name(): req.Resource.Resource,
admissionAllowedKey.Name(): strconv.FormatBool(resp.Allowed),
}

if err := r.ReportAdmissionRequest(req, resp, time.Duration(shortTime)*time.Millisecond); err != nil {
t.Fatalf("ReportAdmissionRequest() = %v", err)
}
if err := r.ReportAdmissionRequest(req, resp, time.Duration(longTime)*time.Millisecond); err != nil {
t.Fatalf("ReportAdmissionRequest() = %v", err)
}

metricstest.CheckCountData(t, requestCountName, expectedTags, 2)
metricstest.CheckDistributionData(t, requestLatenciesName, expectedTags, 2, shortTime, longTime)
}

func TestWebhookStatsReporterConversion(t *testing.T) {
setup()
req := &apixv1.ConversionRequest{
Expand Down Expand Up @@ -103,12 +144,12 @@ func TestWebhookStatsReporterConversion(t *testing.T) {
metricstest.CheckDistributionData(t, requestLatenciesName, expectedTags, 2, shortTime, longTime)
}

func setup() {
resetMetrics()
func setup(opts ...StatsReporterOption) {
resetMetrics(opts...)
}

// opencensus metrics carry global state that need to be reset between unit tests
func resetMetrics() {
func resetMetrics(opts ...StatsReporterOption) {
metricstest.Unregister(requestCountName, requestLatenciesName)
RegisterMetrics()
RegisterMetrics(opts...)
}
5 changes: 4 additions & 1 deletion webhook/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ type Options struct {
// only a single port for the service.
Port int

// StatsReporterOptions are the options used to initialize the default StatsReporter
StatsReporterOptions []StatsReporterOption

// StatsReporter reports metrics about the webhook.
// This will be automatically initialized by the constructor if left uninitialized.
StatsReporter StatsReporter
Expand Down Expand Up @@ -144,7 +147,7 @@ func New(
logger := logging.FromContext(ctx)

if opts.StatsReporter == nil {
reporter, err := NewStatsReporter()
reporter, err := NewStatsReporter(opts.StatsReporterOptions...)
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 03bf3de

Please sign in to comment.