Skip to content

Commit

Permalink
Watcher System Metrics (#8338)
Browse files Browse the repository at this point in the history
* add event watcher prometheus metrics and a new tctl top tab to visualize them
  • Loading branch information
rosstimothy authored Sep 28, 2021
1 parent b58ad48 commit fb0ab2b
Show file tree
Hide file tree
Showing 6 changed files with 484 additions and 77 deletions.
48 changes: 42 additions & 6 deletions lib/auth/grpcserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package auth
import (
"context"
"crypto/tls"
"fmt"
"io"
"net"
"time"
Expand Down Expand Up @@ -51,11 +52,28 @@ import (
_ "google.golang.org/grpc/encoding/gzip" // gzip compressor for gRPC.
)

var heartbeatConnectionsReceived = prometheus.NewCounter(
prometheus.CounterOpts{
Name: teleport.MetricHeartbeatConnectionsReceived,
Help: "Number of times auth received a heartbeat connection",
},
var (
heartbeatConnectionsReceived = prometheus.NewCounter(
prometheus.CounterOpts{
Name: teleport.MetricHeartbeatConnectionsReceived,
Help: "Number of times auth received a heartbeat connection",
},
)
watcherEventsEmitted = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: teleport.MetricWatcherEventsEmitted,
Help: "Per resources size of events emitted",
Buckets: prometheus.LinearBuckets(0, 200, 5),
},
[]string{teleport.TagResource},
)
watcherEventSizes = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: teleport.MetricWatcherEventSizes,
Help: "Overall size of events emitted",
Buckets: prometheus.LinearBuckets(0, 100, 20),
},
)
)

// GRPCServer is GPRC Auth Server API
Expand Down Expand Up @@ -302,13 +320,31 @@ func (g *GRPCServer) WatchEvents(watch *proto.Watch, stream proto.AuthService_Wa
if err != nil {
return trace.Wrap(err)
}

watcherEventsEmitted.WithLabelValues(resourceLabel(event)).Observe(float64(out.Size()))
watcherEventSizes.Observe(float64(out.Size()))

if err := stream.Send(out); err != nil {
return trace.Wrap(err)
}
}
}
}

// resourceLabel returns the label for the provided types.Event
func resourceLabel(event types.Event) string {
if event.Resource == nil {
return event.Type.String()
}

sub := event.Resource.GetSubKind()
if sub == "" {
return fmt.Sprintf("/%s", event.Resource.GetKind())
}

return fmt.Sprintf("/%s/%s", event.Resource.GetKind(), sub)
}

// eventToGRPC converts a types.Event to an proto.Event
func eventToGRPC(ctx context.Context, in types.Event) (*proto.Event, error) {
eventType, err := eventTypeToGRPC(in.Type)
Expand Down Expand Up @@ -3416,7 +3452,7 @@ func (cfg *GRPCServerConfig) CheckAndSetDefaults() error {

// NewGRPCServer returns a new instance of GRPC server
func NewGRPCServer(cfg GRPCServerConfig) (*GRPCServer, error) {
err := utils.RegisterPrometheusCollectors(heartbeatConnectionsReceived)
err := utils.RegisterPrometheusCollectors(heartbeatConnectionsReceived, watcherEventsEmitted, watcherEventSizes)
if err != nil {
return nil, trace.Wrap(err)
}
Expand Down
4 changes: 2 additions & 2 deletions lib/srv/authhandlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ var (

certificateMismatchCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: teleport.MetricCertificateMistmatch,
Name: teleport.MetricCertificateMismatch,
Help: "Number of times there was a certificate mismatch",
},
)

prometheusCollectors = []prometheus.Collector{failedLoginCount, certificateMismatchCount}
)

// HandlerConfig is the configuration for an application handler.
// AuthHandlerConfig is the configuration for an application handler.
type AuthHandlerConfig struct {
// Server is the services.Server in the backend.
Server Server
Expand Down
89 changes: 89 additions & 0 deletions lib/utils/circular_buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
Copyright 2021 Gravitational, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package utils

import (
"sync"

"github.com/gravitational/trace"
)

// CircularBuffer implements an in-memory circular buffer of predefined size
type CircularBuffer struct {
sync.Mutex
buf []float64
start int
end int
size int
}

// NewCircularBuffer returns a new instance of a circular buffer that will hold
// size elements before it rotates
func NewCircularBuffer(size int) (*CircularBuffer, error) {
if size <= 0 {
return nil, trace.BadParameter("circular buffer size should be > 0")
}
buf := &CircularBuffer{
buf: make([]float64, size),
start: -1,
end: -1,
size: 0,
}
return buf, nil
}

// Data returns the most recent n elements in the correct order
func (t *CircularBuffer) Data(n int) []float64 {
t.Lock()
defer t.Unlock()

if n <= 0 || t.size == 0 {
return nil
}

// skip first N items so that the most recent are always provided
start := t.start
if n < t.size {
start = (t.start + (t.size - n)) % len(t.buf)
}

if start <= t.end {
return t.buf[start : t.end+1]
}

return append(t.buf[start:], t.buf[:t.end+1]...)
}

// Add pushes a new item onto the buffer
func (t *CircularBuffer) Add(d float64) {
t.Lock()
defer t.Unlock()

if t.size == 0 {
t.start = 0
t.end = 0
t.size = 1
} else if t.size < len(t.buf) {
t.end++
t.size++
} else {
t.end = t.start
t.start = (t.start + 1) % len(t.buf)
}

t.buf[t.end] = d
}
66 changes: 66 additions & 0 deletions lib/utils/circular_buffer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
Copyright 2021 Gravitational, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package utils

import (
"testing"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/require"
)

func TestNewCircularBuffer(t *testing.T) {
buff, err := NewCircularBuffer(-1)
require.Error(t, err)
require.Nil(t, buff)

buff, err = NewCircularBuffer(5)
require.NoError(t, err)
require.NotNil(t, buff)
require.Len(t, buff.buf, 5)
}

func TestCircularBuffer_Data(t *testing.T) {
buff, err := NewCircularBuffer(5)
require.NoError(t, err)

expectData := func(expected []float64) {
for i := 0; i < 15; i++ {
e := expected
if i <= len(expected) {
e = expected[len(expected)-i:]
}
require.Empty(t, cmp.Diff(e, buff.Data(i), cmpopts.EquateEmpty()), "i = %v", i)
}
}

expectData(nil)

buff.Add(1)
expectData([]float64{1})

buff.Add(2)
buff.Add(3)
buff.Add(4)
expectData([]float64{1, 2, 3, 4})

buff.Add(5)
buff.Add(6)
buff.Add(7)
expectData([]float64{3, 4, 5, 6, 7})
}
13 changes: 11 additions & 2 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,18 @@ const (
// MetricHeartbeatConnectionsReceived counts heartbeat connections received by auth
MetricHeartbeatConnectionsReceived = "heartbeat_connections_received_total"

// MetricCertificateMistmatch counts login failures due to certificate mismatch
MetricCertificateMistmatch = "certificate_mismatch_total"
// MetricCertificateMismatch counts login failures due to certificate mismatch
MetricCertificateMismatch = "certificate_mismatch_total"

// MetricHeartbeatsMissed counts the nodes that failed to heartbeat
MetricHeartbeatsMissed = "heartbeats_missed_total"

// MetricWatcherEventsEmitted counts watcher events that are emitted
MetricWatcherEventsEmitted = "watcher_events"

// MetricWatcherEventSizes measures the size of watcher events that are emitted
MetricWatcherEventSizes = "watcher_event_sizes"

// TagCluster is a metric tag for a cluster
TagCluster = "cluster"
)
Expand Down Expand Up @@ -179,4 +185,7 @@ const (

// TagFalse is a tag value to mark false values
TagFalse = "false"

// TagResource is a tag specifying the resource for an event
TagResource = "resource"
)
Loading

0 comments on commit fb0ab2b

Please sign in to comment.