Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metric definitions for all metrics known at Consul start #9198

Merged
merged 15 commits into from
Nov 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/9198.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
agent: All metrics should be present and available to prometheus scrapers when Consul starts. If any non-deprecated metrics are missing please submit an issue with its name.
```
2 changes: 1 addition & 1 deletion agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func (s *HTTPHandlers) AgentMetrics(resp http.ResponseWriter, req *http.Request)
return nil, acl.ErrPermissionDenied
}
if enablePrometheusOutput(req) {
if s.agent.config.Telemetry.PrometheusRetentionTime < 1 {
if s.agent.config.Telemetry.PrometheusOpts.Expiration < 1 {
resp.WriteHeader(http.StatusUnsupportedMediaType)
fmt.Fprint(resp, "Prometheus is not enabled since its retention time is not positive")
return nil, nil
Expand Down
31 changes: 31 additions & 0 deletions agent/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"golang.org/x/time/rate"

"github.com/hashicorp/consul/lib"
Expand All @@ -32,6 +33,34 @@ import (

//go:generate mockery -all -inpkg

// TODO(kit): remove the namespace from these once the metrics themselves change
var Gauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "cache", "entries_count"},
Help: "",
},
}

// TODO(kit): remove the namespace from these once the metrics themselves change
var Counters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "cache", "bypass"},
Help: "",
},
{
Name: []string{"consul", "cache", "fetch_success"},
Help: "",
},
{
Name: []string{"consul", "cache", "fetch_error"},
Help: "",
},
{
Name: []string{"consul", "cache", "evict_expired"},
Help: "",
},
}

// Constants related to refresh backoff. We probably don't ever need to
// make these configurable knobs since they primarily exist to lower load.
const (
Expand Down Expand Up @@ -629,6 +658,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign
// Error handling
if err == nil {
labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}}
// TODO(kit): move tEntry.Name to a label on the first write here and deprecate the second write
metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels)
metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels)

Expand Down Expand Up @@ -658,6 +688,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign
newEntry.RefreshLostContact = time.Time{}
}
} else {
// TODO(kit): Add tEntry.Name to label on fetch_error and deprecate second write
metrics.IncrCounter([]string{"consul", "cache", "fetch_error"}, 1)
metrics.IncrCounter([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1)

Expand Down
118 changes: 117 additions & 1 deletion agent/catalog_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,127 @@ import (
"net/http"
"strings"

metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/structs"
)

var CatalogCounters = []prometheus.CounterDefinition{
{
Name: []string{"client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"client", "rpc", "error", "catalog_register"},
Help: "Increments whenever a Consul agent receives an RPC error for a catalog register request.",
},
{
Name: []string{"client", "api", "success", "catalog_register"},
Help: "Increments whenever a Consul agent successfully responds to a catalog register request.",
},
{
Name: []string{"client", "api", "catalog_deregister"},
Help: "Increments whenever a Consul agent receives a catalog deregister request.",
},
{
Name: []string{"client", "api", "catalog_datacenters"},
Help: "Increments whenever a Consul agent receives a request to list datacenters in the catalog.",
},
{
Name: []string{"client", "rpc", "error", "catalog_deregister"},
Help: "Increments whenever a Consul agent receives an RPC error for a catalog deregister request.",
},
{
Name: []string{"client", "api", "success", "catalog_nodes"},
Help: "Increments whenever a Consul agent successfully responds to a request to list nodes.",
},
{
Name: []string{"client", "rpc", "error", "catalog_nodes"},
Help: "Increments whenever a Consul agent receives an RPC error for a request to list nodes.",
},
{
Name: []string{"client", "api", "success", "catalog_deregister"},
Help: "Increments whenever a Consul agent successfully responds to a catalog deregister request.",
},
{
Name: []string{"client", "rpc", "error", "catalog_datacenters"},
Help: "Increments whenever a Consul agent receives an RPC error for a request to list datacenters.",
},
{
Name: []string{"client", "api", "success", "catalog_datacenters"},
Help: "Increments whenever a Consul agent successfully responds to a request to list datacenters.",
},
{
Name: []string{"client", "api", "catalog_nodes"},
Help: "Increments whenever a Consul agent receives a request to list nodes from the catalog.",
},
{
Name: []string{"client", "api", "catalog_services"},
Help: "Increments whenever a Consul agent receives a request to list services from the catalog.",
},
{
Name: []string{"client", "rpc", "error", "catalog_services"},
Help: "Increments whenever a Consul agent receives an RPC error for a request to list services.",
},
{
Name: []string{"client", "api", "success", "catalog_services"},
Help: "Increments whenever a Consul agent successfully responds to a request to list services.",
},
{
Name: []string{"client", "api", "catalog_service_nodes"},
Help: "Increments whenever a Consul agent receives a request to list nodes offering a service.",
},
{
Name: []string{"client", "rpc", "error", "catalog_service_nodes"},
Help: "Increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.",
},
{
Name: []string{"client", "api", "success", "catalog_service_nodes"},
Help: "Increments whenever a Consul agent successfully responds to a request to list nodes offering a service.",
},
{
Name: []string{"client", "api", "error", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"client", "api", "catalog_node_services"},
Help: "Increments whenever a Consul agent successfully responds to a request to list nodes offering a service.",
},
{
Name: []string{"client", "api", "success", "catalog_node_services"},
Help: "Increments whenever a Consul agent successfully responds to a request to list services in a node.",
},
{
Name: []string{"client", "rpc", "error", "catalog_node_services"},
Help: "Increments whenever a Consul agent receives an RPC error for a request to list services in a node.",
},
{
Name: []string{"client", "api", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"client", "rpc", "error", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"client", "api", "success", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"client", "api", "catalog_gateway_services"},
Help: "Increments whenever a Consul agent receives a request to list services associated with a gateway.",
},
{
Name: []string{"client", "rpc", "error", "catalog_gateway_services"},
Help: "Increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.",
},
{
Name: []string{"client", "api", "success", "catalog_gateway_services"},
Help: "Increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.",
},
}

func (s *HTTPHandlers) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1,
[]metrics.Label{{Name: "node", Value: s.nodeName()}})
Expand Down
5 changes: 4 additions & 1 deletion agent/config/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"strings"
"time"

"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/go-bexpr"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-multierror"
Expand Down Expand Up @@ -942,13 +943,15 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
DisableHostname: b.boolVal(c.Telemetry.DisableHostname),
DogstatsdAddr: b.stringVal(c.Telemetry.DogstatsdAddr),
DogstatsdTags: c.Telemetry.DogstatsdTags,
PrometheusRetentionTime: b.durationVal("prometheus_retention_time", c.Telemetry.PrometheusRetentionTime),
FilterDefault: b.boolVal(c.Telemetry.FilterDefault),
AllowedPrefixes: telemetryAllowedPrefixes,
BlockedPrefixes: telemetryBlockedPrefixes,
MetricsPrefix: b.stringVal(c.Telemetry.MetricsPrefix),
StatsdAddr: b.stringVal(c.Telemetry.StatsdAddr),
StatsiteAddr: b.stringVal(c.Telemetry.StatsiteAddr),
PrometheusOpts: prometheus.PrometheusOpts{
Expiration: b.durationVal("prometheus_retention_time", c.Telemetry.PrometheusRetentionTime),
},
},

// Agent
Expand Down
15 changes: 12 additions & 3 deletions agent/config/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"testing"
"time"

"github.com/armon/go-metrics/prometheus"
"github.com/stretchr/testify/require"

"github.com/hashicorp/consul/agent/cache"
Expand Down Expand Up @@ -7103,9 +7104,11 @@ func TestFullConfig(t *testing.T) {
AllowedPrefixes: []string{"oJotS8XJ"},
BlockedPrefixes: []string{"cazlEhGn"},
MetricsPrefix: "ftO6DySn",
PrometheusRetentionTime: 15 * time.Second,
StatsdAddr: "drce87cy",
StatsiteAddr: "HpFwKB8R",
PrometheusOpts: prometheus.PrometheusOpts{
Expiration: 15 * time.Second,
},
},
TLSCipherSuites: []uint16{tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256},
TLSMinVersion: "pAOWafkR",
Expand Down Expand Up @@ -7814,9 +7817,15 @@ func TestSanitize(t *testing.T) {
"DogstatsdTags": [],
"FilterDefault": false,
"MetricsPrefix": "",
"PrometheusRetentionTime": "0s",
"StatsdAddr": "",
"StatsiteAddr": ""
"StatsiteAddr": "",
"PrometheusOpts": {
"Expiration": "0s",
"Registerer": null,
"GaugeDefinitions": [],
"CounterDefinitions": [],
"SummaryDefinitions": []
}
},
"TranslateWANAddrs": false,
"TxnMaxReqLen": 5678000000000000,
Expand Down
29 changes: 28 additions & 1 deletion agent/consul/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import (
"sync"
"time"

metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging"
Expand All @@ -15,6 +16,32 @@ import (
"golang.org/x/time/rate"
)

var ACLCounters = []prometheus.CounterDefinition{
{
Name: []string{"acl", "token", "cache_hit"},
Help: "",
},
{
Name: []string{"acl", "token", "cache_miss"},
Help: "",
},
}

var ACLSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"acl", "resolveTokenLegacy"},
Help: "",
},
{
Name: []string{"acl", "ResolveToken"},
Help: "",
},
{
Name: []string{"acl", "ResolveTokenToIdentity"},
Help: "",
},
}

// These must be kept in sync with the constants in command/agent/acl.go.
const (
// anonymousToken is the token ID we re-write to if there is no token ID
Expand Down
70 changes: 69 additions & 1 deletion agent/consul/acl_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (
"regexp"
"time"

metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/authmethod"
"github.com/hashicorp/consul/agent/consul/state"
Expand All @@ -30,6 +31,73 @@ const (
aclBootstrapReset = "acl-bootstrap-reset"
)

var ACLEndpointSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"acl", "token", "clone"},
Help: "",
},
{
Name: []string{"acl", "token", "upsert"},
Help: "",
},
{
Name: []string{"acl", "token", "delete"},
Help: "",
},
{
Name: []string{"acl", "policy", "upsert"},
Help: "",
},
{
Name: []string{"acl", "policy", "delete"},
Help: "",
},
{
Name: []string{"acl", "policy", "delete"},
Help: "",
},
{
Name: []string{"acl", "role", "upsert"},
Help: "",
},
{
Name: []string{"acl", "role", "delete"},
Help: "",
},
{
Name: []string{"acl", "bindingrule", "upsert"},
Help: "",
},
{
Name: []string{"acl", "bindingrule", "delete"},
Help: "",
},
{
Name: []string{"acl", "authmethod", "upsert"},
Help: "",
},
{
Name: []string{"acl", "authmethod", "delete"},
Help: "",
},
{
Name: []string{"acl", "login"},
Help: "",
},
{
Name: []string{"acl", "login"},
Help: "",
},
{
Name: []string{"acl", "logout"},
Help: "",
},
{
Name: []string{"acl", "logout"},
Help: "",
},
}

// Regex for matching
var (
validPolicyName = regexp.MustCompile(`^[A-Za-z0-9\-_]{1,128}$`)
Expand Down
Loading