From 24a247102937fc65a7fc9410473ad98fa85fdcb5 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Thu, 12 Nov 2020 18:12:12 -0800 Subject: [PATCH 01/13] first pass on agent-configured prometheusDefs and adding defs for every consul metric --- agent/catalog_endpoint.go | 119 +++++++++++++++++++++- agent/consul/acl.go | 29 +++++- agent/consul/acl_endpoint.go | 70 ++++++++++++- agent/consul/autopilot.go | 12 +++ agent/consul/catalog_endpoint.go | 47 +++++++++ agent/consul/client.go | 16 +++ agent/consul/config_endpoint.go | 29 ++++++ agent/consul/federation_state_endpoint.go | 22 +++- agent/consul/intention_endpoint.go | 9 ++ agent/consul/kvs_endpoint.go | 8 ++ agent/consul/prepared_query_endpoint.go | 20 ++++ agent/consul/rpc.go | 42 ++++++++ agent/consul/server.go | 4 +- agent/consul/session_ttl.go | 24 +++++ agent/consul/txn_endpoint.go | 12 +++ agent/consul/usagemetrics/usagemetrics.go | 17 ++++ agent/dns.go | 20 ++++ agent/grpc/stats.go | 37 +++++++ agent/http.go | 8 ++ agent/local/state.go | 27 ++++- agent/setup.go | 94 ++++++++++++++++- connect/proxy/proxy.go | 4 +- lib/telemetry.go | 95 +++++------------ 23 files changed, 687 insertions(+), 78 deletions(-) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index 60c5fc344980..7e0ee9571ab6 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -5,11 +5,128 @@ import ( "net/http" "strings" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" cachetype "github.com/hashicorp/consul/agent/cache-types" "github.com/hashicorp/consul/agent/structs" ) +// TODO(kit): Add help strings for each +var CatalogCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "client", "api", "catalog_register"}, + Help: "Increments whenever a Consul agent receives a catalog register request.", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_register"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_register"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_deregister"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_datacenters"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_deregister"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_deregister"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_datacenters"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_datacenters"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_service_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_service_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_service_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "error", "catalog_service_nodes"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_node_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_node_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_node_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_node_service_list"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_node_service_list"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_node_service_list"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "catalog_gateway_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "rpc", "error", "catalog_gateway_services"}, + Help: "", + }, + { + Name: []string{"consul", "client", "api", "success", "catalog_gateway_services"}, + Help: "", + }, +} + func (s *HTTPHandlers) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) { metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1, []metrics.Label{{Name: "node", Value: s.nodeName()}}) diff --git a/agent/consul/acl.go b/agent/consul/acl.go index 7796c3756ca7..d99b588e4e5c 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -6,7 +6,8 @@ import ( "sync" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/logging" @@ -15,6 +16,32 @@ import ( "golang.org/x/time/rate" ) +var ACLCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "acl", "token", "cache_hit"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "token", "cache_miss"}, + Help: "", + }, +} + +var ACLSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "acl", "resolveTokenLegacy"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "ResolveToken"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "ResolveTokenToIdentity"}, + Help: "", + }, +} + // These must be kept in sync with the constants in command/agent/acl.go. const ( // anonymousToken is the token ID we re-write to if there is no token ID diff --git a/agent/consul/acl_endpoint.go b/agent/consul/acl_endpoint.go index ccc9e1b2af1e..10c879467076 100644 --- a/agent/consul/acl_endpoint.go +++ b/agent/consul/acl_endpoint.go @@ -11,7 +11,8 @@ import ( "regexp" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/consul/state" @@ -30,6 +31,73 @@ const ( aclBootstrapReset = "acl-bootstrap-reset" ) +var ACLEndpointSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "acl", "token", "clone"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "token", "upsert"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "token", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "policy", "upsert"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "policy", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "policy", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "role", "upsert"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "role", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "bindingrule", "upsert"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "bindingrule", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "authmethod", "upsert"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "authmethod", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "login"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "login"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "logout"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "logout"}, + Help: "", + }, +} + // Regex for matching var ( validPolicyName = regexp.MustCompile(`^[A-Za-z0-9\-_]{1,128}$`) diff --git a/agent/consul/autopilot.go b/agent/consul/autopilot.go index dc5aa5da700b..7bd8bc258a83 100644 --- a/agent/consul/autopilot.go +++ b/agent/consul/autopilot.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/types" "github.com/hashicorp/raft" @@ -12,6 +13,17 @@ import ( "github.com/hashicorp/serf/serf" ) +var AutopilotGauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "autopilot", "failure_tolerance"}, + Help: "", + }, + { + Name: []string{"consul", "autopilot", "healthy"}, + Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.", + }, +} + // AutopilotDelegate is a Consul delegate for autopilot operations. type AutopilotDelegate struct { server *Server diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 04be323cb5f4..f8b41c97db6a 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -6,6 +6,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -17,6 +18,52 @@ import ( "github.com/hashicorp/go-uuid" ) +var CatalogCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "catalog", "service", "query"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "connect", "query"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "service", "query-tag"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "connect", "query-tag"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "service", "query-tags"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "connect", "query-tags"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "service", "not-found"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "connect", "not-found"}, + Help: "", + }, +} + +var CatalogSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "catalog", "deregister"}, + Help: "", + }, + { + Name: []string{"consul", "catalog", "register"}, + Help: "", + }, +} + // Catalog endpoint is used to manipulate the service catalog type Catalog struct { srv *Server diff --git a/agent/consul/client.go b/agent/consul/client.go index b4cf90759bb0..6f20d6a02352 100644 --- a/agent/consul/client.go +++ b/agent/consul/client.go @@ -9,6 +9,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/router" "github.com/hashicorp/consul/agent/structs" @@ -21,6 +22,21 @@ import ( "golang.org/x/time/rate" ) +var ClientCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "client", "rpc"}, + Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.", + }, + { + Name: []string{"consul", "client", "rpc", "exceeded"}, + Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.", + }, + { + Name: []string{"consul", "client", "rpc", "failed"}, + Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.", + }, +} + const ( // serfEventBacklog is the maximum number of unprocessed Serf Events // that will be held in queue before new serf events block. A diff --git a/agent/consul/config_endpoint.go b/agent/consul/config_endpoint.go index dc56faf946f9..41cf16dc2a5b 100644 --- a/agent/consul/config_endpoint.go +++ b/agent/consul/config_endpoint.go @@ -4,6 +4,8 @@ import ( "fmt" "time" + "github.com/armon/go-metrics/prometheus" + metrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" @@ -12,6 +14,33 @@ import ( "github.com/mitchellh/copystructure" ) +var ConfigSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "config_entry", "apply"}, + Help: "", + }, + { + Name: []string{"consul", "config_entry", "get"}, + Help: "", + }, + { + Name: []string{"consul", "config_entry", "list"}, + Help: "", + }, + { + Name: []string{"consul", "config_entry", "listAll"}, + Help: "", + }, + { + Name: []string{"consul", "config_entry", "delete"}, + Help: "", + }, + { + Name: []string{"consul", "config_entry", "resolve_service_config"}, + Help: "", + }, +} + // The ConfigEntry endpoint is used to query centralized config information type ConfigEntry struct { srv *Server diff --git a/agent/consul/federation_state_endpoint.go b/agent/consul/federation_state_endpoint.go index a98ab83e8fdc..2a71ed3f8ae1 100644 --- a/agent/consul/federation_state_endpoint.go +++ b/agent/consul/federation_state_endpoint.go @@ -5,13 +5,33 @@ import ( "fmt" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" memdb "github.com/hashicorp/go-memdb" ) +var FederationStateSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "federation_state", "apply"}, + Help: "", + }, + { + Name: []string{"consul", "federation_state", "get"}, + Help: "", + }, + { + Name: []string{"consul", "federation_state", "list"}, + Help: "", + }, + { + Name: []string{"consul", "federation_state", "list_mesh_gateways"}, + Help: "", + }, +} + var ( errFederationStatesNotEnabled = errors.New("Federation states are currently disabled until all servers in the datacenter support the feature") ) diff --git a/agent/consul/intention_endpoint.go b/agent/consul/intention_endpoint.go index 95cb5183d978..592ba34927d7 100644 --- a/agent/consul/intention_endpoint.go +++ b/agent/consul/intention_endpoint.go @@ -6,6 +6,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/consul/state" @@ -16,6 +17,13 @@ import ( "github.com/hashicorp/go-memdb" ) +var IntentionSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "intention", "apply"}, + Help: "", + }, +} + var ( // ErrIntentionNotFound is returned if the intention lookup failed. ErrIntentionNotFound = errors.New("Intention not found") @@ -252,6 +260,7 @@ func (s *Intention) Apply( if done, err := s.srv.ForwardRPC("Intention.Apply", args, args, reply); done { return err } + // TODO(Kit): Why do we have summaries for intentions both with and without the consul namespace? defer metrics.MeasureSince([]string{"consul", "intention", "apply"}, time.Now()) defer metrics.MeasureSince([]string{"intention", "apply"}, time.Now()) diff --git a/agent/consul/kvs_endpoint.go b/agent/consul/kvs_endpoint.go index 04dee57b628e..8ded7366af6d 100644 --- a/agent/consul/kvs_endpoint.go +++ b/agent/consul/kvs_endpoint.go @@ -6,6 +6,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -14,6 +15,13 @@ import ( "github.com/hashicorp/go-memdb" ) +var KVSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "kvs", "apply"}, + Help: "This measures the time it takes to complete an update to the KV store.", + }, +} + // KVS endpoint is used to manipulate the Key-Value store type KVS struct { srv *Server diff --git a/agent/consul/prepared_query_endpoint.go b/agent/consul/prepared_query_endpoint.go index bb13ff3cb3d5..df484b2af4d4 100644 --- a/agent/consul/prepared_query_endpoint.go +++ b/agent/consul/prepared_query_endpoint.go @@ -6,6 +6,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -15,6 +16,25 @@ import ( "github.com/hashicorp/go-uuid" ) +var PreparedQuerySummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "prepared-query", "apply"}, + Help: "", + }, + { + Name: []string{"consul", "prepared-query", "explain"}, + Help: "", + }, + { + Name: []string{"consul", "prepared-query", "execute"}, + Help: "", + }, + { + Name: []string{"consul", "prepared-query", "execute_remote"}, + Help: "", + }, +} + // PreparedQuery manages the prepared query endpoint. type PreparedQuery struct { srv *Server diff --git a/agent/consul/rpc.go b/agent/consul/rpc.go index ac1096292bb0..58e7e8210847 100644 --- a/agent/consul/rpc.go +++ b/agent/consul/rpc.go @@ -13,6 +13,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/wanfed" @@ -31,6 +32,47 @@ import ( "github.com/hashicorp/yamux" ) +var RPCCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "rpc", "accept_conn"}, + Help: "", + }, + { + Name: []string{"consul", "rpc", "raft_handoff"}, + Help: "", + }, + { + Name: []string{"consul", "rpc", "request_error"}, + Help: "", + }, + { + Name: []string{"consul", "rpc", "request"}, + Help: "", + }, + { + Name: []string{"consul", "rpc", "cross-dc"}, + Help: "", + }, + { + Name: []string{"consul", "rpc", "query"}, + Help: "", + }, +} + +var RPCGauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "rpc", "queries_blocking"}, + Help: "", + }, +} + +var RPCSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "rpc", "consistentRead"}, + Help: "", + }, +} + const ( // jitterFraction is a the limit to the amount of jitter we apply // to a user specified MaxQueryTime. We divide the specified time by diff --git a/agent/consul/server.go b/agent/consul/server.go index 13fece4060ed..5db589d3a1f4 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -17,7 +17,7 @@ import ( "sync/atomic" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" connlimit "github.com/hashicorp/go-connlimit" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-memdb" @@ -50,6 +50,8 @@ import ( "github.com/hashicorp/consul/types" ) +// NOTE The "consul.client.rpc" and "consul.client.rpc.exceeded" counters are defined in consul/client.go + // These are the protocol versions that Consul can _understand_. These are // Consul-level protocol versions, that are used to configure the Serf // protocol versions. diff --git a/agent/consul/session_ttl.go b/agent/consul/session_ttl.go index 4afdc0e382d9..db4447c507ab 100644 --- a/agent/consul/session_ttl.go +++ b/agent/consul/session_ttl.go @@ -4,10 +4,34 @@ import ( "fmt" "time" + "github.com/armon/go-metrics/prometheus" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/structs" ) +var SessionGauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "session_ttl", "active"}, + Help: "", + }, + { + Name: []string{"consul", "raft", "applied_index"}, + Help: "", + }, + { + Name: []string{"consul", "raft", "last_index"}, + Help: "", + }, +} + +var SessionSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "session_ttl", "invalidate"}, + Help: "", + }, +} + const ( // maxInvalidateAttempts limits how many invalidate attempts are made maxInvalidateAttempts = 6 diff --git a/agent/consul/txn_endpoint.go b/agent/consul/txn_endpoint.go index 9819d63704b1..42539a991fb5 100644 --- a/agent/consul/txn_endpoint.go +++ b/agent/consul/txn_endpoint.go @@ -5,12 +5,24 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/api" "github.com/hashicorp/go-hclog" ) +var TxnSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "txn", "apply"}, + Help: "This measures the time spent applying a transaction operation.", + }, + { + Name: []string{"consul", "txn", "read"}, + Help: "", + }, +} + // Txn endpoint is used to perform multi-object atomic transactions. type Txn struct { srv *Server diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go index 259c6646e125..fc8d9ce90bde 100644 --- a/agent/consul/usagemetrics/usagemetrics.go +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -5,12 +5,29 @@ import ( "errors" "time" + "github.com/armon/go-metrics/prometheus" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/logging" "github.com/hashicorp/go-hclog" ) +var Gauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "state", "nodes"}, + Help: "", + }, + { + Name: []string{"consul", "state", "services"}, + Help: "", + }, + { + Name: []string{"consul", "state", "service_instances"}, + Help: "", + }, +} + // Config holds the settings for various parameters for the // UsageMetricsReporter type Config struct { diff --git a/agent/dns.go b/agent/dns.go index a9063e26f4ad..6d541aeaaa7e 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -10,6 +10,8 @@ import ( "sync/atomic" "time" + "github.com/armon/go-metrics/prometheus" + metrics "github.com/armon/go-metrics" radix "github.com/armon/go-radix" "github.com/coredns/coredns/plugin/pkg/dnsutil" @@ -26,6 +28,24 @@ import ( "github.com/hashicorp/consul/logging" ) +var DNSCounters = []prometheus.CounterDefinition{ + { + Name: []string{"dns", "stale_queries"}, + Help: "", + }, +} + +var DNSSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"dns", "ptr_query"}, + Help: "", + }, + { + Name: []string{"dns", "domain_query"}, + Help: "", + }, +} + const ( // UDP can fit ~25 A records in a 512B response, and ~14 AAAA // records. Limit further to prevent unintentional configuration diff --git a/agent/grpc/stats.go b/agent/grpc/stats.go index add3195f16b2..7f732316fe61 100644 --- a/agent/grpc/stats.go +++ b/agent/grpc/stats.go @@ -5,11 +5,48 @@ import ( "sync/atomic" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "google.golang.org/grpc" "google.golang.org/grpc/stats" ) var defaultMetrics = metrics.Default() +var StatsGauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "grpc", "server", "connections"}, + Help: "", + }, + { + Name: []string{"consul", "grpc", "client", "connections"}, + Help: "", + }, + { + Name: []string{"consul", "grpc", "server", "streams"}, + Help: "", + }, +} +var StatsCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "grpc", "client", "request", "count"}, + Help: "", + }, + { + Name: []string{"consul", "grpc", "server", "request", "count"}, + Help: "", + }, + { + Name: []string{"consul", "grpc", "client", "connection", "count"}, + Help: "", + }, + { + Name: []string{"consul", "grpc", "server", "connection", "count"}, + Help: "", + }, + { + Name: []string{"consul", "grpc", "server", "stream", "count"}, + Help: "", + }, +} // statsHandler is a grpc/stats.StatsHandler which emits connection and // request metrics to go-metrics. diff --git a/agent/http.go b/agent/http.go index 10233fa6b0fa..244b9522b559 100644 --- a/agent/http.go +++ b/agent/http.go @@ -17,6 +17,7 @@ import ( "github.com/NYTimes/gziphandler" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/config" @@ -31,6 +32,13 @@ import ( "github.com/pkg/errors" ) +var HTTPSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"consul", "api", "http"}, + Help: "", + }, +} + // MethodNotAllowedError should be returned by a handler when the HTTP method is not allowed. type MethodNotAllowedError struct { Method string diff --git a/agent/local/state.go b/agent/local/state.go index be0c481f3d51..d145f97d0bd3 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -9,8 +9,8 @@ import ( "sync/atomic" "time" - metrics "github.com/armon/go-metrics" - + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" @@ -20,6 +20,29 @@ import ( "github.com/hashicorp/go-hclog" ) +var StateCounters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "acl", "blocked", "service", "deregistration"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "blocked", "check", "deregistration"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "blocked", "service", "registration"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "blocked", "check", "registration"}, + Help: "", + }, + { + Name: []string{"consul", "acl", "blocked", "node", "registration"}, + Help: "", + }, +} + const fullSyncReadMaxStale = 2 * time.Second // Config is the configuration for the State. diff --git a/agent/setup.go b/agent/setup.go index 96265ef24ada..4297a3089a4f 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -8,6 +8,10 @@ import ( "sync" "time" + "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/consul/agent/consul/usagemetrics" + "github.com/hashicorp/consul/agent/local" + "github.com/hashicorp/go-hclog" "google.golang.org/grpc/grpclog" grpcresolver "google.golang.org/grpc/resolver" @@ -72,7 +76,7 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error) return d, fmt.Errorf("failed to setup node ID: %w", err) } - d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry) + d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry, getPrometheusDefs()) if err != nil { return d, fmt.Errorf("failed to initialize telemetry: %w", err) } @@ -177,3 +181,91 @@ func registerWithGRPC(b grpcresolver.Builder) { defer registerLock.Unlock() grpcresolver.Register(b) } + +// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends +// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics. +func getPrometheusDefs() lib.PrometheusDefs { + var gauges = [][]prometheus.GaugeDefinition{ + consul.AutopilotGauges, + consul.RPCGauges, + consul.SessionGauges, + grpc.StatsGauges, + usagemetrics.Gauges, + } + var gaugeDefs []prometheus.GaugeDefinition + for _, g := range gauges { + gaugeDefs = append(gaugeDefs, g...) + } + + raftCounters := []prometheus.CounterDefinition{ + // TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry + // package within. In the mean time, we're going to define them here because it's important that they're always + // present for Consul users setting up dashboards. + { + Name: []string{"consul", "raft", "apply"}, + Help: "This counts the number of Raft transactions occurring over the interval.", + }, + { + Name: []string{"consul", "raft", "state", "candidate"}, + Help: "This increments whenever a Consul server starts an election.", + }, + { + Name: []string{"consul", "raft", "state", "leader"}, + Help: "This increments whenever a Consul server becomes a leader.", + }, + } + + var counters = [][]prometheus.CounterDefinition{ + CatalogCounters, + consul.ACLCounters, + consul.CatalogCounters, + consul.ClientCounters, + consul.RPCCounters, + grpc.StatsCounters, + local.StateCounters, + raftCounters, + } + var counterDefs []prometheus.CounterDefinition + for _, c := range counters { + counterDefs = append(counterDefs, c...) + } + + raftSummaries := []prometheus.SummaryDefinition{ + // TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry + // package within. In the mean time, we're going to define them here because it's important that they're always + // present for Consul users setting up dashboards. + { + Name: []string{"consul", "raft", "commitTime"}, + Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.", + }, + { + Name: []string{"consul", "raft", "leader", "lastContact"}, + Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.", + }, + } + + var summaries = [][]prometheus.SummaryDefinition{ + HTTPSummaries, + consul.ACLSummaries, + consul.ACLEndpointSummaries, + consul.CatalogSummaries, + consul.FederationStateSummaries, + consul.IntentionSummaries, + consul.KVSummaries, + consul.PreparedQuerySummaries, + consul.RPCSummaries, + consul.SessionSummaries, + consul.TxnSummaries, + raftSummaries, + } + var summaryDefs []prometheus.SummaryDefinition + for _, s := range summaries { + summaryDefs = append(summaryDefs, s...) + } + + return lib.PrometheusDefs{ + Gauges: gaugeDefs, + Counters: counterDefs, + Summaries: summaryDefs, + } +} diff --git a/connect/proxy/proxy.go b/connect/proxy/proxy.go index 9dc27a06fe7a..54df4e309b5c 100644 --- a/connect/proxy/proxy.go +++ b/connect/proxy/proxy.go @@ -54,7 +54,9 @@ func (p *Proxy) Serve() error { // Initial setup // Setup telemetry if configured - _, err := lib.InitTelemetry(newCfg.Telemetry) + // NOTE(kit): As far as I can tell, all of the metrics in the proxy are generated at runtime, so we + // don't have any static metrics we initialize at start. + _, err := lib.InitTelemetry(newCfg.Telemetry, lib.EmptyPrometheusDefs()) if err != nil { p.logger.Error("proxy telemetry config error", "error", err) } diff --git a/lib/telemetry.go b/lib/telemetry.go index 33f7d2100848..fe360172a60d 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -276,79 +276,17 @@ func dogstatdSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err return sink, nil } -func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) { +func prometheusSink(cfg TelemetryConfig, hostname string, defs PrometheusDefs) (metrics.MetricSink, error) { + if cfg.PrometheusRetentionTime.Nanoseconds() < 1 { return nil, nil } - // TODO(kit) define these in vars in the package/file they're used - gaugeDefs := []prometheus.GaugeDefinition{ - { - Name: []string{"consul", "autopilot", "healthy"}, - Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.", - }, - } - - // TODO(kit) define these in vars in the package/file they're used - counterDefs := []prometheus.CounterDefinition{ - { - Name: []string{"consul", "raft", "apply"}, - Help: "This counts the number of Raft transactions occurring over the interval.", - }, - { - Name: []string{"consul", "raft", "state", "candidate"}, - Help: "This increments whenever a Consul server starts an election.", - }, - { - Name: []string{"consul", "raft", "state", "leader"}, - Help: "This increments whenever a Consul server becomes a leader.", - }, - { - Name: []string{"consul", "client", "api", "catalog_register"}, - Help: "Increments whenever a Consul agent receives a catalog register request.", - }, - { - Name: []string{"consul", "runtime", "total_gc_pause_ns"}, - Help: "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started.", - }, - { - Name: []string{"consul", "client", "rpc"}, - Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.", - }, - { - Name: []string{"consul", "client", "rpc", "exceeded"}, - Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.", - }, - { - Name: []string{"consul", "client", "rpc", "failed"}, - Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.", - }, - } - - // TODO(kit) define these in vars in the package/file they're used - summaryDefs := []prometheus.SummaryDefinition{ - { - Name: []string{"consul", "kvs", "apply"}, - Help: "This measures the time it takes to complete an update to the KV store.", - }, - { - Name: []string{"consul", "txn", "apply"}, - Help: "This measures the time spent applying a transaction operation.", - }, - { - Name: []string{"consul", "raft", "commitTime"}, - Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.", - }, - { - Name: []string{"consul", "raft", "leader", "lastContact"}, - Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.", - }, - } prometheusOpts := prometheus.PrometheusOpts{ Expiration: cfg.PrometheusRetentionTime, - GaugeDefinitions: gaugeDefs, - CounterDefinitions: counterDefs, - SummaryDefinitions: summaryDefs, + GaugeDefinitions: defs.Gauges, + CounterDefinitions: defs.Counters, + SummaryDefinitions: defs.Summaries, } sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts) if err != nil { @@ -399,9 +337,25 @@ func circonusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err return sink, nil } +// PrometheusDefs wraps collections of metric definitions to pass into the PrometheusSink +type PrometheusDefs struct { + Gauges []prometheus.GaugeDefinition + Counters []prometheus.CounterDefinition + Summaries []prometheus.SummaryDefinition +} + +// EmptyPrometheusDefs returns a PrometheusDefs struct where each of the slices have zero elements, but not nil. +func EmptyPrometheusDefs() PrometheusDefs { + return PrometheusDefs{ + Gauges: []prometheus.GaugeDefinition{}, + Counters: []prometheus.CounterDefinition{}, + Summaries: []prometheus.SummaryDefinition{}, + } +} + // InitTelemetry configures go-metrics based on map of telemetry config // values as returned by Runtimecfg.Config(). -func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) { +func InitTelemetry(cfg TelemetryConfig, defs PrometheusDefs) (*metrics.InmemSink, error) { if cfg.Disable { return nil, nil } @@ -440,9 +394,12 @@ func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) { if err := addSink(circonusSink); err != nil { return nil, err } - if err := addSink(prometheusSink); err != nil { + + promSink, err := prometheusSink(cfg, metricsConf.HostName, defs) + if err != nil { return nil, err } + sinks = append(sinks, promSink) if len(sinks) > 0 { sinks = append(sinks, memSink) From 06d59c03b93ef362104ea5cd3477fc1c780f9ce0 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Fri, 13 Nov 2020 13:18:04 -0800 Subject: [PATCH 02/13] add the service name in the agent rather than in the definitions themselves --- agent/catalog_endpoint.go | 56 +++++++++++------------ agent/consul/acl.go | 10 ++-- agent/consul/acl_endpoint.go | 32 ++++++------- agent/consul/autopilot.go | 4 +- agent/consul/catalog_endpoint.go | 20 ++++---- agent/consul/client.go | 6 +-- agent/consul/config_endpoint.go | 12 ++--- agent/consul/federation_state_endpoint.go | 8 ++-- agent/consul/intention_endpoint.go | 5 +- agent/consul/kvs_endpoint.go | 2 +- agent/consul/prepared_query_endpoint.go | 8 ++-- agent/consul/rpc.go | 16 +++---- agent/consul/session_ttl.go | 8 ++-- agent/consul/txn_endpoint.go | 4 +- agent/consul/usagemetrics/usagemetrics.go | 6 +-- agent/grpc/stats.go | 16 +++---- agent/http.go | 2 +- agent/local/state.go | 10 ++-- agent/setup.go | 35 ++++++++++---- 19 files changed, 141 insertions(+), 119 deletions(-) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index 7e0ee9571ab6..df78384ea004 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -14,115 +14,115 @@ import ( // TODO(kit): Add help strings for each var CatalogCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "client", "api", "catalog_register"}, + Name: []string{"client", "api", "catalog_register"}, Help: "Increments whenever a Consul agent receives a catalog register request.", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_register"}, + Name: []string{"client", "rpc", "error", "catalog_register"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_register"}, + Name: []string{"client", "api", "success", "catalog_register"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_deregister"}, + Name: []string{"client", "api", "catalog_deregister"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_datacenters"}, + Name: []string{"client", "api", "catalog_datacenters"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_deregister"}, + Name: []string{"client", "rpc", "error", "catalog_deregister"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_nodes"}, + Name: []string{"client", "api", "success", "catalog_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_nodes"}, + Name: []string{"client", "rpc", "error", "catalog_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_deregister"}, + Name: []string{"client", "api", "success", "catalog_deregister"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_datacenters"}, + Name: []string{"client", "rpc", "error", "catalog_datacenters"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_datacenters"}, + Name: []string{"client", "api", "success", "catalog_datacenters"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_nodes"}, + Name: []string{"client", "api", "catalog_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_services"}, + Name: []string{"client", "api", "catalog_services"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_services"}, + Name: []string{"client", "rpc", "error", "catalog_services"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_services"}, + Name: []string{"client", "api", "success", "catalog_services"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_service_nodes"}, + Name: []string{"client", "api", "catalog_service_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_service_nodes"}, + Name: []string{"client", "rpc", "error", "catalog_service_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_service_nodes"}, + Name: []string{"client", "api", "success", "catalog_service_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "api", "error", "catalog_service_nodes"}, + Name: []string{"client", "api", "error", "catalog_service_nodes"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_node_services"}, + Name: []string{"client", "api", "catalog_node_services"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_node_services"}, + Name: []string{"client", "api", "success", "catalog_node_services"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_node_services"}, + Name: []string{"client", "rpc", "error", "catalog_node_services"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_node_service_list"}, + Name: []string{"client", "api", "catalog_node_service_list"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_node_service_list"}, + Name: []string{"client", "rpc", "error", "catalog_node_service_list"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_node_service_list"}, + Name: []string{"client", "api", "success", "catalog_node_service_list"}, Help: "", }, { - Name: []string{"consul", "client", "api", "catalog_gateway_services"}, + Name: []string{"client", "api", "catalog_gateway_services"}, Help: "", }, { - Name: []string{"consul", "client", "rpc", "error", "catalog_gateway_services"}, + Name: []string{"client", "rpc", "error", "catalog_gateway_services"}, Help: "", }, { - Name: []string{"consul", "client", "api", "success", "catalog_gateway_services"}, + Name: []string{"client", "api", "success", "catalog_gateway_services"}, Help: "", }, } diff --git a/agent/consul/acl.go b/agent/consul/acl.go index d99b588e4e5c..1ec7bc4193f5 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -18,26 +18,26 @@ import ( var ACLCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "acl", "token", "cache_hit"}, + Name: []string{"acl", "token", "cache_hit"}, Help: "", }, { - Name: []string{"consul", "acl", "token", "cache_miss"}, + Name: []string{"acl", "token", "cache_miss"}, Help: "", }, } var ACLSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "acl", "resolveTokenLegacy"}, + Name: []string{"acl", "resolveTokenLegacy"}, Help: "", }, { - Name: []string{"consul", "acl", "ResolveToken"}, + Name: []string{"acl", "ResolveToken"}, Help: "", }, { - Name: []string{"consul", "acl", "ResolveTokenToIdentity"}, + Name: []string{"acl", "ResolveTokenToIdentity"}, Help: "", }, } diff --git a/agent/consul/acl_endpoint.go b/agent/consul/acl_endpoint.go index 10c879467076..b8ba08e0b2b8 100644 --- a/agent/consul/acl_endpoint.go +++ b/agent/consul/acl_endpoint.go @@ -33,67 +33,67 @@ const ( var ACLEndpointSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "acl", "token", "clone"}, + Name: []string{"acl", "token", "clone"}, Help: "", }, { - Name: []string{"consul", "acl", "token", "upsert"}, + Name: []string{"acl", "token", "upsert"}, Help: "", }, { - Name: []string{"consul", "acl", "token", "delete"}, + Name: []string{"acl", "token", "delete"}, Help: "", }, { - Name: []string{"consul", "acl", "policy", "upsert"}, + Name: []string{"acl", "policy", "upsert"}, Help: "", }, { - Name: []string{"consul", "acl", "policy", "delete"}, + Name: []string{"acl", "policy", "delete"}, Help: "", }, { - Name: []string{"consul", "acl", "policy", "delete"}, + Name: []string{"acl", "policy", "delete"}, Help: "", }, { - Name: []string{"consul", "acl", "role", "upsert"}, + Name: []string{"acl", "role", "upsert"}, Help: "", }, { - Name: []string{"consul", "acl", "role", "delete"}, + Name: []string{"acl", "role", "delete"}, Help: "", }, { - Name: []string{"consul", "acl", "bindingrule", "upsert"}, + Name: []string{"acl", "bindingrule", "upsert"}, Help: "", }, { - Name: []string{"consul", "acl", "bindingrule", "delete"}, + Name: []string{"acl", "bindingrule", "delete"}, Help: "", }, { - Name: []string{"consul", "acl", "authmethod", "upsert"}, + Name: []string{"acl", "authmethod", "upsert"}, Help: "", }, { - Name: []string{"consul", "acl", "authmethod", "delete"}, + Name: []string{"acl", "authmethod", "delete"}, Help: "", }, { - Name: []string{"consul", "acl", "login"}, + Name: []string{"acl", "login"}, Help: "", }, { - Name: []string{"consul", "acl", "login"}, + Name: []string{"acl", "login"}, Help: "", }, { - Name: []string{"consul", "acl", "logout"}, + Name: []string{"acl", "logout"}, Help: "", }, { - Name: []string{"consul", "acl", "logout"}, + Name: []string{"acl", "logout"}, Help: "", }, } diff --git a/agent/consul/autopilot.go b/agent/consul/autopilot.go index 7bd8bc258a83..22f50259c5c6 100644 --- a/agent/consul/autopilot.go +++ b/agent/consul/autopilot.go @@ -15,11 +15,11 @@ import ( var AutopilotGauges = []prometheus.GaugeDefinition{ { - Name: []string{"consul", "autopilot", "failure_tolerance"}, + Name: []string{"autopilot", "failure_tolerance"}, Help: "", }, { - Name: []string{"consul", "autopilot", "healthy"}, + Name: []string{"autopilot", "healthy"}, Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.", }, } diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index f8b41c97db6a..151367cf4b59 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -20,46 +20,46 @@ import ( var CatalogCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "catalog", "service", "query"}, + Name: []string{"catalog", "service", "query"}, Help: "", }, { - Name: []string{"consul", "catalog", "connect", "query"}, + Name: []string{"catalog", "connect", "query"}, Help: "", }, { - Name: []string{"consul", "catalog", "service", "query-tag"}, + Name: []string{"catalog", "service", "query-tag"}, Help: "", }, { - Name: []string{"consul", "catalog", "connect", "query-tag"}, + Name: []string{"catalog", "connect", "query-tag"}, Help: "", }, { - Name: []string{"consul", "catalog", "service", "query-tags"}, + Name: []string{"catalog", "service", "query-tags"}, Help: "", }, { - Name: []string{"consul", "catalog", "connect", "query-tags"}, + Name: []string{"catalog", "connect", "query-tags"}, Help: "", }, { - Name: []string{"consul", "catalog", "service", "not-found"}, + Name: []string{"catalog", "service", "not-found"}, Help: "", }, { - Name: []string{"consul", "catalog", "connect", "not-found"}, + Name: []string{"catalog", "connect", "not-found"}, Help: "", }, } var CatalogSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "catalog", "deregister"}, + Name: []string{"catalog", "deregister"}, Help: "", }, { - Name: []string{"consul", "catalog", "register"}, + Name: []string{"catalog", "register"}, Help: "", }, } diff --git a/agent/consul/client.go b/agent/consul/client.go index 6f20d6a02352..d2ae9a1eddc4 100644 --- a/agent/consul/client.go +++ b/agent/consul/client.go @@ -24,15 +24,15 @@ import ( var ClientCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "client", "rpc"}, + Name: []string{"client", "rpc"}, Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.", }, { - Name: []string{"consul", "client", "rpc", "exceeded"}, + Name: []string{"client", "rpc", "exceeded"}, Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.", }, { - Name: []string{"consul", "client", "rpc", "failed"}, + Name: []string{"client", "rpc", "failed"}, Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.", }, } diff --git a/agent/consul/config_endpoint.go b/agent/consul/config_endpoint.go index 41cf16dc2a5b..96e43e5c4b6c 100644 --- a/agent/consul/config_endpoint.go +++ b/agent/consul/config_endpoint.go @@ -16,27 +16,27 @@ import ( var ConfigSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "config_entry", "apply"}, + Name: []string{"config_entry", "apply"}, Help: "", }, { - Name: []string{"consul", "config_entry", "get"}, + Name: []string{"config_entry", "get"}, Help: "", }, { - Name: []string{"consul", "config_entry", "list"}, + Name: []string{"config_entry", "list"}, Help: "", }, { - Name: []string{"consul", "config_entry", "listAll"}, + Name: []string{"config_entry", "listAll"}, Help: "", }, { - Name: []string{"consul", "config_entry", "delete"}, + Name: []string{"config_entry", "delete"}, Help: "", }, { - Name: []string{"consul", "config_entry", "resolve_service_config"}, + Name: []string{"config_entry", "resolve_service_config"}, Help: "", }, } diff --git a/agent/consul/federation_state_endpoint.go b/agent/consul/federation_state_endpoint.go index 2a71ed3f8ae1..88111364c15f 100644 --- a/agent/consul/federation_state_endpoint.go +++ b/agent/consul/federation_state_endpoint.go @@ -15,19 +15,19 @@ import ( var FederationStateSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "federation_state", "apply"}, + Name: []string{"federation_state", "apply"}, Help: "", }, { - Name: []string{"consul", "federation_state", "get"}, + Name: []string{"federation_state", "get"}, Help: "", }, { - Name: []string{"consul", "federation_state", "list"}, + Name: []string{"federation_state", "list"}, Help: "", }, { - Name: []string{"consul", "federation_state", "list_mesh_gateways"}, + Name: []string{"federation_state", "list_mesh_gateways"}, Help: "", }, } diff --git a/agent/consul/intention_endpoint.go b/agent/consul/intention_endpoint.go index 592ba34927d7..ca93239dc046 100644 --- a/agent/consul/intention_endpoint.go +++ b/agent/consul/intention_endpoint.go @@ -22,6 +22,10 @@ var IntentionSummaries = []prometheus.SummaryDefinition{ Name: []string{"consul", "intention", "apply"}, Help: "", }, + { + Name: []string{"intention", "apply"}, + Help: "", + }, } var ( @@ -260,7 +264,6 @@ func (s *Intention) Apply( if done, err := s.srv.ForwardRPC("Intention.Apply", args, args, reply); done { return err } - // TODO(Kit): Why do we have summaries for intentions both with and without the consul namespace? defer metrics.MeasureSince([]string{"consul", "intention", "apply"}, time.Now()) defer metrics.MeasureSince([]string{"intention", "apply"}, time.Now()) diff --git a/agent/consul/kvs_endpoint.go b/agent/consul/kvs_endpoint.go index 8ded7366af6d..400397fca1a7 100644 --- a/agent/consul/kvs_endpoint.go +++ b/agent/consul/kvs_endpoint.go @@ -17,7 +17,7 @@ import ( var KVSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "kvs", "apply"}, + Name: []string{"kvs", "apply"}, Help: "This measures the time it takes to complete an update to the KV store.", }, } diff --git a/agent/consul/prepared_query_endpoint.go b/agent/consul/prepared_query_endpoint.go index df484b2af4d4..d796c6f6cbd9 100644 --- a/agent/consul/prepared_query_endpoint.go +++ b/agent/consul/prepared_query_endpoint.go @@ -18,19 +18,19 @@ import ( var PreparedQuerySummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "prepared-query", "apply"}, + Name: []string{"prepared-query", "apply"}, Help: "", }, { - Name: []string{"consul", "prepared-query", "explain"}, + Name: []string{"prepared-query", "explain"}, Help: "", }, { - Name: []string{"consul", "prepared-query", "execute"}, + Name: []string{"prepared-query", "execute"}, Help: "", }, { - Name: []string{"consul", "prepared-query", "execute_remote"}, + Name: []string{"prepared-query", "execute_remote"}, Help: "", }, } diff --git a/agent/consul/rpc.go b/agent/consul/rpc.go index 58e7e8210847..775d311bdafa 100644 --- a/agent/consul/rpc.go +++ b/agent/consul/rpc.go @@ -34,41 +34,41 @@ import ( var RPCCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "rpc", "accept_conn"}, + Name: []string{"rpc", "accept_conn"}, Help: "", }, { - Name: []string{"consul", "rpc", "raft_handoff"}, + Name: []string{"rpc", "raft_handoff"}, Help: "", }, { - Name: []string{"consul", "rpc", "request_error"}, + Name: []string{"rpc", "request_error"}, Help: "", }, { - Name: []string{"consul", "rpc", "request"}, + Name: []string{"rpc", "request"}, Help: "", }, { - Name: []string{"consul", "rpc", "cross-dc"}, + Name: []string{"rpc", "cross-dc"}, Help: "", }, { - Name: []string{"consul", "rpc", "query"}, + Name: []string{"rpc", "query"}, Help: "", }, } var RPCGauges = []prometheus.GaugeDefinition{ { - Name: []string{"consul", "rpc", "queries_blocking"}, + Name: []string{"rpc", "queries_blocking"}, Help: "", }, } var RPCSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "rpc", "consistentRead"}, + Name: []string{"rpc", "consistentRead"}, Help: "", }, } diff --git a/agent/consul/session_ttl.go b/agent/consul/session_ttl.go index db4447c507ab..7387e42a9f49 100644 --- a/agent/consul/session_ttl.go +++ b/agent/consul/session_ttl.go @@ -12,22 +12,22 @@ import ( var SessionGauges = []prometheus.GaugeDefinition{ { - Name: []string{"consul", "session_ttl", "active"}, + Name: []string{"session_ttl", "active"}, Help: "", }, { - Name: []string{"consul", "raft", "applied_index"}, + Name: []string{"raft", "applied_index"}, Help: "", }, { - Name: []string{"consul", "raft", "last_index"}, + Name: []string{"raft", "last_index"}, Help: "", }, } var SessionSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "session_ttl", "invalidate"}, + Name: []string{"session_ttl", "invalidate"}, Help: "", }, } diff --git a/agent/consul/txn_endpoint.go b/agent/consul/txn_endpoint.go index 42539a991fb5..50a57f9e16a1 100644 --- a/agent/consul/txn_endpoint.go +++ b/agent/consul/txn_endpoint.go @@ -14,11 +14,11 @@ import ( var TxnSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "txn", "apply"}, + Name: []string{"txn", "apply"}, Help: "This measures the time spent applying a transaction operation.", }, { - Name: []string{"consul", "txn", "read"}, + Name: []string{"txn", "read"}, Help: "", }, } diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go index fc8d9ce90bde..7f2207fa60e5 100644 --- a/agent/consul/usagemetrics/usagemetrics.go +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -15,15 +15,15 @@ import ( var Gauges = []prometheus.GaugeDefinition{ { - Name: []string{"consul", "state", "nodes"}, + Name: []string{"state", "nodes"}, Help: "", }, { - Name: []string{"consul", "state", "services"}, + Name: []string{"state", "services"}, Help: "", }, { - Name: []string{"consul", "state", "service_instances"}, + Name: []string{"state", "service_instances"}, Help: "", }, } diff --git a/agent/grpc/stats.go b/agent/grpc/stats.go index 7f732316fe61..b1a0c5a23209 100644 --- a/agent/grpc/stats.go +++ b/agent/grpc/stats.go @@ -13,37 +13,37 @@ import ( var defaultMetrics = metrics.Default() var StatsGauges = []prometheus.GaugeDefinition{ { - Name: []string{"consul", "grpc", "server", "connections"}, + Name: []string{"grpc", "server", "connections"}, Help: "", }, { - Name: []string{"consul", "grpc", "client", "connections"}, + Name: []string{"grpc", "client", "connections"}, Help: "", }, { - Name: []string{"consul", "grpc", "server", "streams"}, + Name: []string{"grpc", "server", "streams"}, Help: "", }, } var StatsCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "grpc", "client", "request", "count"}, + Name: []string{"grpc", "client", "request", "count"}, Help: "", }, { - Name: []string{"consul", "grpc", "server", "request", "count"}, + Name: []string{"grpc", "server", "request", "count"}, Help: "", }, { - Name: []string{"consul", "grpc", "client", "connection", "count"}, + Name: []string{"grpc", "client", "connection", "count"}, Help: "", }, { - Name: []string{"consul", "grpc", "server", "connection", "count"}, + Name: []string{"grpc", "server", "connection", "count"}, Help: "", }, { - Name: []string{"consul", "grpc", "server", "stream", "count"}, + Name: []string{"grpc", "server", "stream", "count"}, Help: "", }, } diff --git a/agent/http.go b/agent/http.go index 244b9522b559..9b5fcf5e37cc 100644 --- a/agent/http.go +++ b/agent/http.go @@ -34,7 +34,7 @@ import ( var HTTPSummaries = []prometheus.SummaryDefinition{ { - Name: []string{"consul", "api", "http"}, + Name: []string{"api", "http"}, Help: "", }, } diff --git a/agent/local/state.go b/agent/local/state.go index d145f97d0bd3..5ca827607d33 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -22,23 +22,23 @@ import ( var StateCounters = []prometheus.CounterDefinition{ { - Name: []string{"consul", "acl", "blocked", "service", "deregistration"}, + Name: []string{"acl", "blocked", "service", "deregistration"}, Help: "", }, { - Name: []string{"consul", "acl", "blocked", "check", "deregistration"}, + Name: []string{"acl", "blocked", "check", "deregistration"}, Help: "", }, { - Name: []string{"consul", "acl", "blocked", "service", "registration"}, + Name: []string{"acl", "blocked", "service", "registration"}, Help: "", }, { - Name: []string{"consul", "acl", "blocked", "check", "registration"}, + Name: []string{"acl", "blocked", "check", "registration"}, Help: "", }, { - Name: []string{"consul", "acl", "blocked", "node", "registration"}, + Name: []string{"acl", "blocked", "node", "registration"}, Help: "", }, } diff --git a/agent/setup.go b/agent/setup.go index 4297a3089a4f..c3f4dbf08fa8 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -185,6 +185,7 @@ func registerWithGRPC(b grpcresolver.Builder) { // getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends // all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics. func getPrometheusDefs() lib.PrometheusDefs { + serviceName := []string{"consul"} var gauges = [][]prometheus.GaugeDefinition{ consul.AutopilotGauges, consul.RPCGauges, @@ -194,7 +195,13 @@ func getPrometheusDefs() lib.PrometheusDefs { } var gaugeDefs []prometheus.GaugeDefinition for _, g := range gauges { - gaugeDefs = append(gaugeDefs, g...) + // Set Consul to each definition's namespace + var withService []prometheus.GaugeDefinition + for _, gauge := range g { + gauge.Name = append(serviceName, gauge.Name...) + withService = append(withService, gauge) + } + gaugeDefs = append(gaugeDefs, withService...) } raftCounters := []prometheus.CounterDefinition{ @@ -202,15 +209,15 @@ func getPrometheusDefs() lib.PrometheusDefs { // package within. In the mean time, we're going to define them here because it's important that they're always // present for Consul users setting up dashboards. { - Name: []string{"consul", "raft", "apply"}, + Name: []string{"raft", "apply"}, Help: "This counts the number of Raft transactions occurring over the interval.", }, { - Name: []string{"consul", "raft", "state", "candidate"}, + Name: []string{"raft", "state", "candidate"}, Help: "This increments whenever a Consul server starts an election.", }, { - Name: []string{"consul", "raft", "state", "leader"}, + Name: []string{"raft", "state", "leader"}, Help: "This increments whenever a Consul server becomes a leader.", }, } @@ -227,7 +234,13 @@ func getPrometheusDefs() lib.PrometheusDefs { } var counterDefs []prometheus.CounterDefinition for _, c := range counters { - counterDefs = append(counterDefs, c...) + // Set Consul to each definition's namespace + var withService []prometheus.CounterDefinition + for _, counter := range c { + counter.Name = append(serviceName, counter.Name...) + withService = append(withService, counter) + } + counterDefs = append(counterDefs, withService...) } raftSummaries := []prometheus.SummaryDefinition{ @@ -235,11 +248,11 @@ func getPrometheusDefs() lib.PrometheusDefs { // package within. In the mean time, we're going to define them here because it's important that they're always // present for Consul users setting up dashboards. { - Name: []string{"consul", "raft", "commitTime"}, + Name: []string{"raft", "commitTime"}, Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.", }, { - Name: []string{"consul", "raft", "leader", "lastContact"}, + Name: []string{"raft", "leader", "lastContact"}, Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.", }, } @@ -260,7 +273,13 @@ func getPrometheusDefs() lib.PrometheusDefs { } var summaryDefs []prometheus.SummaryDefinition for _, s := range summaries { - summaryDefs = append(summaryDefs, s...) + // Set Consul to each definition's namespace + var withService []prometheus.SummaryDefinition + for _, summary := range s { + summary.Name = append(serviceName, summary.Name...) + withService = append(withService, summary) + } + summaryDefs = append(summaryDefs, withService...) } return lib.PrometheusDefs{ From 5da2f1efa89852c3f835ac3d0a66ced5eada5ce9 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Fri, 13 Nov 2020 16:26:08 -0800 Subject: [PATCH 03/13] finish adding static server metrics --- agent/cache/cache.go | 31 ++++++++ agent/catalog_endpoint.go | 47 ++++++------ agent/consul/acl_endpoint_legacy.go | 8 ++ agent/consul/autopilot.go | 2 +- agent/consul/catalog_endpoint.go | 12 +-- agent/consul/fsm/commands_oss.go | 93 ++++++++++++++++++++++- agent/consul/fsm/snapshot.go | 8 ++ agent/consul/leader.go | 16 ++++ agent/consul/prepared_query_endpoint.go | 8 +- agent/consul/rpc.go | 16 ++-- agent/consul/segment_oss.go | 8 ++ agent/consul/session_endpoint.go | 12 +++ agent/consul/session_ttl.go | 7 +- agent/consul/txn_endpoint.go | 2 +- agent/consul/usagemetrics/usagemetrics.go | 12 +-- agent/dns.go | 6 +- agent/grpc/stats.go | 16 ++-- agent/http.go | 2 +- agent/local/state.go | 22 +++--- agent/setup.go | 20 +++-- 20 files changed, 266 insertions(+), 82 deletions(-) diff --git a/agent/cache/cache.go b/agent/cache/cache.go index 1a5193792b65..62dc8619ba23 100644 --- a/agent/cache/cache.go +++ b/agent/cache/cache.go @@ -24,6 +24,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "golang.org/x/time/rate" "github.com/hashicorp/consul/lib" @@ -32,6 +33,34 @@ import ( //go:generate mockery -all -inpkg +// TODO(kit): remove the namespace from these once the metrics themselves change +var Gauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "cache", "entries_count"}, + Help: "", + }, +} + +// TODO(kit): remove the namespace from these once the metrics themselves change +var Counters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "cache", "bypass"}, + Help: "", + }, + { + Name: []string{"consul", "cache", "fetch_success"}, + Help: "", + }, + { + Name: []string{"consul", "cache", "fetch_error"}, + Help: "", + }, + { + Name: []string{"consul", "cache", "evict_expired"}, + Help: "", + }, +} + // Constants related to refresh backoff. We probably don't ever need to // make these configurable knobs since they primarily exist to lower load. const ( @@ -629,6 +658,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign // Error handling if err == nil { labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}} + // TODO(kit): move tEntry.Name to a label on the first write here and deprecate the second write metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels) metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels) @@ -658,6 +688,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign newEntry.RefreshLostContact = time.Time{} } } else { + // TODO(kit): Add tEntry.Name to label on fetch_error and deprecate second write metrics.IncrCounter([]string{"consul", "cache", "fetch_error"}, 1) metrics.IncrCounter([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index df78384ea004..3cda7c7cd8b3 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -11,7 +11,6 @@ import ( "github.com/hashicorp/consul/agent/structs" ) -// TODO(kit): Add help strings for each var CatalogCounters = []prometheus.CounterDefinition{ { Name: []string{"client", "api", "catalog_register"}, @@ -19,71 +18,71 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "rpc", "error", "catalog_register"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a catalog register request.", }, { Name: []string{"client", "api", "success", "catalog_register"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a catalog register request.", }, { Name: []string{"client", "api", "catalog_deregister"}, - Help: "", + Help: "This increments whenever a Consul agent receives a catalog deregister request.", }, { Name: []string{"client", "api", "catalog_datacenters"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list datacenters in the catalog.", }, { Name: []string{"client", "rpc", "error", "catalog_deregister"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a catalog deregister request.", }, { Name: []string{"client", "api", "success", "catalog_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list nodes.", }, { Name: []string{"client", "rpc", "error", "catalog_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes.", }, { Name: []string{"client", "api", "success", "catalog_deregister"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a catalog deregister request.", }, { Name: []string{"client", "rpc", "error", "catalog_datacenters"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list datacenters.", }, { Name: []string{"client", "api", "success", "catalog_datacenters"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list datacenters.", }, { Name: []string{"client", "api", "catalog_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list nodes from the catalog.", }, { Name: []string{"client", "api", "catalog_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list services from the catalog.", }, { Name: []string{"client", "rpc", "error", "catalog_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list services.", }, { Name: []string{"client", "api", "success", "catalog_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list services.", }, { Name: []string{"client", "api", "catalog_service_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list nodes offering a service.", }, { Name: []string{"client", "rpc", "error", "catalog_service_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.", }, { Name: []string{"client", "api", "success", "catalog_service_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", }, { Name: []string{"client", "api", "error", "catalog_service_nodes"}, @@ -91,15 +90,15 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "api", "catalog_node_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", }, { Name: []string{"client", "api", "success", "catalog_node_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list services in a node.", }, { Name: []string{"client", "rpc", "error", "catalog_node_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list services in a node.", }, { Name: []string{"client", "api", "catalog_node_service_list"}, @@ -115,15 +114,15 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "api", "catalog_gateway_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list services associated with a gateway.", }, { Name: []string{"client", "rpc", "error", "catalog_gateway_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.", }, { Name: []string{"client", "api", "success", "catalog_gateway_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.", }, } diff --git a/agent/consul/acl_endpoint_legacy.go b/agent/consul/acl_endpoint_legacy.go index 22838aca0daa..16fa917fc6f7 100644 --- a/agent/consul/acl_endpoint_legacy.go +++ b/agent/consul/acl_endpoint_legacy.go @@ -5,6 +5,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -12,6 +13,13 @@ import ( "github.com/hashicorp/go-memdb" ) +var ACLEndpointLegacySummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"acl", "apply"}, + Help: "This measures the time it takes to complete an update to the ACL store.", + }, +} + // Bootstrap is used to perform a one-time ACL bootstrap operation on // a cluster to get the first management token. func (a *ACL) Bootstrap(args *structs.DCSpecificRequest, reply *structs.ACL) error { diff --git a/agent/consul/autopilot.go b/agent/consul/autopilot.go index 22f50259c5c6..e84451ebdf3d 100644 --- a/agent/consul/autopilot.go +++ b/agent/consul/autopilot.go @@ -16,7 +16,7 @@ import ( var AutopilotGauges = []prometheus.GaugeDefinition{ { Name: []string{"autopilot", "failure_tolerance"}, - Help: "", + Help: "This tracks the number of voting servers that the cluster can lose while continuing to function.", }, { Name: []string{"autopilot", "healthy"}, diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 151367cf4b59..95e405dcd03f 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -21,7 +21,7 @@ import ( var CatalogCounters = []prometheus.CounterDefinition{ { Name: []string{"catalog", "service", "query"}, - Help: "", + Help: "This increments for each catalog query for the given service.", }, { Name: []string{"catalog", "connect", "query"}, @@ -29,7 +29,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "query-tag"}, - Help: "", + Help: "This increments for each catalog query for the given service with the given tag.", }, { Name: []string{"catalog", "connect", "query-tag"}, @@ -37,7 +37,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "query-tags"}, - Help: "", + Help: "This increments for each catalog query for the given service with the given tags.", }, { Name: []string{"catalog", "connect", "query-tags"}, @@ -45,7 +45,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "not-found"}, - Help: "", + Help: "This increments for each catalog query where the given service could not be found.", }, { Name: []string{"catalog", "connect", "not-found"}, @@ -56,11 +56,11 @@ var CatalogCounters = []prometheus.CounterDefinition{ var CatalogSummaries = []prometheus.SummaryDefinition{ { Name: []string{"catalog", "deregister"}, - Help: "", + Help: "This measures the time it takes to complete a catalog deregister operation.", }, { Name: []string{"catalog", "register"}, - Help: "", + Help: "This measures the time it takes to complete a catalog register operation.", }, } diff --git a/agent/consul/fsm/commands_oss.go b/agent/consul/fsm/commands_oss.go index 5a5a530c8d27..4c3bf3c0d146 100644 --- a/agent/consul/fsm/commands_oss.go +++ b/agent/consul/fsm/commands_oss.go @@ -4,11 +4,102 @@ import ( "fmt" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/api" ) +var CommandsSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"fsm", "register"}, + Help: "This measures the time it takes to apply a catalog register operation to the FSM.", + }, + { + Name: []string{"fsm", "deregister"}, + Help: "This measures the time it takes to apply a catalog deregister operation to the FSM.", + }, + { + Name: []string{"fsm", "kvs"}, + Help: "This measures the time it takes to apply the given KV operation to the FSM.", + }, + { + Name: []string{"fsm", "session"}, + Help: "This measures the time it takes to apply the given session operation to the FSM.", + }, + { + Name: []string{"fsm", "acl"}, + Help: "This measures the time it takes to apply the given ACL operation to the FSM.", + }, + { + Name: []string{"fsm", "tombstone"}, + Help: "This measures the time it takes to apply the given tombstone operation to the FSM.", + }, + { + Name: []string{"fsm", "coordinate", "batch-update"}, + Help: "This measures the time it takes to apply the given batch coordinate update to the FSM.", + }, + { + Name: []string{"fsm", "prepared-query"}, + Help: "This measures the time it takes to apply the given prepared query update operation to the FSM.", + }, + { + Name: []string{"fsm", "txn"}, + Help: "This measures the time it takes to apply the given transaction update to the FSM.", + }, + { + Name: []string{"fsm", "autopilot"}, + Help: "This measures the time it takes to apply the given autopilot update to the FSM.", + }, + { + Name: []string{"consul", "fsm", "intention"}, + Help: "", + }, + { + Name: []string{"fsm", "intention"}, + Help: "", + }, + { + Name: []string{"consul", "fsm", "ca"}, + Help: "", + }, + { + Name: []string{"fsm", "ca", "leaf"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "token"}, + Help: "", + }, + { + Name: []string{"fsm", "ca", "leaf"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "policy"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "bindingrule"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "authmethod"}, + Help: "", + }, + { + Name: []string{"fsm", "system_metadata"}, + Help: "", + }, + // TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is + // possible to statically declare these when we know all of the names, but I didn't get to it + // in this patch. Config-entries are known though and we should add these in the future. + // { + // Name: []string{"fsm", "config_entry", req.Entry.GetKind()}, + // Help: "", + // }, +} + func init() { registerCommand(structs.RegisterRequestType, (*FSM).applyRegister) registerCommand(structs.DeregisterRequestType, (*FSM).applyDeregister) diff --git a/agent/consul/fsm/snapshot.go b/agent/consul/fsm/snapshot.go index e4c9c0bb456d..55a13f4af7c5 100644 --- a/agent/consul/fsm/snapshot.go +++ b/agent/consul/fsm/snapshot.go @@ -5,6 +5,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/go-msgpack/codec" @@ -12,6 +13,13 @@ import ( "github.com/hashicorp/raft" ) +var SnapshotSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"fsm", "persist"}, + Help: "This measures the time it takes to persist the FSM to a raft snapshot.", + }, +} + // snapshot is used to provide a snapshot of the current // state in a way that can be accessed concurrently with operations // that may modify the live state. diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 6fba3af672f9..e7e028e6e8e6 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -11,6 +11,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" @@ -27,6 +28,21 @@ import ( "golang.org/x/time/rate" ) +var LeaderSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"leader", "barrier"}, + Help: "This measures the time spent waiting for the raft barrier upon gaining leadership.", + }, + { + Name: []string{"leader", "reconcileMember"}, + Help: "This measures the time spent updating the raft store for a single serf member's information.", + }, + { + Name: []string{"leader", "reapTombstones"}, + Help: "This measures the time spent clearing tombstones.", + }, +} + const ( newLeaderEvent = "consul:new-leader" barrierWriteTimeout = 2 * time.Minute diff --git a/agent/consul/prepared_query_endpoint.go b/agent/consul/prepared_query_endpoint.go index d796c6f6cbd9..4b515deb36b6 100644 --- a/agent/consul/prepared_query_endpoint.go +++ b/agent/consul/prepared_query_endpoint.go @@ -19,19 +19,19 @@ import ( var PreparedQuerySummaries = []prometheus.SummaryDefinition{ { Name: []string{"prepared-query", "apply"}, - Help: "", + Help: "This measures the time it takes to apply a prepared query update.", }, { Name: []string{"prepared-query", "explain"}, - Help: "", + Help: "This measures the time it takes to process a prepared query explain request.", }, { Name: []string{"prepared-query", "execute"}, - Help: "", + Help: "This measures the time it takes to process a prepared query execute request.", }, { Name: []string{"prepared-query", "execute_remote"}, - Help: "", + Help: "This measures the time it takes to process a prepared query execute request that was forwarded to another datacenter.", }, } diff --git a/agent/consul/rpc.go b/agent/consul/rpc.go index 775d311bdafa..aab09a32707e 100644 --- a/agent/consul/rpc.go +++ b/agent/consul/rpc.go @@ -35,41 +35,41 @@ import ( var RPCCounters = []prometheus.CounterDefinition{ { Name: []string{"rpc", "accept_conn"}, - Help: "", + Help: "This increments when a server accepts an RPC connection.", }, { Name: []string{"rpc", "raft_handoff"}, - Help: "", + Help: "This increments when a server accepts a Raft-related RPC connection.", }, { Name: []string{"rpc", "request_error"}, - Help: "", + Help: "This increments when a server returns an error from an RPC request.", }, { Name: []string{"rpc", "request"}, - Help: "", + Help: "This increments when a server receives a Consul-related RPC request.", }, { Name: []string{"rpc", "cross-dc"}, - Help: "", + Help: "This increments when a server sends a (potentially blocking) cross datacenter RPC query.", }, { Name: []string{"rpc", "query"}, - Help: "", + Help: "This increments when a server receives a new blocking RPC request, indicating the rate of new blocking query calls.", }, } var RPCGauges = []prometheus.GaugeDefinition{ { Name: []string{"rpc", "queries_blocking"}, - Help: "", + Help: "This shows the current number of in-flight blocking queries the server is handling.", }, } var RPCSummaries = []prometheus.SummaryDefinition{ { Name: []string{"rpc", "consistentRead"}, - Help: "", + Help: "This measures the time spent confirming that a consistent read can be performed.", }, } diff --git a/agent/consul/segment_oss.go b/agent/consul/segment_oss.go index 11b06b695962..db910e8c8d32 100644 --- a/agent/consul/segment_oss.go +++ b/agent/consul/segment_oss.go @@ -7,10 +7,18 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/serf/serf" ) +var SegmentOSSSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"leader", "reconcile"}, + Help: "This measures the time spent updating the raft store from the serf member information.", + }, +} + // LANMembersAllSegments returns members from all segments. func (s *Server) LANMembersAllSegments() ([]serf.Member, error) { return s.LANMembers(), nil diff --git a/agent/consul/session_endpoint.go b/agent/consul/session_endpoint.go index 3ac8b41dc00b..669e6388006a 100644 --- a/agent/consul/session_endpoint.go +++ b/agent/consul/session_endpoint.go @@ -5,6 +5,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -13,6 +14,17 @@ import ( "github.com/hashicorp/go-uuid" ) +var SessionEndpointSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"session", "apply"}, + Help: "This measures the time spent applying a session update.", + }, + { + Name: []string{"session", "renew"}, + Help: "This measures the time spent renewing a session.", + }, +} + // Session endpoint is used to manipulate sessions for KV type Session struct { srv *Server diff --git a/agent/consul/session_ttl.go b/agent/consul/session_ttl.go index 7387e42a9f49..193dc18e31af 100644 --- a/agent/consul/session_ttl.go +++ b/agent/consul/session_ttl.go @@ -4,16 +4,15 @@ import ( "fmt" "time" - "github.com/armon/go-metrics/prometheus" - "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/structs" ) var SessionGauges = []prometheus.GaugeDefinition{ { Name: []string{"session_ttl", "active"}, - Help: "", + Help: "This tracks the active number of sessions being tracked.", }, { Name: []string{"raft", "applied_index"}, @@ -28,7 +27,7 @@ var SessionGauges = []prometheus.GaugeDefinition{ var SessionSummaries = []prometheus.SummaryDefinition{ { Name: []string{"session_ttl", "invalidate"}, - Help: "", + Help: "This measures the time spent invalidating an expired session.", }, } diff --git a/agent/consul/txn_endpoint.go b/agent/consul/txn_endpoint.go index 50a57f9e16a1..c4a9314314fa 100644 --- a/agent/consul/txn_endpoint.go +++ b/agent/consul/txn_endpoint.go @@ -19,7 +19,7 @@ var TxnSummaries = []prometheus.SummaryDefinition{ }, { Name: []string{"txn", "read"}, - Help: "", + Help: "This measures the time spent returning a read transaction.", }, } diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go index 7f2207fa60e5..ac74eca2e13b 100644 --- a/agent/consul/usagemetrics/usagemetrics.go +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -15,16 +15,16 @@ import ( var Gauges = []prometheus.GaugeDefinition{ { - Name: []string{"state", "nodes"}, - Help: "", + Name: []string{"consul", "state", "nodes"}, + Help: "This measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.", }, { - Name: []string{"state", "services"}, - Help: "", + Name: []string{"consul", "state", "services"}, + Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", }, { - Name: []string{"state", "service_instances"}, - Help: "", + Name: []string{"consul", "state", "service_instances"}, + Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", }, } diff --git a/agent/dns.go b/agent/dns.go index 6d541aeaaa7e..880184c398f1 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -31,18 +31,18 @@ import ( var DNSCounters = []prometheus.CounterDefinition{ { Name: []string{"dns", "stale_queries"}, - Help: "", + Help: "This increments when an agent serves a query within the allowed stale threshold.", }, } var DNSSummaries = []prometheus.SummaryDefinition{ { Name: []string{"dns", "ptr_query"}, - Help: "", + Help: "This measures the time spent handling a reverse DNS query for the given node.", }, { Name: []string{"dns", "domain_query"}, - Help: "", + Help: "This measures the time spent handling a domain query for the given node.", }, } diff --git a/agent/grpc/stats.go b/agent/grpc/stats.go index b1a0c5a23209..d8bd6298a289 100644 --- a/agent/grpc/stats.go +++ b/agent/grpc/stats.go @@ -14,37 +14,37 @@ var defaultMetrics = metrics.Default() var StatsGauges = []prometheus.GaugeDefinition{ { Name: []string{"grpc", "server", "connections"}, - Help: "", + Help: "This metric measures the number of active gRPC connections open on the server.", }, { Name: []string{"grpc", "client", "connections"}, - Help: "", + Help: "This metric measures the number of active gRPC connections open from the client agent to any Consul servers.", }, { Name: []string{"grpc", "server", "streams"}, - Help: "", + Help: "This metric measures the number of active gRPC streams handled by the server.", }, } var StatsCounters = []prometheus.CounterDefinition{ { Name: []string{"grpc", "client", "request", "count"}, - Help: "", + Help: "This metric counts the number of gRPC requests made by the client agent to a Consul server.", }, { Name: []string{"grpc", "server", "request", "count"}, - Help: "", + Help: "This metric counts the number of gRPC requests received by the server.", }, { Name: []string{"grpc", "client", "connection", "count"}, - Help: "", + Help: "This metric counts the number of new gRPC connections opened by the client agent to a Consul server.", }, { Name: []string{"grpc", "server", "connection", "count"}, - Help: "", + Help: "This metric counts the number of new gRPC connections received by the server.", }, { Name: []string{"grpc", "server", "stream", "count"}, - Help: "", + Help: "This metric counts the number of new gRPC streams received by the server.", }, } diff --git a/agent/http.go b/agent/http.go index 9b5fcf5e37cc..9e24fe1ab9bd 100644 --- a/agent/http.go +++ b/agent/http.go @@ -35,7 +35,7 @@ import ( var HTTPSummaries = []prometheus.SummaryDefinition{ { Name: []string{"api", "http"}, - Help: "", + Help: "Samples how long it takes to service the given HTTP request for the given verb and path.", }, } diff --git a/agent/local/state.go b/agent/local/state.go index 5ca827607d33..8a74189e3e55 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -22,24 +22,28 @@ import ( var StateCounters = []prometheus.CounterDefinition{ { - Name: []string{"acl", "blocked", "service", "deregistration"}, - Help: "", + Name: []string{"acl", "blocked", "service", "registration"}, + Help: "This increments whenever a registration fails for a service (blocked by an ACL)", }, { - Name: []string{"acl", "blocked", "check", "deregistration"}, - Help: "", + Name: []string{"acl", "blocked", "service", "deregistration"}, + Help: "This increments whenever a deregistration fails for a service (blocked by an ACL)", }, { - Name: []string{"acl", "blocked", "service", "registration"}, - Help: "", + Name: []string{"acl", "blocked", "check", "registration"}, + Help: "This increments whenever a registration fails for a check (blocked by an ACL)", }, { - Name: []string{"acl", "blocked", "check", "registration"}, - Help: "", + Name: []string{"acl", "blocked", "check", "deregistration"}, + Help: "This increments whenever a deregistration fails for a check (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "node", "registration"}, - Help: "", + Help: "This increments whenever a registration fails for a node (blocked by an ACL)", + }, + { + Name: []string{"acl", "blocked", "node", "deregistration"}, + Help: "This increments whenever a deregistration fails for a node (blocked by an ACL)", }, } diff --git a/agent/setup.go b/agent/setup.go index c3f4dbf08fa8..49a586ebbce2 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -8,6 +8,8 @@ import ( "sync" "time" + "github.com/hashicorp/consul/agent/consul/fsm" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/consul/usagemetrics" "github.com/hashicorp/consul/agent/local" @@ -187,6 +189,7 @@ func registerWithGRPC(b grpcresolver.Builder) { func getPrometheusDefs() lib.PrometheusDefs { serviceName := []string{"consul"} var gauges = [][]prometheus.GaugeDefinition{ + cache.Gauges, consul.AutopilotGauges, consul.RPCGauges, consul.SessionGauges, @@ -205,9 +208,8 @@ func getPrometheusDefs() lib.PrometheusDefs { } raftCounters := []prometheus.CounterDefinition{ - // TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry - // package within. In the mean time, we're going to define them here because it's important that they're always - // present for Consul users setting up dashboards. + // TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry + // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul. { Name: []string{"raft", "apply"}, Help: "This counts the number of Raft transactions occurring over the interval.", @@ -224,6 +226,7 @@ func getPrometheusDefs() lib.PrometheusDefs { var counters = [][]prometheus.CounterDefinition{ CatalogCounters, + cache.Counters, consul.ACLCounters, consul.CatalogCounters, consul.ClientCounters, @@ -244,9 +247,8 @@ func getPrometheusDefs() lib.PrometheusDefs { } raftSummaries := []prometheus.SummaryDefinition{ - // TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry - // package within. In the mean time, we're going to define them here because it's important that they're always - // present for Consul users setting up dashboards. + // TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry + // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul. { Name: []string{"raft", "commitTime"}, Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.", @@ -261,14 +263,20 @@ func getPrometheusDefs() lib.PrometheusDefs { HTTPSummaries, consul.ACLSummaries, consul.ACLEndpointSummaries, + consul.ACLEndpointLegacySummaries, consul.CatalogSummaries, consul.FederationStateSummaries, consul.IntentionSummaries, consul.KVSummaries, + consul.LeaderSummaries, consul.PreparedQuerySummaries, consul.RPCSummaries, + consul.SegmentOSSSummaries, consul.SessionSummaries, + consul.SessionEndpointSummaries, consul.TxnSummaries, + fsm.CommandsSummaries, + fsm.SnapshotSummaries, raftSummaries, } var summaryDefs []prometheus.SummaryDefinition From 15af5ead0b2687985c96efd9fe0f3116bdd1c794 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 11:02:11 -0800 Subject: [PATCH 04/13] trim help strings to save a few bytes --- agent/catalog_endpoint.go | 46 +++++++++++------------ agent/consul/acl_endpoint_legacy.go | 2 +- agent/consul/autopilot.go | 4 +- agent/consul/catalog_endpoint.go | 12 +++--- agent/consul/fsm/commands_oss.go | 20 +++++----- agent/consul/fsm/snapshot.go | 2 +- agent/consul/kvs_endpoint.go | 2 +- agent/consul/leader.go | 6 +-- agent/consul/prepared_query_endpoint.go | 8 ++-- agent/consul/rpc.go | 16 ++++---- agent/consul/segment_oss.go | 2 +- agent/consul/session_endpoint.go | 4 +- agent/consul/session_ttl.go | 4 +- agent/consul/txn_endpoint.go | 4 +- agent/consul/usagemetrics/usagemetrics.go | 6 +-- agent/dns.go | 6 +-- agent/grpc/stats.go | 16 ++++---- agent/local/state.go | 12 +++--- 18 files changed, 86 insertions(+), 86 deletions(-) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index 3cda7c7cd8b3..188c1bfb2007 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -18,71 +18,71 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "rpc", "error", "catalog_register"}, - Help: "This increments whenever a Consul agent receives an RPC error for a catalog register request.", + Help: "Increments whenever a Consul agent receives an RPC error for a catalog register request.", }, { Name: []string{"client", "api", "success", "catalog_register"}, - Help: "This increments whenever a Consul agent successfully responds to a catalog register request.", + Help: "Increments whenever a Consul agent successfully responds to a catalog register request.", }, { Name: []string{"client", "api", "catalog_deregister"}, - Help: "This increments whenever a Consul agent receives a catalog deregister request.", + Help: "Increments whenever a Consul agent receives a catalog deregister request.", }, { Name: []string{"client", "api", "catalog_datacenters"}, - Help: "This increments whenever a Consul agent receives a request to list datacenters in the catalog.", + Help: "Increments whenever a Consul agent receives a request to list datacenters in the catalog.", }, { Name: []string{"client", "rpc", "error", "catalog_deregister"}, - Help: "This increments whenever a Consul agent receives an RPC error for a catalog deregister request.", + Help: "Increments whenever a Consul agent receives an RPC error for a catalog deregister request.", }, { Name: []string{"client", "api", "success", "catalog_nodes"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list nodes.", + Help: "Increments whenever a Consul agent successfully responds to a request to list nodes.", }, { Name: []string{"client", "rpc", "error", "catalog_nodes"}, - Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes.", + Help: "Increments whenever a Consul agent receives an RPC error for a request to list nodes.", }, { Name: []string{"client", "api", "success", "catalog_deregister"}, - Help: "This increments whenever a Consul agent successfully responds to a catalog deregister request.", + Help: "Increments whenever a Consul agent successfully responds to a catalog deregister request.", }, { Name: []string{"client", "rpc", "error", "catalog_datacenters"}, - Help: "This increments whenever a Consul agent receives an RPC error for a request to list datacenters.", + Help: "Increments whenever a Consul agent receives an RPC error for a request to list datacenters.", }, { Name: []string{"client", "api", "success", "catalog_datacenters"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list datacenters.", + Help: "Increments whenever a Consul agent successfully responds to a request to list datacenters.", }, { Name: []string{"client", "api", "catalog_nodes"}, - Help: "This increments whenever a Consul agent receives a request to list nodes from the catalog.", + Help: "Increments whenever a Consul agent receives a request to list nodes from the catalog.", }, { Name: []string{"client", "api", "catalog_services"}, - Help: "This increments whenever a Consul agent receives a request to list services from the catalog.", + Help: "Increments whenever a Consul agent receives a request to list services from the catalog.", }, { Name: []string{"client", "rpc", "error", "catalog_services"}, - Help: "This increments whenever a Consul agent receives an RPC error for a request to list services.", + Help: "Increments whenever a Consul agent receives an RPC error for a request to list services.", }, { Name: []string{"client", "api", "success", "catalog_services"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list services.", + Help: "Increments whenever a Consul agent successfully responds to a request to list services.", }, { Name: []string{"client", "api", "catalog_service_nodes"}, - Help: "This increments whenever a Consul agent receives a request to list nodes offering a service.", + Help: "Increments whenever a Consul agent receives a request to list nodes offering a service.", }, { Name: []string{"client", "rpc", "error", "catalog_service_nodes"}, - Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.", + Help: "Increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.", }, { Name: []string{"client", "api", "success", "catalog_service_nodes"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", + Help: "Increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", }, { Name: []string{"client", "api", "error", "catalog_service_nodes"}, @@ -90,15 +90,15 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "api", "catalog_node_services"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", + Help: "Increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", }, { Name: []string{"client", "api", "success", "catalog_node_services"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list services in a node.", + Help: "Increments whenever a Consul agent successfully responds to a request to list services in a node.", }, { Name: []string{"client", "rpc", "error", "catalog_node_services"}, - Help: "This increments whenever a Consul agent receives an RPC error for a request to list services in a node.", + Help: "Increments whenever a Consul agent receives an RPC error for a request to list services in a node.", }, { Name: []string{"client", "api", "catalog_node_service_list"}, @@ -114,15 +114,15 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "api", "catalog_gateway_services"}, - Help: "This increments whenever a Consul agent receives a request to list services associated with a gateway.", + Help: "Increments whenever a Consul agent receives a request to list services associated with a gateway.", }, { Name: []string{"client", "rpc", "error", "catalog_gateway_services"}, - Help: "This increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.", + Help: "Increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.", }, { Name: []string{"client", "api", "success", "catalog_gateway_services"}, - Help: "This increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.", + Help: "Increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.", }, } diff --git a/agent/consul/acl_endpoint_legacy.go b/agent/consul/acl_endpoint_legacy.go index 16fa917fc6f7..9cdfba668bdc 100644 --- a/agent/consul/acl_endpoint_legacy.go +++ b/agent/consul/acl_endpoint_legacy.go @@ -16,7 +16,7 @@ import ( var ACLEndpointLegacySummaries = []prometheus.SummaryDefinition{ { Name: []string{"acl", "apply"}, - Help: "This measures the time it takes to complete an update to the ACL store.", + Help: "Measures the time it takes to complete an update to the ACL store.", }, } diff --git a/agent/consul/autopilot.go b/agent/consul/autopilot.go index e84451ebdf3d..cc6cf62302ff 100644 --- a/agent/consul/autopilot.go +++ b/agent/consul/autopilot.go @@ -16,11 +16,11 @@ import ( var AutopilotGauges = []prometheus.GaugeDefinition{ { Name: []string{"autopilot", "failure_tolerance"}, - Help: "This tracks the number of voting servers that the cluster can lose while continuing to function.", + Help: "Tracks the number of voting servers that the cluster can lose while continuing to function.", }, { Name: []string{"autopilot", "healthy"}, - Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.", + Help: "Tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.", }, } diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 95e405dcd03f..f5d5b5633df4 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -21,7 +21,7 @@ import ( var CatalogCounters = []prometheus.CounterDefinition{ { Name: []string{"catalog", "service", "query"}, - Help: "This increments for each catalog query for the given service.", + Help: "Increments for each catalog query for the given service.", }, { Name: []string{"catalog", "connect", "query"}, @@ -29,7 +29,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "query-tag"}, - Help: "This increments for each catalog query for the given service with the given tag.", + Help: "Increments for each catalog query for the given service with the given tag.", }, { Name: []string{"catalog", "connect", "query-tag"}, @@ -37,7 +37,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "query-tags"}, - Help: "This increments for each catalog query for the given service with the given tags.", + Help: "Increments for each catalog query for the given service with the given tags.", }, { Name: []string{"catalog", "connect", "query-tags"}, @@ -45,7 +45,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "not-found"}, - Help: "This increments for each catalog query where the given service could not be found.", + Help: "Increments for each catalog query where the given service could not be found.", }, { Name: []string{"catalog", "connect", "not-found"}, @@ -56,11 +56,11 @@ var CatalogCounters = []prometheus.CounterDefinition{ var CatalogSummaries = []prometheus.SummaryDefinition{ { Name: []string{"catalog", "deregister"}, - Help: "This measures the time it takes to complete a catalog deregister operation.", + Help: "Measures the time it takes to complete a catalog deregister operation.", }, { Name: []string{"catalog", "register"}, - Help: "This measures the time it takes to complete a catalog register operation.", + Help: "Measures the time it takes to complete a catalog register operation.", }, } diff --git a/agent/consul/fsm/commands_oss.go b/agent/consul/fsm/commands_oss.go index fa4372a5225a..fae5eb1a8112 100644 --- a/agent/consul/fsm/commands_oss.go +++ b/agent/consul/fsm/commands_oss.go @@ -13,43 +13,43 @@ import ( var CommandsSummaries = []prometheus.SummaryDefinition{ { Name: []string{"fsm", "register"}, - Help: "This measures the time it takes to apply a catalog register operation to the FSM.", + Help: "Measures the time it takes to apply a catalog register operation to the FSM.", }, { Name: []string{"fsm", "deregister"}, - Help: "This measures the time it takes to apply a catalog deregister operation to the FSM.", + Help: "Measures the time it takes to apply a catalog deregister operation to the FSM.", }, { Name: []string{"fsm", "kvs"}, - Help: "This measures the time it takes to apply the given KV operation to the FSM.", + Help: "Measures the time it takes to apply the given KV operation to the FSM.", }, { Name: []string{"fsm", "session"}, - Help: "This measures the time it takes to apply the given session operation to the FSM.", + Help: "Measures the time it takes to apply the given session operation to the FSM.", }, { Name: []string{"fsm", "acl"}, - Help: "This measures the time it takes to apply the given ACL operation to the FSM.", + Help: "Measures the time it takes to apply the given ACL operation to the FSM.", }, { Name: []string{"fsm", "tombstone"}, - Help: "This measures the time it takes to apply the given tombstone operation to the FSM.", + Help: "Measures the time it takes to apply the given tombstone operation to the FSM.", }, { Name: []string{"fsm", "coordinate", "batch-update"}, - Help: "This measures the time it takes to apply the given batch coordinate update to the FSM.", + Help: "Measures the time it takes to apply the given batch coordinate update to the FSM.", }, { Name: []string{"fsm", "prepared-query"}, - Help: "This measures the time it takes to apply the given prepared query update operation to the FSM.", + Help: "Measures the time it takes to apply the given prepared query update operation to the FSM.", }, { Name: []string{"fsm", "txn"}, - Help: "This measures the time it takes to apply the given transaction update to the FSM.", + Help: "Measures the time it takes to apply the given transaction update to the FSM.", }, { Name: []string{"fsm", "autopilot"}, - Help: "This measures the time it takes to apply the given autopilot update to the FSM.", + Help: "Measures the time it takes to apply the given autopilot update to the FSM.", }, { Name: []string{"consul", "fsm", "intention"}, diff --git a/agent/consul/fsm/snapshot.go b/agent/consul/fsm/snapshot.go index 55a13f4af7c5..696ca564531f 100644 --- a/agent/consul/fsm/snapshot.go +++ b/agent/consul/fsm/snapshot.go @@ -16,7 +16,7 @@ import ( var SnapshotSummaries = []prometheus.SummaryDefinition{ { Name: []string{"fsm", "persist"}, - Help: "This measures the time it takes to persist the FSM to a raft snapshot.", + Help: "Measures the time it takes to persist the FSM to a raft snapshot.", }, } diff --git a/agent/consul/kvs_endpoint.go b/agent/consul/kvs_endpoint.go index 400397fca1a7..c6aee93805c1 100644 --- a/agent/consul/kvs_endpoint.go +++ b/agent/consul/kvs_endpoint.go @@ -18,7 +18,7 @@ import ( var KVSummaries = []prometheus.SummaryDefinition{ { Name: []string{"kvs", "apply"}, - Help: "This measures the time it takes to complete an update to the KV store.", + Help: "Measures the time it takes to complete an update to the KV store.", }, } diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 93e26efd688b..d050e297b4c7 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -31,15 +31,15 @@ import ( var LeaderSummaries = []prometheus.SummaryDefinition{ { Name: []string{"leader", "barrier"}, - Help: "This measures the time spent waiting for the raft barrier upon gaining leadership.", + Help: "Measures the time spent waiting for the raft barrier upon gaining leadership.", }, { Name: []string{"leader", "reconcileMember"}, - Help: "This measures the time spent updating the raft store for a single serf member's information.", + Help: "Measures the time spent updating the raft store for a single serf member's information.", }, { Name: []string{"leader", "reapTombstones"}, - Help: "This measures the time spent clearing tombstones.", + Help: "Measures the time spent clearing tombstones.", }, } diff --git a/agent/consul/prepared_query_endpoint.go b/agent/consul/prepared_query_endpoint.go index 4b515deb36b6..360c80b9b7d5 100644 --- a/agent/consul/prepared_query_endpoint.go +++ b/agent/consul/prepared_query_endpoint.go @@ -19,19 +19,19 @@ import ( var PreparedQuerySummaries = []prometheus.SummaryDefinition{ { Name: []string{"prepared-query", "apply"}, - Help: "This measures the time it takes to apply a prepared query update.", + Help: "Measures the time it takes to apply a prepared query update.", }, { Name: []string{"prepared-query", "explain"}, - Help: "This measures the time it takes to process a prepared query explain request.", + Help: "Measures the time it takes to process a prepared query explain request.", }, { Name: []string{"prepared-query", "execute"}, - Help: "This measures the time it takes to process a prepared query execute request.", + Help: "Measures the time it takes to process a prepared query execute request.", }, { Name: []string{"prepared-query", "execute_remote"}, - Help: "This measures the time it takes to process a prepared query execute request that was forwarded to another datacenter.", + Help: "Measures the time it takes to process a prepared query execute request that was forwarded to another datacenter.", }, } diff --git a/agent/consul/rpc.go b/agent/consul/rpc.go index aab09a32707e..82a656a3a4df 100644 --- a/agent/consul/rpc.go +++ b/agent/consul/rpc.go @@ -35,41 +35,41 @@ import ( var RPCCounters = []prometheus.CounterDefinition{ { Name: []string{"rpc", "accept_conn"}, - Help: "This increments when a server accepts an RPC connection.", + Help: "Increments when a server accepts an RPC connection.", }, { Name: []string{"rpc", "raft_handoff"}, - Help: "This increments when a server accepts a Raft-related RPC connection.", + Help: "Increments when a server accepts a Raft-related RPC connection.", }, { Name: []string{"rpc", "request_error"}, - Help: "This increments when a server returns an error from an RPC request.", + Help: "Increments when a server returns an error from an RPC request.", }, { Name: []string{"rpc", "request"}, - Help: "This increments when a server receives a Consul-related RPC request.", + Help: "Increments when a server receives a Consul-related RPC request.", }, { Name: []string{"rpc", "cross-dc"}, - Help: "This increments when a server sends a (potentially blocking) cross datacenter RPC query.", + Help: "Increments when a server sends a (potentially blocking) cross datacenter RPC query.", }, { Name: []string{"rpc", "query"}, - Help: "This increments when a server receives a new blocking RPC request, indicating the rate of new blocking query calls.", + Help: "Increments when a server receives a new blocking RPC request, indicating the rate of new blocking query calls.", }, } var RPCGauges = []prometheus.GaugeDefinition{ { Name: []string{"rpc", "queries_blocking"}, - Help: "This shows the current number of in-flight blocking queries the server is handling.", + Help: "Shows the current number of in-flight blocking queries the server is handling.", }, } var RPCSummaries = []prometheus.SummaryDefinition{ { Name: []string{"rpc", "consistentRead"}, - Help: "This measures the time spent confirming that a consistent read can be performed.", + Help: "Measures the time spent confirming that a consistent read can be performed.", }, } diff --git a/agent/consul/segment_oss.go b/agent/consul/segment_oss.go index db910e8c8d32..690132c34749 100644 --- a/agent/consul/segment_oss.go +++ b/agent/consul/segment_oss.go @@ -15,7 +15,7 @@ import ( var SegmentOSSSummaries = []prometheus.SummaryDefinition{ { Name: []string{"leader", "reconcile"}, - Help: "This measures the time spent updating the raft store from the serf member information.", + Help: "Measures the time spent updating the raft store from the serf member information.", }, } diff --git a/agent/consul/session_endpoint.go b/agent/consul/session_endpoint.go index 669e6388006a..d3d360488352 100644 --- a/agent/consul/session_endpoint.go +++ b/agent/consul/session_endpoint.go @@ -17,11 +17,11 @@ import ( var SessionEndpointSummaries = []prometheus.SummaryDefinition{ { Name: []string{"session", "apply"}, - Help: "This measures the time spent applying a session update.", + Help: "Measures the time spent applying a session update.", }, { Name: []string{"session", "renew"}, - Help: "This measures the time spent renewing a session.", + Help: "Measures the time spent renewing a session.", }, } diff --git a/agent/consul/session_ttl.go b/agent/consul/session_ttl.go index 193dc18e31af..15c77a24a259 100644 --- a/agent/consul/session_ttl.go +++ b/agent/consul/session_ttl.go @@ -12,7 +12,7 @@ import ( var SessionGauges = []prometheus.GaugeDefinition{ { Name: []string{"session_ttl", "active"}, - Help: "This tracks the active number of sessions being tracked.", + Help: "Tracks the active number of sessions being tracked.", }, { Name: []string{"raft", "applied_index"}, @@ -27,7 +27,7 @@ var SessionGauges = []prometheus.GaugeDefinition{ var SessionSummaries = []prometheus.SummaryDefinition{ { Name: []string{"session_ttl", "invalidate"}, - Help: "This measures the time spent invalidating an expired session.", + Help: "Measures the time spent invalidating an expired session.", }, } diff --git a/agent/consul/txn_endpoint.go b/agent/consul/txn_endpoint.go index c4a9314314fa..9febc8b89fff 100644 --- a/agent/consul/txn_endpoint.go +++ b/agent/consul/txn_endpoint.go @@ -15,11 +15,11 @@ import ( var TxnSummaries = []prometheus.SummaryDefinition{ { Name: []string{"txn", "apply"}, - Help: "This measures the time spent applying a transaction operation.", + Help: "Measures the time spent applying a transaction operation.", }, { Name: []string{"txn", "read"}, - Help: "This measures the time spent returning a read transaction.", + Help: "Measures the time spent returning a read transaction.", }, } diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go index ac74eca2e13b..da09890e5fb8 100644 --- a/agent/consul/usagemetrics/usagemetrics.go +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -16,15 +16,15 @@ import ( var Gauges = []prometheus.GaugeDefinition{ { Name: []string{"consul", "state", "nodes"}, - Help: "This measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.", + Help: "Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.", }, { Name: []string{"consul", "state", "services"}, - Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", + Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", }, { Name: []string{"consul", "state", "service_instances"}, - Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", + Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", }, } diff --git a/agent/dns.go b/agent/dns.go index 880184c398f1..d8e20003dd5c 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -31,18 +31,18 @@ import ( var DNSCounters = []prometheus.CounterDefinition{ { Name: []string{"dns", "stale_queries"}, - Help: "This increments when an agent serves a query within the allowed stale threshold.", + Help: "Increments when an agent serves a query within the allowed stale threshold.", }, } var DNSSummaries = []prometheus.SummaryDefinition{ { Name: []string{"dns", "ptr_query"}, - Help: "This measures the time spent handling a reverse DNS query for the given node.", + Help: "Measures the time spent handling a reverse DNS query for the given node.", }, { Name: []string{"dns", "domain_query"}, - Help: "This measures the time spent handling a domain query for the given node.", + Help: "Measures the time spent handling a domain query for the given node.", }, } diff --git a/agent/grpc/stats.go b/agent/grpc/stats.go index 309b21f18ad8..7ba96f91f4d8 100644 --- a/agent/grpc/stats.go +++ b/agent/grpc/stats.go @@ -13,37 +13,37 @@ import ( var StatsGauges = []prometheus.GaugeDefinition{ { Name: []string{"grpc", "server", "connections"}, - Help: "This metric measures the number of active gRPC connections open on the server.", + Help: "Measures the number of active gRPC connections open on the server.", }, { Name: []string{"grpc", "client", "connections"}, - Help: "This metric measures the number of active gRPC connections open from the client agent to any Consul servers.", + Help: "Measures the number of active gRPC connections open from the client agent to any Consul servers.", }, { Name: []string{"grpc", "server", "streams"}, - Help: "This metric measures the number of active gRPC streams handled by the server.", + Help: "Measures the number of active gRPC streams handled by the server.", }, } var StatsCounters = []prometheus.CounterDefinition{ { Name: []string{"grpc", "client", "request", "count"}, - Help: "This metric counts the number of gRPC requests made by the client agent to a Consul server.", + Help: "Counts the number of gRPC requests made by the client agent to a Consul server.", }, { Name: []string{"grpc", "server", "request", "count"}, - Help: "This metric counts the number of gRPC requests received by the server.", + Help: "Counts the number of gRPC requests received by the server.", }, { Name: []string{"grpc", "client", "connection", "count"}, - Help: "This metric counts the number of new gRPC connections opened by the client agent to a Consul server.", + Help: "Counts the number of new gRPC connections opened by the client agent to a Consul server.", }, { Name: []string{"grpc", "server", "connection", "count"}, - Help: "This metric counts the number of new gRPC connections received by the server.", + Help: "Counts the number of new gRPC connections received by the server.", }, { Name: []string{"grpc", "server", "stream", "count"}, - Help: "This metric counts the number of new gRPC streams received by the server.", + Help: "Counts the number of new gRPC streams received by the server.", }, } diff --git a/agent/local/state.go b/agent/local/state.go index 8a74189e3e55..b4414e91091a 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -23,27 +23,27 @@ import ( var StateCounters = []prometheus.CounterDefinition{ { Name: []string{"acl", "blocked", "service", "registration"}, - Help: "This increments whenever a registration fails for a service (blocked by an ACL)", + Help: "Increments whenever a registration fails for a service (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "service", "deregistration"}, - Help: "This increments whenever a deregistration fails for a service (blocked by an ACL)", + Help: "Increments whenever a deregistration fails for a service (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "check", "registration"}, - Help: "This increments whenever a registration fails for a check (blocked by an ACL)", + Help: "Increments whenever a registration fails for a check (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "check", "deregistration"}, - Help: "This increments whenever a deregistration fails for a check (blocked by an ACL)", + Help: "Increments whenever a deregistration fails for a check (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "node", "registration"}, - Help: "This increments whenever a registration fails for a node (blocked by an ACL)", + Help: "Increments whenever a registration fails for a node (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "node", "deregistration"}, - Help: "This increments whenever a deregistration fails for a node (blocked by an ACL)", + Help: "Increments whenever a deregistration fails for a node (blocked by an ACL)", }, } From 5e0e4098c9fe1a714e11a87e20cda44404c1ff5c Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 12:44:47 -0800 Subject: [PATCH 05/13] push prometheus sink definiitons into prometheus.PrometheusOpts --- agent/agent_endpoint.go | 2 +- agent/config/builder.go | 5 ++++- agent/setup.go | 14 ++++++------ connect/proxy/proxy.go | 2 +- lib/telemetry.go | 47 +++++++++++++---------------------------- 5 files changed, 28 insertions(+), 42 deletions(-) diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go index 73e0f5364033..49721f91256b 100644 --- a/agent/agent_endpoint.go +++ b/agent/agent_endpoint.go @@ -136,7 +136,7 @@ func (s *HTTPHandlers) AgentMetrics(resp http.ResponseWriter, req *http.Request) return nil, acl.ErrPermissionDenied } if enablePrometheusOutput(req) { - if s.agent.config.Telemetry.PrometheusRetentionTime < 1 { + if s.agent.config.Telemetry.PrometheusOpts.Expiration < 1 { resp.WriteHeader(http.StatusUnsupportedMediaType) fmt.Fprint(resp, "Prometheus is not enabled since its retention time is not positive") return nil, nil diff --git a/agent/config/builder.go b/agent/config/builder.go index 1c1798dc76d5..062fb440f743 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -17,6 +17,7 @@ import ( "strings" "time" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/go-bexpr" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-multierror" @@ -942,13 +943,15 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { DisableHostname: b.boolVal(c.Telemetry.DisableHostname), DogstatsdAddr: b.stringVal(c.Telemetry.DogstatsdAddr), DogstatsdTags: c.Telemetry.DogstatsdTags, - PrometheusRetentionTime: b.durationVal("prometheus_retention_time", c.Telemetry.PrometheusRetentionTime), FilterDefault: b.boolVal(c.Telemetry.FilterDefault), AllowedPrefixes: telemetryAllowedPrefixes, BlockedPrefixes: telemetryBlockedPrefixes, MetricsPrefix: b.stringVal(c.Telemetry.MetricsPrefix), StatsdAddr: b.stringVal(c.Telemetry.StatsdAddr), StatsiteAddr: b.stringVal(c.Telemetry.StatsiteAddr), + PrometheusOpts: prometheus.PrometheusOpts{ + Expiration: b.durationVal("prometheus_retention_time", c.Telemetry.PrometheusRetentionTime), + }, }, // Agent diff --git a/agent/setup.go b/agent/setup.go index 49a586ebbce2..744840a15509 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -78,7 +78,11 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error) return d, fmt.Errorf("failed to setup node ID: %w", err) } - d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry, getPrometheusDefs()) + gauges, counters, summaries := getPrometheusDefs() + cfg.Telemetry.PrometheusOpts.GaugeDefinitions = gauges + cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters + cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries + d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry) if err != nil { return d, fmt.Errorf("failed to initialize telemetry: %w", err) } @@ -186,7 +190,7 @@ func registerWithGRPC(b grpcresolver.Builder) { // getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends // all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics. -func getPrometheusDefs() lib.PrometheusDefs { +func getPrometheusDefs() ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) { serviceName := []string{"consul"} var gauges = [][]prometheus.GaugeDefinition{ cache.Gauges, @@ -290,9 +294,5 @@ func getPrometheusDefs() lib.PrometheusDefs { summaryDefs = append(summaryDefs, withService...) } - return lib.PrometheusDefs{ - Gauges: gaugeDefs, - Counters: counterDefs, - Summaries: summaryDefs, - } + return gaugeDefs, counterDefs, summaryDefs } diff --git a/connect/proxy/proxy.go b/connect/proxy/proxy.go index 54df4e309b5c..a29cf352e8a3 100644 --- a/connect/proxy/proxy.go +++ b/connect/proxy/proxy.go @@ -56,7 +56,7 @@ func (p *Proxy) Serve() error { // Setup telemetry if configured // NOTE(kit): As far as I can tell, all of the metrics in the proxy are generated at runtime, so we // don't have any static metrics we initialize at start. - _, err := lib.InitTelemetry(newCfg.Telemetry, lib.EmptyPrometheusDefs()) + _, err := lib.InitTelemetry(newCfg.Telemetry) if err != nil { p.logger.Error("proxy telemetry config error", "error", err) } diff --git a/lib/telemetry.go b/lib/telemetry.go index fe360172a60d..27ef0043df41 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -154,14 +154,6 @@ type TelemetryConfig struct { // hcl: telemetry { dogstatsd_tags = []string } DogstatsdTags []string `json:"dogstatsd_tags,omitempty" mapstructure:"dogstatsd_tags"` - // PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0. - // A value of 0 disable Prometheus support. Regarding Prometheus, it is considered a good - // practice to put large values here (such as a few days), and at least the interval between - // prometheus requests. - // - // hcl: telemetry { prometheus_retention_time = "duration" } - PrometheusRetentionTime time.Duration `json:"prometheus_retention_time,omitempty" mapstructure:"prometheus_retention_time"` - // FilterDefault is the default for whether to allow a metric that's not // covered by the filter. // @@ -199,6 +191,13 @@ type TelemetryConfig struct { // // hcl: telemetry { statsite_address = string } StatsiteAddr string `json:"statsite_address,omitempty" mapstructure:"statsite_address"` + + // PrometheusOpts provides configuration for the PrometheusSink. Currently the only configuration + // we acquire from hcl is the retention time. We also use definition slices that are set in agent setup + // before being passed to InitTelemmetry. + // + // hcl: telemetry { prometheus_retention_time = "duration" } + PrometheusOpts prometheus.PrometheusOpts } // MergeDefaults copies any non-zero field from defaults into the current @@ -276,17 +275,17 @@ func dogstatdSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err return sink, nil } -func prometheusSink(cfg TelemetryConfig, hostname string, defs PrometheusDefs) (metrics.MetricSink, error) { +func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) { - if cfg.PrometheusRetentionTime.Nanoseconds() < 1 { + if cfg.PrometheusOpts.Expiration.Nanoseconds() < 1 { return nil, nil } prometheusOpts := prometheus.PrometheusOpts{ - Expiration: cfg.PrometheusRetentionTime, - GaugeDefinitions: defs.Gauges, - CounterDefinitions: defs.Counters, - SummaryDefinitions: defs.Summaries, + Expiration: cfg.PrometheusOpts.Expiration, + GaugeDefinitions: cfg.PrometheusOpts.GaugeDefinitions, + CounterDefinitions: cfg.PrometheusOpts.CounterDefinitions, + SummaryDefinitions: cfg.PrometheusOpts.SummaryDefinitions, } sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts) if err != nil { @@ -337,25 +336,9 @@ func circonusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err return sink, nil } -// PrometheusDefs wraps collections of metric definitions to pass into the PrometheusSink -type PrometheusDefs struct { - Gauges []prometheus.GaugeDefinition - Counters []prometheus.CounterDefinition - Summaries []prometheus.SummaryDefinition -} - -// EmptyPrometheusDefs returns a PrometheusDefs struct where each of the slices have zero elements, but not nil. -func EmptyPrometheusDefs() PrometheusDefs { - return PrometheusDefs{ - Gauges: []prometheus.GaugeDefinition{}, - Counters: []prometheus.CounterDefinition{}, - Summaries: []prometheus.SummaryDefinition{}, - } -} - // InitTelemetry configures go-metrics based on map of telemetry config // values as returned by Runtimecfg.Config(). -func InitTelemetry(cfg TelemetryConfig, defs PrometheusDefs) (*metrics.InmemSink, error) { +func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) { if cfg.Disable { return nil, nil } @@ -395,7 +378,7 @@ func InitTelemetry(cfg TelemetryConfig, defs PrometheusDefs) (*metrics.InmemSink return nil, err } - promSink, err := prometheusSink(cfg, metricsConf.HostName, defs) + promSink, err := prometheusSink(cfg, metricsConf.HostName) if err != nil { return nil, err } From b81edac7bb7cc7765823173cfc2c0ddeb444cc6d Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 14:01:12 -0800 Subject: [PATCH 06/13] use the MetricsPrefix to set the service name and provide as slice literal to avoid bugs from append modifying its first arg --- agent/setup.go | 23 +++++++++++++++-------- lib/telemetry.go | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/agent/setup.go b/agent/setup.go index 744840a15509..9efc565c927d 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -78,7 +78,7 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error) return d, fmt.Errorf("failed to setup node ID: %w", err) } - gauges, counters, summaries := getPrometheusDefs() + gauges, counters, summaries := getPrometheusDefs(cfg.Telemetry) cfg.Telemetry.PrometheusOpts.GaugeDefinitions = gauges cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries @@ -190,8 +190,8 @@ func registerWithGRPC(b grpcresolver.Builder) { // getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends // all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics. -func getPrometheusDefs() ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) { - serviceName := []string{"consul"} +func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) { + // Build slice of slices for all gauge definitions var gauges = [][]prometheus.GaugeDefinition{ cache.Gauges, consul.AutopilotGauges, @@ -200,12 +200,15 @@ func getPrometheusDefs() ([]prometheus.GaugeDefinition, []prometheus.CounterDefi grpc.StatsGauges, usagemetrics.Gauges, } + // Flatten definitions + // NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique? var gaugeDefs []prometheus.GaugeDefinition for _, g := range gauges { // Set Consul to each definition's namespace + // TODO(kit): Prepending the service to each definition should be handled by go-metrics var withService []prometheus.GaugeDefinition for _, gauge := range g { - gauge.Name = append(serviceName, gauge.Name...) + gauge.Name = append([]string{cfg.MetricsPrefix}, gauge.Name...) withService = append(withService, gauge) } gaugeDefs = append(gaugeDefs, withService...) @@ -239,12 +242,14 @@ func getPrometheusDefs() ([]prometheus.GaugeDefinition, []prometheus.CounterDefi local.StateCounters, raftCounters, } + // Flatten definitions + // NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique? var counterDefs []prometheus.CounterDefinition for _, c := range counters { - // Set Consul to each definition's namespace + // TODO(kit): Prepending the service to each definition should be handled by go-metrics var withService []prometheus.CounterDefinition for _, counter := range c { - counter.Name = append(serviceName, counter.Name...) + counter.Name = append([]string{cfg.MetricsPrefix}, counter.Name...) withService = append(withService, counter) } counterDefs = append(counterDefs, withService...) @@ -283,12 +288,14 @@ func getPrometheusDefs() ([]prometheus.GaugeDefinition, []prometheus.CounterDefi fsm.SnapshotSummaries, raftSummaries, } + // Flatten definitions + // NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique? var summaryDefs []prometheus.SummaryDefinition for _, s := range summaries { - // Set Consul to each definition's namespace + // TODO(kit): Prepending the service to each definition should be handled by go-metrics var withService []prometheus.SummaryDefinition for _, summary := range s { - summary.Name = append(serviceName, summary.Name...) + summary.Name = append([]string{cfg.MetricsPrefix}, summary.Name...) withService = append(withService, summary) } summaryDefs = append(summaryDefs, withService...) diff --git a/lib/telemetry.go b/lib/telemetry.go index 27ef0043df41..4b0a0435611a 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -4,7 +4,7 @@ import ( "reflect" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" "github.com/armon/go-metrics/circonus" "github.com/armon/go-metrics/datadog" "github.com/armon/go-metrics/prometheus" From 49f017bd429cd49073e16b0e93bcb38ef0335873 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 14:01:40 -0800 Subject: [PATCH 07/13] prometheussink has the same number of params again --- lib/telemetry.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/telemetry.go b/lib/telemetry.go index 4b0a0435611a..5c98bda7911f 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -377,12 +377,12 @@ func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) { if err := addSink(circonusSink); err != nil { return nil, err } - - promSink, err := prometheusSink(cfg, metricsConf.HostName) - if err != nil { + if err := addSink(circonusSink); err != nil { + return nil, err + } + if err := addSink(prometheusSink); err != nil { return nil, err } - sinks = append(sinks, promSink) if len(sinks) > 0 { sinks = append(sinks, memSink) From 2fe021f03cc05a79561c004ba3cea80991eb2c2f Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 14:16:12 -0800 Subject: [PATCH 08/13] update runtime_test to handle PrometheusOpts expiry field change --- agent/config/runtime_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index aba609ec163a..e761ec95a1d8 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -8,6 +8,7 @@ import ( "errors" "flag" "fmt" + "github.com/armon/go-metrics/prometheus" "io/ioutil" "net" "os" @@ -7103,9 +7104,11 @@ func TestFullConfig(t *testing.T) { AllowedPrefixes: []string{"oJotS8XJ"}, BlockedPrefixes: []string{"cazlEhGn"}, MetricsPrefix: "ftO6DySn", - PrometheusRetentionTime: 15 * time.Second, StatsdAddr: "drce87cy", StatsiteAddr: "HpFwKB8R", + PrometheusOpts: prometheus.PrometheusOpts{ + Expiration: 15 * time.Second, + }, }, TLSCipherSuites: []uint16{tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256}, TLSMinVersion: "pAOWafkR", From fc30f07cc7a0f598702bd82e5da7c13dbbc95131 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 14:17:24 -0800 Subject: [PATCH 09/13] linting: sort and group import --- agent/config/runtime_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index e761ec95a1d8..77a0a3d0a63c 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -8,7 +8,6 @@ import ( "errors" "flag" "fmt" - "github.com/armon/go-metrics/prometheus" "io/ioutil" "net" "os" @@ -19,6 +18,7 @@ import ( "testing" "time" + "github.com/armon/go-metrics/prometheus" "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/cache" From ad4cebc1d8ad590f451924bd26dd0543f2602657 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 15:22:36 -0800 Subject: [PATCH 10/13] fix some tests that were broken from the TelemetryConfig change --- agent/config/runtime_test.go | 13 ++++++++++--- lib/telemetry.go | 12 +++++------- lib/telemetry_test.go | 9 +++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index e761ec95a1d8..7c8a35a2e667 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -8,7 +8,6 @@ import ( "errors" "flag" "fmt" - "github.com/armon/go-metrics/prometheus" "io/ioutil" "net" "os" @@ -19,6 +18,8 @@ import ( "testing" "time" + "github.com/armon/go-metrics/prometheus" + "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/cache" @@ -7817,9 +7818,15 @@ func TestSanitize(t *testing.T) { "DogstatsdTags": [], "FilterDefault": false, "MetricsPrefix": "", - "PrometheusRetentionTime": "0s", "StatsdAddr": "", - "StatsiteAddr": "" + "StatsiteAddr": "", + "PrometheusOpts": { + "Expiration": "0s", + "Registerer": null, + "GaugeDefinitions": [], + "CounterDefinitions": [], + "SummaryDefinitions": [] + } }, "TranslateWANAddrs": false, "TxnMaxReqLen": 5678000000000000, diff --git a/lib/telemetry.go b/lib/telemetry.go index 5c98bda7911f..ceef4f973682 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -220,6 +220,10 @@ func (c *TelemetryConfig) MergeDefaults(defaults *TelemetryConfig) { // implementing this for the types we actually have for now. Test failure // should catch the case where we add new types later. switch f.Kind() { + case reflect.Struct: + if f.Type() == reflect.TypeOf(prometheus.PrometheusOpts{}) { + continue + } case reflect.Slice: if !f.IsNil() { continue @@ -281,13 +285,7 @@ func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, e return nil, nil } - prometheusOpts := prometheus.PrometheusOpts{ - Expiration: cfg.PrometheusOpts.Expiration, - GaugeDefinitions: cfg.PrometheusOpts.GaugeDefinitions, - CounterDefinitions: cfg.PrometheusOpts.CounterDefinitions, - SummaryDefinitions: cfg.PrometheusOpts.SummaryDefinitions, - } - sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts) + sink, err := prometheus.NewPrometheusSinkFrom(cfg.PrometheusOpts) if err != nil { return nil, err } diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index f81b7b5c1a4f..4ee012f1ec0c 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -5,11 +5,14 @@ import ( "testing" "time" + "github.com/armon/go-metrics/prometheus" + "github.com/stretchr/testify/require" ) func makeFullTelemetryConfig(t *testing.T) TelemetryConfig { var ( + promOpts = prometheus.PrometheusOpts{} strSliceVal = []string{"foo"} strVal = "foo" intVal = int64(1 * time.Second) @@ -27,6 +30,12 @@ func makeFullTelemetryConfig(t *testing.T) TelemetryConfig { // now for brevity but will fail the test if a new field type is added since // this is likely not implemented in MergeDefaults either. switch f.Kind() { + case reflect.Struct: + if f.Type() != reflect.TypeOf(promOpts) { + t.Fatalf("unknown struct type in TelemetryConfig: actual %v, expected: %v", f.Type(), reflect.TypeOf(promOpts)) + } + // TODO(kit): This should delve into the fields and set them individually rather than using an empty struct + f.Set(reflect.ValueOf(promOpts)) case reflect.Slice: if f.Type() != reflect.TypeOf(strSliceVal) { t.Fatalf("unknown slice type in TelemetryConfig." + From eda553ef1d04e5b8a6a2fe0202adb43a445ff05c Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 15:32:18 -0800 Subject: [PATCH 11/13] add changelog entry --- .changelog/9198.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/9198.txt diff --git a/.changelog/9198.txt b/.changelog/9198.txt new file mode 100644 index 000000000000..6a4c87794b79 --- /dev/null +++ b/.changelog/9198.txt @@ -0,0 +1,3 @@ +```release-note:improvement +server: All metrics should be present and available to prometheus scrapers when Consul starts. If any non-deprecated metrics are missing please submit an issue with its name. +``` From bd0c7c2cd6e0de48096c790a0694c875ece81339 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 15:53:52 -0800 Subject: [PATCH 12/13] add note about deleting TelemetryConfig.MergeDefaults in the future --- lib/telemetry.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/telemetry.go b/lib/telemetry.go index ceef4f973682..d85e51d45df0 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -202,6 +202,7 @@ type TelemetryConfig struct { // MergeDefaults copies any non-zero field from defaults into the current // config. +// TODO(kit): We no longer use this function and can probably delete it func (c *TelemetryConfig) MergeDefaults(defaults *TelemetryConfig) { if defaults == nil { return From 52c53b2c20d0fd3986068dac03c7443c61ed56f8 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Mon, 16 Nov 2020 15:54:24 -0800 Subject: [PATCH 13/13] changelog component should mention agent not just server --- .changelog/9198.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changelog/9198.txt b/.changelog/9198.txt index 6a4c87794b79..3f68c3b4b823 100644 --- a/.changelog/9198.txt +++ b/.changelog/9198.txt @@ -1,3 +1,3 @@ ```release-note:improvement -server: All metrics should be present and available to prometheus scrapers when Consul starts. If any non-deprecated metrics are missing please submit an issue with its name. +agent: All metrics should be present and available to prometheus scrapers when Consul starts. If any non-deprecated metrics are missing please submit an issue with its name. ```