Skip to content

Commit

Permalink
Add DC and NS support for Envoy metrics (#9207)
Browse files Browse the repository at this point in the history
This PR updates the tags that we generate for Envoy stats.

Several of these come with breaking changes, since we can't keep two stats prefixes for a filter.
  • Loading branch information
freddygv committed Nov 17, 2020
1 parent 88b013b commit ef7ee68
Show file tree
Hide file tree
Showing 220 changed files with 2,792 additions and 914 deletions.
3 changes: 3 additions & 0 deletions .changelog/9207.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:breaking-change
connect: Update Envoy metrics names and labels for proxy listeners so that attributes like datacenter and namespace can be extracted.
```
85 changes: 52 additions & 33 deletions agent/xds/listeners.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ func (s *Server) makeIngressGatewayListeners(address string, cfgSnap *proxycfg.C
filterName: listenerKey.RouteName(),
routeName: listenerKey.RouteName(),
cluster: "",
statPrefix: "ingress_upstream_",
statPrefix: "ingress_upstream.",
routePath: "",
ingress: false,
httpAuthzFilter: nil,
Expand Down Expand Up @@ -771,7 +771,7 @@ func (s *Server) makeTerminatingGatewayListener(

// This fallback catch-all filter ensures a listener will be present for health checks to pass
// Envoy will reset these connections since known endpoints are caught by filter chain matches above
tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway_")
tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway.")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -821,7 +821,7 @@ func (s *Server) makeFilterChainTerminatingGateway(
// Lastly we setup the actual proxying component. For L4 this is a straight
// tcp proxy. For L7 this is a very hands-off HTTP proxy just to inject an
// HTTP filter to do intention checks here instead.
statPrefix := fmt.Sprintf("terminating_gateway_%s_%s_", service.NamespaceOrDefault(), service.Name)
statPrefix := fmt.Sprintf("terminating_gateway.%s.%s.", service.NamespaceOrDefault(), service.Name)
opts := listenerFilterOpts{
protocol: protocol,
filterName: listener,
Expand Down Expand Up @@ -868,7 +868,7 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p

// The cluster name here doesn't matter as the sni_cluster
// filter will fill it in for us.
tcpProxy, err := makeTCPProxyFilter(name, "", "mesh_gateway_local_")
tcpProxy, err := makeTCPProxyFilter(name, "", "mesh_gateway_local.")
if err != nil {
return nil, err
}
Expand All @@ -891,8 +891,8 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
continue // skip local
}
clusterName := connect.DatacenterSNI(dc, cfgSnap.Roots.TrustDomain)
filterName := fmt.Sprintf("%s_%s", name, dc)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote_")
filterName := fmt.Sprintf("%s.%s", name, dc)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote.")
if err != nil {
return nil, err
}
Expand All @@ -913,8 +913,8 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
continue // skip local
}
clusterName := cfgSnap.ServerSNIFn(dc, "")
filterName := fmt.Sprintf("%s_%s", name, dc)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote_")
filterName := fmt.Sprintf("%s.%s", name, dc)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote.")
if err != nil {
return nil, err
}
Expand All @@ -933,8 +933,8 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
for _, srv := range cfgSnap.MeshGateway.ConsulServers {
clusterName := cfgSnap.ServerSNIFn(cfgSnap.Datacenter, srv.Node.Node)

filterName := fmt.Sprintf("%s_%s", name, cfgSnap.Datacenter)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_local_server_")
filterName := fmt.Sprintf("%s.%s", name, cfgSnap.Datacenter)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_local_server.")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -976,38 +976,61 @@ func (s *Server) makeUpstreamListenerForDiscoveryChain(
}

useRDS := true
clusterName := ""
var (
clusterName string
destination, datacenter, namespace string
)
if chain == nil || chain.IsDefault() {
useRDS = false

dc := u.Datacenter
if dc == "" {
dc = cfgSnap.Datacenter
}
sni := connect.UpstreamSNI(u, "", dc, cfgSnap.Roots.TrustDomain)
destination, datacenter, namespace = u.DestinationName, dc, u.DestinationNamespace

useRDS = false
sni := connect.UpstreamSNI(u, "", dc, cfgSnap.Roots.TrustDomain)
clusterName = CustomizeClusterName(sni, chain)

} else if cfg.Protocol == "tcp" {
startNode := chain.Nodes[chain.StartNode]
if startNode == nil {
return nil, fmt.Errorf("missing first node in compiled discovery chain for: %s", chain.ServiceName)
} else if startNode.Type != structs.DiscoveryGraphNodeTypeResolver {
return nil, fmt.Errorf("unexpected first node in discovery chain using protocol=%q: %s", cfg.Protocol, startNode.Type)
} else {
destination, datacenter, namespace = chain.ServiceName, chain.Datacenter, chain.Namespace

if cfg.Protocol == "tcp" {
useRDS = false

startNode := chain.Nodes[chain.StartNode]
if startNode == nil {
return nil, fmt.Errorf("missing first node in compiled discovery chain for: %s", chain.ServiceName)
}
if startNode.Type != structs.DiscoveryGraphNodeTypeResolver {
return nil, fmt.Errorf("unexpected first node in discovery chain using protocol=%q: %s", cfg.Protocol, startNode.Type)
}
targetID := startNode.Resolver.Target
target := chain.Targets[targetID]

clusterName = CustomizeClusterName(target.Name, chain)
}
targetID := startNode.Resolver.Target
target := chain.Targets[targetID]
}

useRDS = false
clusterName = CustomizeClusterName(target.Name, chain)
// Default the namespace to match how SNIs are generated
if namespace == "" {
namespace = structs.IntentionDefaultNamespace
}
filterName := fmt.Sprintf("%s.%s.%s", destination, namespace, datacenter)

if u.DestinationType == structs.UpstreamDestTypePreparedQuery {
// Avoid encoding dc and namespace for prepared queries.
// Those are defined in the query itself and are not available here.
filterName = upstreamID
}

opts := listenerFilterOpts{
useRDS: useRDS,
protocol: cfg.Protocol,
filterName: upstreamID,
filterName: filterName,
routeName: upstreamID,
cluster: clusterName,
statPrefix: "upstream_",
statPrefix: "upstream.",
routePath: "",
ingress: false,
httpAuthzFilter: nil,
Expand Down Expand Up @@ -1120,31 +1143,27 @@ func makeSNIClusterFilter() (*envoylistener.Filter, error) {

func makeTCPProxyFilter(filterName, cluster, statPrefix string) (*envoylistener.Filter, error) {
cfg := &envoytcp.TcpProxy{
StatPrefix: makeStatPrefix("tcp", statPrefix, filterName),
StatPrefix: makeStatPrefix(statPrefix, filterName),
ClusterSpecifier: &envoytcp.TcpProxy_Cluster{Cluster: cluster},
}
return makeFilter("envoy.tcp_proxy", cfg, false)
}

func makeStatPrefix(protocol, prefix, filterName string) string {
func makeStatPrefix(prefix, filterName string) string {
// Replace colons here because Envoy does that in the metrics for the actual
// clusters but doesn't in the stat prefix here while dashboards assume they
// will match.
return fmt.Sprintf("%s%s_%s", prefix, strings.Replace(filterName, ":", "_", -1), protocol)
return fmt.Sprintf("%s%s", prefix, strings.Replace(filterName, ":", "_", -1))
}

func makeHTTPFilter(opts listenerFilterOpts) (*envoylistener.Filter, error) {
op := envoyhttp.HttpConnectionManager_Tracing_INGRESS
if !opts.ingress {
op = envoyhttp.HttpConnectionManager_Tracing_EGRESS
}
proto := "http"
if opts.protocol == "grpc" {
proto = opts.protocol
}

cfg := &envoyhttp.HttpConnectionManager{
StatPrefix: makeStatPrefix(proto, opts.statPrefix, opts.filterName),
StatPrefix: makeStatPrefix(opts.statPrefix, opts.filterName),
CodecType: envoyhttp.HttpConnectionManager_AUTO,
HttpFilters: []*envoyhttp.HttpFilter{
{
Expand Down
6 changes: 3 additions & 3 deletions agent/xds/listeners_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ func expectListenerJSONResources(t *testing.T, snap *proxycfg.ConfigSnapshot) ma
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand All @@ -600,7 +600,7 @@ func expectListenerJSONResources(t *testing.T, snap *proxycfg.ConfigSnapshot) ma
"name": "envoy.tcp_proxy",
"config": {
"cluster": "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_db_tcp"
"stat_prefix": "upstream.db.default.dc1"
}
}
]
Expand All @@ -623,7 +623,7 @@ func expectListenerJSONResources(t *testing.T, snap *proxycfg.ConfigSnapshot) ma
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
"route_config_name": "db"
},
"stat_prefix": "upstream_db_grpc",
"stat_prefix": "upstream.db.default.dc1",
"tracing": {
"operation_name": "EGRESS",
"random_sampling": {
Expand Down Expand Up @@ -63,7 +63,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down Expand Up @@ -117,7 +117,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
"route_config_name": "db"
},
"stat_prefix": "upstream_db_grpc",
"stat_prefix": "upstream.db.default.dc1",
"tracing": {
"operation_name": "EGRESS",
"random_sampling": {
Expand Down Expand Up @@ -63,7 +63,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down Expand Up @@ -117,7 +117,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
"route_config_name": "db"
},
"stat_prefix": "upstream_db_grpc",
"stat_prefix": "upstream.db.default.dc1",
"tracing": {
"operation_name": "EGRESS",
"random_sampling": {
Expand Down Expand Up @@ -63,7 +63,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down Expand Up @@ -117,7 +117,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
"route_config_name": "db"
},
"stat_prefix": "upstream_db_grpc",
"stat_prefix": "upstream.db.default.dc1",
"tracing": {
"operation_name": "EGRESS",
"random_sampling": {
Expand Down Expand Up @@ -63,7 +63,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down Expand Up @@ -117,7 +117,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_db_tcp"
"stat_prefix": "upstream.db.default.dc1"
}
}
]
Expand All @@ -40,7 +40,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down Expand Up @@ -94,7 +94,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_db_tcp"
"stat_prefix": "upstream.db.default.dc1"
}
}
]
Expand All @@ -40,7 +40,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
"stat_prefix": "upstream.prepared_query_geo-cache"
}
}
]
Expand Down Expand Up @@ -94,7 +94,7 @@
"name": "envoy.tcp_proxy",
"config": {
"cluster": "local_app",
"stat_prefix": "public_listener_tcp"
"stat_prefix": "public_listener"
}
}
]
Expand Down
Loading

0 comments on commit ef7ee68

Please sign in to comment.