Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xds: Add passive health check config for upstreams (aka envoy outlier detection) #7713

Merged
merged 2 commits into from
May 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions agent/xds/clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,7 @@ func (s *Server) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream,
CircuitBreakers: &envoycluster.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
},
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoycluster.OutlierDetection{},
OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
}
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{}
Expand Down Expand Up @@ -462,8 +461,7 @@ func (s *Server) makeUpstreamClustersForDiscoveryChain(
CircuitBreakers: &envoycluster.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
},
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoycluster.OutlierDetection{},
OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
}

proto := cfg.Protocol
Expand Down
44 changes: 42 additions & 2 deletions agent/xds/config.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package xds

import (
"github.com/hashicorp/consul/lib"
"strings"
"time"

envoycluster "github.com/envoyproxy/go-control-plane/envoy/api/v2/cluster"
"github.com/gogo/protobuf/types"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/mitchellh/mapstructure"
)

Expand Down Expand Up @@ -160,11 +163,48 @@ type UpstreamConfig struct {
// Limits are the set of limits that are applied to the proxy for a specific upstream of a
// service instance.
Limits UpstreamLimits `mapstructure:"limits"`

// PassiveHealthCheck configuration
PassiveHealthCheck PassiveHealthCheck `mapstructure:"passive_health_check"`
}

type PassiveHealthCheck struct {
// Interval between health check analysis sweeps. Each sweep may remove
// hosts or return hosts to the pool.
Interval time.Duration
// MaxFailures is the count of consecutive failures that results in a host
// being removed from the pool.
MaxFailures uint32 `mapstructure:"max_failures"`
}

// Return an envoy.OutlierDetection populated by the values from this struct.
// If all values are zero a default empty OutlierDetection will be returned to
// enable outlier detection with default values.
func (p PassiveHealthCheck) AsOutlierDetection() *envoycluster.OutlierDetection {
od := &envoycluster.OutlierDetection{}
if p.Interval != 0 {
od.Interval = types.DurationProto(p.Interval)
}
if p.MaxFailures != 0 {
od.Consecutive_5Xx = &types.UInt32Value{Value: p.MaxFailures}
}
return od
}

func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) {
var cfg UpstreamConfig
err := mapstructure.WeakDecode(m, &cfg)
config := &mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
Result: &cfg,
WeaklyTypedInput: true,
}

decoder, err := mapstructure.NewDecoder(config)
if err != nil {
return cfg, err
}

err = decoder.Decode(m)
return cfg, err
}

Expand Down
20 changes: 19 additions & 1 deletion agent/xds/config_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package xds

import (
"github.com/hashicorp/consul/agent/structs"
"testing"
"time"

"github.com/hashicorp/consul/agent/structs"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -244,6 +245,23 @@ func TestParseUpstreamConfig(t *testing.T) {
},
},
},
{
name: "passive health check map",
input: map[string]interface{}{
"passive_health_check": map[string]interface{}{
"interval": "22s",
"max_failures": 7,
},
},
want: UpstreamConfig{
ConnectTimeoutMs: 5000,
Protocol: "tcp",
PassiveHealthCheck: PassiveHealthCheck{
Interval: 22 * time.Second,
MaxFailures: 7,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion test/integration/connect/envoy/case-centralconf/s1.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ services {
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ services {
max_pending_requests = 4
max_concurrent_requests = 5
}
passive_health_check {
interval = "22s"
max_failures = 4
}
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ load helpers
}

@test "s1 proxy should have been configured with max_connections on the cluster" {
CLUSTER_THRESHOLD=$(get_envoy_cluster_threshold localhost:19000 s2.default.primary)
CLUSTER_THRESHOLD=$(get_envoy_cluster_config localhost:19000 s2.default.primary | jq '.circuit_breakers.thresholds[0]')
echo $CLUSTER_THRESHOLD

MAX_CONNS=$(echo $CLUSTER_THRESHOLD | jq --raw-output '.max_connections')
Expand All @@ -42,3 +42,11 @@ load helpers
[ "$MAX_PENDING_REQS" = "4" ]
[ "$MAX_REQS" = "5" ]
}

@test "s1 proxy should have been configured with passive_health_check" {
CLUSTER_CONFIG=$(get_envoy_cluster_config localhost:19000 s2.default.primary)
echo $CLUSTER_CONFIG

[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.consecutive_5xx')" = "4" ]
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.interval')" = "22s" ]
}
4 changes: 2 additions & 2 deletions test/integration/connect/envoy/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,15 @@ function get_envoy_listener_filters {
echo "$output" | jq --raw-output "$QUERY"
}

function get_envoy_cluster_threshold {
function get_envoy_cluster_config {
local HOSTPORT=$1
local CLUSTER_NAME=$2
run retry_default curl -s -f $HOSTPORT/config_dump
[ "$status" -eq 0 ]
echo "$output" | jq --raw-output "
.configs[1].dynamic_active_clusters[]
| select(.cluster.name|startswith(\"${CLUSTER_NAME}\"))
| .cluster.circuit_breakers.thresholds[0]
| .cluster
"
}

Expand Down
11 changes: 11 additions & 0 deletions website/pages/docs/connect/proxies/envoy.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,17 @@ definition](/docs/connect/registration/service-registration) or
since HTTP/2 has many requests per connection. For this configuration to be
respected, a L7 protocol must be defined in the `protocol` field.

- `passive_health_check` - Passive health checks are used to remove hosts from
the upstream cluster which are unreachable or are returning errors.

- `interval` - The time between checks. Each check will cause hosts which
have exceeded `max_failures` to be removed from the load balancer, and
any hosts which have passed their ejection time to be returned to the
load balancer.
- `max_failures` - The number of consecutive failures which cause a host to be
removed from the load balancer.


### Gateway Options

These fields may also be overridden explicitly in the [proxy service
Expand Down