Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport: troubleshoot: fixes and updated messages #16309

Merged
merged 1 commit into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions agent/xds/validateupstream-test/validateupstream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ListenerType], listenerName)
return ir
},
err: "no listener for upstream \"db\"",
err: "No listener for upstream \"db\"",
},
{
name: "tcp-missing-cluster",
Expand All @@ -66,7 +66,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ClusterType], sni)
return ir
},
err: "no cluster \"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"db\"",
err: "No cluster \"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"db\"",
},
{
name: "http-success",
Expand Down Expand Up @@ -124,7 +124,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.RouteType], "db")
return ir
},
err: "no route for upstream \"db\"",
err: "No route for upstream \"db\"",
},
{
name: "redirect",
Expand Down Expand Up @@ -170,7 +170,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ClusterType], sni)
return ir
},
err: "no cluster \"google.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"240.0.0.1\"",
err: "No cluster \"google.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"240.0.0.1\"",
},
{
name: "tproxy-http-redirect-success",
Expand Down Expand Up @@ -230,7 +230,9 @@ func TestValidateUpstreams(t *testing.T) {
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
for _, action := range msgError.PossibleActions {
outputErrors += action
}
}
if len(tt.err) == 0 {
require.True(t, messages.Success())
Expand Down
20 changes: 13 additions & 7 deletions command/troubleshoot/proxy/troubleshoot_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ func (c *cmd) Run(args []string) int {

t, err := troubleshoot.NewTroubleshoot(adminBindIP, adminPort)
if err != nil {
c.UI.Error("error generating troubleshoot client: " + err.Error())
c.UI.Error("Error generating troubleshoot client: " + err.Error())
return 1
}
messages, err := t.RunAllTests(c.upstreamEnvoyID, c.upstreamIP)
if err != nil {
c.UI.Error("error running the tests: " + err.Error())
c.UI.Error("Error running the tests: " + err.Error())
return 1
}

Expand All @@ -92,11 +92,16 @@ func (c *cmd) Run(args []string) int {
c.UI.SuccessOutput(o.Message)
} else {
c.UI.ErrorOutput(o.Message)
if o.PossibleActions != "" {
c.UI.UnchangedOutput(o.PossibleActions)
for _, action := range o.PossibleActions {
c.UI.UnchangedOutput("-> " + action)
}
}
}
if messages.Success() {
c.UI.UnchangedOutput("If you are still experiencing issues, you can:")
c.UI.UnchangedOutput("-> Check intentions to ensure the upstream allows traffic from this source")
c.UI.UnchangedOutput("-> If using transparent proxy, ensure DNS resolution is to the same IP you have verified here")
}
return 0
}

Expand All @@ -114,14 +119,15 @@ const (
Usage: consul troubleshoot proxy [options]

Connects to local envoy proxy and troubleshoots service mesh communication issues.
Requires an upstream service envoy identifier.
Requires an upstream service identifier. When debugging explicitly configured upstreams,
use -upstream-envoy-id, when debugging transparent proxy upstreams use -upstream-ip.
Examples:
(explicit upstreams only)
$ consul troubleshoot proxy -upstream-envoy-id foo
(transparent proxy only)
$ consul troubleshoot proxy -upstream-ip <IP>
$ consul troubleshoot proxy -upstream-ip 240.0.0.1

where 'foo' is the upstream envoy identifier which
where 'foo' is the upstream envoy identifier and '240.0.0.1' is an upstream ip which
can be obtained by running:
$ consul troubleshoot upstreams [options]
`
Expand Down
16 changes: 8 additions & 8 deletions command/troubleshoot/upstreams/troubleshoot_upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,24 @@ func (c *cmd) Run(args []string) int {
return 1
}

c.UI.Output(fmt.Sprintf("==> Upstreams (explicit upstreams only) (%v)", len(envoyIDs)))
c.UI.HeaderOutput(fmt.Sprintf("Upstreams (explicit upstreams only) (%v)\n", len(envoyIDs)))
for _, u := range envoyIDs {
c.UI.Output(u)
c.UI.UnchangedOutput(u)
}

c.UI.Output(fmt.Sprintf("\n==> Upstream IPs (transparent proxy only) (%v)", len(upstreamIPs)))
c.UI.HeaderOutput(fmt.Sprintf("Upstream IPs (transparent proxy only) (%v)", len(upstreamIPs)))
tbl := cli.NewTable("IPs ", "Virtual ", "Cluster Names")
for _, u := range upstreamIPs {
tbl.AddRow([]string{formatIPs(u.IPs), strconv.FormatBool(u.IsVirtual), formatClusterNames(u.ClusterNames)}, []string{})
}
c.UI.Table(tbl)

c.UI.Output("\nIf you don't see your upstream address or cluster for a transparent proxy upstream:")
c.UI.Output("- Check intentions: Tproxy upstreams are configured based on intentions, make sure you " +
c.UI.UnchangedOutput("\nIf you cannot find the upstream address or cluster for a transparent proxy upstream:")
c.UI.UnchangedOutput("-> Check intentions: Transparent proxy upstreams are configured based on intentions. Make sure you " +
"have configured intentions to allow traffic to your upstream.")
c.UI.Output("- You can also check that the right cluster is being dialed by running a DNS lookup " +
"for the upstream you are dialing (i.e dig backend.svc.consul). If the address you get from that is missing " +
"from the Upstream IPs your proxy may be misconfigured.")
c.UI.UnchangedOutput("-> To check that the right cluster is being dialed, run a DNS lookup " +
"for the upstream you are dialing. For example, run `dig backend.svc.consul` to return the IP address for the `backend` service. If the address you get from that is missing " +
"from the upstream IPs, it means that your proxy may be misconfigured.")
return 0
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package troubleshoot
import (
"context"
"fmt"
"github.com/stretchr/testify/assert"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"

"github.com/stretchr/testify/require"

libcluster "github.com/hashicorp/consul/test/integration/consul-container/libs/cluster"
Expand Down Expand Up @@ -40,11 +41,12 @@ func TestTroubleshootProxy(t *testing.T) {
"-envoy-admin-endpoint", fmt.Sprintf("localhost:%v", clientAdminPort),
"-upstream-envoy-id", libservice.StaticServerServiceName})
require.NoError(t, err)
certsValid := strings.Contains(output, "certificates are valid")
listenersExist := strings.Contains(output, fmt.Sprintf("listener for upstream \"%s\" found", libservice.StaticServerServiceName))
routesExist := strings.Contains(output, fmt.Sprintf("route for upstream \"%s\" found", libservice.StaticServerServiceName))
healthyEndpoints := strings.Contains(output, "✓ healthy endpoints for cluster")
return upstreamExists && certsValid && listenersExist && routesExist && healthyEndpoints
certsValid := strings.Contains(output, "Certificates are valid")
noRejectedConfig := strings.Contains(output, "Envoy has 0 rejected configurations")
noConnFailure := strings.Contains(output, "Envoy has detected 0 connection failure(s)")
listenersExist := strings.Contains(output, fmt.Sprintf("Listener for upstream \"%s\" found", libservice.StaticServerServiceName))
healthyEndpoints := strings.Contains(output, "Healthy endpoints for cluster")
return upstreamExists && certsValid && listenersExist && noRejectedConfig && noConnFailure && healthyEndpoints
}, 60*time.Second, 10*time.Second)
})

Expand All @@ -58,11 +60,12 @@ func TestTroubleshootProxy(t *testing.T) {
"-upstream-envoy-id", libservice.StaticServerServiceName})
require.NoError(t, err)

certsValid := strings.Contains(output, "certificates are valid")
listenersExist := strings.Contains(output, fmt.Sprintf("listener for upstream \"%s\" found", libservice.StaticServerServiceName))
routesExist := strings.Contains(output, fmt.Sprintf("route for upstream \"%s\" found", libservice.StaticServerServiceName))
endpointUnhealthy := strings.Contains(output, "no healthy endpoints for cluster")
return certsValid && listenersExist && routesExist && endpointUnhealthy
certsValid := strings.Contains(output, "Certificates are valid")
noRejectedConfig := strings.Contains(output, "Envoy has 0 rejected configurations")
noConnFailure := strings.Contains(output, "Envoy has detected 0 connection failure(s)")
listenersExist := strings.Contains(output, fmt.Sprintf("Listener for upstream \"%s\" found", libservice.StaticServerServiceName))
endpointUnhealthy := strings.Contains(output, "No healthy endpoints for cluster")
return certsValid && listenersExist && noRejectedConfig && noConnFailure && endpointUnhealthy
}, 60*time.Second, 10*time.Second)
})
}
20 changes: 16 additions & 4 deletions troubleshoot/proxy/certs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,21 @@ func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validat
if certs == nil {
msg := validate.Message{
Success: false,
Message: "certificate object is nil in the proxy configuration",
Message: "Certificate object is nil in the proxy configuration",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
return []validate.Message{msg}
}

if len(certs.GetCertificates()) == 0 {
msg := validate.Message{
Success: false,
Message: "no certificates found",
Message: "No certificates found",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
return []validate.Message{msg}
}
Expand All @@ -36,7 +42,10 @@ func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validat
if now.After(cacert.GetExpirationTime().AsTime()) {
msg := validate.Message{
Success: false,
Message: "ca certificate is expired",
Message: "CA certificate is expired",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
certMessages = append(certMessages, msg)
}
Expand All @@ -46,7 +55,10 @@ func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validat
if now.After(cc.GetExpirationTime().AsTime()) {
msg := validate.Message{
Success: false,
Message: "certificate chain is expired",
Message: "Certificate chain is expired",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
certMessages = append(certMessages, msg)
}
Expand Down
12 changes: 7 additions & 5 deletions troubleshoot/proxy/certs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ func TestValidateCerts(t *testing.T) {
}{
"cert is nil": {
certs: nil,
expectedError: "certificate object is nil in the proxy configuration",
expectedError: "Certificate object is nil in the proxy configuration",
},
"no certificates": {
certs: &envoy_admin_v3.Certificates{
Certificates: []*envoy_admin_v3.Certificate{},
},
expectedError: "no certificates found",
expectedError: "No certificates found",
},
"ca expired": {
certs: &envoy_admin_v3.Certificates{
Expand All @@ -41,7 +41,7 @@ func TestValidateCerts(t *testing.T) {
},
},
},
expectedError: "ca certificate is expired",
expectedError: "CA certificate is expired",
},
"cert expired": {
certs: &envoy_admin_v3.Certificates{
Expand All @@ -55,7 +55,7 @@ func TestValidateCerts(t *testing.T) {
},
},
},
expectedError: "certificate chain is expired",
expectedError: "Certificate chain is expired",
},
}

Expand All @@ -67,7 +67,9 @@ func TestValidateCerts(t *testing.T) {
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
for _, action := range msgError.PossibleActions {
outputErrors += action
}
}
if tc.expectedError == "" {
require.True(t, messages.Success())
Expand Down
20 changes: 15 additions & 5 deletions troubleshoot/proxy/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package troubleshoot
import (
"encoding/json"
"fmt"

envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
"github.com/hashicorp/consul/troubleshoot/validate"
)
Expand Down Expand Up @@ -32,10 +33,19 @@ func (t *Troubleshoot) troubleshootStats() (validate.Messages, error) {
}
}

statMessages = append(statMessages, validate.Message{
Success: true,
Message: fmt.Sprintf("Envoy has %v rejected configurations", totalConfigRejections),
})
if totalConfigRejections > 0 {
statMessages = append(statMessages, validate.Message{
Message: fmt.Sprintf("Envoy has %v rejected configurations", totalConfigRejections),
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy to see why Envoy rejected this configuration",
},
})
} else {
statMessages = append(statMessages, validate.Message{
Success: true,
Message: fmt.Sprintf("Envoy has %v rejected configurations", totalConfigRejections),
})
}

connectionFailureStats, err := t.getEnvoyStats("upstream_cx_connect_fail")
if err != nil {
Expand All @@ -50,7 +60,7 @@ func (t *Troubleshoot) troubleshootStats() (validate.Messages, error) {
}
statMessages = append(statMessages, validate.Message{
Success: true,
Message: fmt.Sprintf("Envoy has detected %v connection failure(s).", totalCxFailures),
Message: fmt.Sprintf("Envoy has detected %v connection failure(s)", totalCxFailures),
})
return statMessages, nil
}
Expand Down
13 changes: 2 additions & 11 deletions troubleshoot/proxy/troubleshoot_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@ import (
"github.com/hashicorp/consul/troubleshoot/validate"
)

const (
listeners string = "type.googleapis.com/envoy.admin.v3.ListenersConfigDump"
clusters string = "type.googleapis.com/envoy.admin.v3.ClustersConfigDump"
routes string = "type.googleapis.com/envoy.admin.v3.RoutesConfigDump"
endpoints string = "type.googleapis.com/envoy.admin.v3.EndpointsConfigDump"
bootstrap string = "type.googleapis.com/envoy.admin.v3.BootstrapConfigDump"
httpConnManager string = "type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager"
)

type Troubleshoot struct {
client *api.Client
envoyAddr net.IPAddr
Expand Down Expand Up @@ -79,7 +70,7 @@ func (t *Troubleshoot) RunAllTests(upstreamEnvoyID, upstreamIP string) (validate
if errors := messages.Errors(); len(errors) == 0 {
msg := validate.Message{
Success: true,
Message: "certificates are valid",
Message: "Certificates are valid",
}
allTestMessages = append(allTestMessages, msg)
}
Expand All @@ -97,7 +88,7 @@ func (t *Troubleshoot) RunAllTests(upstreamEnvoyID, upstreamIP string) (validate
if errors := messages.Errors(); len(errors) == 0 {
msg := validate.Message{
Success: true,
Message: "upstream resources are valid",
Message: "Upstream resources are valid",
}
allTestMessages = append(allTestMessages, msg)
}
Expand Down
5 changes: 3 additions & 2 deletions troubleshoot/proxy/upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package troubleshoot

import (
"fmt"

envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
envoy_listener_v3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
envoy_route_v3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
Expand Down Expand Up @@ -29,7 +30,7 @@ func (t *Troubleshoot) GetUpstreams() ([]string, []UpstreamIP, error) {

for _, cfg := range t.envoyConfigDump.Configs {
switch cfg.TypeUrl {
case listeners:
case listenersType:
lcd := &envoy_admin_v3.ListenersConfigDump{}

err := proto.Unmarshal(cfg.GetValue(), lcd)
Expand Down Expand Up @@ -135,7 +136,7 @@ func getClustersFromRoutes(routeName string, cfgDump *envoy_admin_v3.ConfigDump)

for _, cfg := range cfgDump.Configs {
switch cfg.TypeUrl {
case routes:
case routesType:
rcd := &envoy_admin_v3.RoutesConfigDump{}

err := proto.Unmarshal(cfg.GetValue(), rcd)
Expand Down
Loading