Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TCP+TLS Healthchecks #18381

Merged
merged 36 commits into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
6ff0e7d
Begin adding TCPUseTLS
pgporada Aug 2, 2023
c6d8eaa
More TCP with TLS plumbing
pgporada Aug 3, 2023
729f671
Making forward progress
pgporada Aug 3, 2023
a382b2f
Keep on adding TCP+TLS support for healthchecks
pgporada Aug 4, 2023
14a0d49
Removed too many lines
pgporada Aug 4, 2023
8ee986c
Unit tests for TCP+TLS
pgporada Aug 11, 2023
7c7db53
Update tlsutil/config.go
pgporada Aug 11, 2023
5b0dfc6
Working on the tcp+tls unit test
pgporada Aug 15, 2023
9620347
Updated the runtime integration tests
pgporada Aug 15, 2023
c76008b
Progress
pgporada Aug 15, 2023
562c0bc
Revert this file back to HEAD
pgporada Aug 15, 2023
a11c310
Remove debugging lines
pgporada Aug 16, 2023
9e2012c
Implement TLS enabled TCP socket server and make a successful TCP+TLS…
pgporada Aug 16, 2023
fd9f37c
Merge branch 'main' into healthcheck-tls
pgporada Aug 16, 2023
dafbf48
Update docs
pgporada Aug 16, 2023
c76e1b9
Merge branch 'healthcheck-tls' of github.com:pgporada/consul into hea…
pgporada Aug 16, 2023
41e572d
Merge branch 'main' into healthcheck-tls
pgporada Aug 16, 2023
af6ba92
Update agent/agent_test.go
pgporada Aug 16, 2023
8a2f5ed
Update website/content/docs/ecs/configuration-reference.mdx
pgporada Aug 16, 2023
f786916
Update website/content/docs/ecs/configuration-reference.mdx
pgporada Aug 16, 2023
4395cd1
Update agent/checks/check.go
pgporada Aug 16, 2023
2864f24
Address comments
pgporada Aug 16, 2023
eb72235
Remove extraneous bracket
pgporada Aug 16, 2023
67c60ee
Update agent/agent_test.go
pgporada Aug 16, 2023
4ca7dcc
Update agent/agent_test.go
pgporada Aug 16, 2023
8e8b035
Update website/content/docs/ecs/configuration-reference.mdx
pgporada Aug 16, 2023
9c53567
Update the mockTLSServer
pgporada Aug 16, 2023
67d5add
Remove trailing newline
pgporada Aug 16, 2023
576bcc4
Merge branch 'main' into healthcheck-tls
pgporada Aug 21, 2023
0705235
Address comments
pgporada Aug 21, 2023
b73cec2
Merge branch 'healthcheck-tls' of github.com:pgporada/consul into hea…
pgporada Aug 21, 2023
7d4b533
Merge branch 'main' into healthcheck-tls
pgporada Aug 31, 2023
97ab75f
Fix merge problem
pgporada Aug 31, 2023
dad8aca
Add changelog entry
pgporada Sep 1, 2023
1e4fa20
Merge branch 'main' into healthcheck-tls
pgporada Sep 1, 2023
116be8b
Merge branch 'main' into healthcheck-tls
pgporada Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changelog/18381.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
```release-note:improvement
checks: It is now possible to configure agent TCP checks to use TLS with
optional server SNI and mutual authentication. To use TLS with a TCP check, the
check must enable the `tcp_use_tls` boolean. By default the agent will use the
TLS configuration in the `tls.default` stanza.
```
20 changes: 13 additions & 7 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -3066,14 +3066,20 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
chkType.Interval = checks.MinInterval
}

var tlsClientConfig *tls.Config
if chkType.TCPUseTLS {
tlsClientConfig = a.tlsConfigurator.OutgoingTLSConfigForCheck(chkType.TLSSkipVerify, chkType.TLSServerName)
}

tcp := &checks.CheckTCP{
CheckID: cid,
ServiceID: sid,
TCP: chkType.TCP,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
StatusHandler: statusHandler,
CheckID: cid,
ServiceID: sid,
TCP: chkType.TCP,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
TLSClientConfig: tlsClientConfig,
StatusHandler: statusHandler,
}
tcp.Start()
a.checkTCPs[cid] = tcp
Expand Down
90 changes: 83 additions & 7 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ import (
"encoding/json"
"errors"
"fmt"
"github.com/hashicorp/consul/agent/grpc-external/limiter"
"github.com/hashicorp/consul/agent/proxycfg"
"github.com/hashicorp/consul/agent/proxycfg-sources/local"
"github.com/hashicorp/consul/agent/xds"
proxytracker "github.com/hashicorp/consul/internal/mesh/proxy-tracker"
"io"
mathrand "math/rand"
"net"
"net/http"
Expand All @@ -34,6 +30,12 @@ import (
"testing"
"time"

"github.com/hashicorp/consul/agent/grpc-external/limiter"
"github.com/hashicorp/consul/agent/proxycfg"
"github.com/hashicorp/consul/agent/proxycfg-sources/local"
"github.com/hashicorp/consul/agent/xds"
proxytracker "github.com/hashicorp/consul/internal/mesh/proxy-tracker"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/google/tcpproxy"
Expand Down Expand Up @@ -973,6 +975,80 @@ func TestAgent_AddServiceWithH2CPINGCheck(t *testing.T) {
requireCheckExists(t, a, "test-h2cping-check")
}

func startMockTLSServer(t *testing.T) (addr string, closeFunc func() error) {
// Load certificates
cert, err := tls.LoadX509KeyPair("../test/key/ourdomain_server.cer", "../test/key/ourdomain_server.key")
require.NoError(t, err)
// Create a certificate pool
rootCertPool := x509.NewCertPool()
caCert, err := os.ReadFile("../test/ca/root.cer")
require.NoError(t, err)
rootCertPool.AppendCertsFromPEM(caCert)
// Configure TLS
config := &tls.Config{
Certificates: []tls.Certificate{cert},
ClientAuth: tls.RequireAndVerifyClientCert,
Comment on lines +989 to +990
Copy link
Contributor Author

@pgporada pgporada Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For lines 982 and 983, I chose not to generate new key material. The tls.RequireAndVerifyClientCert is working by virtue of the Consul Agent sending a TLS cert with a SAN that matches a SAN loaded by the mockTLSServer (because they're both using ourdomain_server.cer). If you want me to generate new key material, that's fine by me.

ClientCAs: rootCertPool,
}
// Start TLS server
ln, err := tls.Listen("tcp", "127.0.0.1:0", config)
require.NoError(t, err)
go func() {
for {
conn, err := ln.Accept()
if err != nil {
return
}
io.Copy(io.Discard, conn)
conn.Close()
}
}()
return ln.Addr().String(), ln.Close
}

func TestAgent_AddServiceWithTCPTLSCheck(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent")
a := NewTestAgent(t, `
data_dir = "`+dataDir+`"
enable_agent_tls_for_checks = true
datacenter = "dc1"
tls {
defaults {
ca_file = "../test/ca/root.cer"
cert_file = "../test/key/ourdomain_server.cer"
key_file = "../test/key/ourdomain_server.key"
}
}
`)
defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
// Start mock TCP+TLS server
addr, closeServer := startMockTLSServer(t)
defer closeServer()
check := &structs.HealthCheck{
Node: "foo",
CheckID: "arbitraryTCPServerTLSCheck",
Name: "arbitraryTCPServerTLSCheck",
Status: api.HealthCritical,
}
chkType := &structs.CheckType{
TCP: addr,
TCPUseTLS: true,
TLSServerName: "server.dc1.consul",
Interval: 5 * time.Second,
}
err := a.AddCheck(check, chkType, false, "", ConfigSourceLocal)
require.NoError(t, err)
// Retry until the healthcheck is passing.
retry.Run(t, func(r *retry.R) {
status := getCheck(a, "arbitraryTCPServerTLSCheck")
if status.Status != api.HealthPassing {
r.Fatalf("bad: %v", status.Status)
}
})
}

func TestAgent_AddServiceNoExec(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
Expand Down Expand Up @@ -4308,7 +4384,7 @@ func TestAgent_consulConfig_RequestLimits(t *testing.T) {

t.Parallel()
hcl := `
limits {
limits {
request_limits {
mode = "enforcing"
read_rate = 8888
Expand Down Expand Up @@ -6278,7 +6354,7 @@ func TestAgent_scadaProvider(t *testing.T) {
},
Overrides: `
cloud {
resource_id = "organization/0b9de9a3-8403-4ca6-aba8-fca752f42100/project/0b9de9a3-8403-4ca6-aba8-fca752f42100/consul.cluster/0b9de9a3-8403-4ca6-aba8-fca752f42100"
resource_id = "organization/0b9de9a3-8403-4ca6-aba8-fca752f42100/project/0b9de9a3-8403-4ca6-aba8-fca752f42100/consul.cluster/0b9de9a3-8403-4ca6-aba8-fca752f42100"
client_id = "test"
client_secret = "test"
}`,
Expand Down
38 changes: 26 additions & 12 deletions agent/checks/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,19 +625,20 @@ func (c *CheckH2PING) Start() {
go c.run()
}

// CheckTCP is used to periodically make an TCP/UDP connection to
// determine the health of a given check.
// CheckTCP is used to periodically make a TCP connection to determine the
// health of a given check.
// The check is passing if the connection succeeds
// The check is critical if the connection returns an error
// Supports failures_before_critical and success_before_passing.
type CheckTCP struct {
CheckID structs.CheckID
ServiceID structs.ServiceID
TCP string
Interval time.Duration
Timeout time.Duration
Logger hclog.Logger
StatusHandler *StatusHandler
CheckID structs.CheckID
ServiceID structs.ServiceID
TCP string
Interval time.Duration
Timeout time.Duration
Logger hclog.Logger
TLSClientConfig *tls.Config
StatusHandler *StatusHandler

dialer *net.Dialer
stop bool
Expand Down Expand Up @@ -694,17 +695,30 @@ func (c *CheckTCP) run() {

// check is invoked periodically to perform the TCP check
func (c *CheckTCP) check() {
conn, err := c.dialer.Dial(`tcp`, c.TCP)
pgporada marked this conversation as resolved.
Show resolved Hide resolved
var conn io.Closer
var err error
var checkType string

if c.TLSClientConfig == nil {
conn, err = c.dialer.Dial(`tcp`, c.TCP)
checkType = "TCP"
} else {
conn, err = tls.DialWithDialer(c.dialer, `tcp`, c.TCP, c.TLSClientConfig)
checkType = "TCP+TLS"
}

if err != nil {
c.Logger.Warn("Check socket connection failed",
c.Logger.Warn(fmt.Sprintf("Check %s connection failed", checkType),
"check", c.CheckID.String(),
"error", err,
)
c.StatusHandler.updateCheck(c.CheckID, api.HealthCritical, err.Error())
return
}

conn.Close()
c.StatusHandler.updateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
c.StatusHandler.updateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("%s connect %s: Success", checkType, c.TCP))

}

// CheckUDP is used to periodically send a UDP datagram to determine the health of a given check.
Expand Down
1 change: 1 addition & 0 deletions agent/config/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,7 @@ func (b *builder) checkVal(v *CheckDefinition) *structs.CheckDefinition {
Body: stringVal(v.Body),
DisableRedirects: boolVal(v.DisableRedirects),
TCP: stringVal(v.TCP),
TCPUseTLS: boolVal(v.TCPUseTLS),
UDP: stringVal(v.UDP),
Interval: b.durationVal(fmt.Sprintf("check[%s].interval", id), v.Interval),
DockerContainerID: stringVal(v.DockerContainerID),
Expand Down
1 change: 1 addition & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ type CheckDefinition struct {
DisableRedirects *bool `mapstructure:"disable_redirects"`
OutputMaxSize *int `mapstructure:"output_max_size"`
TCP *string `mapstructure:"tcp"`
TCPUseTLS *bool `mapstructure:"tcp_use_tls"`
UDP *string `mapstructure:"udp"`
Interval *string `mapstructure:"interval"`
DockerContainerID *string `mapstructure:"docker_container_id" alias:"dockercontainerid"`
Expand Down
61 changes: 59 additions & 2 deletions agent/config/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2357,12 +2357,12 @@ func TestLoad_IntegrationWithFlags(t *testing.T) {
},
json: []string{`{
"cloud": {
"resource_id": "file-id"
"resource_id": "file-id"
}
}`},
hcl: []string{`
cloud = {
resource_id = "file-id"
resource_id = "file-id"
}
`},
expected: func(rt *RuntimeConfig) {
Expand Down Expand Up @@ -2529,6 +2529,60 @@ func TestLoad_IntegrationWithFlags(t *testing.T) {
rt.DataDir = dataDir
},
})
run(t, testCase{
desc: "tcp check with tcp_use_tls set",
args: []string{
`-data-dir=` + dataDir,
},
json: []string{
`{ "check": { "name": "a", "tcp": "localhost:55555", "tcp_use_tls": true, "interval": "5s" } }`,
},
hcl: []string{
`check = { name = "a" tcp = "localhost:55555" tcp_use_tls = true interval = "5s" }`,
},
expected: func(rt *RuntimeConfig) {
rt.Checks = []*structs.CheckDefinition{
{Name: "a", TCP: "localhost:55555", TCPUseTLS: true, OutputMaxSize: checks.DefaultBufSize, Interval: 5 * time.Second},
}
rt.DataDir = dataDir
},
})
run(t, testCase{
desc: "tcp check with tcp_use_tls set to false",
args: []string{
`-data-dir=` + dataDir,
},
json: []string{
`{ "check": { "name": "a", "tcp": "localhost:55555", "tcp_use_tls": false, "interval": "5s" } }`,
},
hcl: []string{
`check = { name = "a" tcp = "localhost:55555" tcp_use_tls = false interval = "5s" }`,
},
expected: func(rt *RuntimeConfig) {
rt.Checks = []*structs.CheckDefinition{
{Name: "a", TCP: "localhost:55555", TCPUseTLS: false, OutputMaxSize: checks.DefaultBufSize, Interval: 5 * time.Second},
}
rt.DataDir = dataDir
},
})
run(t, testCase{
desc: "tcp check with tcp_use_tls not set",
args: []string{
`-data-dir=` + dataDir,
},
json: []string{
`{ "check": { "name": "a", "tcp": "localhost:55555", "interval": "5s" } }`,
},
hcl: []string{
`check = { name = "a" tcp = "localhost:55555" interval = "5s" }`,
},
expected: func(rt *RuntimeConfig) {
rt.Checks = []*structs.CheckDefinition{
{Name: "a", TCP: "localhost:55555", TCPUseTLS: false, OutputMaxSize: checks.DefaultBufSize, Interval: 5 * time.Second},
}
rt.DataDir = dataDir
},
})
run(t, testCase{
desc: "h2ping check without h2ping_use_tls set",
args: []string{
Expand Down Expand Up @@ -6287,6 +6341,7 @@ func TestLoad_FullConfig(t *testing.T) {
Body: "wSjTy7dg",
DisableRedirects: true,
TCP: "RJQND605",
TCPUseTLS: false,
H2PING: "9N1cSb5B",
H2PingUseTLS: false,
OSService: "aAjE6m9Z",
Expand Down Expand Up @@ -6317,6 +6372,7 @@ func TestLoad_FullConfig(t *testing.T) {
DisableRedirects: false,
OutputMaxSize: checks.DefaultBufSize,
TCP: "4jG5casb",
TCPUseTLS: false,
H2PING: "HCHU7gEb",
H2PingUseTLS: false,
OSService: "aqq95BhP",
Expand Down Expand Up @@ -6346,6 +6402,7 @@ func TestLoad_FullConfig(t *testing.T) {
DisableRedirects: true,
OutputMaxSize: checks.DefaultBufSize,
TCP: "JY6fTTcw",
TCPUseTLS: false,
H2PING: "rQ8eyCSF",
H2PingUseTLS: false,
OSService: "aZaCAXww",
Expand Down
2 changes: 2 additions & 0 deletions agent/config/testdata/TestRuntimeConfig_Sanitize.golden
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
"Status": "",
"SuccessBeforePassing": 0,
"TCP": "",
"TCPUseTLS": false,
"TLSServerName": "",
"TLSSkipVerify": false,
"TTL": "0s",
Expand Down Expand Up @@ -368,6 +369,7 @@
"Status": "",
"SuccessBeforePassing": 0,
"TCP": "",
"TCPUseTLS": false,
"TLSServerName": "",
"TLSSkipVerify": false,
"TTL": "0s",
Expand Down
6 changes: 6 additions & 0 deletions agent/structs/check_definition.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type CheckDefinition struct {
Body string
DisableRedirects bool
TCP string
TCPUseTLS bool
UDP string
Interval time.Duration
DockerContainerID string
Expand Down Expand Up @@ -76,6 +77,7 @@ func (t *CheckDefinition) UnmarshalJSON(data []byte) (err error) {
DockerContainerIDSnake string `json:"docker_container_id"`
TLSServerNameSnake string `json:"tls_server_name"`
TLSSkipVerifySnake bool `json:"tls_skip_verify"`
TCPUseTLSSnake bool `json:"tcp_use_tls"`
GRPCUseTLSSnake bool `json:"grpc_use_tls"`
ServiceIDSnake string `json:"service_id"`
H2PingUseTLSSnake bool `json:"h2ping_use_tls"`
Expand Down Expand Up @@ -119,6 +121,9 @@ func (t *CheckDefinition) UnmarshalJSON(data []byte) (err error) {
if aux.TLSSkipVerifySnake {
t.TLSSkipVerify = aux.TLSSkipVerifySnake
}
if aux.TCPUseTLSSnake {
t.TCPUseTLS = aux.TCPUseTLSSnake
}
if aux.GRPCUseTLSSnake {
t.GRPCUseTLS = aux.GRPCUseTLSSnake
}
Expand Down Expand Up @@ -220,6 +225,7 @@ func (c *CheckDefinition) CheckType() *CheckType {
DisableRedirects: c.DisableRedirects,
OutputMaxSize: c.OutputMaxSize,
TCP: c.TCP,
TCPUseTLS: c.TCPUseTLS,
UDP: c.UDP,
Interval: c.Interval,
DockerContainerID: c.DockerContainerID,
Expand Down
Loading