From 86283ad2dc0ee78b450387d87ae3c8a9ce01b1dd Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 8 Jul 2021 09:18:59 -0500 Subject: [PATCH 1/2] consul/connect: Avoid assumption of parent service when filtering connect proxies This PR uses regex-based matching for sidecar proxy services and checks when syncing with Consul. Previously we would check if the parent of the sidecar was still being tracked in Nomad. This is a false invariant - one which we must not depend when we make #10845 work. Fixes #10843 --- .changelog/10872.txt | 3 ++ command/agent/consul/group_test.go | 9 +--- command/agent/consul/service_client.go | 54 ++++++++++++++++----- command/agent/consul/service_client_test.go | 14 ++++++ 4 files changed, 62 insertions(+), 18 deletions(-) create mode 100644 .changelog/10872.txt diff --git a/.changelog/10872.txt b/.changelog/10872.txt new file mode 100644 index 00000000000..cad56420499 --- /dev/null +++ b/.changelog/10872.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul/connect: Avoid assumption of parent service when syncing connect proxies +``` diff --git a/command/agent/consul/group_test.go b/command/agent/consul/group_test.go index 550bd72b3d5..a76aac73e1f 100644 --- a/command/agent/consul/group_test.go +++ b/command/agent/consul/group_test.go @@ -77,11 +77,6 @@ func TestConsul_Connect(t *testing.T) { }, } - // required by isNomadSidecar assertion below - serviceRegMap := map[string]*consulapi.AgentServiceRegistration{ - MakeAllocServiceID(alloc.ID, "group-"+alloc.TaskGroup, tg.Services[0]): nil, - } - require.NoError(t, serviceClient.RegisterWorkload(BuildAllocServices(mock.Node(), alloc, NoopRestarter()))) require.Eventually(t, func() bool { @@ -102,7 +97,7 @@ func TestConsul_Connect(t *testing.T) { require.Contains(t, services, serviceID) require.True(t, isNomadService(serviceID)) - require.False(t, isNomadSidecar(serviceID, serviceRegMap)) + require.False(t, maybeConnectSidecar(serviceID)) agentService := services[serviceID] require.Equal(t, agentService.Service, "testconnect") require.Equal(t, agentService.Address, "10.0.0.1") @@ -112,7 +107,7 @@ func TestConsul_Connect(t *testing.T) { require.Contains(t, services, connectID) require.True(t, isNomadService(connectID)) - require.True(t, isNomadSidecar(connectID, serviceRegMap)) + require.True(t, maybeConnectSidecar(connectID)) connectService := services[connectID] require.Equal(t, connectService.Service, "testconnect-sidecar-proxy") require.Equal(t, connectService.Address, "10.0.0.1") diff --git a/command/agent/consul/service_client.go b/command/agent/consul/service_client.go index da44319660b..2934e24eeb0 100644 --- a/command/agent/consul/service_client.go +++ b/command/agent/consul/service_client.go @@ -6,6 +6,7 @@ import ( "net" "net/url" "reflect" + "regexp" "strconv" "strings" "sync" @@ -816,7 +817,7 @@ func (c *ServiceClient) sync(reason syncReason) error { } // Ignore if this is a service for a Nomad managed sidecar proxy. - if isNomadSidecar(id, c.services) { + if maybeConnectSidecar(id) { continue } @@ -886,7 +887,7 @@ func (c *ServiceClient) sync(reason syncReason) error { } // Ignore if this is a check for a Nomad managed sidecar proxy. - if isNomadSidecar(check.ServiceID, c.services) { + if maybeSidecarProxyCheck(id) { continue } @@ -1681,8 +1682,14 @@ const ( sidecarSuffix = "-sidecar-proxy" ) -// isNomadSidecar returns true if the ID matches a sidecar proxy for a Nomad -// managed service. +// maybeConnectSidecar returns true if the ID is likely of a Connect sidecar proxy. +// This function should only be used to determine if Nomad should skip managing +// service id; it could produce false negatives for non-Nomad managed services +// (i.e. someone set the ID manually), but Nomad does not manage those anyway. +// +// It is important not to reference the parent service, which may or may not still +// be tracked by Nomad internally. +// // // For example if you have a Connect enabled service with the ID: // @@ -1692,14 +1699,39 @@ const ( // // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy // -func isNomadSidecar(id string, services map[string]*api.AgentServiceRegistration) bool { - if !strings.HasSuffix(id, sidecarSuffix) { - return false - } +func maybeConnectSidecar(id string) bool { + return strings.HasSuffix(id, sidecarSuffix) +} - // Make sure the Nomad managed service for this proxy still exists. - _, ok := services[id[:len(id)-len(sidecarSuffix)]] - return ok +var ( + sidecarProxyCheckRe = regexp.MustCompile(`service:_nomad-.+-sidecar-proxy(:[\d]+)?`) +) + +// maybeSidecarProxyCheck returns true if the ID likely matches a Nomad generated +// check ID used in the context of a Nomad managed Connect sidecar proxy. This function +// should only be used to determine if Nomad should skip managing a check; it can +// produce false negatives for non-Nomad managed Connect sidecar proxy checks (i.e. +// someone set the ID manually), but Nomad does not manage those anyway. +// +// For example if you have a Connect enabled service with the ID: +// +// _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db +// +// Nomad will create a Connect sidecar proxy of ID: +// +// _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy +// +// With default checks like: +// +// service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:1 +// service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:2 +// +// Unless sidecar_service.disable_default_tcp_check is set, in which case the +// default check is: +// +// service:_nomad-task-322616db-2680-35d8-0d10-b50a0a0aa4cd-group-api-count-api-9001-sidecar-proxy +func maybeSidecarProxyCheck(id string) bool { + return sidecarProxyCheckRe.MatchString(id) } // getNomadSidecar returns the service registration of the sidecar for the managed diff --git a/command/agent/consul/service_client_test.go b/command/agent/consul/service_client_test.go index 5a72c4a0e6c..30ef7559291 100644 --- a/command/agent/consul/service_client_test.go +++ b/command/agent/consul/service_client_test.go @@ -624,3 +624,17 @@ func TestSyncOps_empty(t *testing.T) { try(&operations{}, true) try(nil, true) } + +func TestSyncLogic_maybeSidecarProxyCheck(t *testing.T) { + try := func(input string, exp bool) { + result := maybeSidecarProxyCheck(input) + require.Equal(t, exp, result) + } + + try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy", true) + try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:1", true) + try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:2", true) + try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001", false) + try("_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:1", false) + try("service", false) +} From 82f424585ac5cd7b868fc00014ff00546ac03564 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 8 Jul 2021 13:05:05 -0500 Subject: [PATCH 2/2] consul/connect: improve regex from CR suggestions --- command/agent/consul/service_client.go | 2 +- command/agent/consul/service_client_test.go | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/command/agent/consul/service_client.go b/command/agent/consul/service_client.go index 2934e24eeb0..3a22848f098 100644 --- a/command/agent/consul/service_client.go +++ b/command/agent/consul/service_client.go @@ -1704,7 +1704,7 @@ func maybeConnectSidecar(id string) bool { } var ( - sidecarProxyCheckRe = regexp.MustCompile(`service:_nomad-.+-sidecar-proxy(:[\d]+)?`) + sidecarProxyCheckRe = regexp.MustCompile(`^service:_nomad-.+-sidecar-proxy(:[\d]+)?$`) ) // maybeSidecarProxyCheck returns true if the ID likely matches a Nomad generated diff --git a/command/agent/consul/service_client_test.go b/command/agent/consul/service_client_test.go index 30ef7559291..9cacaa38ddc 100644 --- a/command/agent/consul/service_client_test.go +++ b/command/agent/consul/service_client_test.go @@ -636,5 +636,7 @@ func TestSyncLogic_maybeSidecarProxyCheck(t *testing.T) { try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:2", true) try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001", false) try("_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:1", false) + try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:X", false) + try("service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy: ", false) try("service", false) }