Skip to content

Commit

Permalink
agentless: Multi-cluster tproxy (#1632)
Browse files Browse the repository at this point in the history
* Redirect traffic to the consul-dataplane's DNS proxy when DNS redirection is enabled. This means we no longer need to configure recursors for the clients and servers
* To fall back to kube dns when consul cannot resolve names, we define our own DNSConfig on the pod that uses Consul first, but falls back to the config from /etc/resolv.conf in the cluster.
  • Loading branch information
ishustava authored Oct 25, 2022
1 parent 9597c82 commit 933cc5e
Show file tree
Hide file tree
Showing 28 changed files with 301 additions and 267 deletions.
20 changes: 9 additions & 11 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ on:

env:
TEST_RESULTS: /tmp/test-results # path to where test results are saved
CONSUL_VERSION: 1.13.1 # Consul's OSS version to use in tests
CONSUL_ENT_VERSION: 1.13.1+ent # Consul's enterprise version to use in tests
GOTESTSUM_VERSION: 1.8.2 # You cannot use environment variables with workflows. The gotestsum version is hardcoded in the reusable workflows too.
# We use docker images to copy the consul binary for unit tests.
CONSUL_OSS_DOCKER_IMAGE: hashicorppreview/consul:1.14-dev # Consul's OSS version to use in tests
CONSUL_ENT_DOCKER_IMAGE: hashicorppreview/consul-enterprise:1.14-dev # Consul's enterprise version to use in tests

jobs:
get-go-version:
Expand Down Expand Up @@ -158,11 +159,9 @@ jobs:
working-directory: control-plane
run: |
mkdir -p $HOME/bin
wget https://releases.hashicorp.com/consul/${{env.CONSUL_VERSION}}/consul_${{env.CONSUL_VERSION}}_linux_amd64.zip && \
unzip consul_${{env.CONSUL_VERSION}}_linux_amd64.zip -d $HOME/bin && \
rm consul_${{env.CONSUL_VERSION}}_linux_amd64.zip
chmod +x $HOME/bin/consul
container_id=$(docker create ${{env.CONSUL_OSS_DOCKER_IMAGE}})
docker cp "$container_id:/bin/consul" $HOME/bin/consul
docker rm "$container_id"
- name: Run go tests
working-directory: control-plane
run: |
Expand Down Expand Up @@ -207,10 +206,9 @@ jobs:
working-directory: control-plane
run: |
mkdir -p $HOME/bin
wget https://releases.hashicorp.com/consul/${{env.CONSUL_ENT_VERSION}}/consul_${{env.CONSUL_ENT_VERSION}}_linux_amd64.zip && \
unzip consul_${{env.CONSUL_ENT_VERSION}}_linux_amd64.zip -d $HOME/bin && \
rm consul_${{env.CONSUL_ENT_VERSION}}_linux_amd64.zip
chmod +x $HOME/bin/consul
container_id=$(docker create ${{env.CONSUL_ENT_DOCKER_IMAGE}})
docker cp "$container_id:/bin/consul" $HOME/bin/consul
docker rm "$container_id"
- name: Run go tests
working-directory: control-plane
Expand Down
16 changes: 9 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
## UNRELEASED

BREAKING_CHANGES:
* Helm:
* Remove `global.consulSidecarContainer` from values file as there is no longer a consul sidecar. [[GH-1635](https://github.com/hashicorp/consul-k8s/pull/1635)]

FEATURES:
* Consul-dataplane:
* Support merged metrics with consul-dataplane. [[GH-1635](https://github.com/hashicorp/consul-k8s/pull/1635)]
* Support transparent proxying when using consul-dataplane. [[GH-1625](https://github.com/hashicorp/consul-k8s/pull/1478),[GH-1632](https://github.com/hashicorp/consul-k8s/pull/1632)]

IMPROVEMENTS:
* CLI
* Update minimum go version for project to 1.19 [[GH-1633](https://github.com/hashicorp/consul-k8s/pull/1633)]
Expand All @@ -9,13 +18,6 @@ IMPROVEMENTS:
* Remove deprecated annotation `service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"` in the `server-service` template. [[GH-1619](https://github.com/hashicorp/consul-k8s/pull/1619)]
* Support `minAvailable` on connect injector `PodDisruptionBudget`. [[GH-1557](https://github.com/hashicorp/consul-k8s/pull/1557)]
* Add `tolerations` and `nodeSelector` to Server ACL init jobs and `nodeSelector` to Webhook cert manager. [[GH-1581](https://github.com/hashicorp/consul-k8s/pull/1581)]
* Control plane
* Support merged metrics with consul-dataplane. [[GH-1635](https://github.com/hashicorp/consul-k8s/pull/1635)]

BREAKING_CHANGES:

* Helm:
* Removal of `global.consulSidecarContainer` from values file as there is no longer a consul sidecar. [[GH-1635](https://github.com/hashicorp/consul-k8s/pull/1635)]

## 1.0.0-beta3 (October 12, 2022)

Expand Down
2 changes: 2 additions & 0 deletions acceptance/framework/connhelper/connect_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ func (c *ConnectHelper) helmValues() map[string]string {
"connectInject.enabled": "true",
"global.tls.enabled": strconv.FormatBool(c.Secure),
"global.acls.manageSystemACLs": strconv.FormatBool(c.Secure),
"dns.enabled": "true",
"dns.enableRedirection": "true",
}

helpers.MergeMaps(helmValues, c.HelmValues)
Expand Down
7 changes: 3 additions & 4 deletions acceptance/tests/consul-dns/consul_dns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const podName = "dns-pod"

func TestConsulDNS(t *testing.T) {
cfg := suite.Config()
if cfg.EnableCNI {
Expand Down Expand Up @@ -59,16 +57,17 @@ func TestConsulDNS(t *testing.T) {
serverIPs = append(serverIPs, serverPod.Status.PodIP)
}

dnsPodName := fmt.Sprintf("%s-dns-pod", releaseName)
dnsTestPodArgs := []string{
"run", "-i", podName, "--restart", "Never", "--image", "anubhavmishra/tiny-tools", "--", "dig", fmt.Sprintf("@%s-consul-dns", releaseName), "consul.service.consul",
"run", "-i", dnsPodName, "--restart", "Never", "--image", "anubhavmishra/tiny-tools", "--", "dig", fmt.Sprintf("@%s-consul-dns", releaseName), "consul.service.consul",
}

helpers.Cleanup(t, suite.Config().NoCleanupOnFailure, func() {
// Note: this delete command won't wait for pods to be fully terminated.
// This shouldn't cause any test pollution because the underlying
// objects are deployments, and so when other tests create these
// they should have different pod names.
k8s.RunKubectl(t, ctx.KubectlOptions(t), "delete", "pod", podName)
k8s.RunKubectl(t, ctx.KubectlOptions(t), "delete", "pod", dnsPodName)
})

retry.Run(t, func(r *retry.R) {
Expand Down
3 changes: 1 addition & 2 deletions acceptance/tests/partitions/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ var suite testsuite.Suite
func TestMain(m *testing.M) {
suite = testsuite.NewSuite(m)

// todo(agentless): Re-enable tproxy tests once we support it for multi-cluster.
if suite.Config().EnableMultiCluster && !suite.Config().EnableTransparentProxy {
if suite.Config().EnableMultiCluster {
os.Exit(suite.Run())
} else {
fmt.Println("Skipping partitions tests because -enable-multi-cluster is not set")
Expand Down
3 changes: 1 addition & 2 deletions acceptance/tests/peering/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ var suite testsuite.Suite
func TestMain(m *testing.M) {
suite = testsuite.NewSuite(m)

// todo(agentless): Re-enable tproxy tests once we support it for multi-cluster.
if suite.Config().EnableMultiCluster && !suite.Config().DisablePeering && !suite.Config().EnableTransparentProxy {
if suite.Config().EnableMultiCluster && !suite.Config().DisablePeering {
os.Exit(suite.Run())
} else {
fmt.Println("Skipping peering tests because either -enable-multi-cluster is not set or -disable-peering is set")
Expand Down
4 changes: 0 additions & 4 deletions acceptance/tests/peering/peering_connect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ func TestPeering_Connect(t *testing.T) {
t.Skipf("skipping this test because peering is not supported in version %v", cfg.ConsulVersion.String())
}

if cfg.EnableTransparentProxy {
t.Skipf("skipping because no t-proxy support")
}

const staticServerPeer = "server"
const staticClientPeer = "client"
cases := []struct {
Expand Down
3 changes: 1 addition & 2 deletions acceptance/tests/wan-federation/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ var suite testsuite.Suite
func TestMain(m *testing.M) {
suite = testsuite.NewSuite(m)

// todo(agentless): Re-enable tproxy tests once we support it for multi-cluster.
if suite.Config().EnableMultiCluster && !suite.Config().EnableTransparentProxy {
if suite.Config().EnableMultiCluster {
os.Exit(suite.Run())
} else {
fmt.Println("Skipping wan federation tests because -enable-multi-cluster is not set")
Expand Down
13 changes: 0 additions & 13 deletions charts/consul/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -148,19 +148,6 @@ is passed to consul as a -config-file param on command line.
[ -n "${HOSTNAME}" ] && sed -Ei "s|HOSTNAME|${HOSTNAME?}|g" /consul/extra-config/extra-from-values.json
{{- end -}}

{{/*
Sets up a list of recusor flags for Consul agents by iterating over the IPs of every nameserver
in /etc/resolv.conf and concatenating them into a string of arguments that can be passed directly
to the consul agent command.
*/}}
{{- define "consul.recursors" -}}
recursor_flags=""
for ip in $(cat /etc/resolv.conf | grep nameserver | cut -d' ' -f2)
do
recursor_flags="$recursor_flags -recursor=$ip"
done
{{- end -}}

{{/*
Create chart name and version as used by the chart label.
*/}}
Expand Down
6 changes: 0 additions & 6 deletions charts/consul/templates/client-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,6 @@ spec:
{{- if and .Values.global.secretsBackend.vault.enabled .Values.global.gossipEncryption.secretName }}
GOSSIP_KEY=`cat /vault/secrets/gossip.txt`
{{- end }}
{{- if (and .Values.dns.enabled .Values.dns.enableRedirection) }}
{{ template "consul.recursors" }}
{{- end }}
{{ template "consul.extraconfig" }}
Expand Down Expand Up @@ -379,9 +376,6 @@ spec:
{{- range $value := .Values.global.recursors }}
-recursor={{ quote $value }} \
{{- end }}
{{- if (and .Values.dns.enabled .Values.dns.enableRedirection) }}
$recursor_flags \
{{- end }}
-config-file=/consul/extra-config/extra-from-values.json \
-domain={{ .Values.global.domain }}
volumeMounts:
Expand Down
7 changes: 0 additions & 7 deletions charts/consul/templates/server-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -312,10 +312,6 @@ spec:
{{- if and .Values.global.secretsBackend.vault.enabled .Values.global.gossipEncryption.secretName }}
GOSSIP_KEY=`cat /vault/secrets/gossip.txt`
{{- end }}
{{- if (and .Values.dns.enabled .Values.dns.enableRedirection) }}
{{ template "consul.recursors" }}
{{- end }}
{{ template "consul.extraconfig" }}
Expand All @@ -332,9 +328,6 @@ spec:
-hcl="acl { tokens { agent = \"${ACL_REPLICATION_TOKEN}\", replication = \"${ACL_REPLICATION_TOKEN}\" } }" \
{{- end }}
{{- end }}
{{- if (and .Values.dns.enabled .Values.dns.enableRedirection) }}
$recursor_flags \
{{- end }}
{{- if and .Values.global.secretsBackend.vault.enabled .Values.global.acls.bootstrapToken.secretName }}
-config-file=/vault/secrets/bootstrap-token-config.hcl \
{{- else if (and (not .Values.global.secretsBackend.vault.enabled) .Values.global.acls.bootstrapToken.secretName) }}
Expand Down
24 changes: 0 additions & 24 deletions charts/consul/test/unit/client-daemonset.bats
Original file line number Diff line number Diff line change
Expand Up @@ -1706,30 +1706,6 @@ local actual=$(echo $object |
[ "${actual}" = "true" ]
}

#--------------------------------------------------------------------
# DNS

@test "client/DaemonSet: recursor flags is not set by default" {
cd `chart_dir`
local actual=$(helm template \
-s templates/client-daemonset.yaml \
--set 'client.enabled=true' \
. | tee /dev/stderr |
yq -c -r '.spec.template.spec.containers[0].command | join(" ") | contains("$recursor_flags")' | tee /dev/stderr)
[ "${actual}" = "false" ]
}

@test "client/DaemonSet: add recursor flags if dns.enableRedirection is true" {
cd `chart_dir`
local actual=$(helm template \
-s templates/client-daemonset.yaml \
--set 'client.enabled=true' \
--set 'dns.enableRedirection=true' \
. | tee /dev/stderr |
yq -c -r '.spec.template.spec.containers[0].command | join(" ") | contains("$recursor_flags")' | tee /dev/stderr)
[ "${actual}" = "true" ]
}

#--------------------------------------------------------------------
# hostNetwork

Expand Down
22 changes: 0 additions & 22 deletions charts/consul/test/unit/server-statefulset.bats
Original file line number Diff line number Diff line change
Expand Up @@ -590,28 +590,6 @@ load _helpers
[ "${actualBaz}" = "qux" ]
}

#--------------------------------------------------------------------
# DNS

@test "server/StatefulSet: recursor flags unset by default" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-statefulset.yaml \
. | tee /dev/stderr |
yq -c -r '.spec.template.spec.containers[0].command | join(" ") | contains("$recursor_flags")' | tee /dev/stderr)
[ "${actual}" = "false" ]
}

@test "server/StatefulSet: add recursor flags if dns.enableRedirection is true" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-statefulset.yaml \
--set 'dns.enableRedirection=true' \
. | tee /dev/stderr |
yq -c -r '.spec.template.spec.containers[0].command | join(" ") | contains("$recursor_flags")' | tee /dev/stderr)
[ "${actual}" = "true" ]
}

#--------------------------------------------------------------------
# annotations

Expand Down
2 changes: 1 addition & 1 deletion control-plane/cni/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ require (
sigs.k8s.io/yaml v1.2.0 // indirect
)

replace github.com/hashicorp/consul/sdk v0.9.0 => github.com/hashicorp/consul/sdk v0.4.1-0.20220531155537-364758ef2f50
replace github.com/hashicorp/consul/sdk => github.com/hashicorp/consul/sdk v0.4.1-0.20221021205723-cc843c4be892

go 1.19
4 changes: 2 additions & 2 deletions control-plane/cni/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ github.com/googleapis/gnostic v0.5.5 h1:9fHAtK0uDfpveeqqo1hkEZJcFvYXAiCN3UutL8F9
github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/hashicorp/consul/sdk v0.4.1-0.20220531155537-364758ef2f50 h1:GwbRRT+QxMRbYI608FGwTfcZ0iOVLX69B2ePjpQoyXw=
github.com/hashicorp/consul/sdk v0.4.1-0.20220531155537-364758ef2f50/go.mod h1:yPkX5Q6CsxTFMjQQDJwzeNmUUF5NUGGbrDsv9wTb8cw=
github.com/hashicorp/consul/sdk v0.4.1-0.20221021205723-cc843c4be892 h1:jw0NwPmNPr5CxAU04hACdj61JSaJBKZ0FdBo+kwfNp4=
github.com/hashicorp/consul/sdk v0.4.1-0.20221021205723-cc843c4be892/go.mod h1:yPkX5Q6CsxTFMjQQDJwzeNmUUF5NUGGbrDsv9wTb8cw=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-hclog v0.16.1 h1:IVQwpTGNRRIHafnTs2dQLIk4ENtneRIEEJWOVDqz99o=
Expand Down
12 changes: 11 additions & 1 deletion control-plane/connect-inject/consul_dataplane_sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ import (
"k8s.io/utils/pointer"
)

const ConsulCAFile = "/consul/connect-inject/consul-ca.pem"
const (
ConsulCAFile = "/consul/connect-inject/consul-ca.pem"
ConsulDataplaneDNSBindHost = "127.0.0.1"
ConsulDataplaneDNSBindPort = 8600
)

func (w *MeshWebhook) consulDataplaneSidecar(namespace corev1.Namespace, pod corev1.Pod, mpi multiPortInfo) (corev1.Container, error) {
resources, err := w.sidecarResources(pod)
Expand Down Expand Up @@ -253,6 +257,12 @@ func (w *MeshWebhook) getContainerSidecarCommand(namespace corev1.Namespace, mpi
}
}

// If Consul DNS is enabled, we want to configure consul-dataplane to be the DNS proxy
// for Consul DNS in the pod.
if w.EnableConsulDNS {
cmd = append(cmd, "-consul-dns-bind-port="+strconv.Itoa(ConsulDataplaneDNSBindPort))
}

var envoyExtraArgs []string
extraArgs, annotationSet := pod.Annotations[annotationEnvoyExtraArgs]
// --base-id is an envoy arg rather than consul-dataplane, and so we need to make sure we're passing it
Expand Down
20 changes: 20 additions & 0 deletions control-plane/connect-inject/consul_dataplane_sidecar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,26 @@ func TestHandlerConsulDataplaneSidecar_Concurrency(t *testing.T) {
}
}

func TestHandlerConsulDataplaneSidecar_DNSProxy(t *testing.T) {
h := MeshWebhook{
ConsulConfig: &consul.Config{HTTPPort: 8500, GRPCPort: 8502},
EnableConsulDNS: true,
}
pod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "web",
},
},
},
}
container, err := h.consulDataplaneSidecar(testNS, pod, multiPortInfo{})
require.NoError(t, err)
require.Contains(t, container.Command[2], "-consul-dns-bind-port=8600")
}

func TestHandlerConsulDataplaneSidecar_Multiport(t *testing.T) {
for _, aclsEnabled := range []bool{false, true} {
name := fmt.Sprintf("acls enabled: %t", aclsEnabled)
Expand Down
1 change: 0 additions & 1 deletion control-plane/connect-inject/container_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ const (
sidecarUserAndGroupID = 5995
initContainersUserAndGroupID = 5996
netAdminCapability = "NET_ADMIN"
dnsServiceHostEnvSuffix = "DNS_SERVICE_HOST"
)

type initContainerCommandData struct {
Expand Down
Loading

0 comments on commit 933cc5e

Please sign in to comment.