From e56c74ef3831c9d8799ef18a8ba007b248ee6766 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 31 Mar 2022 12:20:50 -0400 Subject: [PATCH 1/3] Drop support for iptables 1.8.3, update distro version notes iptables 1.8.3 has a bug that makes it hang forever in certain cases, requiring us to use "timeout" in the script. However, 1.8.4 is over two years old at this point, and even Debian buster-backports has 1.8.5, so there's no reason to keep supporting 1.8.3. So remove that. Also update the README and test Dockerfiles to use newer versions of Fedora and Alpine to get a new-enough version. --- README.md | 43 ++++++++++++++++------------------- iptables-wrapper-installer.sh | 36 ++++------------------------- test/Dockerfile.test-alpine | 2 +- test/Dockerfile.test-debian | 2 +- test/Dockerfile.test-fedora | 6 ++--- 5 files changed, 29 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 96fc6c2..57dc707 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,12 @@ them not work with kubelet and some other programs. In particular: led to kubelet repeatedly creating more and more copies of the same rule, thinking it had not been created yet.) -iptables 1.8.3 fixes these compatibility problems, but has a slightly -different problem, which is that `iptables-nft` will get stuck in an -infinite loop if it can't load the kernel `nf_tables` module. The -wrapper script has code to deal with this. +iptables 1.8.3 fixed these compatibility problems, but had a slightly +different problem, which is that `iptables-nft` would get stuck in an +infinite loop if it couldn't load the kernel `nf_tables` module. -All currently-known problems are fixed in iptables 1.8.4. +iptables 1.8.4 and later have no known problems that affect +Kubernetes. ## iptables-wrapper @@ -64,10 +64,8 @@ an `iptables-wrapper` script alongside `iptables-legacy` and (Because of the known bugs, `iptables-wrapper-installer.sh` will refuse to install the wrappers into a container with iptables earlier -than 1.8.2. If you really know what you're doing you can pass -`--no-sanity-check` to install anyway. Because it can work around the -bugs in 1.8.3, the installer will allow you to install with iptables -1.8.3.) +than 1.8.4. If you really know what you're doing you can pass +`--no-sanity-check` to install anyway.) The first time the wrapper is run, it will figure out which mode the system is using, update the `iptables`, `iptables-save`, etc, links to @@ -79,7 +77,7 @@ directly to the correct underlying binary. ## Building a container image that uses iptables When building a container image that needs to run iptables in the host -network namespace, install iptables 1.8.3 or later in the container +network namespace, install iptables 1.8.4 or later in the container using whatever tools you normally would. Then copy the [`iptables-wrapper-installer.sh`](./iptables-wrapper-installer.sh) script into your container, and run it to have it set up run-time @@ -89,7 +87,9 @@ Some distro-specific examples: - Alpine Linux - FROM alpine:3.10 + Alpine Linux 3.12 and later have iptables >= 1.8.4. + + FROM alpine:3.15 RUN apk add --no-cache iptables COPY iptables-wrapper-installer.sh / @@ -97,7 +97,7 @@ Some distro-specific examples: - Debian GNU/Linux - Debian stable (buster) ships iptables 1.8.2, but iptables 1.8.3 is + Debian stable (buster) ships iptables 1.8.2, but iptables 1.8.5 is available in buster-backports, so you should install it from there: FROM debian:buster @@ -111,21 +111,18 @@ Some distro-specific examples: - Fedora - Fedora 31 is the first release to include iptables 1.8.3. (Similarly - to the Debian example, you might be able to build an image based on - Fedora 30 or 29 if you use `dnf --releasever 31 ...` to install the - F31 iptables packages.) + Fedora 32 and later have iptables >= 1.8.4. - FROM fedora:31 + FROM fedora:35 - RUN dnf install -y iptables iptables-nft + RUN dnf install -y iptables iptables-legacy iptables-nft COPY iptables-wrapper-installer.sh / RUN /iptables-wrapper-installer.sh -- RHEL / CentOS +- RHEL / CentOS / UBI - RHEL/CentOS 7 ship iptables 1.4, which does not support nft mode. - RHEL/CentOS 8 ship a hacked version of iptables 1.8 that *only* - supports nft mode. Therefore, neither can be used as a basis for a - portable iptables-using container image. + RHEL 7 ships iptables 1.4, which does not support nft mode. RHEL 8 + ships a hacked version of iptables 1.8 that *only* supports nft + mode. Therefore, neither can be used as a basis for a portable + iptables-using container image. diff --git a/iptables-wrapper-installer.sh b/iptables-wrapper-installer.sh index 5e58c67..64d332c 100755 --- a/iptables-wrapper-installer.sh +++ b/iptables-wrapper-installer.sh @@ -64,16 +64,11 @@ if [ "${1:-}" != "--no-sanity-check" ]; then fi case "${version}" in - *v1.8.[012]\ *) - echo "ERROR: iptables 1.8.0 - 1.8.2 have compatibility bugs." 1>&2 - echo " Upgrade to 1.8.3 or newer." 1>&2 + *v1.8.[0123]\ *) + echo "ERROR: iptables 1.8.0 - 1.8.3 have compatibility bugs." 1>&2 + echo " Upgrade to 1.8.4 or newer." 1>&2 exit 1 ;; - *v1.8.3\ *) - # 1.8.3 mostly works but can get stuck in an infinite loop if the nft - # kernel modules are unavailable - need_timeout=1 - ;; *) # 1.8.4+ are OK ;; @@ -107,11 +102,7 @@ set -eu # Detect whether the base system is using iptables-legacy or # iptables-nft. This assumes that some non-containerized process (eg # kubelet) has already created some iptables rules. -EOF -if [ "${need_timeout:-0}" = 0 ]; then - # Write out the simpler version of legacy-vs-nft detection - cat >> "${sbin}/iptables-wrapper" </dev/null | grep '^-' | wc -l) num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) if [ "\${num_legacy_lines}" -ge "\${num_nft_lines}" ]; then @@ -119,27 +110,8 @@ if [ "\${num_legacy_lines}" -ge "\${num_nft_lines}" ]; then else mode=nft fi + EOF -else - # Write out the version of legacy-vs-nft detection with an nft timeout - cat >> "${sbin}/iptables-wrapper" </dev/null | grep '^-' | wc -l) -if [ "\${num_legacy_lines}" -ge 10 ]; then - mode=legacy -else - num_nft_lines=\$( (timeout 5 sh -c "iptables-nft-save; ip6tables-nft-save" || true) 2>/dev/null | grep '^-' | wc -l) - if [ "\${num_legacy_lines}" -ge "\${num_nft_lines}" ]; then - mode=legacy - else - mode=nft - fi -fi -EOF -fi # Write out the appropriate alternatives-selection commands case "${altstyle}" in diff --git a/test/Dockerfile.test-alpine b/test/Dockerfile.test-alpine index fe462f5..d589c71 100644 --- a/test/Dockerfile.test-alpine +++ b/test/Dockerfile.test-alpine @@ -14,7 +14,7 @@ ### Dockerfile for building an Alpine image for testing -FROM alpine:3.10 +FROM alpine:3.15 RUN apk add --no-cache iptables COPY iptables-wrapper-installer.sh / diff --git a/test/Dockerfile.test-debian b/test/Dockerfile.test-debian index 01ddde8..51d49ce 100644 --- a/test/Dockerfile.test-debian +++ b/test/Dockerfile.test-debian @@ -16,7 +16,7 @@ FROM debian:buster -ARG INSTALL_ARGS +ARG INSTALL_ARGS= ARG REPO=buster RUN echo deb http://deb.debian.org/debian buster-backports main >> /etc/apt/sources.list; \ diff --git a/test/Dockerfile.test-fedora b/test/Dockerfile.test-fedora index 34da5e1..817b254 100644 --- a/test/Dockerfile.test-fedora +++ b/test/Dockerfile.test-fedora @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -### Dockerfile for building a Fedora 31 image for testing +### Dockerfile for building a Fedora 35 image for testing -FROM fedora:31 +FROM fedora:35 -RUN dnf install -y iptables iptables-nft +RUN dnf install -y iptables iptables-legacy iptables-nft COPY iptables-wrapper-installer.sh / RUN /iptables-wrapper-installer.sh COPY test/test.sh / From 2a9e1a15f6fcff38f51ac4714223c7694664cf5d Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 31 Mar 2022 12:32:42 -0400 Subject: [PATCH 2/3] Default to nft over legacy in ambiguous situations Systems these days are more likely to be using iptables-nft than iptables-legacy, so if there are no rules in either table, guess "nft" rather than "legacy". --- iptables-wrapper-installer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iptables-wrapper-installer.sh b/iptables-wrapper-installer.sh index 64d332c..122207d 100755 --- a/iptables-wrapper-installer.sh +++ b/iptables-wrapper-installer.sh @@ -105,7 +105,7 @@ set -eu num_legacy_lines=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep '^-' | wc -l) num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) -if [ "\${num_legacy_lines}" -ge "\${num_nft_lines}" ]; then +if [ "\${num_legacy_lines}" -gt "\${num_nft_lines}" ]; then mode=legacy else mode=nft From 70d7897da19aad7749b4f7ae46cb51e3c07c95b9 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 31 Mar 2022 14:54:54 -0400 Subject: [PATCH 3/3] Try to resolve nft/legacy based on KUBE-IPTABLES-HINT / KUBE-KUBELET-CANARY first Check for the (1.24+) KUBE-IPTABLES-HINT or (1.17+) KUBE-KUBELET-CANARY chain first, and only fall back to counting total number of legacy vs nft rules if we don't find that. --- iptables-wrapper-installer.sh | 36 +++++++++++++++------ test/run-test.sh | 14 +++++--- test/test.sh | 60 +++++++++++++++++++++++++++-------- 3 files changed, 84 insertions(+), 26 deletions(-) diff --git a/iptables-wrapper-installer.sh b/iptables-wrapper-installer.sh index 122207d..e6eb7d8 100755 --- a/iptables-wrapper-installer.sh +++ b/iptables-wrapper-installer.sh @@ -99,16 +99,34 @@ cat > "${sbin}/iptables-wrapper" </dev/null | grep '^-' | wc -l) -num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) -if [ "\${num_legacy_lines}" -gt "\${num_nft_lines}" ]; then - mode=legacy -else +# In kubernetes 1.17 and later, kubelet will have created at least +# one chain in the "mangle" table (either "KUBE-IPTABLES-HINT" or +# "KUBE-KUBELET-CANARY"), so check that first, against +# iptables-nft, because we can check that more efficiently and +# it's more common these days. +nft_kubelet_rules=\$( (iptables-nft-save -t mangle || true; ip6tables-nft-save -t mangle || true) 2>/dev/null | grep -E '^:(KUBE-IPTABLES-HINT|KUBE-KUBELET-CANARY)' | wc -l) +if [ "\${nft_kubelet_rules}" -ne 0 ]; then mode=nft +else + # Check for kubernetes 1.17-or-later with iptables-legacy. We + # can't pass "-t mangle" to iptables-legacy-save because it would + # cause the kernel to create that table if it didn't already + # exist, which we don't want. So we have to grab all the rules + legacy_kubelet_rules=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep -E '^:(KUBE-IPTABLES-HINT|KUBE-KUBELET-CANARY)' | wc -l) + if [ "\${legacy_kubelet_rules}" -ne 0 ]; then + mode=legacy + else + # With older kubernetes releases there may not be any _specific_ + # rules we can look for, but we assume that some non-containerized process + # (possibly kubelet) will have created _some_ iptables rules. + num_legacy_lines=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep '^-' | wc -l) + num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) + if [ "\${num_legacy_lines}" -gt "\${num_nft_lines}" ]; then + mode=legacy + else + mode=nft + fi + fi fi EOF diff --git a/test/run-test.sh b/test/run-test.sh index eb80b9c..30b11b7 100755 --- a/test/run-test.sh +++ b/test/run-test.sh @@ -105,14 +105,20 @@ if ! build "${tag}" ${build_arg}; then FAIL "build failed unexpectedly" fi -if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh legacy; then - FAIL "failed legacy test" +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh legacy old; then + FAIL "failed legacy iptables / old rules test" fi -if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh nft; then +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh legacy new; then + FAIL "failed legacy iptables / new rules test" +fi +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh nft old; then if [[ "${nft_fail}" = 1 ]]; then PASS "nft mode failed as expected" fi - FAIL "failed nft test" + FAIL "failed nft iptables / old rules test" +fi +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh nft new; then + FAIL "failed nft iptables / new rules test" fi PASS "success" diff --git a/test/test.sh b/test/test.sh index b2186f0..7ad2581 100755 --- a/test/test.sh +++ b/test/test.sh @@ -17,6 +17,20 @@ set -eu mode=$1 +rulestype=$2 + +case "${mode}" in + legacy) + wrongmode=nft + ;; + nft) + wrongmode=legacy + ;; + *) + echo "ERROR: bad mode '${mode}'" 1>&2 + exit 1 + ;; +esac if [ -d /usr/sbin -a -e /usr/sbin/iptables ]; then sbin="/usr/sbin" @@ -56,22 +70,42 @@ ensure_iptables_resolved() { ensure_iptables_undecided -# Initialize the chosen iptables mode with a subset of kubelet's rules -iptables-${mode} -t nat -N KUBE-MARK-DROP -iptables-${mode} -t nat -A KUBE-MARK-DROP -j MARK --set-xmark 0x8000/0x8000 -iptables-${mode} -t filter -N KUBE-FIREWALL -iptables-${mode} -t filter -A KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP -iptables-${mode} -t filter -I OUTPUT -j KUBE-FIREWALL -iptables-${mode} -t filter -I INPUT -j KUBE-FIREWALL +case "${rulestype}" in + old) + # Initialize the chosen iptables mode with some kubelet-like rules + iptables-${mode} -t nat -N KUBE-MARK-DROP + iptables-${mode} -t nat -A KUBE-MARK-DROP -j MARK --set-xmark 0x8000/0x8000 + iptables-${mode} -t filter -N KUBE-FIREWALL + iptables-${mode} -t filter -A KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP + iptables-${mode} -t filter -I OUTPUT -j KUBE-FIREWALL + iptables-${mode} -t filter -I INPUT -j KUBE-FIREWALL + + # Fail on iptables 1.8.2 in nft mode + if ! iptables-${mode} -C KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP; then + echo "failed to match previously-added rule; iptables is broken" 1>&2 + exit 1 + fi + ;; + + new) + # Initialize the chosen iptables mode with just a hint chain + iptables-${mode} -t mangle -N KUBE-IPTABLES-HINT + ;; + + *) + echo "ERROR: bad rulestype '${rulestype}'" 1>&2 + exit 1 + ;; +esac + +# Put some junk in the other iptables system +iptables-${wrongmode} -t filter -N BAD-1 +iptables-${wrongmode} -t filter -A BAD-1 -j ACCEPT +iptables-${wrongmode} -t filter -N BAD-2 +iptables-${wrongmode} -t filter -A BAD-2 -j DROP ensure_iptables_undecided iptables -L > /dev/null ensure_iptables_resolved ${mode} - -# Fail on iptables 1.8.2 in nft mode -if ! iptables -C KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP; then - echo "failed to match previously-added rule; iptables is broken" 1>&2 - exit 1 -fi