diff --git a/README.md b/README.md index 96fc6c2..57dc707 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,12 @@ them not work with kubelet and some other programs. In particular: led to kubelet repeatedly creating more and more copies of the same rule, thinking it had not been created yet.) -iptables 1.8.3 fixes these compatibility problems, but has a slightly -different problem, which is that `iptables-nft` will get stuck in an -infinite loop if it can't load the kernel `nf_tables` module. The -wrapper script has code to deal with this. +iptables 1.8.3 fixed these compatibility problems, but had a slightly +different problem, which is that `iptables-nft` would get stuck in an +infinite loop if it couldn't load the kernel `nf_tables` module. -All currently-known problems are fixed in iptables 1.8.4. +iptables 1.8.4 and later have no known problems that affect +Kubernetes. ## iptables-wrapper @@ -64,10 +64,8 @@ an `iptables-wrapper` script alongside `iptables-legacy` and (Because of the known bugs, `iptables-wrapper-installer.sh` will refuse to install the wrappers into a container with iptables earlier -than 1.8.2. If you really know what you're doing you can pass -`--no-sanity-check` to install anyway. Because it can work around the -bugs in 1.8.3, the installer will allow you to install with iptables -1.8.3.) +than 1.8.4. If you really know what you're doing you can pass +`--no-sanity-check` to install anyway.) The first time the wrapper is run, it will figure out which mode the system is using, update the `iptables`, `iptables-save`, etc, links to @@ -79,7 +77,7 @@ directly to the correct underlying binary. ## Building a container image that uses iptables When building a container image that needs to run iptables in the host -network namespace, install iptables 1.8.3 or later in the container +network namespace, install iptables 1.8.4 or later in the container using whatever tools you normally would. Then copy the [`iptables-wrapper-installer.sh`](./iptables-wrapper-installer.sh) script into your container, and run it to have it set up run-time @@ -89,7 +87,9 @@ Some distro-specific examples: - Alpine Linux - FROM alpine:3.10 + Alpine Linux 3.12 and later have iptables >= 1.8.4. + + FROM alpine:3.15 RUN apk add --no-cache iptables COPY iptables-wrapper-installer.sh / @@ -97,7 +97,7 @@ Some distro-specific examples: - Debian GNU/Linux - Debian stable (buster) ships iptables 1.8.2, but iptables 1.8.3 is + Debian stable (buster) ships iptables 1.8.2, but iptables 1.8.5 is available in buster-backports, so you should install it from there: FROM debian:buster @@ -111,21 +111,18 @@ Some distro-specific examples: - Fedora - Fedora 31 is the first release to include iptables 1.8.3. (Similarly - to the Debian example, you might be able to build an image based on - Fedora 30 or 29 if you use `dnf --releasever 31 ...` to install the - F31 iptables packages.) + Fedora 32 and later have iptables >= 1.8.4. - FROM fedora:31 + FROM fedora:35 - RUN dnf install -y iptables iptables-nft + RUN dnf install -y iptables iptables-legacy iptables-nft COPY iptables-wrapper-installer.sh / RUN /iptables-wrapper-installer.sh -- RHEL / CentOS +- RHEL / CentOS / UBI - RHEL/CentOS 7 ship iptables 1.4, which does not support nft mode. - RHEL/CentOS 8 ship a hacked version of iptables 1.8 that *only* - supports nft mode. Therefore, neither can be used as a basis for a - portable iptables-using container image. + RHEL 7 ships iptables 1.4, which does not support nft mode. RHEL 8 + ships a hacked version of iptables 1.8 that *only* supports nft + mode. Therefore, neither can be used as a basis for a portable + iptables-using container image. diff --git a/iptables-wrapper-installer.sh b/iptables-wrapper-installer.sh index 5e58c67..e6eb7d8 100755 --- a/iptables-wrapper-installer.sh +++ b/iptables-wrapper-installer.sh @@ -64,16 +64,11 @@ if [ "${1:-}" != "--no-sanity-check" ]; then fi case "${version}" in - *v1.8.[012]\ *) - echo "ERROR: iptables 1.8.0 - 1.8.2 have compatibility bugs." 1>&2 - echo " Upgrade to 1.8.3 or newer." 1>&2 + *v1.8.[0123]\ *) + echo "ERROR: iptables 1.8.0 - 1.8.3 have compatibility bugs." 1>&2 + echo " Upgrade to 1.8.4 or newer." 1>&2 exit 1 ;; - *v1.8.3\ *) - # 1.8.3 mostly works but can get stuck in an infinite loop if the nft - # kernel modules are unavailable - need_timeout=1 - ;; *) # 1.8.4+ are OK ;; @@ -104,42 +99,37 @@ cat > "${sbin}/iptables-wrapper" <> "${sbin}/iptables-wrapper" </dev/null | grep '^-' | wc -l) -num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) -if [ "\${num_legacy_lines}" -ge "\${num_nft_lines}" ]; then - mode=legacy -else +# In kubernetes 1.17 and later, kubelet will have created at least +# one chain in the "mangle" table (either "KUBE-IPTABLES-HINT" or +# "KUBE-KUBELET-CANARY"), so check that first, against +# iptables-nft, because we can check that more efficiently and +# it's more common these days. +nft_kubelet_rules=\$( (iptables-nft-save -t mangle || true; ip6tables-nft-save -t mangle || true) 2>/dev/null | grep -E '^:(KUBE-IPTABLES-HINT|KUBE-KUBELET-CANARY)' | wc -l) +if [ "\${nft_kubelet_rules}" -ne 0 ]; then mode=nft -fi -EOF -else - # Write out the version of legacy-vs-nft detection with an nft timeout - cat >> "${sbin}/iptables-wrapper" </dev/null | grep '^-' | wc -l) -if [ "\${num_legacy_lines}" -ge 10 ]; then - mode=legacy else - num_nft_lines=\$( (timeout 5 sh -c "iptables-nft-save; ip6tables-nft-save" || true) 2>/dev/null | grep '^-' | wc -l) - if [ "\${num_legacy_lines}" -ge "\${num_nft_lines}" ]; then + # Check for kubernetes 1.17-or-later with iptables-legacy. We + # can't pass "-t mangle" to iptables-legacy-save because it would + # cause the kernel to create that table if it didn't already + # exist, which we don't want. So we have to grab all the rules + legacy_kubelet_rules=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep -E '^:(KUBE-IPTABLES-HINT|KUBE-KUBELET-CANARY)' | wc -l) + if [ "\${legacy_kubelet_rules}" -ne 0 ]; then mode=legacy else - mode=nft + # With older kubernetes releases there may not be any _specific_ + # rules we can look for, but we assume that some non-containerized process + # (possibly kubelet) will have created _some_ iptables rules. + num_legacy_lines=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep '^-' | wc -l) + num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) + if [ "\${num_legacy_lines}" -gt "\${num_nft_lines}" ]; then + mode=legacy + else + mode=nft + fi fi fi + EOF -fi # Write out the appropriate alternatives-selection commands case "${altstyle}" in diff --git a/test/Dockerfile.test-alpine b/test/Dockerfile.test-alpine index fe462f5..d589c71 100644 --- a/test/Dockerfile.test-alpine +++ b/test/Dockerfile.test-alpine @@ -14,7 +14,7 @@ ### Dockerfile for building an Alpine image for testing -FROM alpine:3.10 +FROM alpine:3.15 RUN apk add --no-cache iptables COPY iptables-wrapper-installer.sh / diff --git a/test/Dockerfile.test-debian b/test/Dockerfile.test-debian index 01ddde8..51d49ce 100644 --- a/test/Dockerfile.test-debian +++ b/test/Dockerfile.test-debian @@ -16,7 +16,7 @@ FROM debian:buster -ARG INSTALL_ARGS +ARG INSTALL_ARGS= ARG REPO=buster RUN echo deb http://deb.debian.org/debian buster-backports main >> /etc/apt/sources.list; \ diff --git a/test/Dockerfile.test-fedora b/test/Dockerfile.test-fedora index 34da5e1..817b254 100644 --- a/test/Dockerfile.test-fedora +++ b/test/Dockerfile.test-fedora @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -### Dockerfile for building a Fedora 31 image for testing +### Dockerfile for building a Fedora 35 image for testing -FROM fedora:31 +FROM fedora:35 -RUN dnf install -y iptables iptables-nft +RUN dnf install -y iptables iptables-legacy iptables-nft COPY iptables-wrapper-installer.sh / RUN /iptables-wrapper-installer.sh COPY test/test.sh / diff --git a/test/run-test.sh b/test/run-test.sh index eb80b9c..30b11b7 100755 --- a/test/run-test.sh +++ b/test/run-test.sh @@ -105,14 +105,20 @@ if ! build "${tag}" ${build_arg}; then FAIL "build failed unexpectedly" fi -if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh legacy; then - FAIL "failed legacy test" +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh legacy old; then + FAIL "failed legacy iptables / old rules test" fi -if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh nft; then +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh legacy new; then + FAIL "failed legacy iptables / new rules test" +fi +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh nft old; then if [[ "${nft_fail}" = 1 ]]; then PASS "nft mode failed as expected" fi - FAIL "failed nft test" + FAIL "failed nft iptables / old rules test" +fi +if ! docker run --privileged "iptables-wrapper-test-${tag}" /bin/sh ${dash_x:-} /test.sh nft new; then + FAIL "failed nft iptables / new rules test" fi PASS "success" diff --git a/test/test.sh b/test/test.sh index b2186f0..7ad2581 100755 --- a/test/test.sh +++ b/test/test.sh @@ -17,6 +17,20 @@ set -eu mode=$1 +rulestype=$2 + +case "${mode}" in + legacy) + wrongmode=nft + ;; + nft) + wrongmode=legacy + ;; + *) + echo "ERROR: bad mode '${mode}'" 1>&2 + exit 1 + ;; +esac if [ -d /usr/sbin -a -e /usr/sbin/iptables ]; then sbin="/usr/sbin" @@ -56,22 +70,42 @@ ensure_iptables_resolved() { ensure_iptables_undecided -# Initialize the chosen iptables mode with a subset of kubelet's rules -iptables-${mode} -t nat -N KUBE-MARK-DROP -iptables-${mode} -t nat -A KUBE-MARK-DROP -j MARK --set-xmark 0x8000/0x8000 -iptables-${mode} -t filter -N KUBE-FIREWALL -iptables-${mode} -t filter -A KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP -iptables-${mode} -t filter -I OUTPUT -j KUBE-FIREWALL -iptables-${mode} -t filter -I INPUT -j KUBE-FIREWALL +case "${rulestype}" in + old) + # Initialize the chosen iptables mode with some kubelet-like rules + iptables-${mode} -t nat -N KUBE-MARK-DROP + iptables-${mode} -t nat -A KUBE-MARK-DROP -j MARK --set-xmark 0x8000/0x8000 + iptables-${mode} -t filter -N KUBE-FIREWALL + iptables-${mode} -t filter -A KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP + iptables-${mode} -t filter -I OUTPUT -j KUBE-FIREWALL + iptables-${mode} -t filter -I INPUT -j KUBE-FIREWALL + + # Fail on iptables 1.8.2 in nft mode + if ! iptables-${mode} -C KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP; then + echo "failed to match previously-added rule; iptables is broken" 1>&2 + exit 1 + fi + ;; + + new) + # Initialize the chosen iptables mode with just a hint chain + iptables-${mode} -t mangle -N KUBE-IPTABLES-HINT + ;; + + *) + echo "ERROR: bad rulestype '${rulestype}'" 1>&2 + exit 1 + ;; +esac + +# Put some junk in the other iptables system +iptables-${wrongmode} -t filter -N BAD-1 +iptables-${wrongmode} -t filter -A BAD-1 -j ACCEPT +iptables-${wrongmode} -t filter -N BAD-2 +iptables-${wrongmode} -t filter -A BAD-2 -j DROP ensure_iptables_undecided iptables -L > /dev/null ensure_iptables_resolved ${mode} - -# Fail on iptables 1.8.2 in nft mode -if ! iptables -C KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP; then - echo "failed to match previously-added rule; iptables is broken" 1>&2 - exit 1 -fi