From d4cd0194d8a9ba42e1da7a2b43d73e12060fbbe3 Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Thu, 19 Oct 2023 00:42:26 -0700 Subject: [PATCH] add CNI bridge network provider This adds a new network configuration based on CNI bridge. Unlike the existing CNI provider this does not require user to provide CNI configuration and plugins externally. The minimal set of plugins required to use the network mode is provided together with buildkit. Currently, bridge mode is opt-in but intention is to make it default after a testing period of one release cycle. Signed-off-by: Tonis Tiigi --- Dockerfile | 64 +- cmd/buildkitd/config/config.go | 2 + cmd/buildkitd/main.go | 6 + cmd/buildkitd/main_containerd_worker.go | 10 +- cmd/buildkitd/main_oci_worker.go | 10 +- go.mod | 2 + go.sum | 6 + hack/shell | 2 +- util/network/cniprovider/bridge.go | 168 + util/network/cniprovider/cni.go | 41 +- util/network/netproviders/network.go | 17 +- util/network/netproviders/network_linux.go | 13 + util/network/netproviders/network_nobridge.go | 16 + .../github.com/vishvananda/netlink/.gitignore | 1 + .../vishvananda/netlink/CHANGELOG.md | 5 + vendor/github.com/vishvananda/netlink/LICENSE | 192 + .../github.com/vishvananda/netlink/Makefile | 30 + .../github.com/vishvananda/netlink/README.md | 92 + vendor/github.com/vishvananda/netlink/addr.go | 57 + .../vishvananda/netlink/addr_linux.go | 421 ++ .../vishvananda/netlink/bpf_linux.go | 77 + .../vishvananda/netlink/bridge_linux.go | 112 + .../github.com/vishvananda/netlink/class.go | 239 ++ .../vishvananda/netlink/class_linux.go | 395 ++ .../vishvananda/netlink/conntrack_linux.go | 537 +++ .../netlink/conntrack_unspecified.go | 53 + .../vishvananda/netlink/devlink_linux.go | 728 ++++ .../github.com/vishvananda/netlink/filter.go | 392 ++ .../vishvananda/netlink/filter_linux.go | 969 +++++ vendor/github.com/vishvananda/netlink/fou.go | 21 + .../vishvananda/netlink/fou_linux.go | 211 + .../vishvananda/netlink/fou_unspecified.go | 15 + .../vishvananda/netlink/genetlink_linux.go | 171 + .../netlink/genetlink_unspecified.go | 25 + .../vishvananda/netlink/gtp_linux.go | 239 ++ .../vishvananda/netlink/handle_linux.go | 183 + .../vishvananda/netlink/handle_unspecified.go | 276 ++ .../vishvananda/netlink/inet_diag.go | 31 + .../vishvananda/netlink/ioctl_linux.go | 90 + .../vishvananda/netlink/ipset_linux.go | 504 +++ vendor/github.com/vishvananda/netlink/link.go | 1347 +++++++ .../vishvananda/netlink/link_linux.go | 3465 +++++++++++++++++ .../vishvananda/netlink/link_tuntap_linux.go | 14 + .../github.com/vishvananda/netlink/neigh.go | 33 + .../vishvananda/netlink/neigh_linux.go | 452 +++ .../github.com/vishvananda/netlink/netlink.go | 40 + .../vishvananda/netlink/netlink_linux.go | 11 + .../netlink/netlink_unspecified.go | 257 ++ .../vishvananda/netlink/netns_linux.go | 141 + .../vishvananda/netlink/netns_unspecified.go | 19 + .../vishvananda/netlink/nl/addr_linux.go | 71 + .../vishvananda/netlink/nl/bridge_linux.go | 74 + .../vishvananda/netlink/nl/conntrack_linux.go | 219 ++ .../vishvananda/netlink/nl/devlink_linux.go | 96 + .../vishvananda/netlink/nl/genetlink_linux.go | 89 + .../vishvananda/netlink/nl/ipset_linux.go | 222 ++ .../vishvananda/netlink/nl/link_linux.go | 720 ++++ .../vishvananda/netlink/nl/lwt_linux.go | 29 + .../vishvananda/netlink/nl/mpls_linux.go | 36 + .../vishvananda/netlink/nl/nl_linux.go | 900 +++++ .../vishvananda/netlink/nl/nl_unspecified.go | 11 + .../netlink/nl/parse_attr_linux.go | 79 + .../vishvananda/netlink/nl/rdma_link_linux.go | 39 + .../vishvananda/netlink/nl/route_linux.go | 107 + .../vishvananda/netlink/nl/seg6_linux.go | 154 + .../vishvananda/netlink/nl/seg6local_linux.go | 76 + .../vishvananda/netlink/nl/syscall.go | 76 + .../vishvananda/netlink/nl/tc_linux.go | 1119 ++++++ .../vishvananda/netlink/nl/xfrm_linux.go | 306 ++ .../netlink/nl/xfrm_monitor_linux.go | 32 + .../netlink/nl/xfrm_policy_linux.go | 119 + .../netlink/nl/xfrm_state_linux.go | 334 ++ .../github.com/vishvananda/netlink/order.go | 32 + .../vishvananda/netlink/proc_event_linux.go | 217 ++ .../vishvananda/netlink/protinfo.go | 62 + .../vishvananda/netlink/protinfo_linux.go | 74 + .../github.com/vishvananda/netlink/qdisc.go | 366 ++ .../vishvananda/netlink/qdisc_linux.go | 712 ++++ .../vishvananda/netlink/rdma_link_linux.go | 331 ++ .../github.com/vishvananda/netlink/route.go | 227 ++ .../vishvananda/netlink/route_linux.go | 1555 ++++++++ .../vishvananda/netlink/route_unspecified.go | 21 + vendor/github.com/vishvananda/netlink/rule.go | 68 + .../vishvananda/netlink/rule_linux.go | 301 ++ .../github.com/vishvananda/netlink/socket.go | 27 + .../vishvananda/netlink/socket_linux.go | 291 ++ vendor/github.com/vishvananda/netlink/tcp.go | 84 + .../vishvananda/netlink/tcp_linux.go | 353 ++ vendor/github.com/vishvananda/netlink/xfrm.go | 75 + .../vishvananda/netlink/xfrm_monitor_linux.go | 101 + .../vishvananda/netlink/xfrm_policy.go | 97 + .../vishvananda/netlink/xfrm_policy_linux.go | 269 ++ .../vishvananda/netlink/xfrm_state.go | 131 + .../vishvananda/netlink/xfrm_state_linux.go | 481 +++ vendor/github.com/vishvananda/netns/LICENSE | 192 + vendor/github.com/vishvananda/netns/README.md | 61 + vendor/github.com/vishvananda/netns/netns.go | 81 + .../vishvananda/netns/netns_linux.go | 266 ++ .../vishvananda/netns/netns_unspecified.go | 43 + vendor/modules.txt | 7 + 100 files changed, 23588 insertions(+), 47 deletions(-) create mode 100644 util/network/cniprovider/bridge.go create mode 100644 util/network/netproviders/network_linux.go create mode 100644 util/network/netproviders/network_nobridge.go create mode 100644 vendor/github.com/vishvananda/netlink/.gitignore create mode 100644 vendor/github.com/vishvananda/netlink/CHANGELOG.md create mode 100644 vendor/github.com/vishvananda/netlink/LICENSE create mode 100644 vendor/github.com/vishvananda/netlink/Makefile create mode 100644 vendor/github.com/vishvananda/netlink/README.md create mode 100644 vendor/github.com/vishvananda/netlink/addr.go create mode 100644 vendor/github.com/vishvananda/netlink/addr_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/bpf_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/bridge_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/class.go create mode 100644 vendor/github.com/vishvananda/netlink/class_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/conntrack_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/conntrack_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/devlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/filter.go create mode 100644 vendor/github.com/vishvananda/netlink/filter_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/fou.go create mode 100644 vendor/github.com/vishvananda/netlink/fou_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/fou_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/genetlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/genetlink_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/gtp_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/handle_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/handle_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/inet_diag.go create mode 100644 vendor/github.com/vishvananda/netlink/ioctl_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/ipset_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/link.go create mode 100644 vendor/github.com/vishvananda/netlink/link_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/link_tuntap_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/neigh.go create mode 100644 vendor/github.com/vishvananda/netlink/neigh_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/netlink.go create mode 100644 vendor/github.com/vishvananda/netlink/netlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/netlink_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/netns_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/netns_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/addr_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/bridge_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/devlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/genetlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/ipset_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/link_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/lwt_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/mpls_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/nl_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/route_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/seg6_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/syscall.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/tc_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/order.go create mode 100644 vendor/github.com/vishvananda/netlink/proc_event_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/protinfo.go create mode 100644 vendor/github.com/vishvananda/netlink/protinfo_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/qdisc.go create mode 100644 vendor/github.com/vishvananda/netlink/qdisc_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/rdma_link_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/route.go create mode 100644 vendor/github.com/vishvananda/netlink/route_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/route_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/rule.go create mode 100644 vendor/github.com/vishvananda/netlink/rule_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/socket.go create mode 100644 vendor/github.com/vishvananda/netlink/socket_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/tcp.go create mode 100644 vendor/github.com/vishvananda/netlink/tcp_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm_policy.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm_state.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm_state_linux.go create mode 100644 vendor/github.com/vishvananda/netns/LICENSE create mode 100644 vendor/github.com/vishvananda/netns/README.md create mode 100644 vendor/github.com/vishvananda/netns/netns.go create mode 100644 vendor/github.com/vishvananda/netns/netns_linux.go create mode 100644 vendor/github.com/vishvananda/netns/netns_unspecified.go diff --git a/Dockerfile b/Dockerfile index efc58b089f13..f5b021aa3e35 100644 --- a/Dockerfile +++ b/Dockerfile @@ -148,10 +148,49 @@ RUN --mount=target=. --mount=target=/root/.cache,type=cache \ fi EOT +# dnsname source +FROM git AS dnsname-src +ARG DNSNAME_VERSION +WORKDIR /usr/src +RUN git clone https://github.com/containers/dnsname.git dnsname \ + && cd dnsname && git checkout -q "$DNSNAME_VERSION" + +# build dnsname CNI plugin for testing +FROM gobuild-base AS dnsname +WORKDIR /go/src/github.com/containers/dnsname +ARG TARGETPLATFORM +RUN --mount=from=dnsname-src,src=/usr/src/dnsname,target=.,rw \ + --mount=target=/root/.cache,type=cache \ + CGO_ENABLED=0 xx-go build -o /usr/bin/dnsname ./plugins/meta/dnsname && \ + xx-verify --static /usr/bin/dnsname + +FROM --platform=$BUILDPLATFORM alpine:${ALPINE_VERSION} AS cni-plugins +RUN apk add --no-cache curl +COPY --from=xx / / +ARG CNI_VERSION +ARG TARGETOS +ARG TARGETARCH +ARG TARGETPLATFORM +WORKDIR /opt/cni/bin +RUN curl -Ls https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-${TARGETOS}-${TARGETARCH}-${CNI_VERSION}.tgz | tar xzv +RUN xx-verify --static bridge loopback host-local +COPY --link --from=dnsname /usr/bin/dnsname /opt/cni/bin/ + +FROM scratch AS cni-plugins-export +COPY --link --from=cni-plugins /opt/cni/bin/bridge /buildkit-cni-bridge +COPY --link --from=cni-plugins /opt/cni/bin/loopback /buildkit-cni-loopback +COPY --link --from=cni-plugins /opt/cni/bin/host-local /buildkit-cni-host-local +COPY --link --from=cni-plugins /opt/cni/bin/firewall /buildkit-cni-firewall + +FROM scratch AS cni-plugins-export-squashed +COPY --from=cni-plugins-export / / + + FROM scratch AS binaries-linux COPY --link --from=runc /usr/bin/runc /buildkit-runc # built from https://github.com/tonistiigi/binfmt/releases/tag/buildkit%2Fv7.1.0-30 COPY --link --from=tonistiigi/binfmt:buildkit-v7.1.0-30@sha256:45dd57b4ba2f24e2354f71f1e4e51f073cb7a28fd848ce6f5f2a7701142a6bf0 / / +COPY --link --from=cni-plugins-export-squashed / / COPY --link --from=buildctl /usr/bin/buildctl / COPY --link --from=buildkitd /usr/bin/buildkitd / @@ -286,31 +325,6 @@ FROM binaries AS buildkit-windows # this is not in binaries-windows because it is not intended for release yet, just CI COPY --link --from=buildkitd /usr/bin/buildkitd /buildkitd.exe -# dnsname source -FROM git AS dnsname-src -ARG DNSNAME_VERSION -WORKDIR /usr/src -RUN git clone https://github.com/containers/dnsname.git dnsname \ - && cd dnsname && git checkout -q "$DNSNAME_VERSION" - -# build dnsname CNI plugin for testing -FROM gobuild-base AS dnsname -WORKDIR /go/src/github.com/containers/dnsname -ARG TARGETPLATFORM -RUN --mount=from=dnsname-src,src=/usr/src/dnsname,target=.,rw \ - --mount=target=/root/.cache,type=cache \ - CGO_ENABLED=0 xx-go build -o /usr/bin/dnsname ./plugins/meta/dnsname && \ - xx-verify --static /usr/bin/dnsname - -FROM --platform=$BUILDPLATFORM alpine:${ALPINE_VERSION} AS cni-plugins -RUN apk add --no-cache curl -ARG CNI_VERSION -ARG TARGETOS -ARG TARGETARCH -WORKDIR /opt/cni/bin -RUN curl -Ls https://github.com/containernetworking/plugins/releases/download/$CNI_VERSION/cni-plugins-$TARGETOS-$TARGETARCH-$CNI_VERSION.tgz | tar xzv -COPY --link --from=dnsname /usr/bin/dnsname /opt/cni/bin/ - FROM buildkit-base AS integration-tests-base ENV BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR="1000:1000" RUN apk add --no-cache shadow shadow-uidmap sudo vim iptables ip6tables dnsmasq fuse curl git-daemon openssh-client \ diff --git a/cmd/buildkitd/config/config.go b/cmd/buildkitd/config/config.go index b5ce86cc1e62..9af4156f68de 100644 --- a/cmd/buildkitd/config/config.go +++ b/cmd/buildkitd/config/config.go @@ -71,6 +71,8 @@ type NetworkConfig struct { CNIConfigPath string `toml:"cniConfigPath"` CNIBinaryPath string `toml:"cniBinaryPath"` CNIPoolSize int `toml:"cniPoolSize"` + BridgeName string `toml:"bridgeName"` + BridgeSubnet string `toml:"bridgeSubnet"` } type OCIConfig struct { diff --git a/cmd/buildkitd/main.go b/cmd/buildkitd/main.go index d7d637d5a16b..0318dd895f79 100644 --- a/cmd/buildkitd/main.go +++ b/cmd/buildkitd/main.go @@ -453,6 +453,12 @@ func setDefaultNetworkConfig(nc config.NetworkConfig) config.NetworkConfig { if nc.CNIBinaryPath == "" { nc.CNIBinaryPath = appdefaults.DefaultCNIBinDir } + if nc.BridgeName == "" { + nc.BridgeName = "buildkit0" + } + if nc.BridgeSubnet == "" { + nc.BridgeSubnet = "10.10.0.0/16" + } return nc } diff --git a/cmd/buildkitd/main_containerd_worker.go b/cmd/buildkitd/main_containerd_worker.go index f77ee95d226b..0bf2b7b9adb9 100644 --- a/cmd/buildkitd/main_containerd_worker.go +++ b/cmd/buildkitd/main_containerd_worker.go @@ -290,10 +290,12 @@ func containerdWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([ nc := netproviders.Opt{ Mode: common.config.Workers.Containerd.NetworkConfig.Mode, CNI: cniprovider.Opt{ - Root: common.config.Root, - ConfigPath: common.config.Workers.Containerd.CNIConfigPath, - BinaryDir: common.config.Workers.Containerd.CNIBinaryPath, - PoolSize: common.config.Workers.Containerd.CNIPoolSize, + Root: common.config.Root, + ConfigPath: common.config.Workers.Containerd.CNIConfigPath, + BinaryDir: common.config.Workers.Containerd.CNIBinaryPath, + PoolSize: common.config.Workers.Containerd.CNIPoolSize, + BridgeName: common.config.Workers.Containerd.BridgeName, + BridgeSubnet: common.config.Workers.Containerd.BridgeSubnet, }, } diff --git a/cmd/buildkitd/main_oci_worker.go b/cmd/buildkitd/main_oci_worker.go index 72a8309ec7df..d2c56b2f0dce 100644 --- a/cmd/buildkitd/main_oci_worker.go +++ b/cmd/buildkitd/main_oci_worker.go @@ -297,10 +297,12 @@ func ociWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([]worker nc := netproviders.Opt{ Mode: common.config.Workers.OCI.NetworkConfig.Mode, CNI: cniprovider.Opt{ - Root: common.config.Root, - ConfigPath: common.config.Workers.OCI.CNIConfigPath, - BinaryDir: common.config.Workers.OCI.CNIBinaryPath, - PoolSize: common.config.Workers.OCI.CNIPoolSize, + Root: common.config.Root, + ConfigPath: common.config.Workers.OCI.CNIConfigPath, + BinaryDir: common.config.Workers.OCI.CNIBinaryPath, + PoolSize: common.config.Workers.OCI.CNIPoolSize, + BridgeName: common.config.Workers.OCI.BridgeName, + BridgeSubnet: common.config.Workers.OCI.BridgeSubnet, }, } diff --git a/go.mod b/go.mod index c70ad58f0fe2..ef16e116d4c8 100644 --- a/go.mod +++ b/go.mod @@ -72,6 +72,7 @@ require ( github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea github.com/tonistiigi/vt100 v0.0.0-20230623042737-f9a4f7ef6531 github.com/urfave/cli v1.22.12 + github.com/vishvananda/netlink v1.2.1-beta.2 go.etcd.io/bbolt v1.3.7 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.45.0 @@ -158,6 +159,7 @@ require ( github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect github.com/shibumi/go-pathspec v1.3.0 // indirect github.com/vbatts/tar-split v0.11.3 // indirect + github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect golang.org/x/mod v0.11.0 // indirect diff --git a/go.sum b/go.sum index d4c84fbe92e2..f8f128f791cb 100644 --- a/go.sum +++ b/go.sum @@ -1238,8 +1238,13 @@ github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXST github.com/vdemeester/k8s-pkg-credentialprovider v1.17.4/go.mod h1:inCTmtUdr5KJbreVojo06krnTgaeAz/Z7lynpPk/Q2c= github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= +github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= +github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= +github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f h1:p4VB7kIXpOQvVn1ZaTIVp+3vuYAXFe3OJEvjbUYJLaA= +github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vmware/govmomi v0.20.3/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU= github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= @@ -1507,6 +1512,7 @@ golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/hack/shell b/hack/shell index 1fb9603be476..15ce006cbc21 100755 --- a/hack/shell +++ b/hack/shell @@ -21,4 +21,4 @@ if [ -n "$MOUNT_BUILDKIT_SOURCE" ]; then fi set -x -docker run $SSH $volumes -it --privileged -v /tmp --net=host -e BUILDKIT_REGISTRY_MIRROR_DIR=/root/.cache/registry --rm $(cat $iidfile) ash +docker run $SSH $volumes -it --privileged -v /tmp -e BUILDKIT_REGISTRY_MIRROR_DIR=/root/.cache/registry --rm $(cat $iidfile) ash diff --git a/util/network/cniprovider/bridge.go b/util/network/cniprovider/bridge.go new file mode 100644 index 000000000000..69c4d823b6a9 --- /dev/null +++ b/util/network/cniprovider/bridge.go @@ -0,0 +1,168 @@ +//go:build linux +// +build linux + +package cniprovider + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + + cni "github.com/containerd/go-cni" + "github.com/moby/buildkit/util/bklog" + "github.com/moby/buildkit/util/network" + "github.com/pkg/errors" + "github.com/vishvananda/netlink" +) + +func NewBridge(opt Opt) (network.Provider, error) { + cniOptions := []cni.Opt{cni.WithInterfacePrefix("eth")} + bridgeBinName := "bridge" + loopbackBinName := "loopback" + hostLocalBinName := "host-local" + firewallBinName := "firewall" + var setup bool + // binaries shipping with buildkit + for { + var dirs []string + + bridgePath, err := exec.LookPath("buildkit-cni-bridge") + if err != nil { + break + } + var bridgeDir string + bridgeDir, bridgeBinName = filepath.Split(bridgePath) + dirs = append(dirs, bridgeDir) + + loopbackPath, err := exec.LookPath("buildkit-cni-loopback") + if err != nil { + break + } + var loopbackDir string + loopbackDir, loopbackBinName = filepath.Split(loopbackPath) + if loopbackDir != bridgeDir { + dirs = append(dirs, loopbackDir) + } + + hostLocalPath, err := exec.LookPath("buildkit-cni-host-local") + if err != nil { + break + } + var hostLocalDir string + hostLocalDir, hostLocalBinName = filepath.Split(hostLocalPath) + if hostLocalDir != bridgeDir && hostLocalDir != loopbackDir { + dirs = append(dirs, hostLocalDir) + } + + firewallPath, err := exec.LookPath("buildkit-cni-firewall") + if err != nil { + break + } + var firewallDir string + firewallDir, firewallBinName = filepath.Split(firewallPath) + if firewallDir != bridgeDir && firewallDir != loopbackDir && firewallDir != hostLocalDir { + dirs = append(dirs, firewallDir) + } + + cniOptions = append(cniOptions, cni.WithPluginDir(dirs)) + setup = true + break //nolint: staticcheck + } + + if !setup { + fn := filepath.Join(opt.BinaryDir, "bridge") + if _, err := os.Stat(fn); err != nil { + return nil, errors.Wrapf(err, "failed to find CNI bridge %q or buildkit-cni-bridge", fn) + } + + cniOptions = append(cniOptions, cni.WithPluginDir([]string{opt.BinaryDir})) + } + + cniOptions = append(cniOptions, cni.WithConfListBytes([]byte(fmt.Sprintf(`{ + "cniVersion": "1.0.0", + "name": "buildkit", + "plugins": [ + { + "type": "%s" + }, + { + "type": "%s", + "bridge": "%s", + "isDefaultGateway": true, + "ipMasq": true, + "ipam": { + "type": "%s", + "ranges": [ + [ + { "subnet": "%s" } + ] + ] + } + }, + { + "type": "%s", + "ingressPolicy": "same-bridge" + } + ] + }`, loopbackBinName, bridgeBinName, opt.BridgeName, hostLocalBinName, opt.BridgeSubnet, firewallBinName)))) + + unlock, err := initLock() + if err != nil { + return nil, err + } + defer unlock() + + createBridge := true + if _, err := bridgeByName(opt.BridgeName); err == nil { + createBridge = false + } + + cniHandle, err := cni.New(cniOptions...) + if err != nil { + return nil, err + } + cp := &cniProvider{ + CNI: cniHandle, + root: opt.Root, + } + + if createBridge { + cp.release = func() error { + if err := removeBridge(opt.BridgeName); err != nil { + bklog.L.Errorf("failed to remove bridge %q: %v", opt.BridgeName, err) + } + return nil + } + } + + cleanOldNamespaces(cp) + + cp.nsPool = &cniPool{targetSize: opt.PoolSize, provider: cp} + if err := cp.initNetwork(false); err != nil { + return nil, err + } + go cp.nsPool.fillPool(context.TODO()) + return cp, nil +} + +func bridgeByName(name string) (*netlink.Bridge, error) { + l, err := netlink.LinkByName(name) + if err != nil { + return nil, errors.Wrapf(err, "could not lookup %q", name) + } + br, ok := l.(*netlink.Bridge) + if !ok { + return nil, errors.Errorf("%q already exists but is not a bridge", name) + } + return br, nil +} + +func removeBridge(name string) error { + br, err := bridgeByName(name) + if err != nil { + return err + } + return netlink.LinkDel(br) +} diff --git a/util/network/cniprovider/cni.go b/util/network/cniprovider/cni.go index 2d37aa94a18c..16ab73473a99 100644 --- a/util/network/cniprovider/cni.go +++ b/util/network/cniprovider/cni.go @@ -21,10 +21,12 @@ import ( const aboveTargetGracePeriod = 5 * time.Minute type Opt struct { - Root string - ConfigPath string - BinaryDir string - PoolSize int + Root string + ConfigPath string + BinaryDir string + PoolSize int + BridgeName string + BridgeSubnet string } func New(opt Opt) (network.Provider, error) { @@ -61,7 +63,7 @@ func New(opt Opt) (network.Provider, error) { cleanOldNamespaces(cp) cp.nsPool = &cniPool{targetSize: opt.PoolSize, provider: cp} - if err := cp.initNetwork(); err != nil { + if err := cp.initNetwork(true); err != nil { return nil, err } go cp.nsPool.fillPool(context.TODO()) @@ -70,17 +72,18 @@ func New(opt Opt) (network.Provider, error) { type cniProvider struct { cni.CNI - root string - nsPool *cniPool + root string + nsPool *cniPool + release func() error } -func (c *cniProvider) initNetwork() error { - if v := os.Getenv("BUILDKIT_CNI_INIT_LOCK_PATH"); v != "" { - l := flock.New(v) - if err := l.Lock(); err != nil { +func (c *cniProvider) initNetwork(lock bool) error { + if lock { + unlock, err := initLock() + if err != nil { return err } - defer l.Unlock() + defer unlock() } ns, err := c.New(context.TODO(), "") if err != nil { @@ -91,9 +94,23 @@ func (c *cniProvider) initNetwork() error { func (c *cniProvider) Close() error { c.nsPool.close() + if c.release != nil { + return c.release() + } return nil } +func initLock() (func() error, error) { + if v := os.Getenv("BUILDKIT_CNI_INIT_LOCK_PATH"); v != "" { + l := flock.New(v) + if err := l.Lock(); err != nil { + return nil, err + } + return l.Unlock, nil + } + return func() error { return nil }, nil +} + type cniPool struct { provider *cniProvider mu sync.Mutex diff --git a/util/network/netproviders/network.go b/util/network/netproviders/network.go index 4265b7b29b5e..4564782bd88e 100644 --- a/util/network/netproviders/network.go +++ b/util/network/netproviders/network.go @@ -2,6 +2,7 @@ package netproviders import ( "os" + "strconv" "github.com/moby/buildkit/solver/pb" "github.com/moby/buildkit/util/network" @@ -33,8 +34,22 @@ func Providers(opt Opt) (providers map[pb.NetMode]network.Provider, resolvedMode } defaultProvider = hostProvider resolvedMode = opt.Mode + case "bridge": + bridgeProvider, err := getBridgeProvider(opt.CNI) + if err != nil { + return nil, resolvedMode, err + } + defaultProvider = bridgeProvider + resolvedMode = "cni" case "auto", "": - if _, err := os.Stat(opt.CNI.ConfigPath); err == nil { + if v, err := strconv.ParseBool(os.Getenv("BUILDKIT_NETWORK_BRIDGE_AUTO")); v && err == nil { + bridgeProvider, err := getBridgeProvider(opt.CNI) + if err != nil { + return nil, resolvedMode, err + } + defaultProvider = bridgeProvider + resolvedMode = "cni" + } else if _, err := os.Stat(opt.CNI.ConfigPath); err == nil { cniProvider, err := cniprovider.New(opt.CNI) if err != nil { return nil, resolvedMode, err diff --git a/util/network/netproviders/network_linux.go b/util/network/netproviders/network_linux.go new file mode 100644 index 000000000000..8ce861e34bf3 --- /dev/null +++ b/util/network/netproviders/network_linux.go @@ -0,0 +1,13 @@ +//go:build linux +// +build linux + +package netproviders + +import ( + "github.com/moby/buildkit/util/network" + "github.com/moby/buildkit/util/network/cniprovider" +) + +func getBridgeProvider(opt cniprovider.Opt) (network.Provider, error) { + return cniprovider.NewBridge(opt) +} diff --git a/util/network/netproviders/network_nobridge.go b/util/network/netproviders/network_nobridge.go new file mode 100644 index 000000000000..8941e29dae9f --- /dev/null +++ b/util/network/netproviders/network_nobridge.go @@ -0,0 +1,16 @@ +//go:build freebsd || windows +// +build freebsd windows + +package netproviders + +import ( + "runtime" + + "github.com/moby/buildkit/util/network" + "github.com/moby/buildkit/util/network/cniprovider" + "github.com/pkg/errors" +) + +func getBridgeProvider(opt cniprovider.Opt) (network.Provider, error) { + return nil, errors.Errorf("bridge network is not supported on %s yet", runtime.GOOS) +} diff --git a/vendor/github.com/vishvananda/netlink/.gitignore b/vendor/github.com/vishvananda/netlink/.gitignore new file mode 100644 index 000000000000..9f11b755a17d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/.gitignore @@ -0,0 +1 @@ +.idea/ diff --git a/vendor/github.com/vishvananda/netlink/CHANGELOG.md b/vendor/github.com/vishvananda/netlink/CHANGELOG.md new file mode 100644 index 000000000000..b11e59ff6a80 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## 1.0.0 (2018-03-15) + +Initial release tagging \ No newline at end of file diff --git a/vendor/github.com/vishvananda/netlink/LICENSE b/vendor/github.com/vishvananda/netlink/LICENSE new file mode 100644 index 000000000000..9f64db8582cf --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/LICENSE @@ -0,0 +1,192 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Vishvananda Ishaya. + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/vishvananda/netlink/Makefile b/vendor/github.com/vishvananda/netlink/Makefile new file mode 100644 index 000000000000..a0e68e7a9aaa --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/Makefile @@ -0,0 +1,30 @@ +DIRS := \ + . \ + nl + +DEPS = \ + github.com/vishvananda/netns \ + golang.org/x/sys/unix + +uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) +testdirs = $(call uniq,$(foreach d,$(1),$(dir $(wildcard $(d)/*_test.go)))) +goroot = $(addprefix ../../../,$(1)) +unroot = $(subst ../../../,,$(1)) +fmt = $(addprefix fmt-,$(1)) + +all: test + +$(call goroot,$(DEPS)): + go get $(call unroot,$@) + +.PHONY: $(call testdirs,$(DIRS)) +$(call testdirs,$(DIRS)): + go test -test.exec sudo -test.parallel 4 -timeout 60s -test.v github.com/vishvananda/netlink/$@ + +$(call fmt,$(call testdirs,$(DIRS))): + ! gofmt -l $(subst fmt-,,$@)/*.go | grep -q . + +.PHONY: fmt +fmt: $(call fmt,$(call testdirs,$(DIRS))) + +test: fmt $(call goroot,$(DEPS)) $(call testdirs,$(DIRS)) diff --git a/vendor/github.com/vishvananda/netlink/README.md b/vendor/github.com/vishvananda/netlink/README.md new file mode 100644 index 000000000000..0128bc67d558 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/README.md @@ -0,0 +1,92 @@ +# netlink - netlink library for go # + +![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) + +The netlink package provides a simple netlink library for go. Netlink +is the interface a user-space program in linux uses to communicate with +the kernel. It can be used to add and remove interfaces, set ip addresses +and routes, and configure ipsec. Netlink communication requires elevated +privileges, so in most cases this code needs to be run as root. Since +low-level netlink messages are inscrutable at best, the library attempts +to provide an api that is loosely modeled on the CLI provided by iproute2. +Actions like `ip link add` will be accomplished via a similarly named +function like AddLink(). This library began its life as a fork of the +netlink functionality in +[docker/libcontainer](https://github.com/docker/libcontainer) but was +heavily rewritten to improve testability, performance, and to add new +functionality like ipsec xfrm handling. + +## Local Build and Test ## + +You can use go get command: + + go get github.com/vishvananda/netlink + +Testing dependencies: + + go get github.com/vishvananda/netns + +Testing (requires root): + + sudo -E go test github.com/vishvananda/netlink + +## Examples ## + +Add a new bridge and add eth1 into it: + +```go +package main + +import ( + "fmt" + "github.com/vishvananda/netlink" +) + +func main() { + la := netlink.NewLinkAttrs() + la.Name = "foo" + mybridge := &netlink.Bridge{LinkAttrs: la} + err := netlink.LinkAdd(mybridge) + if err != nil { + fmt.Printf("could not add %s: %v\n", la.Name, err) + } + eth1, _ := netlink.LinkByName("eth1") + netlink.LinkSetMaster(eth1, mybridge) +} + +``` +Note `NewLinkAttrs` constructor, it sets default values in structure. For now +it sets only `TxQLen` to `-1`, so kernel will set default by itself. If you're +using simple initialization(`LinkAttrs{Name: "foo"}`) `TxQLen` will be set to +`0` unless you specify it like `LinkAttrs{Name: "foo", TxQLen: 1000}`. + +Add a new ip address to loopback: + +```go +package main + +import ( + "github.com/vishvananda/netlink" +) + +func main() { + lo, _ := netlink.LinkByName("lo") + addr, _ := netlink.ParseAddr("169.254.169.254/32") + netlink.AddrAdd(lo, addr) +} + +``` + +## Future Work ## + +Many pieces of netlink are not yet fully supported in the high-level +interface. Aspects of virtually all of the high-level objects don't exist. +Many of the underlying primitives are there, so its a matter of putting +the right fields into the high-level objects and making sure that they +are serialized and deserialized correctly in the Add and List methods. + +There are also a few pieces of low level netlink functionality that still +need to be implemented. Routing rules are not in place and some of the +more advanced link types. Hopefully there is decent structure and testing +in place to make these fairly straightforward to add. + diff --git a/vendor/github.com/vishvananda/netlink/addr.go b/vendor/github.com/vishvananda/netlink/addr.go new file mode 100644 index 000000000000..653f540dbc05 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/addr.go @@ -0,0 +1,57 @@ +package netlink + +import ( + "fmt" + "net" + "strings" +) + +// Addr represents an IP address from netlink. Netlink ip addresses +// include a mask, so it stores the address as a net.IPNet. +type Addr struct { + *net.IPNet + Label string + Flags int + Scope int + Peer *net.IPNet + Broadcast net.IP + PreferedLft int + ValidLft int + LinkIndex int +} + +// String returns $ip/$netmask $label +func (a Addr) String() string { + return strings.TrimSpace(fmt.Sprintf("%s %s", a.IPNet, a.Label)) +} + +// ParseAddr parses the string representation of an address in the +// form $ip/$netmask $label. The label portion is optional +func ParseAddr(s string) (*Addr, error) { + label := "" + parts := strings.Split(s, " ") + if len(parts) > 1 { + s = parts[0] + label = parts[1] + } + m, err := ParseIPNet(s) + if err != nil { + return nil, err + } + return &Addr{IPNet: m, Label: label}, nil +} + +// Equal returns true if both Addrs have the same net.IPNet value. +func (a Addr) Equal(x Addr) bool { + sizea, _ := a.Mask.Size() + sizeb, _ := x.Mask.Size() + // ignore label for comparison + return a.IP.Equal(x.IP) && sizea == sizeb +} + +func (a Addr) PeerEqual(x Addr) bool { + sizea, _ := a.Peer.Mask.Size() + sizeb, _ := x.Peer.Mask.Size() + // ignore label for comparison + return a.Peer.IP.Equal(x.Peer.IP) && sizea == sizeb +} diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go new file mode 100644 index 000000000000..72862ce1f44a --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -0,0 +1,421 @@ +package netlink + +import ( + "fmt" + "net" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +// AddrAdd will add an IP address to a link device. +// +// Equivalent to: `ip addr add $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. +func AddrAdd(link Link, addr *Addr) error { + return pkgHandle.AddrAdd(link, addr) +} + +// AddrAdd will add an IP address to a link device. +// +// Equivalent to: `ip addr add $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. +func (h *Handle) AddrAdd(link Link, addr *Addr) error { + req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) + return h.addrHandle(link, addr, req) +} + +// AddrReplace will replace (or, if not present, add) an IP address on a link device. +// +// Equivalent to: `ip addr replace $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. +func AddrReplace(link Link, addr *Addr) error { + return pkgHandle.AddrReplace(link, addr) +} + +// AddrReplace will replace (or, if not present, add) an IP address on a link device. +// +// Equivalent to: `ip addr replace $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. +func (h *Handle) AddrReplace(link Link, addr *Addr) error { + req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_REPLACE|unix.NLM_F_ACK) + return h.addrHandle(link, addr, req) +} + +// AddrDel will delete an IP address from a link device. +// +// Equivalent to: `ip addr del $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. +func AddrDel(link Link, addr *Addr) error { + return pkgHandle.AddrDel(link, addr) +} + +// AddrDel will delete an IP address from a link device. +// Equivalent to: `ip addr del $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. +func (h *Handle) AddrDel(link Link, addr *Addr) error { + req := h.newNetlinkRequest(unix.RTM_DELADDR, unix.NLM_F_ACK) + return h.addrHandle(link, addr, req) +} + +func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error { + base := link.Attrs() + if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { + return fmt.Errorf("label must begin with interface name") + } + h.ensureIndex(base) + + family := nl.GetIPFamily(addr.IP) + + msg := nl.NewIfAddrmsg(family) + msg.Index = uint32(base.Index) + msg.Scope = uint8(addr.Scope) + mask := addr.Mask + if addr.Peer != nil { + mask = addr.Peer.Mask + } + prefixlen, masklen := mask.Size() + msg.Prefixlen = uint8(prefixlen) + req.AddData(msg) + + var localAddrData []byte + if family == FAMILY_V4 { + localAddrData = addr.IP.To4() + } else { + localAddrData = addr.IP.To16() + } + + localData := nl.NewRtAttr(unix.IFA_LOCAL, localAddrData) + req.AddData(localData) + var peerAddrData []byte + if addr.Peer != nil { + if family == FAMILY_V4 { + peerAddrData = addr.Peer.IP.To4() + } else { + peerAddrData = addr.Peer.IP.To16() + } + } else { + peerAddrData = localAddrData + } + + addressData := nl.NewRtAttr(unix.IFA_ADDRESS, peerAddrData) + req.AddData(addressData) + + if addr.Flags != 0 { + if addr.Flags <= 0xff { + msg.IfAddrmsg.Flags = uint8(addr.Flags) + } else { + b := make([]byte, 4) + native.PutUint32(b, uint32(addr.Flags)) + flagsData := nl.NewRtAttr(unix.IFA_FLAGS, b) + req.AddData(flagsData) + } + } + + if family == FAMILY_V4 { + // Automatically set the broadcast address if it is unset and the + // subnet is large enough to sensibly have one (/30 or larger). + // See: RFC 3021 + if addr.Broadcast == nil && prefixlen < 31 { + calcBroadcast := make(net.IP, masklen/8) + for i := range localAddrData { + calcBroadcast[i] = localAddrData[i] | ^mask[i] + } + addr.Broadcast = calcBroadcast + } + + if addr.Broadcast != nil { + req.AddData(nl.NewRtAttr(unix.IFA_BROADCAST, addr.Broadcast)) + } + + if addr.Label != "" { + labelData := nl.NewRtAttr(unix.IFA_LABEL, nl.ZeroTerminated(addr.Label)) + req.AddData(labelData) + } + } + + // 0 is the default value for these attributes. However, 0 means "expired", while the least-surprising default + // value should be "forever". To compensate for that, only add the attributes if at least one of the values is + // non-zero, which means the caller has explicitly set them + if addr.ValidLft > 0 || addr.PreferedLft > 0 { + cachedata := nl.IfaCacheInfo{unix.IfaCacheinfo{ + Valid: uint32(addr.ValidLft), + Prefered: uint32(addr.PreferedLft), + }} + req.AddData(nl.NewRtAttr(unix.IFA_CACHEINFO, cachedata.Serialize())) + } + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// AddrList gets a list of IP addresses in the system. +// Equivalent to: `ip addr show`. +// The list can be filtered by link and ip family. +func AddrList(link Link, family int) ([]Addr, error) { + return pkgHandle.AddrList(link, family) +} + +// AddrList gets a list of IP addresses in the system. +// Equivalent to: `ip addr show`. +// The list can be filtered by link and ip family. +func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { + req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) + msg := nl.NewIfAddrmsg(family) + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR) + if err != nil { + return nil, err + } + + indexFilter := 0 + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + indexFilter = base.Index + } + + var res []Addr + for _, m := range msgs { + addr, msgFamily, err := parseAddr(m) + if err != nil { + return res, err + } + + if link != nil && addr.LinkIndex != indexFilter { + // Ignore messages from other interfaces + continue + } + + if family != FAMILY_ALL && msgFamily != family { + continue + } + + res = append(res, addr) + } + + return res, nil +} + +func parseAddr(m []byte) (addr Addr, family int, err error) { + msg := nl.DeserializeIfAddrmsg(m) + + family = -1 + addr.LinkIndex = -1 + + attrs, err1 := nl.ParseRouteAttr(m[msg.Len():]) + if err1 != nil { + err = err1 + return + } + + family = int(msg.Family) + addr.LinkIndex = int(msg.Index) + + var local, dst *net.IPNet + for _, attr := range attrs { + switch attr.Attr.Type { + case unix.IFA_ADDRESS: + dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), + } + case unix.IFA_LOCAL: + // iproute2 manual: + // If a peer address is specified, the local address + // cannot have a prefix length. The network prefix is + // associated with the peer rather than with the local + // address. + n := 8 * len(attr.Value) + local = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(n, n), + } + case unix.IFA_BROADCAST: + addr.Broadcast = attr.Value + case unix.IFA_LABEL: + addr.Label = string(attr.Value[:len(attr.Value)-1]) + case unix.IFA_FLAGS: + addr.Flags = int(native.Uint32(attr.Value[0:4])) + case unix.IFA_CACHEINFO: + ci := nl.DeserializeIfaCacheInfo(attr.Value) + addr.PreferedLft = int(ci.Prefered) + addr.ValidLft = int(ci.Valid) + } + } + + // libnl addr.c comment: + // IPv6 sends the local address as IFA_ADDRESS with no + // IFA_LOCAL, IPv4 sends both IFA_LOCAL and IFA_ADDRESS + // with IFA_ADDRESS being the peer address if they differ + // + // But obviously, as there are IPv6 PtP addresses, too, + // IFA_LOCAL should also be handled for IPv6. + if local != nil { + if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) { + addr.IPNet = dst + } else { + addr.IPNet = local + addr.Peer = dst + } + } else { + addr.IPNet = dst + } + + addr.Scope = int(msg.Scope) + + return +} + +type AddrUpdate struct { + LinkAddress net.IPNet + LinkIndex int + Flags int + Scope int + PreferedLft int + ValidLft int + NewAddr bool // true=added false=deleted +} + +// AddrSubscribe takes a chan down which notifications will be sent +// when addresses change. Close the 'done' chan to stop subscription. +func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil) +} + +// AddrSubscribeAt works like AddrSubscribe plus it allows the caller +// to choose the network namespace in which to subscribe (ns). +func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { + return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil) +} + +// AddrSubscribeOptions contains a set of options to use with +// AddrSubscribeWithOptions. +type AddrSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveTimeout *unix.Timeval +} + +// AddrSubscribeWithOptions work like AddrSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, options AddrSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize, options.ReceiveTimeout) +} + +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int, rcvTimeout *unix.Timeval) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR) + if err != nil { + return err + } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + + if done != nil { + go func() { + <-done + s.Close() + }() + } + if rcvbuf != 0 { + err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false) + if err != nil { + return err + } + } + if listExisting { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR, + unix.NLM_F_DUMP) + infmsg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(infmsg) + if err := s.Send(req); err != nil { + return err + } + } + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + if cberr != nil { + cberr(fmt.Errorf("Receive failed: %v", + err)) + } + return + } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } + for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) + } + continue + } + msgType := m.Header.Type + if msgType != unix.RTM_NEWADDR && msgType != unix.RTM_DELADDR { + if cberr != nil { + cberr(fmt.Errorf("bad message type: %d", msgType)) + } + continue + } + + addr, _, err := parseAddr(m.Data) + if err != nil { + if cberr != nil { + cberr(fmt.Errorf("could not parse address: %v", err)) + } + continue + } + + ch <- AddrUpdate{LinkAddress: *addr.IPNet, + LinkIndex: addr.LinkIndex, + NewAddr: msgType == unix.RTM_NEWADDR, + Flags: addr.Flags, + Scope: addr.Scope, + PreferedLft: addr.PreferedLft, + ValidLft: addr.ValidLft} + } + } + }() + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/bpf_linux.go b/vendor/github.com/vishvananda/netlink/bpf_linux.go new file mode 100644 index 000000000000..96befbfe0cad --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/bpf_linux.go @@ -0,0 +1,77 @@ +package netlink + +import ( + "unsafe" + + "golang.org/x/sys/unix" +) + +type BpfProgType uint32 + +const ( + BPF_PROG_TYPE_UNSPEC BpfProgType = iota + BPF_PROG_TYPE_SOCKET_FILTER + BPF_PROG_TYPE_KPROBE + BPF_PROG_TYPE_SCHED_CLS + BPF_PROG_TYPE_SCHED_ACT + BPF_PROG_TYPE_TRACEPOINT + BPF_PROG_TYPE_XDP + BPF_PROG_TYPE_PERF_EVENT + BPF_PROG_TYPE_CGROUP_SKB + BPF_PROG_TYPE_CGROUP_SOCK + BPF_PROG_TYPE_LWT_IN + BPF_PROG_TYPE_LWT_OUT + BPF_PROG_TYPE_LWT_XMIT + BPF_PROG_TYPE_SOCK_OPS + BPF_PROG_TYPE_SK_SKB + BPF_PROG_TYPE_CGROUP_DEVICE + BPF_PROG_TYPE_SK_MSG + BPF_PROG_TYPE_RAW_TRACEPOINT + BPF_PROG_TYPE_CGROUP_SOCK_ADDR + BPF_PROG_TYPE_LWT_SEG6LOCAL + BPF_PROG_TYPE_LIRC_MODE2 + BPF_PROG_TYPE_SK_REUSEPORT + BPF_PROG_TYPE_FLOW_DISSECTOR + BPF_PROG_TYPE_CGROUP_SYSCTL + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE + BPF_PROG_TYPE_CGROUP_SOCKOPT + BPF_PROG_TYPE_TRACING + BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_EXT + BPF_PROG_TYPE_LSM + BPF_PROG_TYPE_SK_LOOKUP +) + +type BPFAttr struct { + ProgType uint32 + InsnCnt uint32 + Insns uintptr + License uintptr + LogLevel uint32 + LogSize uint32 + LogBuf uintptr + KernVersion uint32 +} + +// loadSimpleBpf loads a trivial bpf program for testing purposes. +func loadSimpleBpf(progType BpfProgType, ret uint32) (int, error) { + insns := []uint64{ + 0x00000000000000b7 | (uint64(ret) << 32), + 0x0000000000000095, + } + license := []byte{'A', 'S', 'L', '2', '\x00'} + attr := BPFAttr{ + ProgType: uint32(progType), + InsnCnt: uint32(len(insns)), + Insns: uintptr(unsafe.Pointer(&insns[0])), + License: uintptr(unsafe.Pointer(&license[0])), + } + fd, _, errno := unix.Syscall(unix.SYS_BPF, + 5, /* bpf cmd */ + uintptr(unsafe.Pointer(&attr)), + unsafe.Sizeof(attr)) + if errno != 0 { + return 0, errno + } + return int(fd), nil +} diff --git a/vendor/github.com/vishvananda/netlink/bridge_linux.go b/vendor/github.com/vishvananda/netlink/bridge_linux.go new file mode 100644 index 000000000000..6e1224c47b8f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/bridge_linux.go @@ -0,0 +1,112 @@ +package netlink + +import ( + "fmt" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// BridgeVlanList gets a map of device id to bridge vlan infos. +// Equivalent to: `bridge vlan show` +func BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { + return pkgHandle.BridgeVlanList() +} + +// BridgeVlanList gets a map of device id to bridge vlan infos. +// Equivalent to: `bridge vlan show` +func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP) + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) + req.AddData(msg) + req.AddData(nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN)))) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK) + if err != nil { + return nil, err + } + ret := make(map[int32][]*nl.BridgeVlanInfo) + for _, m := range msgs { + msg := nl.DeserializeIfInfomsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case unix.IFLA_AF_SPEC: + //nested attr + nestAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, fmt.Errorf("failed to parse nested attr %v", err) + } + for _, nestAttr := range nestAttrs { + switch nestAttr.Attr.Type { + case nl.IFLA_BRIDGE_VLAN_INFO: + vlanInfo := nl.DeserializeBridgeVlanInfo(nestAttr.Value) + ret[msg.Index] = append(ret[msg.Index], vlanInfo) + } + } + } + } + } + return ret, nil +} + +// BridgeVlanAdd adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanAdd(link, vid, pvid, untagged, self, master) +} + +// BridgeVlanAdd adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master) +} + +// BridgeVlanDel adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanDel(link, vid, pvid, untagged, self, master) +} + +// BridgeVlanDel adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master) +} + +func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) + msg.Index = int32(base.Index) + req.AddData(msg) + + br := nl.NewRtAttr(unix.IFLA_AF_SPEC, nil) + var flags uint16 + if self { + flags |= nl.BRIDGE_FLAGS_SELF + } + if master { + flags |= nl.BRIDGE_FLAGS_MASTER + } + if flags > 0 { + br.AddRtAttr(nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags)) + } + vlanInfo := &nl.BridgeVlanInfo{Vid: vid} + if pvid { + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_PVID + } + if untagged { + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED + } + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + req.AddData(br) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/class.go b/vendor/github.com/vishvananda/netlink/class.go new file mode 100644 index 000000000000..10ceffed8bb1 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/class.go @@ -0,0 +1,239 @@ +package netlink + +import ( + "fmt" +) + +// Class interfaces for all classes +type Class interface { + Attrs() *ClassAttrs + Type() string +} + +// Generic networking statistics for netlink users. +// This file contains "gnet_" prefixed structs and relevant functions. +// See Documentation/networking/getn_stats.txt in Linux source code for more details. + +// GnetStatsBasic Ref: struct gnet_stats_basic { ... } +type GnetStatsBasic struct { + Bytes uint64 // number of seen bytes + Packets uint32 // number of seen packets +} + +// GnetStatsRateEst Ref: struct gnet_stats_rate_est { ... } +type GnetStatsRateEst struct { + Bps uint32 // current byte rate + Pps uint32 // current packet rate +} + +// GnetStatsRateEst64 Ref: struct gnet_stats_rate_est64 { ... } +type GnetStatsRateEst64 struct { + Bps uint64 // current byte rate + Pps uint64 // current packet rate +} + +// GnetStatsQueue Ref: struct gnet_stats_queue { ... } +type GnetStatsQueue struct { + Qlen uint32 // queue length + Backlog uint32 // backlog size of queue + Drops uint32 // number of dropped packets + Requeues uint32 // number of requues + Overlimits uint32 // number of enqueues over the limit +} + +// ClassStatistics representation based on generic networking statistics for netlink. +// See Documentation/networking/gen_stats.txt in Linux source code for more details. +type ClassStatistics struct { + Basic *GnetStatsBasic + Queue *GnetStatsQueue + RateEst *GnetStatsRateEst +} + +// NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0. +func NewClassStatistics() *ClassStatistics { + return &ClassStatistics{ + Basic: &GnetStatsBasic{}, + Queue: &GnetStatsQueue{}, + RateEst: &GnetStatsRateEst{}, + } +} + +// ClassAttrs represents a netlink class. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type ClassAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Leaf uint32 + Statistics *ClassStatistics +} + +func (q ClassAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf) +} + +// HtbClassAttrs stores the attributes of HTB class +type HtbClassAttrs struct { + // TODO handle all attributes + Rate uint64 + Ceil uint64 + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (q HtbClassAttrs) String() string { + return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) +} + +// HtbClass represents an Htb class +type HtbClass struct { + ClassAttrs + Rate uint64 + Ceil uint64 + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (q HtbClass) String() string { + return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) +} + +// Attrs returns the class attributes +func (q *HtbClass) Attrs() *ClassAttrs { + return &q.ClassAttrs +} + +// Type return the class type +func (q *HtbClass) Type() string { + return "htb" +} + +// GenericClass classes represent types that are not currently understood +// by this netlink library. +type GenericClass struct { + ClassAttrs + ClassType string +} + +// Attrs return the class attributes +func (class *GenericClass) Attrs() *ClassAttrs { + return &class.ClassAttrs +} + +// Type return the class type +func (class *GenericClass) Type() string { + return class.ClassType +} + +// ServiceCurve is a nondecreasing function of some time unit, returning the amount of service +// (an allowed or allocated amount of bandwidth) at some specific point in time. The purpose of it +// should be subconsciously obvious: if a class was allowed to transfer not less than the amount +// specified by its service curve, then the service curve is not violated. +type ServiceCurve struct { + m1 uint32 + d uint32 + m2 uint32 +} + +// Attrs return the parameters of the service curve +func (c *ServiceCurve) Attrs() (uint32, uint32, uint32) { + return c.m1, c.d, c.m2 +} + +// Burst returns the burst rate (m1) of the curve +func (c *ServiceCurve) Burst() uint32 { + return c.m1 +} + +// Delay return the delay (d) of the curve +func (c *ServiceCurve) Delay() uint32 { + return c.d +} + +// Rate returns the rate (m2) of the curve +func (c *ServiceCurve) Rate() uint32 { + return c.m2 +} + +// HfscClass is a representation of the HFSC class +type HfscClass struct { + ClassAttrs + Rsc ServiceCurve + Fsc ServiceCurve + Usc ServiceCurve +} + +// SetUsc sets the USC curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. +func (hfsc *HfscClass) SetUsc(m1 uint32, d uint32, m2 uint32) { + hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2} +} + +// SetFsc sets the Fsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. +func (hfsc *HfscClass) SetFsc(m1 uint32, d uint32, m2 uint32) { + hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2} +} + +// SetRsc sets the Rsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. +func (hfsc *HfscClass) SetRsc(m1 uint32, d uint32, m2 uint32) { + hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2} +} + +// SetSC implements the SC from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. +func (hfsc *HfscClass) SetSC(m1 uint32, d uint32, m2 uint32) { + hfsc.SetRsc(m1, d, m2) + hfsc.SetFsc(m1, d, m2) +} + +// SetUL implements the UL from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. +func (hfsc *HfscClass) SetUL(m1 uint32, d uint32, m2 uint32) { + hfsc.SetUsc(m1, d, m2) +} + +// SetLS implements the LS from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. +func (hfsc *HfscClass) SetLS(m1 uint32, d uint32, m2 uint32) { + hfsc.SetFsc(m1, d, m2) +} + +// NewHfscClass returns a new HFSC struct with the set parameters +func NewHfscClass(attrs ClassAttrs) *HfscClass { + return &HfscClass{ + ClassAttrs: attrs, + Rsc: ServiceCurve{}, + Fsc: ServiceCurve{}, + Usc: ServiceCurve{}, + } +} + +// String() returns a string that contains the information and attributes of the HFSC class +func (hfsc *HfscClass) String() string { + return fmt.Sprintf( + "{%s -- {RSC: {m1=%d d=%d m2=%d}} {FSC: {m1=%d d=%d m2=%d}} {USC: {m1=%d d=%d m2=%d}}}", + hfsc.Attrs(), hfsc.Rsc.m1*8, hfsc.Rsc.d, hfsc.Rsc.m2*8, hfsc.Fsc.m1*8, hfsc.Fsc.d, hfsc.Fsc.m2*8, hfsc.Usc.m1*8, hfsc.Usc.d, hfsc.Usc.m2*8, + ) +} + +// Attrs return the Hfsc parameters +func (hfsc *HfscClass) Attrs() *ClassAttrs { + return &hfsc.ClassAttrs +} + +// Type return the type of the class +func (hfsc *HfscClass) Type() string { + return "hfsc" +} diff --git a/vendor/github.com/vishvananda/netlink/class_linux.go b/vendor/github.com/vishvananda/netlink/class_linux.go new file mode 100644 index 000000000000..6f542ba4e72c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/class_linux.go @@ -0,0 +1,395 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "errors" + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// Internal tc_stats representation in Go struct. +// This is for internal uses only to deserialize the payload of rtattr. +// After the deserialization, this should be converted into the canonical stats +// struct, ClassStatistics, in case of statistics of a class. +// Ref: struct tc_stats { ... } +type tcStats struct { + Bytes uint64 // Number of enqueued bytes + Packets uint32 // Number of enqueued packets + Drops uint32 // Packets dropped because of lack of resources + Overlimits uint32 // Number of throttle events when this flow goes out of allocated bandwidth + Bps uint32 // Current flow byte rate + Pps uint32 // Current flow packet rate + Qlen uint32 + Backlog uint32 +} + +// NewHtbClass NOTE: function is in here because it uses other linux functions +func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { + mtu := 1600 + rate := cattrs.Rate / 8 + ceil := cattrs.Ceil / 8 + buffer := cattrs.Buffer + cbuffer := cattrs.Cbuffer + + if ceil == 0 { + ceil = rate + } + + if buffer == 0 { + buffer = uint32(float64(rate)/Hz() + float64(mtu)) + } + buffer = Xmittime(rate, buffer) + + if cbuffer == 0 { + cbuffer = uint32(float64(ceil)/Hz() + float64(mtu)) + } + cbuffer = Xmittime(ceil, cbuffer) + + return &HtbClass{ + ClassAttrs: attrs, + Rate: rate, + Ceil: ceil, + Buffer: buffer, + Cbuffer: cbuffer, + Level: 0, + Prio: cattrs.Prio, + Quantum: cattrs.Quantum, + } +} + +// ClassDel will delete a class from the system. +// Equivalent to: `tc class del $class` +func ClassDel(class Class) error { + return pkgHandle.ClassDel(class) +} + +// ClassDel will delete a class from the system. +// Equivalent to: `tc class del $class` +func (h *Handle) ClassDel(class Class) error { + return h.classModify(unix.RTM_DELTCLASS, 0, class) +} + +// ClassChange will change a class in place +// Equivalent to: `tc class change $class` +// The parent and handle MUST NOT be changed. +func ClassChange(class Class) error { + return pkgHandle.ClassChange(class) +} + +// ClassChange will change a class in place +// Equivalent to: `tc class change $class` +// The parent and handle MUST NOT be changed. +func (h *Handle) ClassChange(class Class) error { + return h.classModify(unix.RTM_NEWTCLASS, 0, class) +} + +// ClassReplace will replace a class to the system. +// quivalent to: `tc class replace $class` +// The handle MAY be changed. +// If a class already exist with this parent/handle pair, the class is changed. +// If a class does not already exist with this parent/handle, a new class is created. +func ClassReplace(class Class) error { + return pkgHandle.ClassReplace(class) +} + +// ClassReplace will replace a class to the system. +// quivalent to: `tc class replace $class` +// The handle MAY be changed. +// If a class already exist with this parent/handle pair, the class is changed. +// If a class does not already exist with this parent/handle, a new class is created. +func (h *Handle) ClassReplace(class Class) error { + return h.classModify(unix.RTM_NEWTCLASS, unix.NLM_F_CREATE, class) +} + +// ClassAdd will add a class to the system. +// Equivalent to: `tc class add $class` +func ClassAdd(class Class) error { + return pkgHandle.ClassAdd(class) +} + +// ClassAdd will add a class to the system. +// Equivalent to: `tc class add $class` +func (h *Handle) ClassAdd(class Class) error { + return h.classModify( + unix.RTM_NEWTCLASS, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, + class, + ) +} + +func (h *Handle) classModify(cmd, flags int, class Class) error { + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) + base := class.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + if cmd != unix.RTM_DELTCLASS { + if err := classPayload(req, class); err != nil { + return err + } + } + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func classPayload(req *nl.NetlinkRequest, class Class) error { + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + switch class.Type() { + case "htb": + htb := class.(*HtbClass) + opt := nl.TcHtbCopt{} + opt.Buffer = htb.Buffer + opt.Cbuffer = htb.Cbuffer + opt.Quantum = htb.Quantum + opt.Level = htb.Level + opt.Prio = htb.Prio + // TODO: Handle Debug properly. For now default to 0 + /* Calculate {R,C}Tab and set Rate and Ceil */ + cellLog := -1 + ccellLog := -1 + linklayer := nl.LINKLAYER_ETHERNET + mtu := 1600 + var rtab [256]uint32 + var ctab [256]uint32 + tcrate := nl.TcRateSpec{Rate: uint32(htb.Rate)} + if CalcRtable(&tcrate, rtab[:], cellLog, uint32(mtu), linklayer) < 0 { + return errors.New("HTB: failed to calculate rate table") + } + opt.Rate = tcrate + tcceil := nl.TcRateSpec{Rate: uint32(htb.Ceil)} + if CalcRtable(&tcceil, ctab[:], ccellLog, uint32(mtu), linklayer) < 0 { + return errors.New("HTB: failed to calculate ceil rate table") + } + opt.Ceil = tcceil + options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize()) + options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab)) + options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab)) + if htb.Rate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_RATE64, nl.Uint64Attr(htb.Rate)) + } + if htb.Ceil >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_CEIL64, nl.Uint64Attr(htb.Ceil)) + } + case "hfsc": + hfsc := class.(*HfscClass) + opt := nl.HfscCopt{} + rm1, rd, rm2 := hfsc.Rsc.Attrs() + opt.Rsc.Set(rm1/8, rd, rm2/8) + fm1, fd, fm2 := hfsc.Fsc.Attrs() + opt.Fsc.Set(fm1/8, fd, fm2/8) + um1, ud, um2 := hfsc.Usc.Attrs() + opt.Usc.Set(um1/8, ud, um2/8) + options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc)) + options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc)) + options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc)) + } + req.AddData(options) + return nil +} + +// ClassList gets a list of classes in the system. +// Equivalent to: `tc class show`. +// Generally returns nothing if link and parent are not specified. +func ClassList(link Link, parent uint32) ([]Class, error) { + return pkgHandle.ClassList(link, parent) +} + +// ClassList gets a list of classes in the system. +// Equivalent to: `tc class show`. +// Generally returns nothing if link and parent are not specified. +func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) { + req := h.newNetlinkRequest(unix.RTM_GETTCLASS, unix.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTCLASS) + if err != nil { + return nil, err + } + + var res []Class + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := ClassAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Statistics: nil, + } + + var class Class + classType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + classType = string(attr.Value[:len(attr.Value)-1]) + switch classType { + case "htb": + class = &HtbClass{} + case "hfsc": + class = &HfscClass{} + default: + class = &GenericClass{ClassType: classType} + } + case nl.TCA_OPTIONS: + switch classType { + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + _, err = parseHtbClassData(class, data) + if err != nil { + return nil, err + } + case "hfsc": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + _, err = parseHfscClassData(class, data) + if err != nil { + return nil, err + } + } + // For backward compatibility. + case nl.TCA_STATS: + base.Statistics, err = parseTcStats(attr.Value) + if err != nil { + return nil, err + } + case nl.TCA_STATS2: + base.Statistics, err = parseTcStats2(attr.Value) + if err != nil { + return nil, err + } + } + } + *class.Attrs() = base + res = append(res, class) + } + + return res, nil +} + +func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) { + htb := class.(*HtbClass) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_PARMS: + opt := nl.DeserializeTcHtbCopt(datum.Value) + htb.Rate = uint64(opt.Rate.Rate) + htb.Ceil = uint64(opt.Ceil.Rate) + htb.Buffer = opt.Buffer + htb.Cbuffer = opt.Cbuffer + htb.Quantum = opt.Quantum + htb.Level = opt.Level + htb.Prio = opt.Prio + case nl.TCA_HTB_RATE64: + htb.Rate = native.Uint64(datum.Value[0:8]) + case nl.TCA_HTB_CEIL64: + htb.Ceil = native.Uint64(datum.Value[0:8]) + } + } + return detailed, nil +} + +func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) { + hfsc := class.(*HfscClass) + detailed := false + for _, datum := range data { + m1, d, m2 := nl.DeserializeHfscCurve(datum.Value).Attrs() + switch datum.Attr.Type { + case nl.TCA_HFSC_RSC: + hfsc.Rsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} + case nl.TCA_HFSC_FSC: + hfsc.Fsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} + case nl.TCA_HFSC_USC: + hfsc.Usc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} + } + } + return detailed, nil +} + +func parseTcStats(data []byte) (*ClassStatistics, error) { + buf := &bytes.Buffer{} + buf.Write(data) + tcStats := &tcStats{} + if err := binary.Read(buf, native, tcStats); err != nil { + return nil, err + } + + stats := NewClassStatistics() + stats.Basic.Bytes = tcStats.Bytes + stats.Basic.Packets = tcStats.Packets + stats.Queue.Qlen = tcStats.Qlen + stats.Queue.Backlog = tcStats.Backlog + stats.Queue.Drops = tcStats.Drops + stats.Queue.Overlimits = tcStats.Overlimits + stats.RateEst.Bps = tcStats.Bps + stats.RateEst.Pps = tcStats.Pps + + return stats, nil +} + +func parseGnetStats(data []byte, gnetStats interface{}) error { + buf := &bytes.Buffer{} + buf.Write(data) + return binary.Read(buf, native, gnetStats) +} + +func parseTcStats2(data []byte) (*ClassStatistics, error) { + rtAttrs, err := nl.ParseRouteAttr(data) + if err != nil { + return nil, err + } + stats := NewClassStatistics() + for _, datum := range rtAttrs { + switch datum.Attr.Type { + case nl.TCA_STATS_BASIC: + if err := parseGnetStats(datum.Value, stats.Basic); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.Basic with: %v\n%s", + err, hex.Dump(datum.Value)) + } + case nl.TCA_STATS_QUEUE: + if err := parseGnetStats(datum.Value, stats.Queue); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.Queue with: %v\n%s", + err, hex.Dump(datum.Value)) + } + case nl.TCA_STATS_RATE_EST: + if err := parseGnetStats(datum.Value, stats.RateEst); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s", + err, hex.Dump(datum.Value)) + } + } + } + + return stats, nil +} diff --git a/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/conntrack_linux.go new file mode 100644 index 000000000000..03ea1b98fc28 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -0,0 +1,537 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "net" + "time" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// ConntrackTableType Conntrack table for the netlink operation +type ConntrackTableType uint8 + +const ( + // ConntrackTable Conntrack table + // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK 1 + ConntrackTable = 1 + // ConntrackExpectTable Conntrack expect table + // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2 + ConntrackExpectTable = 2 +) + +const ( + // backward compatibility with golang 1.6 which does not have io.SeekCurrent + seekCurrent = 1 +) + +// InetFamily Family type +type InetFamily uint8 + +// -L [table] [options] List conntrack or expectation table +// -G [table] parameters Get conntrack or expectation + +// -I [table] parameters Create a conntrack or expectation +// -U [table] parameters Update a conntrack +// -E [table] [options] Show events + +// -C [table] Show counter +// -S Show statistics + +// ConntrackTableList returns the flow list of a table of a specific family +// conntrack -L [table] [options] List conntrack or expectation table +func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { + return pkgHandle.ConntrackTableList(table, family) +} + +// ConntrackTableFlush flushes all the flows of a specified table +// conntrack -F [table] Flush table +// The flush operation applies to all the family types +func ConntrackTableFlush(table ConntrackTableType) error { + return pkgHandle.ConntrackTableFlush(table) +} + +// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { + return pkgHandle.ConntrackDeleteFilter(table, family, filter) +} + +// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed +// conntrack -L [table] [options] List conntrack or expectation table +func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { + res, err := h.dumpConntrackTable(table, family) + if err != nil { + return nil, err + } + + // Deserialize all the flows + var result []*ConntrackFlow + for _, dataRaw := range res { + result = append(result, parseRawData(dataRaw)) + } + + return result, nil +} + +// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed +// conntrack -F [table] Flush table +// The flush operation applies to all the family types +func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { + req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) + _, err := req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + +// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed +// conntrack -D [table] parameters Delete conntrack or expectation +func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { + res, err := h.dumpConntrackTable(table, family) + if err != nil { + return 0, err + } + + var matched uint + for _, dataRaw := range res { + flow := parseRawData(dataRaw) + if match := filter.MatchConntrackFlow(flow); match { + req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) + // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already + req2.AddRawData(dataRaw[4:]) + req2.Execute(unix.NETLINK_NETFILTER, 0) + matched++ + } + } + + return matched, nil +} + +func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest { + // Create the Netlink request object + req := h.newNetlinkRequest((int(table)<<8)|operation, flags) + // Add the netfilter header + msg := &nl.Nfgenmsg{ + NfgenFamily: uint8(family), + Version: nl.NFNETLINK_V0, + ResId: 0, + } + req.AddData(msg) + return req +} + +func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP) + return req.Execute(unix.NETLINK_NETFILTER, 0) +} + +// The full conntrack flow structure is very complicated and can be found in the file: +// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h +// For the time being, the structure below allows to parse and extract the base information of a flow +type ipTuple struct { + Bytes uint64 + DstIP net.IP + DstPort uint16 + Packets uint64 + Protocol uint8 + SrcIP net.IP + SrcPort uint16 +} + +type ConntrackFlow struct { + FamilyType uint8 + Forward ipTuple + Reverse ipTuple + Mark uint32 + TimeStart uint64 + TimeStop uint64 + TimeOut uint32 +} + +func (s *ConntrackFlow) String() string { + // conntrack cmd output: + // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 + // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) + start := time.Unix(0, int64(s.TimeStart)) + stop := time.Unix(0, int64(s.TimeStop)) + timeout := int32(s.TimeOut) + return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x start=%v stop=%v timeout=%d(sec)", + nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, + s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, + s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, + s.Mark, start, stop, timeout) +} + +// This method parse the ip tuple structure +// The message structure is the following: +// +// +// +// +// +func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { + for i := 0; i < 2; i++ { + _, t, _, v := parseNfAttrTLV(reader) + switch t { + case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC: + tpl.SrcIP = v + case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST: + tpl.DstIP = v + } + } + // Get total length of nested protocol-specific info. + _, _, protoInfoTotalLen := parseNfAttrTL(reader) + _, t, l, v := parseNfAttrTLV(reader) + // Track the number of bytes read. + protoInfoBytesRead := uint16(nl.SizeofNfattr) + l + if t == nl.CTA_PROTO_NUM { + tpl.Protocol = uint8(v[0]) + } + // We only parse TCP & UDP headers. Skip the others. + if tpl.Protocol != 6 && tpl.Protocol != 17 { + // skip the rest + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol + } + // Skip 3 bytes of padding + reader.Seek(3, seekCurrent) + protoInfoBytesRead += 3 + for i := 0; i < 2; i++ { + _, t, _ := parseNfAttrTL(reader) + protoInfoBytesRead += uint16(nl.SizeofNfattr) + switch t { + case nl.CTA_PROTO_SRC_PORT: + parseBERaw16(reader, &tpl.SrcPort) + protoInfoBytesRead += 2 + case nl.CTA_PROTO_DST_PORT: + parseBERaw16(reader, &tpl.DstPort) + protoInfoBytesRead += 2 + } + // Skip 2 bytes of padding + reader.Seek(2, seekCurrent) + protoInfoBytesRead += 2 + } + // Skip any remaining/unknown parts of the message + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + + return tpl.Protocol +} + +func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) { + isNested, attrType, len = parseNfAttrTL(r) + + value = make([]byte, len) + binary.Read(r, binary.BigEndian, &value) + return isNested, attrType, len, value +} + +func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { + binary.Read(r, nl.NativeEndian(), &len) + len -= nl.SizeofNfattr + + binary.Read(r, nl.NativeEndian(), &attrType) + isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED + attrType = attrType & (nl.NLA_F_NESTED - 1) + return isNested, attrType, len +} + +func skipNfAttrValue(r *bytes.Reader, len uint16) { + len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) + r.Seek(int64(len), seekCurrent) +} + +func parseBERaw16(r *bytes.Reader, v *uint16) { + binary.Read(r, binary.BigEndian, v) +} + +func parseBERaw32(r *bytes.Reader, v *uint32) { + binary.Read(r, binary.BigEndian, v) +} + +func parseBERaw64(r *bytes.Reader, v *uint64) { + binary.Read(r, binary.BigEndian, v) +} + +func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { + for i := 0; i < 2; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_COUNTERS_BYTES: + parseBERaw64(r, &bytes) + case nl.CTA_COUNTERS_PACKETS: + parseBERaw64(r, &packets) + default: + return + } + } + return +} + +// when the flow is alive, only the timestamp_start is returned in structure +func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { + var numTimeStamps int + oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp + if readSize == uint16(oneItem) { + numTimeStamps = 1 + } else if readSize == 2*uint16(oneItem) { + numTimeStamps = 2 + } else { + return + } + for i := 0; i < numTimeStamps; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_TIMESTAMP_START: + parseBERaw64(r, &tstart) + case nl.CTA_TIMESTAMP_STOP: + parseBERaw64(r, &tstop) + default: + return + } + } + return + +} + +func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { + parseBERaw32(r, &ttimeout) + return +} + +func parseConnectionMark(r *bytes.Reader) (mark uint32) { + parseBERaw32(r, &mark) + return +} + +func parseRawData(data []byte) *ConntrackFlow { + s := &ConntrackFlow{} + // First there is the Nfgenmsg header + // consume only the family field + reader := bytes.NewReader(data) + binary.Read(reader, nl.NativeEndian(), &s.FamilyType) + + // skip rest of the Netfilter header + reader.Seek(3, seekCurrent) + // The message structure is the following: + // 4 bytes + // 4 bytes + // flow information of the forward flow + // 4 bytes + // 4 bytes + // flow information of the reverse flow + for reader.Len() > 0 { + if nested, t, l := parseNfAttrTL(reader); nested { + switch t { + case nl.CTA_TUPLE_ORIG: + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + parseIpTuple(reader, &s.Forward) + } + case nl.CTA_TUPLE_REPLY: + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + parseIpTuple(reader, &s.Reverse) + } else { + // Header not recognized skip it + skipNfAttrValue(reader, l) + } + case nl.CTA_COUNTERS_ORIG: + s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_COUNTERS_REPLY: + s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_TIMESTAMP: + s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) + case nl.CTA_PROTOINFO: + skipNfAttrValue(reader, l) + default: + skipNfAttrValue(reader, l) + } + } else { + switch t { + case nl.CTA_MARK: + s.Mark = parseConnectionMark(reader) + case nl.CTA_TIMEOUT: + s.TimeOut = parseTimeOut(reader) + case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID: + skipNfAttrValue(reader, l) + default: + skipNfAttrValue(reader, l) + } + } + } + return s +} + +// Conntrack parameters and options: +// -n, --src-nat ip source NAT ip +// -g, --dst-nat ip destination NAT ip +// -j, --any-nat ip source or destination NAT ip +// -m, --mark mark Set mark +// -c, --secmark secmark Set selinux secmark +// -e, --event-mask eventmask Event mask, eg. NEW,DESTROY +// -z, --zero Zero counters while listing +// -o, --output type[,...] Output format, eg. xml +// -l, --label label[,...] conntrack labels + +// Common parameters and options: +// -s, --src, --orig-src ip Source address from original direction +// -d, --dst, --orig-dst ip Destination address from original direction +// -r, --reply-src ip Source address from reply direction +// -q, --reply-dst ip Destination address from reply direction +// -p, --protonum proto Layer 4 Protocol, eg. 'tcp' +// -f, --family proto Layer 3 Protocol, eg. 'ipv6' +// -t, --timeout timeout Set timeout +// -u, --status status Set status, eg. ASSURED +// -w, --zone value Set conntrack zone +// --orig-zone value Set zone for original direction +// --reply-zone value Set zone for reply direction +// -b, --buffer-size Netlink socket buffer size +// --mask-src ip Source mask address +// --mask-dst ip Destination mask address + +// Layer 4 Protocol common parameters and options: +// TCP, UDP, SCTP, UDPLite and DCCP +// --sport, --orig-port-src port Source port in original direction +// --dport, --orig-port-dst port Destination port in original direction + +// Filter types +type ConntrackFilterType uint8 + +const ( + ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction + ConntrackOrigDstIP // -orig-dst ip Destination address from original direction + ConntrackReplySrcIP // --reply-src ip Reply Source IP + ConntrackReplyDstIP // --reply-dst ip Reply Destination IP + ConntrackReplyAnyIP // Match source or destination reply IP + ConntrackOrigSrcPort // --orig-port-src port Source port in original direction + ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction + ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP + ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP + ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP +) + +type CustomConntrackFilter interface { + // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches + // the filter or false otherwise + MatchConntrackFlow(flow *ConntrackFlow) bool +} + +type ConntrackFilter struct { + ipNetFilter map[ConntrackFilterType]*net.IPNet + portFilter map[ConntrackFilterType]uint16 + protoFilter uint8 +} + +// AddIPNet adds a IP subnet to the conntrack filter +func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error { + if ipNet == nil { + return fmt.Errorf("Filter attribute empty") + } + if f.ipNetFilter == nil { + f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet) + } + if _, ok := f.ipNetFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.ipNetFilter[tp] = ipNet + return nil +} + +// AddIP adds an IP to the conntrack filter +func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error { + if ip == nil { + return fmt.Errorf("Filter attribute empty") + } + return f.AddIPNet(tp, NewIPNet(ip)) +} + +// AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it +func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error { + switch f.protoFilter { + // TCP, UDP, DCCP, SCTP, UDPLite + case 6, 17, 33, 132, 136: + default: + return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter) + } + + if f.portFilter == nil { + f.portFilter = make(map[ConntrackFilterType]uint16) + } + if _, ok := f.portFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.portFilter[tp] = port + return nil +} + +// AddProtocol adds the Layer 4 protocol to the conntrack filter +func (f *ConntrackFilter) AddProtocol(proto uint8) error { + if f.protoFilter != 0 { + return errors.New("Filter attribute already present") + } + f.protoFilter = proto + return nil +} + +// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter +// false otherwise +func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { + if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 { + // empty filter always not match + return false + } + + // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' + if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter { + // different Layer 4 protocol always not match + return false + } + + match := true + + // IP conntrack filter + if len(f.ipNetFilter) > 0 { + // -orig-src ip Source address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found { + match = match && elem.Contains(flow.Forward.SrcIP) + } + + // -orig-dst ip Destination address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found { + match = match && elem.Contains(flow.Forward.DstIP) + } + + // -src-nat ip Source NAT ip + if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found { + match = match && elem.Contains(flow.Reverse.SrcIP) + } + + // -dst-nat ip Destination NAT ip + if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found { + match = match && elem.Contains(flow.Reverse.DstIP) + } + + // Match source or destination reply IP + if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found { + match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP)) + } + } + + // Layer 4 Port filter + if len(f.portFilter) > 0 { + // -orig-port-src port Source port from original direction + if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found { + match = match && elem == flow.Forward.SrcPort + } + + // -orig-port-dst port Destination port from original direction + if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found { + match = match && elem == flow.Forward.DstPort + } + } + + return match +} + +var _ CustomConntrackFilter = (*ConntrackFilter)(nil) diff --git a/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go b/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go new file mode 100644 index 000000000000..af7af799e776 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go @@ -0,0 +1,53 @@ +// +build !linux + +package netlink + +// ConntrackTableType Conntrack table for the netlink operation +type ConntrackTableType uint8 + +// InetFamily Family type +type InetFamily uint8 + +// ConntrackFlow placeholder +type ConntrackFlow struct{} + +// ConntrackFilter placeholder +type ConntrackFilter struct{} + +// ConntrackTableList returns the flow list of a table of a specific family +// conntrack -L [table] [options] List conntrack or expectation table +func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { + return nil, ErrNotImplemented +} + +// ConntrackTableFlush flushes all the flows of a specified table +// conntrack -F [table] Flush table +// The flush operation applies to all the family types +func ConntrackTableFlush(table ConntrackTableType) error { + return ErrNotImplemented +} + +// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { + return 0, ErrNotImplemented +} + +// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed +// conntrack -L [table] [options] List conntrack or expectation table +func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { + return nil, ErrNotImplemented +} + +// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed +// conntrack -F [table] Flush table +// The flush operation applies to all the family types +func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { + return ErrNotImplemented +} + +// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed +// conntrack -D [table] parameters Delete conntrack or expectation +func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { + return 0, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/devlink_linux.go b/vendor/github.com/vishvananda/netlink/devlink_linux.go new file mode 100644 index 000000000000..358b232c6c52 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/devlink_linux.go @@ -0,0 +1,728 @@ +package netlink + +import ( + "fmt" + "net" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// DevlinkDevEswitchAttr represents device's eswitch attributes +type DevlinkDevEswitchAttr struct { + Mode string + InlineMode string + EncapMode string +} + +// DevlinkDevAttrs represents device attributes +type DevlinkDevAttrs struct { + Eswitch DevlinkDevEswitchAttr +} + +// DevlinkDevice represents device and its attributes +type DevlinkDevice struct { + BusName string + DeviceName string + Attrs DevlinkDevAttrs +} + +// DevlinkPortFn represents port function and its attributes +type DevlinkPortFn struct { + HwAddr net.HardwareAddr + State uint8 + OpState uint8 +} + +// DevlinkPortFnSetAttrs represents attributes to set +type DevlinkPortFnSetAttrs struct { + FnAttrs DevlinkPortFn + HwAddrValid bool + StateValid bool +} + +// DevlinkPort represents port and its attributes +type DevlinkPort struct { + BusName string + DeviceName string + PortIndex uint32 + PortType uint16 + NetdeviceName string + NetdevIfIndex uint32 + RdmaDeviceName string + PortFlavour uint16 + Fn *DevlinkPortFn +} + +type DevLinkPortAddAttrs struct { + Controller uint32 + SfNumber uint32 + PortIndex uint32 + PfNumber uint16 + SfNumberValid bool + PortIndexValid bool + ControllerValid bool +} + +// DevlinkDeviceInfo represents devlink info +type DevlinkDeviceInfo struct { + Driver string + SerialNumber string + BoardID string + FwApp string + FwAppBoundleID string + FwAppName string + FwBoundleID string + FwMgmt string + FwMgmtAPI string + FwMgmtBuild string + FwNetlist string + FwNetlistBuild string + FwPsidAPI string + FwUndi string +} + +func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) { + devices := make([]*DevlinkDevice, 0, len(msgs)) + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + dev := &DevlinkDevice{} + if err = dev.parseAttributes(attrs); err != nil { + return nil, err + } + devices = append(devices, dev) + } + return devices, nil +} + +func eswitchStringToMode(modeName string) (uint16, error) { + if modeName == "legacy" { + return nl.DEVLINK_ESWITCH_MODE_LEGACY, nil + } else if modeName == "switchdev" { + return nl.DEVLINK_ESWITCH_MODE_SWITCHDEV, nil + } else { + return 0xffff, fmt.Errorf("invalid switchdev mode") + } +} + +func parseEswitchMode(mode uint16) string { + var eswitchMode = map[uint16]string{ + nl.DEVLINK_ESWITCH_MODE_LEGACY: "legacy", + nl.DEVLINK_ESWITCH_MODE_SWITCHDEV: "switchdev", + } + if eswitchMode[mode] == "" { + return "unknown" + } else { + return eswitchMode[mode] + } +} + +func parseEswitchInlineMode(inlinemode uint8) string { + var eswitchInlineMode = map[uint8]string{ + nl.DEVLINK_ESWITCH_INLINE_MODE_NONE: "none", + nl.DEVLINK_ESWITCH_INLINE_MODE_LINK: "link", + nl.DEVLINK_ESWITCH_INLINE_MODE_NETWORK: "network", + nl.DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT: "transport", + } + if eswitchInlineMode[inlinemode] == "" { + return "unknown" + } else { + return eswitchInlineMode[inlinemode] + } +} + +func parseEswitchEncapMode(encapmode uint8) string { + var eswitchEncapMode = map[uint8]string{ + nl.DEVLINK_ESWITCH_ENCAP_MODE_NONE: "disable", + nl.DEVLINK_ESWITCH_ENCAP_MODE_BASIC: "enable", + } + if eswitchEncapMode[encapmode] == "" { + return "unknown" + } else { + return eswitchEncapMode[encapmode] + } +} + +func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case nl.DEVLINK_ATTR_BUS_NAME: + d.BusName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_DEV_NAME: + d.DeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_ESWITCH_MODE: + d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value)) + case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE: + d.Attrs.Eswitch.InlineMode = parseEswitchInlineMode(uint8(a.Value[0])) + case nl.DEVLINK_ATTR_ESWITCH_ENCAP_MODE: + d.Attrs.Eswitch.EncapMode = parseEswitchEncapMode(uint8(a.Value[0])) + } + } + return nil +} + +func (dev *DevlinkDevice) parseEswitchAttrs(msgs [][]byte) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return + } + dev.parseAttributes(attrs) +} + +func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) { + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_ESWITCH_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK) + req.AddData(msg) + + b := make([]byte, len(dev.BusName)+1) + copy(b, dev.BusName) + data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) + req.AddData(data) + + b = make([]byte, len(dev.DeviceName)+1) + copy(b, dev.DeviceName) + data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) + req.AddData(data) + + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return + } + dev.parseEswitchAttrs(msgs) +} + +// DevLinkGetDeviceList provides a pointer to devlink devices and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetDeviceList() ([]*DevlinkDevice, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, err + } + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + devices, err := parseDevLinkDeviceList(msgs) + if err != nil { + return nil, err + } + for _, d := range devices { + h.getEswitchAttrs(f, d) + } + return devices, nil +} + +// DevLinkGetDeviceList provides a pointer to devlink devices and nil error, +// otherwise returns an error code. +func DevLinkGetDeviceList() ([]*DevlinkDevice, error) { + return pkgHandle.DevLinkGetDeviceList() +} + +func parseDevlinkDevice(msgs [][]byte) (*DevlinkDevice, error) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + dev := &DevlinkDevice{} + if err = dev.parseAttributes(attrs); err != nil { + return nil, err + } + return dev, nil +} + +func (h *Handle) createCmdReq(cmd uint8, bus string, device string) (*GenlFamily, *nl.NetlinkRequest, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, nil, err + } + + msg := &nl.Genlmsg{ + Command: cmd, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK) + req.AddData(msg) + + b := make([]byte, len(bus)+1) + copy(b, bus) + data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) + req.AddData(data) + + b = make([]byte, len(device)+1) + copy(b, device) + data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) + req.AddData(data) + + return f, req, nil +} + +// DevlinkGetDeviceByName provides a pointer to devlink device and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetDeviceByName(Bus string, Device string) (*DevlinkDevice, error) { + f, req, err := h.createCmdReq(nl.DEVLINK_CMD_GET, Bus, Device) + if err != nil { + return nil, err + } + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + dev, err := parseDevlinkDevice(respmsg) + if err == nil { + h.getEswitchAttrs(f, dev) + } + return dev, err +} + +// DevlinkGetDeviceByName provides a pointer to devlink device and nil error, +// otherwise returns an error code. +func DevLinkGetDeviceByName(Bus string, Device string) (*DevlinkDevice, error) { + return pkgHandle.DevLinkGetDeviceByName(Bus, Device) +} + +// DevLinkSetEswitchMode sets eswitch mode if able to set successfully or +// returns an error code. +// Equivalent to: `devlink dev eswitch set $dev mode switchdev` +// Equivalent to: `devlink dev eswitch set $dev mode legacy` +func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error { + mode, err := eswitchStringToMode(NewMode) + if err != nil { + return err + } + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_ESWITCH_SET, Dev.BusName, Dev.DeviceName) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_ESWITCH_MODE, nl.Uint16Attr(mode))) + + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevLinkSetEswitchMode sets eswitch mode if able to set successfully or +// returns an error code. +// Equivalent to: `devlink dev eswitch set $dev mode switchdev` +// Equivalent to: `devlink dev eswitch set $dev mode legacy` +func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error { + return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode) +} + +func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case nl.DEVLINK_ATTR_BUS_NAME: + port.BusName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_DEV_NAME: + port.DeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_INDEX: + port.PortIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_TYPE: + port.PortType = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_NETDEV_NAME: + port.NetdeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX: + port.NetdevIfIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_IBDEV_NAME: + port.RdmaDeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_FLAVOUR: + port.PortFlavour = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_FUNCTION: + port.Fn = &DevlinkPortFn{} + for nested := range nl.ParseAttributes(a.Value) { + switch nested.Type { + case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR: + port.Fn.HwAddr = nested.Value[:] + case nl.DEVLINK_PORT_FN_ATTR_STATE: + port.Fn.State = uint8(nested.Value[0]) + case nl.DEVLINK_PORT_FN_ATTR_OPSTATE: + port.Fn.OpState = uint8(nested.Value[0]) + } + } + } + } + return nil +} + +func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) { + ports := make([]*DevlinkPort, 0, len(msgs)) + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + ports = append(ports, port) + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, err + } + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_PORT_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + ports, err := parseDevLinkAllPortList(msgs) + if err != nil { + return nil, err + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func DevLinkGetAllPortList() ([]*DevlinkPort, error) { + return pkgHandle.DevLinkGetAllPortList() +} + +func parseDevlinkPortMsg(msgs [][]byte) (*DevlinkPort, error) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + return port, nil +} + +// DevLinkGetPortByIndexprovides a pointer to devlink device and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_GET, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevLinkGetPortByIndex provides a pointer to devlink portand nil error, +// otherwise returns an error code. +func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex) +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour))) + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber))) + if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber))) + } + if Attrs.PortIndexValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex))) + } + if Attrs.ControllerValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller))) + } + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs) +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex) +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil) + + if FnAttrs.HwAddrValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr)) + } + + if FnAttrs.StateValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State)) + } + req.AddData(fnAttr) + + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs) +} + +// devlinkInfoGetter is function that is responsible for getting devlink info message +// this is introduced for test purpose +type devlinkInfoGetter func(bus, device string) ([]byte, error) + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) { + info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg) + if err != nil { + return nil, err + } + + return parseInfoData(info), nil +} + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) { + response, err := getInfoMsg(Bus, Device) + if err != nil { + return nil, err + } + + info, err := parseInfoMsg(response) + if err != nil { + return nil, err + } + + return info, nil +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfo returns devlink info for target device, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfoAsMap returns devlink info for target device as a map, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device) + if err != nil { + return nil, err + } + + response, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + if len(response) < 1 { + return nil, fmt.Errorf("getDevlinkInfoMsg: message too short") + } + + return response[0], nil +} + +func parseInfoMsg(msg []byte) (map[string]string, error) { + if len(msg) < nl.SizeofGenlmsg { + return nil, fmt.Errorf("parseInfoMsg: message too short") + } + + info := make(map[string]string) + err := collectInfoData(msg[nl.SizeofGenlmsg:], info) + + if err != nil { + return nil, err + } + + return info, nil +} + +func collectInfoData(msg []byte, data map[string]string) error { + attrs, err := nl.ParseRouteAttr(msg) + if err != nil { + return err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_DRIVER_NAME: + data["driver"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER: + data["serialNumber"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED, + nl.DEVLINK_ATTR_INFO_VERSION_STORED: + key, value, err := getNestedInfoData(attr.Value) + if err != nil { + return err + } + data[key] = value + } + } + + if len(data) == 0 { + return fmt.Errorf("collectInfoData: could not read attributes") + } + + return nil +} + +func getNestedInfoData(msg []byte) (string, string, error) { + nestedAttrs, err := nl.ParseRouteAttr(msg) + + var key, value string + + if err != nil { + return "", "", err + } + + if len(nestedAttrs) != 2 { + return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure") + } + + for _, nestedAttr := range nestedAttrs { + switch nestedAttr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_VERSION_NAME: + key = parseInfoValue(nestedAttr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_VALUE: + value = parseInfoValue(nestedAttr.Value) + } + } + + if key == "" { + return "", "", fmt.Errorf("getNestedInfoData: key not found") + } + + if value == "" { + return "", "", fmt.Errorf("getNestedInfoData: value not found") + } + + return key, value, nil +} + +func parseInfoData(data map[string]string) *DevlinkDeviceInfo { + info := new(DevlinkDeviceInfo) + for key, value := range data { + switch key { + case "driver": + info.Driver = value + case "serialNumber": + info.SerialNumber = value + case "board.id": + info.BoardID = value + case "fw.app": + info.FwApp = value + case "fw.app.bundle_id": + info.FwAppBoundleID = value + case "fw.app.name": + info.FwAppName = value + case "fw.bundle_id": + info.FwBoundleID = value + case "fw.mgmt": + info.FwMgmt = value + case "fw.mgmt.api": + info.FwMgmtAPI = value + case "fw.mgmt.build": + info.FwMgmtBuild = value + case "fw.netlist": + info.FwNetlist = value + case "fw.netlist.build": + info.FwNetlistBuild = value + case "fw.psid.api": + info.FwPsidAPI = value + case "fw.undi": + info.FwUndi = value + } + } + return info +} + +func parseInfoValue(value []byte) string { + v := strings.ReplaceAll(string(value), "\x00", "") + return strings.TrimSpace(v) +} diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go new file mode 100644 index 000000000000..2d798b0fbbc7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -0,0 +1,392 @@ +package netlink + +import ( + "fmt" + "net" +) + +type Filter interface { + Attrs() *FilterAttrs + Type() string +} + +// FilterAttrs represents a netlink filter. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type FilterAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Priority uint16 // lower is higher priority + Protocol uint16 // unix.ETH_P_* +} + +func (q FilterAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol) +} + +type TcAct int32 + +const ( + TC_ACT_UNSPEC TcAct = -1 + TC_ACT_OK TcAct = 0 + TC_ACT_RECLASSIFY TcAct = 1 + TC_ACT_SHOT TcAct = 2 + TC_ACT_PIPE TcAct = 3 + TC_ACT_STOLEN TcAct = 4 + TC_ACT_QUEUED TcAct = 5 + TC_ACT_REPEAT TcAct = 6 + TC_ACT_REDIRECT TcAct = 7 + TC_ACT_JUMP TcAct = 0x10000000 +) + +func (a TcAct) String() string { + switch a { + case TC_ACT_UNSPEC: + return "unspec" + case TC_ACT_OK: + return "ok" + case TC_ACT_RECLASSIFY: + return "reclassify" + case TC_ACT_SHOT: + return "shot" + case TC_ACT_PIPE: + return "pipe" + case TC_ACT_STOLEN: + return "stolen" + case TC_ACT_QUEUED: + return "queued" + case TC_ACT_REPEAT: + return "repeat" + case TC_ACT_REDIRECT: + return "redirect" + case TC_ACT_JUMP: + return "jump" + } + return fmt.Sprintf("0x%x", int32(a)) +} + +type TcPolAct int32 + +const ( + TC_POLICE_UNSPEC TcPolAct = TcPolAct(TC_ACT_UNSPEC) + TC_POLICE_OK TcPolAct = TcPolAct(TC_ACT_OK) + TC_POLICE_RECLASSIFY TcPolAct = TcPolAct(TC_ACT_RECLASSIFY) + TC_POLICE_SHOT TcPolAct = TcPolAct(TC_ACT_SHOT) + TC_POLICE_PIPE TcPolAct = TcPolAct(TC_ACT_PIPE) +) + +func (a TcPolAct) String() string { + switch a { + case TC_POLICE_UNSPEC: + return "unspec" + case TC_POLICE_OK: + return "ok" + case TC_POLICE_RECLASSIFY: + return "reclassify" + case TC_POLICE_SHOT: + return "shot" + case TC_POLICE_PIPE: + return "pipe" + } + return fmt.Sprintf("0x%x", int32(a)) +} + +type ActionAttrs struct { + Index int + Capab int + Action TcAct + Refcnt int + Bindcnt int +} + +func (q ActionAttrs) String() string { + return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt) +} + +// Action represents an action in any supported filter. +type Action interface { + Attrs() *ActionAttrs + Type() string +} + +type GenericAction struct { + ActionAttrs +} + +func (action *GenericAction) Type() string { + return "generic" +} + +func (action *GenericAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +type BpfAction struct { + ActionAttrs + Fd int + Name string +} + +func (action *BpfAction) Type() string { + return "bpf" +} + +func (action *BpfAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +type ConnmarkAction struct { + ActionAttrs + Zone uint16 +} + +func (action *ConnmarkAction) Type() string { + return "connmark" +} + +func (action *ConnmarkAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewConnmarkAction() *ConnmarkAction { + return &ConnmarkAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type CsumUpdateFlags uint32 + +const ( + TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1 + TCA_CSUM_UPDATE_FLAG_ICMP CsumUpdateFlags = 2 + TCA_CSUM_UPDATE_FLAG_IGMP CsumUpdateFlags = 4 + TCA_CSUM_UPDATE_FLAG_TCP CsumUpdateFlags = 8 + TCA_CSUM_UPDATE_FLAG_UDP CsumUpdateFlags = 16 + TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32 + TCA_CSUM_UPDATE_FLAG_SCTP CsumUpdateFlags = 64 +) + +type CsumAction struct { + ActionAttrs + UpdateFlags CsumUpdateFlags +} + +func (action *CsumAction) Type() string { + return "csum" +} + +func (action *CsumAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewCsumAction() *CsumAction { + return &CsumAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type MirredAct uint8 + +func (a MirredAct) String() string { + switch a { + case TCA_EGRESS_REDIR: + return "egress redir" + case TCA_EGRESS_MIRROR: + return "egress mirror" + case TCA_INGRESS_REDIR: + return "ingress redir" + case TCA_INGRESS_MIRROR: + return "ingress mirror" + } + return "unknown" +} + +const ( + TCA_EGRESS_REDIR MirredAct = 1 /* packet redirect to EGRESS*/ + TCA_EGRESS_MIRROR MirredAct = 2 /* mirror packet to EGRESS */ + TCA_INGRESS_REDIR MirredAct = 3 /* packet redirect to INGRESS*/ + TCA_INGRESS_MIRROR MirredAct = 4 /* mirror packet to INGRESS */ +) + +type MirredAction struct { + ActionAttrs + MirredAction MirredAct + Ifindex int +} + +func (action *MirredAction) Type() string { + return "mirred" +} + +func (action *MirredAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewMirredAction(redirIndex int) *MirredAction { + return &MirredAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_STOLEN, + }, + MirredAction: TCA_EGRESS_REDIR, + Ifindex: redirIndex, + } +} + +type TunnelKeyAct int8 + +const ( + TCA_TUNNEL_KEY_SET TunnelKeyAct = 1 // set tunnel key + TCA_TUNNEL_KEY_UNSET TunnelKeyAct = 2 // unset tunnel key +) + +type TunnelKeyAction struct { + ActionAttrs + Action TunnelKeyAct + SrcAddr net.IP + DstAddr net.IP + KeyID uint32 + DestPort uint16 +} + +func (action *TunnelKeyAction) Type() string { + return "tunnel_key" +} + +func (action *TunnelKeyAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewTunnelKeyAction() *TunnelKeyAction { + return &TunnelKeyAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type SkbEditAction struct { + ActionAttrs + QueueMapping *uint16 + PType *uint16 + Priority *uint32 + Mark *uint32 +} + +func (action *SkbEditAction) Type() string { + return "skbedit" +} + +func (action *SkbEditAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewSkbEditAction() *SkbEditAction { + return &SkbEditAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type PoliceAction struct { + ActionAttrs + Rate uint32 // in byte per second + Burst uint32 // in byte + RCellLog int + Mtu uint32 + Mpu uint16 // in byte + PeakRate uint32 // in byte per second + PCellLog int + AvRate uint32 // in byte per second + Overhead uint16 + LinkLayer int + ExceedAction TcPolAct + NotExceedAction TcPolAct +} + +func (action *PoliceAction) Type() string { + return "police" +} + +func (action *PoliceAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewPoliceAction() *PoliceAction { + return &PoliceAction{ + RCellLog: -1, + PCellLog: -1, + LinkLayer: 1, // ETHERNET + ExceedAction: TC_POLICE_RECLASSIFY, + NotExceedAction: TC_POLICE_OK, + } +} + +// MatchAll filters match all packets +type MatchAll struct { + FilterAttrs + ClassId uint32 + Actions []Action +} + +func (filter *MatchAll) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *MatchAll) Type() string { + return "matchall" +} + +type FwFilter struct { + FilterAttrs + ClassId uint32 + InDev string + Mask uint32 + Police *PoliceAction +} + +func (filter *FwFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *FwFilter) Type() string { + return "fw" +} + +type BpfFilter struct { + FilterAttrs + ClassId uint32 + Fd int + Name string + DirectAction bool + Id int + Tag string +} + +func (filter *BpfFilter) Type() string { + return "bpf" +} + +func (filter *BpfFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +// GenericFilter filters represent types that are not currently understood +// by this netlink library. +type GenericFilter struct { + FilterAttrs + FilterType string +} + +func (filter *GenericFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *GenericFilter) Type() string { + return filter.FilterType +} diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go new file mode 100644 index 000000000000..4c6d1cf7d7f7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -0,0 +1,969 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "errors" + "fmt" + "net" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// Constants used in TcU32Sel.Flags. +const ( + TC_U32_TERMINAL = nl.TC_U32_TERMINAL + TC_U32_OFFSET = nl.TC_U32_OFFSET + TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET + TC_U32_EAT = nl.TC_U32_EAT +) + +// Sel of the U32 filters that contains multiple TcU32Key. This is the type +// alias and the frontend representation of nl.TcU32Sel. It is serialized into +// canonical nl.TcU32Sel with the appropriate endianness. +type TcU32Sel = nl.TcU32Sel + +// TcU32Key contained of Sel in the U32 filters. This is the type alias and the +// frontend representation of nl.TcU32Key. It is serialized into chanonical +// nl.TcU32Sel with the appropriate endianness. +type TcU32Key = nl.TcU32Key + +// U32 filters on many packet related properties +type U32 struct { + FilterAttrs + ClassId uint32 + Divisor uint32 // Divisor MUST be power of 2. + Hash uint32 + Link uint32 + RedirIndex int + Sel *TcU32Sel + Actions []Action +} + +func (filter *U32) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *U32) Type() string { + return "u32" +} + +type Flower struct { + FilterAttrs + DestIP net.IP + DestIPMask net.IPMask + SrcIP net.IP + SrcIPMask net.IPMask + EthType uint16 + EncDestIP net.IP + EncDestIPMask net.IPMask + EncSrcIP net.IP + EncSrcIPMask net.IPMask + EncDestPort uint16 + EncKeyId uint32 + + Actions []Action +} + +func (filter *Flower) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *Flower) Type() string { + return "flower" +} + +func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) { + ipType := v4Type + maskType := v4MaskType + + encodeMask := mask + if mask == nil { + encodeMask = net.CIDRMask(32, 32) + } + v4IP := ip.To4() + if v4IP == nil { + ipType = v6Type + maskType = v6MaskType + if mask == nil { + encodeMask = net.CIDRMask(128, 128) + } + } else { + ip = v4IP + } + + parent.AddRtAttr(ipType, ip) + parent.AddRtAttr(maskType, encodeMask) +} + +func (filter *Flower) encode(parent *nl.RtAttr) error { + if filter.EthType != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType)) + } + if filter.SrcIP != nil { + filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask, + nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC, + nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK) + } + if filter.DestIP != nil { + filter.encodeIP(parent, filter.DestIP, filter.DestIPMask, + nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST, + nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK) + } + if filter.EncSrcIP != nil { + filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK) + } + if filter.EncDestIP != nil { + filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK) + } + if filter.EncDestPort != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort)) + } + if filter.EncKeyId != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId)) + } + + actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + return nil +} + +func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FLOWER_KEY_ETH_TYPE: + filter.EthType = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC: + filter.SrcIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK: + filter.SrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST: + filter.DestIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK: + filter.DestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC: + filter.EncSrcIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK: + filter.EncSrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST: + filter.EncDestIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK: + filter.EncDestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT: + filter.EncDestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_ENC_KEY_ID: + filter.EncKeyId = ntohl(datum.Value) + case nl.TCA_FLOWER_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return err + } + filter.Actions, err = parseActions(tables) + if err != nil { + return err + } + } + } + return nil +} + +// FilterDel will delete a filter from the system. +// Equivalent to: `tc filter del $filter` +func FilterDel(filter Filter) error { + return pkgHandle.FilterDel(filter) +} + +// FilterDel will delete a filter from the system. +// Equivalent to: `tc filter del $filter` +func (h *Handle) FilterDel(filter Filter) error { + req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// FilterAdd will add a filter to the system. +// Equivalent to: `tc filter add $filter` +func FilterAdd(filter Filter) error { + return pkgHandle.FilterAdd(filter) +} + +// FilterAdd will add a filter to the system. +// Equivalent to: `tc filter add $filter` +func (h *Handle) FilterAdd(filter Filter) error { + return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL) +} + +// FilterReplace will replace a filter. +// Equivalent to: `tc filter replace $filter` +func FilterReplace(filter Filter) error { + return pkgHandle.FilterReplace(filter) +} + +// FilterReplace will replace a filter. +// Equivalent to: `tc filter replace $filter` +func (h *Handle) FilterReplace(filter Filter) error { + return h.filterModify(filter, unix.NLM_F_CREATE) +} + +func (h *Handle) filterModify(filter Filter, flags int) error { + req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + + switch filter := filter.(type) { + case *U32: + sel := filter.Sel + if sel == nil { + // match all + sel = &nl.TcU32Sel{ + Nkeys: 1, + Flags: nl.TC_U32_TERMINAL, + } + sel.Keys = append(sel.Keys, nl.TcU32Key{}) + } + + if native != networkOrder { + // Copy TcU32Sel. + cSel := *sel + keys := make([]nl.TcU32Key, cap(sel.Keys)) + copy(keys, sel.Keys) + cSel.Keys = keys + sel = &cSel + + // Handle the endianness of attributes + sel.Offmask = native.Uint16(htons(sel.Offmask)) + sel.Hmask = native.Uint32(htonl(sel.Hmask)) + for i, key := range sel.Keys { + sel.Keys[i].Mask = native.Uint32(htonl(key.Mask)) + sel.Keys[i].Val = native.Uint32(htonl(key.Val)) + } + } + sel.Nkeys = uint8(len(sel.Keys)) + options.AddRtAttr(nl.TCA_U32_SEL, sel.Serialize()) + if filter.ClassId != 0 { + options.AddRtAttr(nl.TCA_U32_CLASSID, nl.Uint32Attr(filter.ClassId)) + } + if filter.Divisor != 0 { + if (filter.Divisor-1)&filter.Divisor != 0 { + return fmt.Errorf("illegal divisor %d. Must be a power of 2", filter.Divisor) + } + options.AddRtAttr(nl.TCA_U32_DIVISOR, nl.Uint32Attr(filter.Divisor)) + } + if filter.Hash != 0 { + options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash)) + } + if filter.Link != 0 { + options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link)) + } + actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil) + // backwards compatibility + if filter.RedirIndex != 0 { + filter.Actions = append([]Action{NewMirredAction(filter.RedirIndex)}, filter.Actions...) + } + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + case *FwFilter: + if filter.Mask != 0 { + b := make([]byte, 4) + native.PutUint32(b, filter.Mask) + options.AddRtAttr(nl.TCA_FW_MASK, b) + } + if filter.InDev != "" { + options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev)) + } + if filter.Police != nil { + police := options.AddRtAttr(nl.TCA_FW_POLICE, nil) + if err := encodePolice(police, filter.Police); err != nil { + return err + } + } + if filter.ClassId != 0 { + b := make([]byte, 4) + native.PutUint32(b, filter.ClassId) + options.AddRtAttr(nl.TCA_FW_CLASSID, b) + } + case *BpfFilter: + var bpfFlags uint32 + if filter.ClassId != 0 { + options.AddRtAttr(nl.TCA_BPF_CLASSID, nl.Uint32Attr(filter.ClassId)) + } + if filter.Fd >= 0 { + options.AddRtAttr(nl.TCA_BPF_FD, nl.Uint32Attr((uint32(filter.Fd)))) + } + if filter.Name != "" { + options.AddRtAttr(nl.TCA_BPF_NAME, nl.ZeroTerminated(filter.Name)) + } + if filter.DirectAction { + bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT + } + options.AddRtAttr(nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags)) + case *MatchAll: + actionsAttr := options.AddRtAttr(nl.TCA_MATCHALL_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + if filter.ClassId != 0 { + options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId)) + } + case *Flower: + if err := filter.encode(options); err != nil { + return err + } + } + + req.AddData(options) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// FilterList gets a list of filters in the system. +// Equivalent to: `tc filter show`. +// Generally returns nothing if link and parent are not specified. +func FilterList(link Link, parent uint32) ([]Filter, error) { + return pkgHandle.FilterList(link, parent) +} + +// FilterList gets a list of filters in the system. +// Equivalent to: `tc filter show`. +// Generally returns nothing if link and parent are not specified. +func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { + req := h.newNetlinkRequest(unix.RTM_GETTFILTER, unix.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTFILTER) + if err != nil { + return nil, err + } + + var res []Filter + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := FilterAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + base.Priority, base.Protocol = MajorMinor(msg.Info) + base.Protocol = nl.Swap16(base.Protocol) + + var filter Filter + filterType := "" + detailed := false + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + filterType = string(attr.Value[:len(attr.Value)-1]) + switch filterType { + case "u32": + filter = &U32{} + case "fw": + filter = &FwFilter{} + case "bpf": + filter = &BpfFilter{} + case "matchall": + filter = &MatchAll{} + case "flower": + filter = &Flower{} + default: + filter = &GenericFilter{FilterType: filterType} + } + case nl.TCA_OPTIONS: + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + switch filterType { + case "u32": + detailed, err = parseU32Data(filter, data) + if err != nil { + return nil, err + } + case "fw": + detailed, err = parseFwData(filter, data) + if err != nil { + return nil, err + } + case "bpf": + detailed, err = parseBpfData(filter, data) + if err != nil { + return nil, err + } + case "matchall": + detailed, err = parseMatchAllData(filter, data) + if err != nil { + return nil, err + } + case "flower": + detailed, err = parseFlowerData(filter, data) + if err != nil { + return nil, err + } + default: + detailed = true + } + } + } + // only return the detailed version of the filter + if detailed { + *filter.Attrs() = base + res = append(res, filter) + } + } + + return res, nil +} + +func toTcGen(attrs *ActionAttrs, tcgen *nl.TcGen) { + tcgen.Index = uint32(attrs.Index) + tcgen.Capab = uint32(attrs.Capab) + tcgen.Action = int32(attrs.Action) + tcgen.Refcnt = int32(attrs.Refcnt) + tcgen.Bindcnt = int32(attrs.Bindcnt) +} + +func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) { + attrs.Index = int(tcgen.Index) + attrs.Capab = int(tcgen.Capab) + attrs.Action = TcAct(tcgen.Action) + attrs.Refcnt = int(tcgen.Refcnt) + attrs.Bindcnt = int(tcgen.Bindcnt) +} + +func encodePolice(attr *nl.RtAttr, action *PoliceAction) error { + var rtab [256]uint32 + var ptab [256]uint32 + police := nl.TcPolice{} + police.Index = uint32(action.Attrs().Index) + police.Bindcnt = int32(action.Attrs().Bindcnt) + police.Capab = uint32(action.Attrs().Capab) + police.Refcnt = int32(action.Attrs().Refcnt) + police.Rate.Rate = action.Rate + police.PeakRate.Rate = action.PeakRate + police.Action = int32(action.ExceedAction) + + if police.Rate.Rate != 0 { + police.Rate.Mpu = action.Mpu + police.Rate.Overhead = action.Overhead + if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("TBF: failed to calculate rate table") + } + police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst) + } + + police.Mtu = action.Mtu + if police.PeakRate.Rate != 0 { + police.PeakRate.Mpu = action.Mpu + police.PeakRate.Overhead = action.Overhead + if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("POLICE: failed to calculate peak rate table") + } + } + + attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize()) + if police.Rate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab)) + } + if police.PeakRate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab)) + } + if action.AvRate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate)) + } + if action.NotExceedAction != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction))) + } + + return nil +} + +func EncodeActions(attr *nl.RtAttr, actions []Action) error { + tabIndex := int(nl.TCA_ACT_TAB) + + for _, action := range actions { + switch action := action.(type) { + default: + return fmt.Errorf("unknown action type %s", action.Type()) + case *PoliceAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + if err := encodePolice(aopts, action); err != nil { + return err + } + case *MirredAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + mirred := nl.TcMirred{ + Eaction: int32(action.MirredAction), + Ifindex: uint32(action.Ifindex), + } + toTcGen(action.Attrs(), &mirred.TcGen) + aopts.AddRtAttr(nl.TCA_MIRRED_PARMS, mirred.Serialize()) + case *TunnelKeyAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("tunnel_key")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + tun := nl.TcTunnelKey{ + Action: int32(action.Action), + } + toTcGen(action.Attrs(), &tun.TcGen) + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_PARMS, tun.Serialize()) + if action.Action == TCA_TUNNEL_KEY_SET { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_KEY_ID, htonl(action.KeyID)) + if v4 := action.SrcAddr.To4(); v4 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC, v4[:]) + } else if v6 := action.SrcAddr.To16(); v6 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, v6[:]) + } else { + return fmt.Errorf("invalid src addr %s for tunnel_key action", action.SrcAddr) + } + if v4 := action.DstAddr.To4(); v4 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV4_DST, v4[:]) + } else if v6 := action.DstAddr.To16(); v6 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, v6[:]) + } else { + return fmt.Errorf("invalid dst addr %s for tunnel_key action", action.DstAddr) + } + if action.DestPort != 0 { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_DST_PORT, htons(action.DestPort)) + } + } + case *SkbEditAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("skbedit")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + skbedit := nl.TcSkbEdit{} + toTcGen(action.Attrs(), &skbedit.TcGen) + aopts.AddRtAttr(nl.TCA_SKBEDIT_PARMS, skbedit.Serialize()) + if action.QueueMapping != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_QUEUE_MAPPING, nl.Uint16Attr(*action.QueueMapping)) + } + if action.Priority != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_PRIORITY, nl.Uint32Attr(*action.Priority)) + } + if action.PType != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_PTYPE, nl.Uint16Attr(*action.PType)) + } + if action.Mark != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark)) + } + case *ConnmarkAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("connmark")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + connmark := nl.TcConnmark{ + Zone: action.Zone, + } + toTcGen(action.Attrs(), &connmark.TcGen) + aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize()) + case *CsumAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + csum := nl.TcCsum{ + UpdateFlags: uint32(action.UpdateFlags), + } + toTcGen(action.Attrs(), &csum.TcGen) + aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize()) + case *BpfAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + gen := nl.TcGen{} + toTcGen(action.Attrs(), &gen) + aopts.AddRtAttr(nl.TCA_ACT_BPF_PARMS, gen.Serialize()) + aopts.AddRtAttr(nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd))) + aopts.AddRtAttr(nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name)) + case *GenericAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("gact")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + gen := nl.TcGen{} + toTcGen(action.Attrs(), &gen) + aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize()) + } + } + return nil +} + +func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) { + switch data.Attr.Type { + case nl.TCA_POLICE_RESULT: + police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4])) + case nl.TCA_POLICE_AVRATE: + police.AvRate = native.Uint32(data.Value[0:4]) + case nl.TCA_POLICE_TBF: + p := *nl.DeserializeTcPolice(data.Value) + police.ActionAttrs = ActionAttrs{} + police.Attrs().Index = int(p.Index) + police.Attrs().Bindcnt = int(p.Bindcnt) + police.Attrs().Capab = int(p.Capab) + police.Attrs().Refcnt = int(p.Refcnt) + police.ExceedAction = TcPolAct(p.Action) + police.Rate = p.Rate.Rate + police.PeakRate = p.PeakRate.Rate + police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst) + police.Mtu = p.Mtu + police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK + police.Overhead = p.Rate.Overhead + } +} + +func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { + var actions []Action + for _, table := range tables { + var action Action + var actionType string + aattrs, err := nl.ParseRouteAttr(table.Value) + if err != nil { + return nil, err + } + nextattr: + for _, aattr := range aattrs { + switch aattr.Attr.Type { + case nl.TCA_KIND: + actionType = string(aattr.Value[:len(aattr.Value)-1]) + // only parse if the action is mirred or bpf + switch actionType { + case "mirred": + action = &MirredAction{} + case "bpf": + action = &BpfAction{} + case "connmark": + action = &ConnmarkAction{} + case "csum": + action = &CsumAction{} + case "gact": + action = &GenericAction{} + case "tunnel_key": + action = &TunnelKeyAction{} + case "skbedit": + action = &SkbEditAction{} + case "police": + action = &PoliceAction{} + default: + break nextattr + } + case nl.TCA_OPTIONS: + adata, err := nl.ParseRouteAttr(aattr.Value) + if err != nil { + return nil, err + } + for _, adatum := range adata { + switch actionType { + case "mirred": + switch adatum.Attr.Type { + case nl.TCA_MIRRED_PARMS: + mirred := *nl.DeserializeTcMirred(adatum.Value) + action.(*MirredAction).ActionAttrs = ActionAttrs{} + toAttrs(&mirred.TcGen, action.Attrs()) + action.(*MirredAction).Ifindex = int(mirred.Ifindex) + action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction) + } + case "tunnel_key": + switch adatum.Attr.Type { + case nl.TCA_TUNNEL_KEY_PARMS: + tun := *nl.DeserializeTunnelKey(adatum.Value) + action.(*TunnelKeyAction).ActionAttrs = ActionAttrs{} + toAttrs(&tun.TcGen, action.Attrs()) + action.(*TunnelKeyAction).Action = TunnelKeyAct(tun.Action) + case nl.TCA_TUNNEL_KEY_ENC_KEY_ID: + action.(*TunnelKeyAction).KeyID = networkOrder.Uint32(adatum.Value[0:4]) + case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC: + action.(*TunnelKeyAction).SrcAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, nl.TCA_TUNNEL_KEY_ENC_IPV4_DST: + action.(*TunnelKeyAction).DstAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_DST_PORT: + action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value) + } + case "skbedit": + switch adatum.Attr.Type { + case nl.TCA_SKBEDIT_PARMS: + skbedit := *nl.DeserializeSkbEdit(adatum.Value) + action.(*SkbEditAction).ActionAttrs = ActionAttrs{} + toAttrs(&skbedit.TcGen, action.Attrs()) + case nl.TCA_SKBEDIT_MARK: + mark := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Mark = &mark + case nl.TCA_SKBEDIT_PRIORITY: + priority := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Priority = &priority + case nl.TCA_SKBEDIT_PTYPE: + ptype := native.Uint16(adatum.Value[0:2]) + action.(*SkbEditAction).PType = &ptype + case nl.TCA_SKBEDIT_QUEUE_MAPPING: + mapping := native.Uint16(adatum.Value[0:2]) + action.(*SkbEditAction).QueueMapping = &mapping + } + case "bpf": + switch adatum.Attr.Type { + case nl.TCA_ACT_BPF_PARMS: + gen := *nl.DeserializeTcGen(adatum.Value) + toAttrs(&gen, action.Attrs()) + case nl.TCA_ACT_BPF_FD: + action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4])) + case nl.TCA_ACT_BPF_NAME: + action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1]) + } + case "connmark": + switch adatum.Attr.Type { + case nl.TCA_CONNMARK_PARMS: + connmark := *nl.DeserializeTcConnmark(adatum.Value) + action.(*ConnmarkAction).ActionAttrs = ActionAttrs{} + toAttrs(&connmark.TcGen, action.Attrs()) + action.(*ConnmarkAction).Zone = connmark.Zone + } + case "csum": + switch adatum.Attr.Type { + case nl.TCA_CSUM_PARMS: + csum := *nl.DeserializeTcCsum(adatum.Value) + action.(*CsumAction).ActionAttrs = ActionAttrs{} + toAttrs(&csum.TcGen, action.Attrs()) + action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags) + } + case "gact": + switch adatum.Attr.Type { + case nl.TCA_GACT_PARMS: + gen := *nl.DeserializeTcGen(adatum.Value) + toAttrs(&gen, action.Attrs()) + } + case "police": + parsePolice(adatum, action.(*PoliceAction)) + } + } + } + } + actions = append(actions, action) + } + return actions, nil +} + +func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + u32 := filter.(*U32) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_U32_SEL: + detailed = true + sel := nl.DeserializeTcU32Sel(datum.Value) + u32.Sel = sel + if native != networkOrder { + // Handle the endianness of attributes + u32.Sel.Offmask = native.Uint16(htons(sel.Offmask)) + u32.Sel.Hmask = native.Uint32(htonl(sel.Hmask)) + for i, key := range u32.Sel.Keys { + u32.Sel.Keys[i].Mask = native.Uint32(htonl(key.Mask)) + u32.Sel.Keys[i].Val = native.Uint32(htonl(key.Val)) + } + } + case nl.TCA_U32_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + u32.Actions, err = parseActions(tables) + if err != nil { + return detailed, err + } + for _, action := range u32.Actions { + if action, ok := action.(*MirredAction); ok { + u32.RedirIndex = int(action.Ifindex) + } + } + case nl.TCA_U32_CLASSID: + u32.ClassId = native.Uint32(datum.Value) + case nl.TCA_U32_DIVISOR: + u32.Divisor = native.Uint32(datum.Value) + case nl.TCA_U32_HASH: + u32.Hash = native.Uint32(datum.Value) + case nl.TCA_U32_LINK: + u32.Link = native.Uint32(datum.Value) + } + } + return detailed, nil +} + +func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + fw := filter.(*FwFilter) + detailed := true + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FW_MASK: + fw.Mask = native.Uint32(datum.Value[0:4]) + case nl.TCA_FW_CLASSID: + fw.ClassId = native.Uint32(datum.Value[0:4]) + case nl.TCA_FW_INDEV: + fw.InDev = string(datum.Value[:len(datum.Value)-1]) + case nl.TCA_FW_POLICE: + var police PoliceAction + adata, _ := nl.ParseRouteAttr(datum.Value) + for _, aattr := range adata { + parsePolice(aattr, &police) + } + fw.Police = &police + } + } + return detailed, nil +} + +func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + bpf := filter.(*BpfFilter) + detailed := true + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_BPF_FD: + bpf.Fd = int(native.Uint32(datum.Value[0:4])) + case nl.TCA_BPF_NAME: + bpf.Name = string(datum.Value[:len(datum.Value)-1]) + case nl.TCA_BPF_CLASSID: + bpf.ClassId = native.Uint32(datum.Value[0:4]) + case nl.TCA_BPF_FLAGS: + flags := native.Uint32(datum.Value[0:4]) + if (flags & nl.TCA_BPF_FLAG_ACT_DIRECT) != 0 { + bpf.DirectAction = true + } + case nl.TCA_BPF_ID: + bpf.Id = int(native.Uint32(datum.Value[0:4])) + case nl.TCA_BPF_TAG: + bpf.Tag = hex.EncodeToString(datum.Value) + } + } + return detailed, nil +} + +func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + matchall := filter.(*MatchAll) + detailed := true + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_MATCHALL_CLASSID: + matchall.ClassId = native.Uint32(datum.Value[0:4]) + case nl.TCA_MATCHALL_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + matchall.Actions, err = parseActions(tables) + if err != nil { + return detailed, err + } + } + } + return detailed, nil +} + +func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + return true, filter.(*Flower).decode(data) +} + +func AlignToAtm(size uint) uint { + var linksize, cells int + cells = int(size / nl.ATM_CELL_PAYLOAD) + if (size % nl.ATM_CELL_PAYLOAD) > 0 { + cells++ + } + linksize = cells * nl.ATM_CELL_SIZE + return uint(linksize) +} + +func AdjustSize(sz uint, mpu uint, linklayer int) uint { + if sz < mpu { + sz = mpu + } + switch linklayer { + case nl.LINKLAYER_ATM: + return AlignToAtm(sz) + default: + return sz + } +} + +func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, linklayer int) int { + bps := rate.Rate + mpu := rate.Mpu + var sz uint + if mtu == 0 { + mtu = 2047 + } + if cellLog < 0 { + cellLog = 0 + for (mtu >> uint(cellLog)) > 255 { + cellLog++ + } + } + for i := 0; i < 256; i++ { + sz = AdjustSize(uint((i+1)<= nl.IPSET_ERR_PRIVATE { + err = nl.IPSetError(uintptr(errno)) + } + } + return +} + +func ipsetUnserialize(msgs [][]byte) (result IPSetResult) { + for _, msg := range msgs { + result.unserialize(msg) + } + return result +} + +func (result *IPSetResult) unserialize(msg []byte) { + result.Nfgenmsg = nl.DeserializeNfgenmsg(msg) + + for attr := range nl.ParseAttributes(msg[4:]) { + switch attr.Type { + case nl.IPSET_ATTR_PROTOCOL: + result.Protocol = attr.Value[0] + case nl.IPSET_ATTR_SETNAME: + result.SetName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_TYPENAME: + result.TypeName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_REVISION: + result.Revision = attr.Value[0] + case nl.IPSET_ATTR_FAMILY: + result.Family = attr.Value[0] + case nl.IPSET_ATTR_FLAGS: + result.Flags = attr.Value[0] + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.parseAttrData(attr.Value) + case nl.IPSET_ATTR_ADT | nl.NLA_F_NESTED: + result.parseAttrADT(attr.Value) + case nl.IPSET_ATTR_PROTOCOL_MIN: + result.ProtocolMinVersion = attr.Value[0] + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrData(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_HASHSIZE | nl.NLA_F_NET_BYTEORDER: + result.HashSize = attr.Uint32() + case nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER: + result.MaxElements = attr.Uint32() + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + result.Timeout = &val + case nl.IPSET_ATTR_ELEMENTS | nl.NLA_F_NET_BYTEORDER: + result.NumEntries = attr.Uint32() + case nl.IPSET_ATTR_REFERENCES | nl.NLA_F_NET_BYTEORDER: + result.References = attr.Uint32() + case nl.IPSET_ATTR_MEMSIZE | nl.NLA_F_NET_BYTEORDER: + result.SizeInMemory = attr.Uint32() + case nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER: + result.CadtFlags = attr.Uint32() + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER: + result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value}) + case nl.IPSET_ATTR_IP: + result.IPFrom = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP: + result.IPTo = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER: + result.PortFrom = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER: + result.PortTo = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER: + result.LineNo = attr.Uint32() + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrADT(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.Entries = append(result.Entries, parseIPSetEntry(attr.Value)) + default: + log.Printf("unknown ADT attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func parseIPSetEntry(data []byte) (entry IPSetEntry) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Timeout = &val + case nl.IPSET_ATTR_BYTES | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Bytes = &val + case nl.IPSET_ATTR_PACKETS | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Packets = &val + case nl.IPSET_ATTR_ETHER: + entry.MAC = net.HardwareAddr(attr.Value) + case nl.IPSET_ATTR_IP: + entry.IP = net.IP(attr.Value) + case nl.IPSET_ATTR_COMMENT: + entry.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IP: + entry.IP = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IP: + entry.IP2 = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_CIDR: + entry.CIDR = attr.Value[0] + case nl.IPSET_ATTR_CIDR2: + entry.CIDR2 = attr.Value[0] + case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER: + val := networkOrder.Uint16(attr.Value) + entry.Port = &val + case nl.IPSET_ATTR_PROTO: + val := attr.Value[0] + entry.Protocol = &val + case nl.IPSET_ATTR_IFACE: + entry.IFace = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Mark = &val + default: + log.Printf("unknown ADT attribute from kernel: %+v", attr) + } + } + return +} diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go new file mode 100644 index 000000000000..33c872336d67 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -0,0 +1,1347 @@ +package netlink + +import ( + "fmt" + "net" + "os" + "strconv" +) + +// Link represents a link device from netlink. Shared link attributes +// like name may be retrieved using the Attrs() method. Unique data +// can be retrieved by casting the object to the proper type. +type Link interface { + Attrs() *LinkAttrs + Type() string +} + +type ( + NsPid int + NsFd int +) + +// LinkAttrs represents data shared by most link types +type LinkAttrs struct { + Index int + MTU int + TxQLen int // Transmit Queue Length + Name string + HardwareAddr net.HardwareAddr + Flags net.Flags + RawFlags uint32 + ParentIndex int // index of the parent link device + MasterIndex int // must be the index of a bridge + Namespace interface{} // nil | NsPid | NsFd + Alias string + Statistics *LinkStatistics + Promisc int + Allmulti int + Multi int + Xdp *LinkXdp + EncapType string + Protinfo *Protinfo + OperState LinkOperState + PhysSwitchID int + NetNsID int + NumTxQueues int + NumRxQueues int + GSOMaxSize uint32 + GSOMaxSegs uint32 + Vfs []VfInfo // virtual functions available on link + Group uint32 + Slave LinkSlave +} + +// LinkSlave represents a slave device. +type LinkSlave interface { + SlaveType() string +} + +// VfInfo represents configuration of virtual function +type VfInfo struct { + ID int + Mac net.HardwareAddr + Vlan int + Qos int + TxRate int // IFLA_VF_TX_RATE Max TxRate + Spoofchk bool + LinkState uint32 + MaxTxRate uint32 // IFLA_VF_RATE Max TxRate + MinTxRate uint32 // IFLA_VF_RATE Min TxRate + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 + + RssQuery uint32 + Trust uint32 +} + +// LinkOperState represents the values of the IFLA_OPERSTATE link +// attribute, which contains the RFC2863 state of the interface. +type LinkOperState uint8 + +const ( + OperUnknown = iota // Status can't be determined. + OperNotPresent // Some component is missing. + OperDown // Down. + OperLowerLayerDown // Down due to state of lower layer. + OperTesting // In some test mode. + OperDormant // Not up but pending an external event. + OperUp // Up, ready to send packets. +) + +func (s LinkOperState) String() string { + switch s { + case OperNotPresent: + return "not-present" + case OperDown: + return "down" + case OperLowerLayerDown: + return "lower-layer-down" + case OperTesting: + return "testing" + case OperDormant: + return "dormant" + case OperUp: + return "up" + default: + return "unknown" + } +} + +// NewLinkAttrs returns LinkAttrs structure filled with default values +func NewLinkAttrs() LinkAttrs { + return LinkAttrs{ + NetNsID: -1, + TxQLen: -1, + } +} + +type LinkStatistics LinkStatistics64 + +/* +Ref: struct rtnl_link_stats {...} +*/ +type LinkStatistics32 struct { + RxPackets uint32 + TxPackets uint32 + RxBytes uint32 + TxBytes uint32 + RxErrors uint32 + TxErrors uint32 + RxDropped uint32 + TxDropped uint32 + Multicast uint32 + Collisions uint32 + RxLengthErrors uint32 + RxOverErrors uint32 + RxCrcErrors uint32 + RxFrameErrors uint32 + RxFifoErrors uint32 + RxMissedErrors uint32 + TxAbortedErrors uint32 + TxCarrierErrors uint32 + TxFifoErrors uint32 + TxHeartbeatErrors uint32 + TxWindowErrors uint32 + RxCompressed uint32 + TxCompressed uint32 +} + +func (s32 LinkStatistics32) to64() *LinkStatistics64 { + return &LinkStatistics64{ + RxPackets: uint64(s32.RxPackets), + TxPackets: uint64(s32.TxPackets), + RxBytes: uint64(s32.RxBytes), + TxBytes: uint64(s32.TxBytes), + RxErrors: uint64(s32.RxErrors), + TxErrors: uint64(s32.TxErrors), + RxDropped: uint64(s32.RxDropped), + TxDropped: uint64(s32.TxDropped), + Multicast: uint64(s32.Multicast), + Collisions: uint64(s32.Collisions), + RxLengthErrors: uint64(s32.RxLengthErrors), + RxOverErrors: uint64(s32.RxOverErrors), + RxCrcErrors: uint64(s32.RxCrcErrors), + RxFrameErrors: uint64(s32.RxFrameErrors), + RxFifoErrors: uint64(s32.RxFifoErrors), + RxMissedErrors: uint64(s32.RxMissedErrors), + TxAbortedErrors: uint64(s32.TxAbortedErrors), + TxCarrierErrors: uint64(s32.TxCarrierErrors), + TxFifoErrors: uint64(s32.TxFifoErrors), + TxHeartbeatErrors: uint64(s32.TxHeartbeatErrors), + TxWindowErrors: uint64(s32.TxWindowErrors), + RxCompressed: uint64(s32.RxCompressed), + TxCompressed: uint64(s32.TxCompressed), + } +} + +/* +Ref: struct rtnl_link_stats64 {...} +*/ +type LinkStatistics64 struct { + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + RxErrors uint64 + TxErrors uint64 + RxDropped uint64 + TxDropped uint64 + Multicast uint64 + Collisions uint64 + RxLengthErrors uint64 + RxOverErrors uint64 + RxCrcErrors uint64 + RxFrameErrors uint64 + RxFifoErrors uint64 + RxMissedErrors uint64 + TxAbortedErrors uint64 + TxCarrierErrors uint64 + TxFifoErrors uint64 + TxHeartbeatErrors uint64 + TxWindowErrors uint64 + RxCompressed uint64 + TxCompressed uint64 +} + +type LinkXdp struct { + Fd int + Attached bool + AttachMode uint32 + Flags uint32 + ProgId uint32 +} + +// Device links cannot be created via netlink. These links +// are links created by udev like 'lo' and 'etho0' +type Device struct { + LinkAttrs +} + +func (device *Device) Attrs() *LinkAttrs { + return &device.LinkAttrs +} + +func (device *Device) Type() string { + return "device" +} + +// Dummy links are dummy ethernet devices +type Dummy struct { + LinkAttrs +} + +func (dummy *Dummy) Attrs() *LinkAttrs { + return &dummy.LinkAttrs +} + +func (dummy *Dummy) Type() string { + return "dummy" +} + +// Ifb links are advanced dummy devices for packet filtering +type Ifb struct { + LinkAttrs +} + +func (ifb *Ifb) Attrs() *LinkAttrs { + return &ifb.LinkAttrs +} + +func (ifb *Ifb) Type() string { + return "ifb" +} + +// Bridge links are simple linux bridges +type Bridge struct { + LinkAttrs + MulticastSnooping *bool + AgeingTime *uint32 + HelloTime *uint32 + VlanFiltering *bool +} + +func (bridge *Bridge) Attrs() *LinkAttrs { + return &bridge.LinkAttrs +} + +func (bridge *Bridge) Type() string { + return "bridge" +} + +// Vlan links have ParentIndex set in their Attrs() +type Vlan struct { + LinkAttrs + VlanId int + VlanProtocol VlanProtocol +} + +func (vlan *Vlan) Attrs() *LinkAttrs { + return &vlan.LinkAttrs +} + +func (vlan *Vlan) Type() string { + return "vlan" +} + +type MacvlanMode uint16 + +const ( + MACVLAN_MODE_DEFAULT MacvlanMode = iota + MACVLAN_MODE_PRIVATE + MACVLAN_MODE_VEPA + MACVLAN_MODE_BRIDGE + MACVLAN_MODE_PASSTHRU + MACVLAN_MODE_SOURCE +) + +// Macvlan links have ParentIndex set in their Attrs() +type Macvlan struct { + LinkAttrs + Mode MacvlanMode + + // MACAddrs is only populated for Macvlan SOURCE links + MACAddrs []net.HardwareAddr +} + +func (macvlan *Macvlan) Attrs() *LinkAttrs { + return &macvlan.LinkAttrs +} + +func (macvlan *Macvlan) Type() string { + return "macvlan" +} + +// Macvtap - macvtap is a virtual interfaces based on macvlan +type Macvtap struct { + Macvlan +} + +func (macvtap Macvtap) Type() string { + return "macvtap" +} + +type TuntapMode uint16 +type TuntapFlag uint16 + +// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink +type Tuntap struct { + LinkAttrs + Mode TuntapMode + Flags TuntapFlag + NonPersist bool + Queues int + Fds []*os.File + Owner uint32 + Group uint32 +} + +func (tuntap *Tuntap) Attrs() *LinkAttrs { + return &tuntap.LinkAttrs +} + +func (tuntap *Tuntap) Type() string { + return "tuntap" +} + +// Veth devices must specify PeerName on create +type Veth struct { + LinkAttrs + PeerName string // veth on create only + PeerHardwareAddr net.HardwareAddr + PeerNamespace interface{} +} + +func (veth *Veth) Attrs() *LinkAttrs { + return &veth.LinkAttrs +} + +func (veth *Veth) Type() string { + return "veth" +} + +// Wireguard represent links of type "wireguard", see https://www.wireguard.com/ +type Wireguard struct { + LinkAttrs +} + +func (wg *Wireguard) Attrs() *LinkAttrs { + return &wg.LinkAttrs +} + +func (wg *Wireguard) Type() string { + return "wireguard" +} + +// GenericLink links represent types that are not currently understood +// by this netlink library. +type GenericLink struct { + LinkAttrs + LinkType string +} + +func (generic *GenericLink) Attrs() *LinkAttrs { + return &generic.LinkAttrs +} + +func (generic *GenericLink) Type() string { + return generic.LinkType +} + +type Vxlan struct { + LinkAttrs + VxlanId int + VtepDevIndex int + SrcAddr net.IP + Group net.IP + TTL int + TOS int + Learning bool + Proxy bool + RSC bool + L2miss bool + L3miss bool + UDPCSum bool + UDP6ZeroCSumTx bool + UDP6ZeroCSumRx bool + NoAge bool + GBP bool + FlowBased bool + Age int + Limit int + Port int + PortLow int + PortHigh int +} + +func (vxlan *Vxlan) Attrs() *LinkAttrs { + return &vxlan.LinkAttrs +} + +func (vxlan *Vxlan) Type() string { + return "vxlan" +} + +type IPVlanMode uint16 + +const ( + IPVLAN_MODE_L2 IPVlanMode = iota + IPVLAN_MODE_L3 + IPVLAN_MODE_L3S + IPVLAN_MODE_MAX +) + +type IPVlanFlag uint16 + +const ( + IPVLAN_FLAG_BRIDGE IPVlanFlag = iota + IPVLAN_FLAG_PRIVATE + IPVLAN_FLAG_VEPA +) + +type IPVlan struct { + LinkAttrs + Mode IPVlanMode + Flag IPVlanFlag +} + +func (ipvlan *IPVlan) Attrs() *LinkAttrs { + return &ipvlan.LinkAttrs +} + +func (ipvlan *IPVlan) Type() string { + return "ipvlan" +} + +// IPVtap - IPVtap is a virtual interfaces based on ipvlan +type IPVtap struct { + IPVlan +} + +func (ipvtap *IPVtap) Attrs() *LinkAttrs { + return &ipvtap.LinkAttrs +} + +func (ipvtap IPVtap) Type() string { + return "ipvtap" +} + +// VlanProtocol type +type VlanProtocol int + +func (p VlanProtocol) String() string { + s, ok := VlanProtocolToString[p] + if !ok { + return fmt.Sprintf("VlanProtocol(%d)", p) + } + return s +} + +// StringToVlanProtocol returns vlan protocol, or unknown is the s is invalid. +func StringToVlanProtocol(s string) VlanProtocol { + mode, ok := StringToVlanProtocolMap[s] + if !ok { + return VLAN_PROTOCOL_UNKNOWN + } + return mode +} + +// VlanProtocol possible values +const ( + VLAN_PROTOCOL_UNKNOWN VlanProtocol = 0 + VLAN_PROTOCOL_8021Q VlanProtocol = 0x8100 + VLAN_PROTOCOL_8021AD VlanProtocol = 0x88A8 +) + +var VlanProtocolToString = map[VlanProtocol]string{ + VLAN_PROTOCOL_8021Q: "802.1q", + VLAN_PROTOCOL_8021AD: "802.1ad", +} + +var StringToVlanProtocolMap = map[string]VlanProtocol{ + "802.1q": VLAN_PROTOCOL_8021Q, + "802.1ad": VLAN_PROTOCOL_8021AD, +} + +// BondMode type +type BondMode int + +func (b BondMode) String() string { + s, ok := bondModeToString[b] + if !ok { + return fmt.Sprintf("BondMode(%d)", b) + } + return s +} + +// StringToBondMode returns bond mode, or unknown is the s is invalid. +func StringToBondMode(s string) BondMode { + mode, ok := StringToBondModeMap[s] + if !ok { + return BOND_MODE_UNKNOWN + } + return mode +} + +// Possible BondMode +const ( + BOND_MODE_BALANCE_RR BondMode = iota + BOND_MODE_ACTIVE_BACKUP + BOND_MODE_BALANCE_XOR + BOND_MODE_BROADCAST + BOND_MODE_802_3AD + BOND_MODE_BALANCE_TLB + BOND_MODE_BALANCE_ALB + BOND_MODE_UNKNOWN +) + +var bondModeToString = map[BondMode]string{ + BOND_MODE_BALANCE_RR: "balance-rr", + BOND_MODE_ACTIVE_BACKUP: "active-backup", + BOND_MODE_BALANCE_XOR: "balance-xor", + BOND_MODE_BROADCAST: "broadcast", + BOND_MODE_802_3AD: "802.3ad", + BOND_MODE_BALANCE_TLB: "balance-tlb", + BOND_MODE_BALANCE_ALB: "balance-alb", +} +var StringToBondModeMap = map[string]BondMode{ + "balance-rr": BOND_MODE_BALANCE_RR, + "active-backup": BOND_MODE_ACTIVE_BACKUP, + "balance-xor": BOND_MODE_BALANCE_XOR, + "broadcast": BOND_MODE_BROADCAST, + "802.3ad": BOND_MODE_802_3AD, + "balance-tlb": BOND_MODE_BALANCE_TLB, + "balance-alb": BOND_MODE_BALANCE_ALB, +} + +// BondArpValidate type +type BondArpValidate int + +// Possible BondArpValidate value +const ( + BOND_ARP_VALIDATE_NONE BondArpValidate = iota + BOND_ARP_VALIDATE_ACTIVE + BOND_ARP_VALIDATE_BACKUP + BOND_ARP_VALIDATE_ALL +) + +var bondArpValidateToString = map[BondArpValidate]string{ + BOND_ARP_VALIDATE_NONE: "none", + BOND_ARP_VALIDATE_ACTIVE: "active", + BOND_ARP_VALIDATE_BACKUP: "backup", + BOND_ARP_VALIDATE_ALL: "none", +} +var StringToBondArpValidateMap = map[string]BondArpValidate{ + "none": BOND_ARP_VALIDATE_NONE, + "active": BOND_ARP_VALIDATE_ACTIVE, + "backup": BOND_ARP_VALIDATE_BACKUP, + "all": BOND_ARP_VALIDATE_ALL, +} + +func (b BondArpValidate) String() string { + s, ok := bondArpValidateToString[b] + if !ok { + return fmt.Sprintf("BondArpValidate(%d)", b) + } + return s +} + +// BondPrimaryReselect type +type BondPrimaryReselect int + +// Possible BondPrimaryReselect value +const ( + BOND_PRIMARY_RESELECT_ALWAYS BondPrimaryReselect = iota + BOND_PRIMARY_RESELECT_BETTER + BOND_PRIMARY_RESELECT_FAILURE +) + +var bondPrimaryReselectToString = map[BondPrimaryReselect]string{ + BOND_PRIMARY_RESELECT_ALWAYS: "always", + BOND_PRIMARY_RESELECT_BETTER: "better", + BOND_PRIMARY_RESELECT_FAILURE: "failure", +} +var StringToBondPrimaryReselectMap = map[string]BondPrimaryReselect{ + "always": BOND_PRIMARY_RESELECT_ALWAYS, + "better": BOND_PRIMARY_RESELECT_BETTER, + "failure": BOND_PRIMARY_RESELECT_FAILURE, +} + +func (b BondPrimaryReselect) String() string { + s, ok := bondPrimaryReselectToString[b] + if !ok { + return fmt.Sprintf("BondPrimaryReselect(%d)", b) + } + return s +} + +// BondArpAllTargets type +type BondArpAllTargets int + +// Possible BondArpAllTargets value +const ( + BOND_ARP_ALL_TARGETS_ANY BondArpAllTargets = iota + BOND_ARP_ALL_TARGETS_ALL +) + +var bondArpAllTargetsToString = map[BondArpAllTargets]string{ + BOND_ARP_ALL_TARGETS_ANY: "any", + BOND_ARP_ALL_TARGETS_ALL: "all", +} +var StringToBondArpAllTargetsMap = map[string]BondArpAllTargets{ + "any": BOND_ARP_ALL_TARGETS_ANY, + "all": BOND_ARP_ALL_TARGETS_ALL, +} + +func (b BondArpAllTargets) String() string { + s, ok := bondArpAllTargetsToString[b] + if !ok { + return fmt.Sprintf("BondArpAllTargets(%d)", b) + } + return s +} + +// BondFailOverMac type +type BondFailOverMac int + +// Possible BondFailOverMac value +const ( + BOND_FAIL_OVER_MAC_NONE BondFailOverMac = iota + BOND_FAIL_OVER_MAC_ACTIVE + BOND_FAIL_OVER_MAC_FOLLOW +) + +var bondFailOverMacToString = map[BondFailOverMac]string{ + BOND_FAIL_OVER_MAC_NONE: "none", + BOND_FAIL_OVER_MAC_ACTIVE: "active", + BOND_FAIL_OVER_MAC_FOLLOW: "follow", +} +var StringToBondFailOverMacMap = map[string]BondFailOverMac{ + "none": BOND_FAIL_OVER_MAC_NONE, + "active": BOND_FAIL_OVER_MAC_ACTIVE, + "follow": BOND_FAIL_OVER_MAC_FOLLOW, +} + +func (b BondFailOverMac) String() string { + s, ok := bondFailOverMacToString[b] + if !ok { + return fmt.Sprintf("BondFailOverMac(%d)", b) + } + return s +} + +// BondXmitHashPolicy type +type BondXmitHashPolicy int + +func (b BondXmitHashPolicy) String() string { + s, ok := bondXmitHashPolicyToString[b] + if !ok { + return fmt.Sprintf("XmitHashPolicy(%d)", b) + } + return s +} + +// StringToBondXmitHashPolicy returns bond lacp arte, or unknown is the s is invalid. +func StringToBondXmitHashPolicy(s string) BondXmitHashPolicy { + lacp, ok := StringToBondXmitHashPolicyMap[s] + if !ok { + return BOND_XMIT_HASH_POLICY_UNKNOWN + } + return lacp +} + +// Possible BondXmitHashPolicy value +const ( + BOND_XMIT_HASH_POLICY_LAYER2 BondXmitHashPolicy = iota + BOND_XMIT_HASH_POLICY_LAYER3_4 + BOND_XMIT_HASH_POLICY_LAYER2_3 + BOND_XMIT_HASH_POLICY_ENCAP2_3 + BOND_XMIT_HASH_POLICY_ENCAP3_4 + BOND_XMIT_HASH_POLICY_UNKNOWN +) + +var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{ + BOND_XMIT_HASH_POLICY_LAYER2: "layer2", + BOND_XMIT_HASH_POLICY_LAYER3_4: "layer3+4", + BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3", + BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3", + BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4", +} +var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ + "layer2": BOND_XMIT_HASH_POLICY_LAYER2, + "layer3+4": BOND_XMIT_HASH_POLICY_LAYER3_4, + "layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3, + "encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3, + "encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4, +} + +// BondLacpRate type +type BondLacpRate int + +func (b BondLacpRate) String() string { + s, ok := bondLacpRateToString[b] + if !ok { + return fmt.Sprintf("LacpRate(%d)", b) + } + return s +} + +// StringToBondLacpRate returns bond lacp arte, or unknown is the s is invalid. +func StringToBondLacpRate(s string) BondLacpRate { + lacp, ok := StringToBondLacpRateMap[s] + if !ok { + return BOND_LACP_RATE_UNKNOWN + } + return lacp +} + +// Possible BondLacpRate value +const ( + BOND_LACP_RATE_SLOW BondLacpRate = iota + BOND_LACP_RATE_FAST + BOND_LACP_RATE_UNKNOWN +) + +var bondLacpRateToString = map[BondLacpRate]string{ + BOND_LACP_RATE_SLOW: "slow", + BOND_LACP_RATE_FAST: "fast", +} +var StringToBondLacpRateMap = map[string]BondLacpRate{ + "slow": BOND_LACP_RATE_SLOW, + "fast": BOND_LACP_RATE_FAST, +} + +// BondAdSelect type +type BondAdSelect int + +// Possible BondAdSelect value +const ( + BOND_AD_SELECT_STABLE BondAdSelect = iota + BOND_AD_SELECT_BANDWIDTH + BOND_AD_SELECT_COUNT +) + +var bondAdSelectToString = map[BondAdSelect]string{ + BOND_AD_SELECT_STABLE: "stable", + BOND_AD_SELECT_BANDWIDTH: "bandwidth", + BOND_AD_SELECT_COUNT: "count", +} +var StringToBondAdSelectMap = map[string]BondAdSelect{ + "stable": BOND_AD_SELECT_STABLE, + "bandwidth": BOND_AD_SELECT_BANDWIDTH, + "count": BOND_AD_SELECT_COUNT, +} + +func (b BondAdSelect) String() string { + s, ok := bondAdSelectToString[b] + if !ok { + return fmt.Sprintf("BondAdSelect(%d)", b) + } + return s +} + +// BondAdInfo represents ad info for bond +type BondAdInfo struct { + AggregatorId int + NumPorts int + ActorKey int + PartnerKey int + PartnerMac net.HardwareAddr +} + +// Bond representation +type Bond struct { + LinkAttrs + Mode BondMode + ActiveSlave int + Miimon int + UpDelay int + DownDelay int + UseCarrier int + ArpInterval int + ArpIpTargets []net.IP + ArpValidate BondArpValidate + ArpAllTargets BondArpAllTargets + Primary int + PrimaryReselect BondPrimaryReselect + FailOverMac BondFailOverMac + XmitHashPolicy BondXmitHashPolicy + ResendIgmp int + NumPeerNotif int + AllSlavesActive int + MinLinks int + LpInterval int + PacketsPerSlave int + LacpRate BondLacpRate + AdSelect BondAdSelect + // looking at iproute tool AdInfo can only be retrived. It can't be set. + AdInfo *BondAdInfo + AdActorSysPrio int + AdUserPortKey int + AdActorSystem net.HardwareAddr + TlbDynamicLb int +} + +func NewLinkBond(atr LinkAttrs) *Bond { + return &Bond{ + LinkAttrs: atr, + Mode: -1, + ActiveSlave: -1, + Miimon: -1, + UpDelay: -1, + DownDelay: -1, + UseCarrier: -1, + ArpInterval: -1, + ArpIpTargets: nil, + ArpValidate: -1, + ArpAllTargets: -1, + Primary: -1, + PrimaryReselect: -1, + FailOverMac: -1, + XmitHashPolicy: -1, + ResendIgmp: -1, + NumPeerNotif: -1, + AllSlavesActive: -1, + MinLinks: -1, + LpInterval: -1, + PacketsPerSlave: -1, + LacpRate: -1, + AdSelect: -1, + AdActorSysPrio: -1, + AdUserPortKey: -1, + AdActorSystem: nil, + TlbDynamicLb: -1, + } +} + +// Flag mask for bond options. Bond.Flagmask must be set to on for option to work. +const ( + BOND_MODE_MASK uint64 = 1 << (1 + iota) + BOND_ACTIVE_SLAVE_MASK + BOND_MIIMON_MASK + BOND_UPDELAY_MASK + BOND_DOWNDELAY_MASK + BOND_USE_CARRIER_MASK + BOND_ARP_INTERVAL_MASK + BOND_ARP_VALIDATE_MASK + BOND_ARP_ALL_TARGETS_MASK + BOND_PRIMARY_MASK + BOND_PRIMARY_RESELECT_MASK + BOND_FAIL_OVER_MAC_MASK + BOND_XMIT_HASH_POLICY_MASK + BOND_RESEND_IGMP_MASK + BOND_NUM_PEER_NOTIF_MASK + BOND_ALL_SLAVES_ACTIVE_MASK + BOND_MIN_LINKS_MASK + BOND_LP_INTERVAL_MASK + BOND_PACKETS_PER_SLAVE_MASK + BOND_LACP_RATE_MASK + BOND_AD_SELECT_MASK +) + +// Attrs implementation. +func (bond *Bond) Attrs() *LinkAttrs { + return &bond.LinkAttrs +} + +// Type implementation fro Vxlan. +func (bond *Bond) Type() string { + return "bond" +} + +// BondSlaveState represents the values of the IFLA_BOND_SLAVE_STATE bond slave +// attribute, which contains the state of the bond slave. +type BondSlaveState uint8 + +const ( + //BondStateActive Link is active. + BondStateActive BondSlaveState = iota + //BondStateBackup Link is backup. + BondStateBackup +) + +func (s BondSlaveState) String() string { + switch s { + case BondStateActive: + return "ACTIVE" + case BondStateBackup: + return "BACKUP" + default: + return strconv.Itoa(int(s)) + } +} + +// BondSlaveMiiStatus represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave +// attribute, which contains the status of MII link monitoring +type BondSlaveMiiStatus uint8 + +const ( + //BondLinkUp link is up and running. + BondLinkUp BondSlaveMiiStatus = iota + //BondLinkFail link has just gone down. + BondLinkFail + //BondLinkDown link has been down for too long time. + BondLinkDown + //BondLinkBack link is going back. + BondLinkBack +) + +func (s BondSlaveMiiStatus) String() string { + switch s { + case BondLinkUp: + return "UP" + case BondLinkFail: + return "GOING_DOWN" + case BondLinkDown: + return "DOWN" + case BondLinkBack: + return "GOING_BACK" + default: + return strconv.Itoa(int(s)) + } +} + +type BondSlave struct { + State BondSlaveState + MiiStatus BondSlaveMiiStatus + LinkFailureCount uint32 + PermHardwareAddr net.HardwareAddr + QueueId uint16 + AggregatorId uint16 + AdActorOperPortState uint8 + AdPartnerOperPortState uint16 +} + +func (b *BondSlave) SlaveType() string { + return "bond" +} + +type VrfSlave struct { + Table uint32 +} + +func (v *VrfSlave) SlaveType() string { + return "vrf" +} + +// Geneve devices must specify RemoteIP and ID (VNI) on create +// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223 +type Geneve struct { + LinkAttrs + ID uint32 // vni + Remote net.IP + Ttl uint8 + Tos uint8 + Dport uint16 + UdpCsum uint8 + UdpZeroCsum6Tx uint8 + UdpZeroCsum6Rx uint8 + Link uint32 + FlowBased bool +} + +func (geneve *Geneve) Attrs() *LinkAttrs { + return &geneve.LinkAttrs +} + +func (geneve *Geneve) Type() string { + return "geneve" +} + +// Gretap devices must specify LocalIP and RemoteIP on create +type Gretap struct { + LinkAttrs + IKey uint32 + OKey uint32 + EncapSport uint16 + EncapDport uint16 + Local net.IP + Remote net.IP + IFlags uint16 + OFlags uint16 + PMtuDisc uint8 + Ttl uint8 + Tos uint8 + EncapType uint16 + EncapFlags uint16 + Link uint32 + FlowBased bool +} + +func (gretap *Gretap) Attrs() *LinkAttrs { + return &gretap.LinkAttrs +} + +func (gretap *Gretap) Type() string { + if gretap.Local.To4() == nil { + return "ip6gretap" + } + return "gretap" +} + +type Iptun struct { + LinkAttrs + Ttl uint8 + Tos uint8 + PMtuDisc uint8 + Link uint32 + Local net.IP + Remote net.IP + EncapSport uint16 + EncapDport uint16 + EncapType uint16 + EncapFlags uint16 + FlowBased bool + Proto uint8 +} + +func (iptun *Iptun) Attrs() *LinkAttrs { + return &iptun.LinkAttrs +} + +func (iptun *Iptun) Type() string { + return "ipip" +} + +type Ip6tnl struct { + LinkAttrs + Link uint32 + Local net.IP + Remote net.IP + Ttl uint8 + Tos uint8 + Flags uint32 + Proto uint8 + FlowInfo uint32 + EncapLimit uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 +} + +func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs { + return &ip6tnl.LinkAttrs +} + +func (ip6tnl *Ip6tnl) Type() string { + return "ip6tnl" +} + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84 +type TunnelEncapType uint16 + +const ( + None TunnelEncapType = iota + FOU + GUE +) + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91 +type TunnelEncapFlag uint16 + +const ( + CSum TunnelEncapFlag = 1 << 0 + CSum6 = 1 << 1 + RemCSum = 1 << 2 +) + +// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12 +type IP6TunnelFlag uint16 + +const ( + IP6_TNL_F_IGN_ENCAP_LIMIT IP6TunnelFlag = 1 // don't add encapsulation limit if one isn't present in inner packet + IP6_TNL_F_USE_ORIG_TCLASS = 2 // copy the traffic class field from the inner packet + IP6_TNL_F_USE_ORIG_FLOWLABEL = 4 // copy the flowlabel from the inner packet + IP6_TNL_F_MIP6_DEV = 8 // being used for Mobile IPv6 + IP6_TNL_F_RCV_DSCP_COPY = 10 // copy DSCP from the outer packet + IP6_TNL_F_USE_ORIG_FWMARK = 20 // copy fwmark from inner packet + IP6_TNL_F_ALLOW_LOCAL_REMOTE = 40 // allow remote endpoint on the local node +) + +type Sittun struct { + LinkAttrs + Link uint32 + Ttl uint8 + Tos uint8 + PMtuDisc uint8 + Proto uint8 + Local net.IP + Remote net.IP + EncapLimit uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 +} + +func (sittun *Sittun) Attrs() *LinkAttrs { + return &sittun.LinkAttrs +} + +func (sittun *Sittun) Type() string { + return "sit" +} + +type Vti struct { + LinkAttrs + IKey uint32 + OKey uint32 + Link uint32 + Local net.IP + Remote net.IP +} + +func (vti *Vti) Attrs() *LinkAttrs { + return &vti.LinkAttrs +} + +func (vti *Vti) Type() string { + if vti.Local.To4() == nil { + return "vti6" + } + return "vti" +} + +type Gretun struct { + LinkAttrs + Link uint32 + IFlags uint16 + OFlags uint16 + IKey uint32 + OKey uint32 + Local net.IP + Remote net.IP + Ttl uint8 + Tos uint8 + PMtuDisc uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 +} + +func (gretun *Gretun) Attrs() *LinkAttrs { + return &gretun.LinkAttrs +} + +func (gretun *Gretun) Type() string { + if gretun.Local.To4() == nil { + return "ip6gre" + } + return "gre" +} + +type Vrf struct { + LinkAttrs + Table uint32 +} + +func (vrf *Vrf) Attrs() *LinkAttrs { + return &vrf.LinkAttrs +} + +func (vrf *Vrf) Type() string { + return "vrf" +} + +type GTP struct { + LinkAttrs + FD0 int + FD1 int + Role int + PDPHashsize int +} + +func (gtp *GTP) Attrs() *LinkAttrs { + return >p.LinkAttrs +} + +func (gtp *GTP) Type() string { + return "gtp" +} + +// Virtual XFRM Interfaces +// Named "xfrmi" to prevent confusion with XFRM objects +type Xfrmi struct { + LinkAttrs + Ifid uint32 +} + +func (xfrm *Xfrmi) Attrs() *LinkAttrs { + return &xfrm.LinkAttrs +} + +func (xfrm *Xfrmi) Type() string { + return "xfrm" +} + +// IPoIB interface + +type IPoIBMode uint16 + +func (m *IPoIBMode) String() string { + str, ok := iPoIBModeToString[*m] + if !ok { + return fmt.Sprintf("mode(%d)", *m) + } + return str +} + +const ( + IPOIB_MODE_DATAGRAM = iota + IPOIB_MODE_CONNECTED +) + +var iPoIBModeToString = map[IPoIBMode]string{ + IPOIB_MODE_DATAGRAM: "datagram", + IPOIB_MODE_CONNECTED: "connected", +} + +var StringToIPoIBMode = map[string]IPoIBMode{ + "datagram": IPOIB_MODE_DATAGRAM, + "connected": IPOIB_MODE_CONNECTED, +} + +const ( + CAN_STATE_ERROR_ACTIVE = iota + CAN_STATE_ERROR_WARNING + CAN_STATE_ERROR_PASSIVE + CAN_STATE_BUS_OFF + CAN_STATE_STOPPED + CAN_STATE_SLEEPING +) + +type Can struct { + LinkAttrs + + BitRate uint32 + SamplePoint uint32 + TimeQuanta uint32 + PropagationSegment uint32 + PhaseSegment1 uint32 + PhaseSegment2 uint32 + SyncJumpWidth uint32 + BitRatePreScaler uint32 + + Name string + TimeSegment1Min uint32 + TimeSegment1Max uint32 + TimeSegment2Min uint32 + TimeSegment2Max uint32 + SyncJumpWidthMax uint32 + BitRatePreScalerMin uint32 + BitRatePreScalerMax uint32 + BitRatePreScalerInc uint32 + + ClockFrequency uint32 + + State uint32 + + Mask uint32 + Flags uint32 + + TxError uint16 + RxError uint16 + + RestartMs uint32 +} + +func (can *Can) Attrs() *LinkAttrs { + return &can.LinkAttrs +} + +func (can *Can) Type() string { + return "can" +} + +type IPoIB struct { + LinkAttrs + Pkey uint16 + Mode IPoIBMode + Umcast uint16 +} + +func (ipoib *IPoIB) Attrs() *LinkAttrs { + return &ipoib.LinkAttrs +} + +func (ipoib *IPoIB) Type() string { + return "ipoib" +} + +type BareUDP struct { + LinkAttrs + Port uint16 + EtherType uint16 + SrcPortMin uint16 + MultiProto bool +} + +func (bareudp *BareUDP) Attrs() *LinkAttrs { + return &bareudp.LinkAttrs +} + +func (bareudp *BareUDP) Type() string { + return "bareudp" +} + +// iproute2 supported devices; +// vlan | veth | vcan | dummy | ifb | macvlan | macvtap | +// bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | +// gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon | +// bond_slave | ipvlan | xfrm | bareudp + +// LinkNotFoundError wraps the various not found errors when +// getting/reading links. This is intended for better error +// handling by dependent code so that "not found error" can +// be distinguished from other errors +type LinkNotFoundError struct { + error +} diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go new file mode 100644 index 000000000000..276947a0069c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -0,0 +1,3465 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "io/ioutil" + "net" + "os" + "strconv" + "strings" + "syscall" + "unsafe" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const ( + SizeofLinkStats32 = 0x5c + SizeofLinkStats64 = 0xb8 +) + +const ( + TUNTAP_MODE_TUN TuntapMode = unix.IFF_TUN + TUNTAP_MODE_TAP TuntapMode = unix.IFF_TAP + TUNTAP_DEFAULTS TuntapFlag = unix.IFF_TUN_EXCL | unix.IFF_ONE_QUEUE + TUNTAP_VNET_HDR TuntapFlag = unix.IFF_VNET_HDR + TUNTAP_TUN_EXCL TuntapFlag = unix.IFF_TUN_EXCL + TUNTAP_NO_PI TuntapFlag = unix.IFF_NO_PI + TUNTAP_ONE_QUEUE TuntapFlag = unix.IFF_ONE_QUEUE + TUNTAP_MULTI_QUEUE TuntapFlag = unix.IFF_MULTI_QUEUE + TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI +) + +var StringToTuntapModeMap = map[string]TuntapMode{ + "tun": TUNTAP_MODE_TUN, + "tap": TUNTAP_MODE_TAP, +} + +func (ttm TuntapMode) String() string { + switch ttm { + case TUNTAP_MODE_TUN: + return "tun" + case TUNTAP_MODE_TAP: + return "tap" + } + return "unknown" +} + +const ( + VF_LINK_STATE_AUTO uint32 = 0 + VF_LINK_STATE_ENABLE uint32 = 1 + VF_LINK_STATE_DISABLE uint32 = 2 +) + +var macvlanModes = [...]uint32{ + 0, + nl.MACVLAN_MODE_PRIVATE, + nl.MACVLAN_MODE_VEPA, + nl.MACVLAN_MODE_BRIDGE, + nl.MACVLAN_MODE_PASSTHRU, + nl.MACVLAN_MODE_SOURCE, +} + +func ensureIndex(link *LinkAttrs) { + if link != nil && link.Index == 0 { + newlink, _ := LinkByName(link.Name) + if newlink != nil { + link.Index = newlink.Attrs().Index + } + } +} + +func (h *Handle) ensureIndex(link *LinkAttrs) { + if link != nil && link.Index == 0 { + newlink, _ := h.LinkByName(link.Name) + if newlink != nil { + link.Index = newlink.Attrs().Index + } + } +} + +func (h *Handle) LinkSetARPOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change |= unix.IFF_NOARP + msg.Flags |= unix.IFF_NOARP + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func LinkSetARPOff(link Link) error { + return pkgHandle.LinkSetARPOff(link) +} + +func (h *Handle) LinkSetARPOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change |= unix.IFF_NOARP + msg.Flags &= ^uint32(unix.IFF_NOARP) + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func LinkSetARPOn(link Link) error { + return pkgHandle.LinkSetARPOn(link) +} + +func (h *Handle) SetPromiscOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_PROMISC + msg.Flags = unix.IFF_PROMISC + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOn enables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast on` +func LinkSetAllmulticastOn(link Link) error { + return pkgHandle.LinkSetAllmulticastOn(link) +} + +// LinkSetAllmulticastOn enables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast on` +func (h *Handle) LinkSetAllmulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_ALLMULTI + msg.Flags = unix.IFF_ALLMULTI + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast off` +func LinkSetAllmulticastOff(link Link) error { + return pkgHandle.LinkSetAllmulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast off` +func (h *Handle) LinkSetAllmulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_ALLMULTI + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func LinkSetMulticastOn(link Link) error { + return pkgHandle.LinkSetMulticastOn(link) +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func (h *Handle) LinkSetMulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Flags = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func LinkSetMulticastOff(link Link) error { + return pkgHandle.LinkSetMulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func (h *Handle) LinkSetMulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { + return pkgHandle.MacvlanMACAddrAdd(link, addr) +} + +func (h *Handle) MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { + return h.macvlanMACAddrChange(link, []net.HardwareAddr{addr}, nl.MACVLAN_MACADDR_ADD) +} + +func MacvlanMACAddrDel(link Link, addr net.HardwareAddr) error { + return pkgHandle.MacvlanMACAddrDel(link, addr) +} + +func (h *Handle) MacvlanMACAddrDel(link Link, addr net.HardwareAddr) error { + return h.macvlanMACAddrChange(link, []net.HardwareAddr{addr}, nl.MACVLAN_MACADDR_DEL) +} + +func MacvlanMACAddrFlush(link Link) error { + return pkgHandle.MacvlanMACAddrFlush(link) +} + +func (h *Handle) MacvlanMACAddrFlush(link Link) error { + return h.macvlanMACAddrChange(link, nil, nl.MACVLAN_MACADDR_FLUSH) +} + +func MacvlanMACAddrSet(link Link, addrs []net.HardwareAddr) error { + return pkgHandle.MacvlanMACAddrSet(link, addrs) +} + +func (h *Handle) MacvlanMACAddrSet(link Link, addrs []net.HardwareAddr) error { + return h.macvlanMACAddrChange(link, addrs, nl.MACVLAN_MACADDR_SET) +} + +func (h *Handle) macvlanMACAddrChange(link Link, addrs []net.HardwareAddr, mode uint32) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + inner := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + // IFLA_MACVLAN_MACADDR_MODE = mode + b := make([]byte, 4) + native.PutUint32(b, mode) + inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR_MODE, b) + + // populate message with MAC addrs, if necessary + switch mode { + case nl.MACVLAN_MACADDR_ADD, nl.MACVLAN_MACADDR_DEL: + if len(addrs) == 1 { + inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR, []byte(addrs[0])) + } + case nl.MACVLAN_MACADDR_SET: + mad := inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR_DATA, nil) + for _, addr := range addrs { + mad.AddRtAttr(nl.IFLA_MACVLAN_MACADDR, []byte(addr)) + } + } + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMacvlanMode sets the mode of a macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + return pkgHandle.LinkSetMacvlanMode(link, mode) +} + +// LinkSetMacvlanMode sets the mode of the macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func (h *Handle) LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[mode])) + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func BridgeSetMcastSnoop(link Link, on bool) error { + return pkgHandle.BridgeSetMcastSnoop(link, on) +} + +func (h *Handle) BridgeSetMcastSnoop(link Link, on bool) error { + bridge := link.(*Bridge) + bridge.MulticastSnooping = &on + return h.linkModify(bridge, unix.NLM_F_ACK) +} + +func BridgeSetVlanFiltering(link Link, on bool) error { + return pkgHandle.BridgeSetVlanFiltering(link, on) +} + +func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error { + bridge := link.(*Bridge) + bridge.VlanFiltering = &on + return h.linkModify(bridge, unix.NLM_F_ACK) +} + +func SetPromiscOn(link Link) error { + return pkgHandle.SetPromiscOn(link) +} + +func (h *Handle) SetPromiscOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_PROMISC + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func SetPromiscOff(link Link) error { + return pkgHandle.SetPromiscOff(link) +} + +// LinkSetUp enables the link device. +// Equivalent to: `ip link set $link up` +func LinkSetUp(link Link) error { + return pkgHandle.LinkSetUp(link) +} + +// LinkSetUp enables the link device. +// Equivalent to: `ip link set $link up` +func (h *Handle) LinkSetUp(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetDown disables link device. +// Equivalent to: `ip link set $link down` +func LinkSetDown(link Link) error { + return pkgHandle.LinkSetDown(link) +} + +// LinkSetDown disables link device. +// Equivalent to: `ip link set $link down` +func (h *Handle) LinkSetDown(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_UP + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMTU sets the mtu of the link device. +// Equivalent to: `ip link set $link mtu $mtu` +func LinkSetMTU(link Link, mtu int) error { + return pkgHandle.LinkSetMTU(link, mtu) +} + +// LinkSetMTU sets the mtu of the link device. +// Equivalent to: `ip link set $link mtu $mtu` +func (h *Handle) LinkSetMTU(link Link, mtu int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(mtu)) + + data := nl.NewRtAttr(unix.IFLA_MTU, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetName sets the name of the link device. +// Equivalent to: `ip link set $link name $name` +func LinkSetName(link Link, name string) error { + return pkgHandle.LinkSetName(link, name) +} + +// LinkSetName sets the name of the link device. +// Equivalent to: `ip link set $link name $name` +func (h *Handle) LinkSetName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_IFNAME, []byte(name)) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAlias sets the alias of the link device. +// Equivalent to: `ip link set dev $link alias $name` +func LinkSetAlias(link Link, name string) error { + return pkgHandle.LinkSetAlias(link, name) +} + +// LinkSetAlias sets the alias of the link device. +// Equivalent to: `ip link set dev $link alias $name` +func (h *Handle) LinkSetAlias(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(name)) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetHardwareAddr sets the hardware address of the link device. +// Equivalent to: `ip link set $link address $hwaddr` +func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { + return pkgHandle.LinkSetHardwareAddr(link, hwaddr) +} + +// LinkSetHardwareAddr sets the hardware address of the link device. +// Equivalent to: `ip link set $link address $hwaddr` +func (h *Handle) LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_ADDRESS, []byte(hwaddr)) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfHardwareAddr sets the hardware address of a vf for the link. +// Equivalent to: `ip link set $link vf $vf mac $hwaddr` +func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error { + return pkgHandle.LinkSetVfHardwareAddr(link, vf, hwaddr) +} + +// LinkSetVfHardwareAddr sets the hardware address of a vf for the link. +// Equivalent to: `ip link set $link vf $vf mac $hwaddr` +func (h *Handle) LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfMac{ + Vf: uint32(vf), + } + copy(vfmsg.Mac[:], []byte(hwaddr)) + info.AddRtAttr(nl.IFLA_VF_MAC, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfVlan sets the vlan of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan` +func LinkSetVfVlan(link Link, vf, vlan int) error { + return pkgHandle.LinkSetVfVlan(link, vf, vlan) +} + +// LinkSetVfVlan sets the vlan of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan` +func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfVlan{ + Vf: uint32(vf), + Vlan: uint32(vlan), + } + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfVlanQos sets the vlan and qos priority of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos` +func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { + return pkgHandle.LinkSetVfVlanQos(link, vf, vlan, qos) +} + +// LinkSetVfVlanQos sets the vlan and qos priority of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos` +func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfVlan{ + Vf: uint32(vf), + Vlan: uint32(vlan), + Qos: uint32(qos), + } + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfTxRate sets the tx rate of a vf for the link. +// Equivalent to: `ip link set $link vf $vf rate $rate` +func LinkSetVfTxRate(link Link, vf, rate int) error { + return pkgHandle.LinkSetVfTxRate(link, vf, rate) +} + +// LinkSetVfTxRate sets the tx rate of a vf for the link. +// Equivalent to: `ip link set $link vf $vf rate $rate` +func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfTxRate{ + Vf: uint32(vf), + Rate: uint32(rate), + } + info.AddRtAttr(nl.IFLA_VF_TX_RATE, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfRate sets the min and max tx rate of a vf for the link. +// Equivalent to: `ip link set $link vf $vf min_tx_rate $min_rate max_tx_rate $max_rate` +func LinkSetVfRate(link Link, vf, minRate, maxRate int) error { + return pkgHandle.LinkSetVfRate(link, vf, minRate, maxRate) +} + +// LinkSetVfRate sets the min and max tx rate of a vf for the link. +// Equivalent to: `ip link set $link vf $vf min_tx_rate $min_rate max_tx_rate $max_rate` +func (h *Handle) LinkSetVfRate(link Link, vf, minRate, maxRate int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfRate{ + Vf: uint32(vf), + MinTxRate: uint32(minRate), + MaxTxRate: uint32(maxRate), + } + info.AddRtAttr(nl.IFLA_VF_RATE, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfState enables/disables virtual link state on a vf. +// Equivalent to: `ip link set $link vf $vf state $state` +func LinkSetVfState(link Link, vf int, state uint32) error { + return pkgHandle.LinkSetVfState(link, vf, state) +} + +// LinkSetVfState enables/disables virtual link state on a vf. +// Equivalent to: `ip link set $link vf $vf state $state` +func (h *Handle) LinkSetVfState(link Link, vf int, state uint32) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfLinkState{ + Vf: uint32(vf), + LinkState: state, + } + info.AddRtAttr(nl.IFLA_VF_LINK_STATE, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfSpoofchk enables/disables spoof check on a vf for the link. +// Equivalent to: `ip link set $link vf $vf spoofchk $check` +func LinkSetVfSpoofchk(link Link, vf int, check bool) error { + return pkgHandle.LinkSetVfSpoofchk(link, vf, check) +} + +// LinkSetVfSpoofchk enables/disables spoof check on a vf for the link. +// Equivalent to: `ip link set $link vf $vf spoofchk $check` +func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error { + var setting uint32 + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + if check { + setting = 1 + } + vfmsg := nl.VfSpoofchk{ + Vf: uint32(vf), + Setting: setting, + } + info.AddRtAttr(nl.IFLA_VF_SPOOFCHK, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfTrust enables/disables trust state on a vf for the link. +// Equivalent to: `ip link set $link vf $vf trust $state` +func LinkSetVfTrust(link Link, vf int, state bool) error { + return pkgHandle.LinkSetVfTrust(link, vf, state) +} + +// LinkSetVfTrust enables/disables trust state on a vf for the link. +// Equivalent to: `ip link set $link vf $vf trust $state` +func (h *Handle) LinkSetVfTrust(link Link, vf int, state bool) error { + var setting uint32 + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + if state { + setting = 1 + } + vfmsg := nl.VfTrust{ + Vf: uint32(vf), + Setting: setting, + } + info.AddRtAttr(nl.IFLA_VF_TRUST, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfNodeGUID sets the node GUID of a vf for the link. +// Equivalent to: `ip link set dev $link vf $vf node_guid $nodeguid` +func LinkSetVfNodeGUID(link Link, vf int, nodeguid net.HardwareAddr) error { + return pkgHandle.LinkSetVfGUID(link, vf, nodeguid, nl.IFLA_VF_IB_NODE_GUID) +} + +// LinkSetVfPortGUID sets the port GUID of a vf for the link. +// Equivalent to: `ip link set dev $link vf $vf port_guid $portguid` +func LinkSetVfPortGUID(link Link, vf int, portguid net.HardwareAddr) error { + return pkgHandle.LinkSetVfGUID(link, vf, portguid, nl.IFLA_VF_IB_PORT_GUID) +} + +// LinkSetVfGUID sets the node or port GUID of a vf for the link. +func (h *Handle) LinkSetVfGUID(link Link, vf int, vfGuid net.HardwareAddr, guidType int) error { + var err error + var guid uint64 + + buf := bytes.NewBuffer(vfGuid) + err = binary.Read(buf, binary.BigEndian, &guid) + if err != nil { + return err + } + + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfGUID{ + Vf: uint32(vf), + GUID: guid, + } + info.AddRtAttr(guidType, vfmsg.Serialize()) + req.AddData(data) + + _, err = req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMaster sets the master of the link device. +// Equivalent to: `ip link set $link master $master` +func LinkSetMaster(link Link, master Link) error { + return pkgHandle.LinkSetMaster(link, master) +} + +// LinkSetMaster sets the master of the link device. +// Equivalent to: `ip link set $link master $master` +func (h *Handle) LinkSetMaster(link Link, master Link) error { + index := 0 + if master != nil { + masterBase := master.Attrs() + h.ensureIndex(masterBase) + index = masterBase.Index + } + if index <= 0 { + return fmt.Errorf("Device does not exist") + } + return h.LinkSetMasterByIndex(link, index) +} + +// LinkSetNoMaster removes the master of the link device. +// Equivalent to: `ip link set $link nomaster` +func LinkSetNoMaster(link Link) error { + return pkgHandle.LinkSetNoMaster(link) +} + +// LinkSetNoMaster removes the master of the link device. +// Equivalent to: `ip link set $link nomaster` +func (h *Handle) LinkSetNoMaster(link Link) error { + return h.LinkSetMasterByIndex(link, 0) +} + +// LinkSetMasterByIndex sets the master of the link device. +// Equivalent to: `ip link set $link master $master` +func LinkSetMasterByIndex(link Link, masterIndex int) error { + return pkgHandle.LinkSetMasterByIndex(link, masterIndex) +} + +// LinkSetMasterByIndex sets the master of the link device. +// Equivalent to: `ip link set $link master $master` +func (h *Handle) LinkSetMasterByIndex(link Link, masterIndex int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(masterIndex)) + + data := nl.NewRtAttr(unix.IFLA_MASTER, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetNsPid puts the device into a new network namespace. The +// pid must be a pid of a running process. +// Equivalent to: `ip link set $link netns $pid` +func LinkSetNsPid(link Link, nspid int) error { + return pkgHandle.LinkSetNsPid(link, nspid) +} + +// LinkSetNsPid puts the device into a new network namespace. The +// pid must be a pid of a running process. +// Equivalent to: `ip link set $link netns $pid` +func (h *Handle) LinkSetNsPid(link Link, nspid int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(nspid)) + + data := nl.NewRtAttr(unix.IFLA_NET_NS_PID, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetNsFd puts the device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `ip link set $link netns $ns` +func LinkSetNsFd(link Link, fd int) error { + return pkgHandle.LinkSetNsFd(link, fd) +} + +// LinkSetNsFd puts the device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `ip link set $link netns $ns` +func (h *Handle) LinkSetNsFd(link Link, fd int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(fd)) + + data := nl.NewRtAttr(unix.IFLA_NET_NS_FD, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetXdpFd adds a bpf function to the driver. The fd must be a bpf +// program loaded with bpf(type=BPF_PROG_TYPE_XDP) +func LinkSetXdpFd(link Link, fd int) error { + return LinkSetXdpFdWithFlags(link, fd, 0) +} + +// LinkSetXdpFdWithFlags adds a bpf function to the driver with the given +// options. The fd must be a bpf program loaded with bpf(type=BPF_PROG_TYPE_XDP) +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + addXdpAttrs(&LinkXdp{Fd: fd, Flags: uint32(flags)}, req) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func boolAttr(val bool) []byte { + var v uint8 + if val { + v = 1 + } + return nl.Uint8Attr(v) +} + +type vxlanPortRange struct { + Lo, Hi uint16 +} + +func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if vxlan.FlowBased { + vxlan.VxlanId = 0 + } + + data.AddRtAttr(nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId))) + + if vxlan.VtepDevIndex != 0 { + data.AddRtAttr(nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex))) + } + if vxlan.SrcAddr != nil { + ip := vxlan.SrcAddr.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_VXLAN_LOCAL, []byte(ip)) + } else { + ip = vxlan.SrcAddr.To16() + if ip != nil { + data.AddRtAttr(nl.IFLA_VXLAN_LOCAL6, []byte(ip)) + } + } + } + if vxlan.Group != nil { + group := vxlan.Group.To4() + if group != nil { + data.AddRtAttr(nl.IFLA_VXLAN_GROUP, []byte(group)) + } else { + group = vxlan.Group.To16() + if group != nil { + data.AddRtAttr(nl.IFLA_VXLAN_GROUP6, []byte(group)) + } + } + } + + data.AddRtAttr(nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL))) + data.AddRtAttr(nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS))) + data.AddRtAttr(nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning)) + data.AddRtAttr(nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy)) + data.AddRtAttr(nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC)) + data.AddRtAttr(nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss)) + data.AddRtAttr(nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss)) + data.AddRtAttr(nl.IFLA_VXLAN_UDP_ZERO_CSUM6_TX, boolAttr(vxlan.UDP6ZeroCSumTx)) + data.AddRtAttr(nl.IFLA_VXLAN_UDP_ZERO_CSUM6_RX, boolAttr(vxlan.UDP6ZeroCSumRx)) + + if vxlan.UDPCSum { + data.AddRtAttr(nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum)) + } + if vxlan.GBP { + data.AddRtAttr(nl.IFLA_VXLAN_GBP, []byte{}) + } + if vxlan.FlowBased { + data.AddRtAttr(nl.IFLA_VXLAN_FLOWBASED, boolAttr(vxlan.FlowBased)) + } + if vxlan.NoAge { + data.AddRtAttr(nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) + } else if vxlan.Age > 0 { + data.AddRtAttr(nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age))) + } + if vxlan.Limit > 0 { + data.AddRtAttr(nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit))) + } + if vxlan.Port > 0 { + data.AddRtAttr(nl.IFLA_VXLAN_PORT, htons(uint16(vxlan.Port))) + } + if vxlan.PortLow > 0 || vxlan.PortHigh > 0 { + pr := vxlanPortRange{uint16(vxlan.PortLow), uint16(vxlan.PortHigh)} + + buf := new(bytes.Buffer) + binary.Write(buf, binary.BigEndian, &pr) + + data.AddRtAttr(nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes()) + } +} + +func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if bond.Mode >= 0 { + data.AddRtAttr(nl.IFLA_BOND_MODE, nl.Uint8Attr(uint8(bond.Mode))) + } + if bond.ActiveSlave >= 0 { + data.AddRtAttr(nl.IFLA_BOND_ACTIVE_SLAVE, nl.Uint32Attr(uint32(bond.ActiveSlave))) + } + if bond.Miimon >= 0 { + data.AddRtAttr(nl.IFLA_BOND_MIIMON, nl.Uint32Attr(uint32(bond.Miimon))) + } + if bond.UpDelay >= 0 { + data.AddRtAttr(nl.IFLA_BOND_UPDELAY, nl.Uint32Attr(uint32(bond.UpDelay))) + } + if bond.DownDelay >= 0 { + data.AddRtAttr(nl.IFLA_BOND_DOWNDELAY, nl.Uint32Attr(uint32(bond.DownDelay))) + } + if bond.UseCarrier >= 0 { + data.AddRtAttr(nl.IFLA_BOND_USE_CARRIER, nl.Uint8Attr(uint8(bond.UseCarrier))) + } + if bond.ArpInterval >= 0 { + data.AddRtAttr(nl.IFLA_BOND_ARP_INTERVAL, nl.Uint32Attr(uint32(bond.ArpInterval))) + } + if bond.ArpIpTargets != nil { + msg := data.AddRtAttr(nl.IFLA_BOND_ARP_IP_TARGET, nil) + for i := range bond.ArpIpTargets { + ip := bond.ArpIpTargets[i].To4() + if ip != nil { + msg.AddRtAttr(i, []byte(ip)) + continue + } + ip = bond.ArpIpTargets[i].To16() + if ip != nil { + msg.AddRtAttr(i, []byte(ip)) + } + } + } + if bond.ArpValidate >= 0 { + data.AddRtAttr(nl.IFLA_BOND_ARP_VALIDATE, nl.Uint32Attr(uint32(bond.ArpValidate))) + } + if bond.ArpAllTargets >= 0 { + data.AddRtAttr(nl.IFLA_BOND_ARP_ALL_TARGETS, nl.Uint32Attr(uint32(bond.ArpAllTargets))) + } + if bond.Primary >= 0 { + data.AddRtAttr(nl.IFLA_BOND_PRIMARY, nl.Uint32Attr(uint32(bond.Primary))) + } + if bond.PrimaryReselect >= 0 { + data.AddRtAttr(nl.IFLA_BOND_PRIMARY_RESELECT, nl.Uint8Attr(uint8(bond.PrimaryReselect))) + } + if bond.FailOverMac >= 0 { + data.AddRtAttr(nl.IFLA_BOND_FAIL_OVER_MAC, nl.Uint8Attr(uint8(bond.FailOverMac))) + } + if bond.XmitHashPolicy >= 0 { + data.AddRtAttr(nl.IFLA_BOND_XMIT_HASH_POLICY, nl.Uint8Attr(uint8(bond.XmitHashPolicy))) + } + if bond.ResendIgmp >= 0 { + data.AddRtAttr(nl.IFLA_BOND_RESEND_IGMP, nl.Uint32Attr(uint32(bond.ResendIgmp))) + } + if bond.NumPeerNotif >= 0 { + data.AddRtAttr(nl.IFLA_BOND_NUM_PEER_NOTIF, nl.Uint8Attr(uint8(bond.NumPeerNotif))) + } + if bond.AllSlavesActive >= 0 { + data.AddRtAttr(nl.IFLA_BOND_ALL_SLAVES_ACTIVE, nl.Uint8Attr(uint8(bond.AllSlavesActive))) + } + if bond.MinLinks >= 0 { + data.AddRtAttr(nl.IFLA_BOND_MIN_LINKS, nl.Uint32Attr(uint32(bond.MinLinks))) + } + if bond.LpInterval >= 0 { + data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval))) + } + if bond.PacketsPerSlave >= 0 { + data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PacketsPerSlave))) + } + if bond.LacpRate >= 0 { + data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate))) + } + if bond.AdSelect >= 0 { + data.AddRtAttr(nl.IFLA_BOND_AD_SELECT, nl.Uint8Attr(uint8(bond.AdSelect))) + } + if bond.AdActorSysPrio >= 0 { + data.AddRtAttr(nl.IFLA_BOND_AD_ACTOR_SYS_PRIO, nl.Uint16Attr(uint16(bond.AdActorSysPrio))) + } + if bond.AdUserPortKey >= 0 { + data.AddRtAttr(nl.IFLA_BOND_AD_USER_PORT_KEY, nl.Uint16Attr(uint16(bond.AdUserPortKey))) + } + if bond.AdActorSystem != nil { + data.AddRtAttr(nl.IFLA_BOND_AD_ACTOR_SYSTEM, []byte(bond.AdActorSystem)) + } + if bond.TlbDynamicLb >= 0 { + data.AddRtAttr(nl.IFLA_BOND_TLB_DYNAMIC_LB, nl.Uint8Attr(uint8(bond.TlbDynamicLb))) + } +} + +func cleanupFds(fds []*os.File) { + for _, f := range fds { + f.Close() + } +} + +// LinkAdd adds a new link device. The type and features of the device +// are taken from the parameters in the link object. +// Equivalent to: `ip link add $link` +func LinkAdd(link Link) error { + return pkgHandle.LinkAdd(link) +} + +// LinkAdd adds a new link device. The type and features of the device +// are taken from the parameters in the link object. +// Equivalent to: `ip link add $link` +func (h *Handle) LinkAdd(link Link) error { + return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) +} + +func LinkModify(link Link) error { + return pkgHandle.LinkModify(link) +} + +func (h *Handle) LinkModify(link Link) error { + return h.linkModify(link, unix.NLM_F_REQUEST|unix.NLM_F_ACK) +} + +func (h *Handle) linkModify(link Link, flags int) error { + // TODO: support extra data for macvlan + base := link.Attrs() + + // if tuntap, then the name can be empty, OS will provide a name + tuntap, isTuntap := link.(*Tuntap) + + if base.Name == "" && !isTuntap { + return fmt.Errorf("LinkAttrs.Name cannot be empty") + } + + if isTuntap { + if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP { + return fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode) + } + + queues := tuntap.Queues + + var fds []*os.File + var req ifReq + copy(req.Name[:15], base.Name) + + req.Flags = uint16(tuntap.Flags) + + if queues == 0 { //Legacy compatibility + queues = 1 + if tuntap.Flags == 0 { + req.Flags = uint16(TUNTAP_DEFAULTS) + } + } else { + // For best peformance set Flags to TUNTAP_MULTI_QUEUE_DEFAULTS | TUNTAP_VNET_HDR + // when a) KVM has support for this ABI and + // b) the value of the flag is queryable using the TUNGETIFF ioctl + if tuntap.Flags == 0 { + req.Flags = uint16(TUNTAP_MULTI_QUEUE_DEFAULTS) + } + } + + req.Flags |= uint16(tuntap.Mode) + const TUN = "/dev/net/tun" + for i := 0; i < queues; i++ { + localReq := req + fd, err := unix.Open(TUN, os.O_RDWR|syscall.O_CLOEXEC, 0) + if err != nil { + cleanupFds(fds) + return err + } + + _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq))) + if errno != 0 { + // close the new fd + unix.Close(fd) + // and the already opened ones + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed [%d], errno %v", i, errno) + } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETOWNER, uintptr(tuntap.Owner)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETOWNER failed [%d], errno %v", i, errno) + } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETGROUP, uintptr(tuntap.Group)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETGROUP failed [%d], errno %v", i, errno) + } + + // Set the tun device to non-blocking before use. The below comment + // taken from: + // + // https://github.com/mistsys/tuntap/commit/161418c25003bbee77d085a34af64d189df62bea + // + // Note there is a complication because in go, if a device node is + // opened, go sets it to use nonblocking I/O. However a /dev/net/tun + // doesn't work with epoll until after the TUNSETIFF ioctl has been + // done. So we open the unix fd directly, do the ioctl, then put the + // fd in nonblocking mode, an then finally wrap it in a os.File, + // which will see the nonblocking mode and add the fd to the + // pollable set, so later on when we Read() from it blocked the + // calling thread in the kernel. + // + // See + // https://github.com/golang/go/issues/30426 + // which got exposed in go 1.13 by the fix to + // https://github.com/golang/go/issues/30624 + err = unix.SetNonblock(fd, true) + if err != nil { + cleanupFds(fds) + return fmt.Errorf("Tuntap set to non-blocking failed [%d], err %v", i, err) + } + + // create the file from the file descriptor and store it + file := os.NewFile(uintptr(fd), TUN) + fds = append(fds, file) + + // 1) we only care for the name of the first tap in the multi queue set + // 2) if the original name was empty, the localReq has now the actual name + // + // In addition: + // This ensures that the link name is always identical to what the kernel returns. + // Not only in case of an empty name, but also when using name templates. + // e.g. when the provided name is "tap%d", the kernel replaces %d with the next available number. + if i == 0 { + link.Attrs().Name = strings.Trim(string(localReq.Name[:]), "\x00") + } + + } + + control := func(file *os.File, f func(fd uintptr)) error { + name := file.Name() + conn, err := file.SyscallConn() + if err != nil { + return fmt.Errorf("SyscallConn() failed on %s: %v", name, err) + } + if err := conn.Control(f); err != nil { + return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err) + } + return nil + } + + // only persist interface if NonPersist is NOT set + if !tuntap.NonPersist { + var errno syscall.Errno + if err := control(fds[0], func(fd uintptr) { + _, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1) + }); err != nil { + return err + } + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) + } + } + + h.ensureIndex(base) + + // can't set master during create, so set it afterwards + if base.MasterIndex != 0 { + // TODO: verify MasterIndex is actually a bridge? + err := h.LinkSetMasterByIndex(link, base.MasterIndex) + if err != nil { + // un-persist (e.g. allow the interface to be removed) the tuntap + // should not hurt if not set prior, condition might be not needed + if !tuntap.NonPersist { + // ignore error + _ = control(fds[0], func(fd uintptr) { + _, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0) + }) + } + cleanupFds(fds) + return err + } + } + + if tuntap.Queues == 0 { + cleanupFds(fds) + } else { + tuntap.Fds = fds + } + + return nil + } + + req := h.newNetlinkRequest(unix.RTM_NEWLINK, flags) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + // TODO: make it shorter + if base.Flags&net.FlagUp != 0 { + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP + } + if base.Flags&net.FlagBroadcast != 0 { + msg.Change |= unix.IFF_BROADCAST + msg.Flags |= unix.IFF_BROADCAST + } + if base.Flags&net.FlagLoopback != 0 { + msg.Change |= unix.IFF_LOOPBACK + msg.Flags |= unix.IFF_LOOPBACK + } + if base.Flags&net.FlagPointToPoint != 0 { + msg.Change |= unix.IFF_POINTOPOINT + msg.Flags |= unix.IFF_POINTOPOINT + } + if base.Flags&net.FlagMulticast != 0 { + msg.Change |= unix.IFF_MULTICAST + msg.Flags |= unix.IFF_MULTICAST + } + if base.Index != 0 { + msg.Index = int32(base.Index) + } + + req.AddData(msg) + + if base.ParentIndex != 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(base.ParentIndex)) + data := nl.NewRtAttr(unix.IFLA_LINK, b) + req.AddData(data) + } else if link.Type() == "ipvlan" || link.Type() == "ipoib" { + return fmt.Errorf("Can't create %s link without ParentIndex", link.Type()) + } + + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(base.Name)) + req.AddData(nameData) + + if base.Alias != "" { + alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(base.Alias)) + req.AddData(alias) + } + + if base.MTU > 0 { + mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) + req.AddData(mtu) + } + + if base.TxQLen >= 0 { + qlen := nl.NewRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + req.AddData(qlen) + } + + if base.HardwareAddr != nil { + hwaddr := nl.NewRtAttr(unix.IFLA_ADDRESS, []byte(base.HardwareAddr)) + req.AddData(hwaddr) + } + + if base.NumTxQueues > 0 { + txqueues := nl.NewRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues))) + req.AddData(txqueues) + } + + if base.NumRxQueues > 0 { + rxqueues := nl.NewRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues))) + req.AddData(rxqueues) + } + + if base.GSOMaxSegs > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_MAX_SEGS, nl.Uint32Attr(base.GSOMaxSegs)) + req.AddData(gsoAttr) + } + + if base.GSOMaxSize > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, nl.Uint32Attr(base.GSOMaxSize)) + req.AddData(gsoAttr) + } + + if base.Group > 0 { + groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group)) + req.AddData(groupAttr) + } + + if base.Namespace != nil { + var attr *nl.RtAttr + switch ns := base.Namespace.(type) { + case NsPid: + val := nl.Uint32Attr(uint32(ns)) + attr = nl.NewRtAttr(unix.IFLA_NET_NS_PID, val) + case NsFd: + val := nl.Uint32Attr(uint32(ns)) + attr = nl.NewRtAttr(unix.IFLA_NET_NS_FD, val) + } + + req.AddData(attr) + } + + if base.Xdp != nil { + addXdpAttrs(base.Xdp, req) + } + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + + switch link := link.(type) { + case *Vlan: + b := make([]byte, 2) + native.PutUint16(b, uint16(link.VlanId)) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_VLAN_ID, b) + + if link.VlanProtocol != VLAN_PROTOCOL_UNKNOWN { + data.AddRtAttr(nl.IFLA_VLAN_PROTOCOL, htons(uint16(link.VlanProtocol))) + } + case *Veth: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + peer := data.AddRtAttr(nl.VETH_INFO_PEER, nil) + nl.NewIfInfomsgChild(peer, unix.AF_UNSPEC) + peer.AddRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(link.PeerName)) + if base.TxQLen >= 0 { + peer.AddRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + } + if base.NumTxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues))) + } + if base.NumRxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues))) + } + if base.MTU > 0 { + peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) + } + if link.PeerHardwareAddr != nil { + peer.AddRtAttr(unix.IFLA_ADDRESS, []byte(link.PeerHardwareAddr)) + } + if link.PeerNamespace != nil { + switch ns := link.PeerNamespace.(type) { + case NsPid: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_PID, val) + case NsFd: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_FD, val) + } + } + case *Vxlan: + addVxlanAttrs(link, linkInfo) + case *Bond: + addBondAttrs(link, linkInfo) + case *IPVlan: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) + case *IPVtap: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) + case *Macvlan: + if link.Mode != MACVLAN_MODE_DEFAULT { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) + } + case *Macvtap: + if link.Mode != MACVLAN_MODE_DEFAULT { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) + } + case *Geneve: + addGeneveAttrs(link, linkInfo) + case *Gretap: + addGretapAttrs(link, linkInfo) + case *Iptun: + addIptunAttrs(link, linkInfo) + case *Ip6tnl: + addIp6tnlAttrs(link, linkInfo) + case *Sittun: + addSittunAttrs(link, linkInfo) + case *Gretun: + addGretunAttrs(link, linkInfo) + case *Vti: + addVtiAttrs(link, linkInfo) + case *Vrf: + addVrfAttrs(link, linkInfo) + case *Bridge: + addBridgeAttrs(link, linkInfo) + case *GTP: + addGTPAttrs(link, linkInfo) + case *Xfrmi: + addXfrmiAttrs(link, linkInfo) + case *IPoIB: + addIPoIBAttrs(link, linkInfo) + case *BareUDP: + addBareUDPAttrs(link, linkInfo) + } + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + if err != nil { + return err + } + + h.ensureIndex(base) + + // can't set master during create, so set it afterwards + if base.MasterIndex != 0 { + // TODO: verify MasterIndex is actually a bridge? + return h.LinkSetMasterByIndex(link, base.MasterIndex) + } + return nil +} + +// LinkDel deletes link device. Either Index or Name must be set in +// the link object for it to be deleted. The other values are ignored. +// Equivalent to: `ip link del $link` +func LinkDel(link Link) error { + return pkgHandle.LinkDel(link) +} + +// LinkDel deletes link device. Either Index or Name must be set in +// the link object for it to be deleted. The other values are ignored. +// Equivalent to: `ip link del $link` +func (h *Handle) LinkDel(link Link) error { + base := link.Attrs() + + h.ensureIndex(base) + + req := h.newNetlinkRequest(unix.RTM_DELLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func (h *Handle) linkByNameDump(name string) (Link, error) { + links, err := h.LinkList() + if err != nil { + return nil, err + } + + for _, link := range links { + if link.Attrs().Name == name { + return link, nil + } + } + return nil, LinkNotFoundError{fmt.Errorf("Link %s not found", name)} +} + +func (h *Handle) linkByAliasDump(alias string) (Link, error) { + links, err := h.LinkList() + if err != nil { + return nil, err + } + + for _, link := range links { + if link.Attrs().Alias == alias { + return link, nil + } + } + return nil, LinkNotFoundError{fmt.Errorf("Link alias %s not found", alias)} +} + +// LinkByName finds a link by name and returns a pointer to the object. +func LinkByName(name string) (Link, error) { + return pkgHandle.LinkByName(name) +} + +// LinkByName finds a link by name and returns a pointer to the object. +func (h *Handle) LinkByName(name string) (Link, error) { + if h.lookupByDump { + return h.linkByNameDump(name) + } + + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(msg) + + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) + + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name)) + req.AddData(nameData) + + link, err := execGetLink(req) + if err == unix.EINVAL { + // older kernels don't support looking up via IFLA_IFNAME + // so fall back to dumping all links + h.lookupByDump = true + return h.linkByNameDump(name) + } + + return link, err +} + +// LinkByAlias finds a link by its alias and returns a pointer to the object. +// If there are multiple links with the alias it returns the first one +func LinkByAlias(alias string) (Link, error) { + return pkgHandle.LinkByAlias(alias) +} + +// LinkByAlias finds a link by its alias and returns a pointer to the object. +// If there are multiple links with the alias it returns the first one +func (h *Handle) LinkByAlias(alias string) (Link, error) { + if h.lookupByDump { + return h.linkByAliasDump(alias) + } + + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(msg) + + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) + + nameData := nl.NewRtAttr(unix.IFLA_IFALIAS, nl.ZeroTerminated(alias)) + req.AddData(nameData) + + link, err := execGetLink(req) + if err == unix.EINVAL { + // older kernels don't support looking up via IFLA_IFALIAS + // so fall back to dumping all links + h.lookupByDump = true + return h.linkByAliasDump(alias) + } + + return link, err +} + +// LinkByIndex finds a link by index and returns a pointer to the object. +func LinkByIndex(index int) (Link, error) { + return pkgHandle.LinkByIndex(index) +} + +// LinkByIndex finds a link by index and returns a pointer to the object. +func (h *Handle) LinkByIndex(index int) (Link, error) { + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(index) + req.AddData(msg) + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) + + return execGetLink(req) +} + +func execGetLink(req *nl.NetlinkRequest) (Link, error) { + msgs, err := req.Execute(unix.NETLINK_ROUTE, 0) + if err != nil { + if errno, ok := err.(syscall.Errno); ok { + if errno == unix.ENODEV { + return nil, LinkNotFoundError{fmt.Errorf("Link not found")} + } + } + return nil, err + } + + switch { + case len(msgs) == 0: + return nil, LinkNotFoundError{fmt.Errorf("Link not found")} + + case len(msgs) == 1: + return LinkDeserialize(nil, msgs[0]) + + default: + return nil, fmt.Errorf("More than one link found") + } +} + +// LinkDeserialize deserializes a raw message received from netlink into +// a link object. +func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { + msg := nl.DeserializeIfInfomsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := NewLinkAttrs() + base.Index = int(msg.Index) + base.RawFlags = msg.Flags + base.Flags = linkFlags(msg.Flags) + base.EncapType = msg.EncapType() + base.NetNsID = -1 + if msg.Flags&unix.IFF_PROMISC != 0 { + base.Promisc = 1 + } + if msg.Flags&unix.IFF_ALLMULTI != 0 { + base.Allmulti = 1 + } + if msg.Flags&unix.IFF_MULTICAST != 0 { + base.Multi = 1 + } + + var ( + link Link + stats32 *LinkStatistics32 + stats64 *LinkStatistics64 + linkType string + linkSlave LinkSlave + slaveType string + ) + for _, attr := range attrs { + switch attr.Attr.Type { + case unix.IFLA_LINKINFO: + infos, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + for _, info := range infos { + switch info.Attr.Type { + case nl.IFLA_INFO_KIND: + linkType = string(info.Value[:len(info.Value)-1]) + switch linkType { + case "dummy": + link = &Dummy{} + case "ifb": + link = &Ifb{} + case "bridge": + link = &Bridge{} + case "vlan": + link = &Vlan{} + case "veth": + link = &Veth{} + case "wireguard": + link = &Wireguard{} + case "vxlan": + link = &Vxlan{} + case "bond": + link = &Bond{} + case "ipvlan": + link = &IPVlan{} + case "ipvtap": + link = &IPVtap{} + case "macvlan": + link = &Macvlan{} + case "macvtap": + link = &Macvtap{} + case "geneve": + link = &Geneve{} + case "gretap": + link = &Gretap{} + case "ip6gretap": + link = &Gretap{} + case "ipip": + link = &Iptun{} + case "ip6tnl": + link = &Ip6tnl{} + case "sit": + link = &Sittun{} + case "gre": + link = &Gretun{} + case "ip6gre": + link = &Gretun{} + case "vti", "vti6": + link = &Vti{} + case "vrf": + link = &Vrf{} + case "gtp": + link = >P{} + case "xfrm": + link = &Xfrmi{} + case "tun": + link = &Tuntap{} + case "ipoib": + link = &IPoIB{} + case "can": + link = &Can{} + case "bareudp": + link = &BareUDP{} + default: + link = &GenericLink{LinkType: linkType} + } + case nl.IFLA_INFO_DATA: + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + switch linkType { + case "vlan": + parseVlanData(link, data) + case "vxlan": + parseVxlanData(link, data) + case "bond": + parseBondData(link, data) + case "ipvlan": + parseIPVlanData(link, data) + case "ipvtap": + parseIPVtapData(link, data) + case "macvlan": + parseMacvlanData(link, data) + case "macvtap": + parseMacvtapData(link, data) + case "geneve": + parseGeneveData(link, data) + case "gretap": + parseGretapData(link, data) + case "ip6gretap": + parseGretapData(link, data) + case "ipip": + parseIptunData(link, data) + case "ip6tnl": + parseIp6tnlData(link, data) + case "sit": + parseSittunData(link, data) + case "gre": + parseGretunData(link, data) + case "ip6gre": + parseGretunData(link, data) + case "vti", "vti6": + parseVtiData(link, data) + case "vrf": + parseVrfData(link, data) + case "bridge": + parseBridgeData(link, data) + case "gtp": + parseGTPData(link, data) + case "xfrm": + parseXfrmiData(link, data) + case "tun": + parseTuntapData(link, data) + case "ipoib": + parseIPoIBData(link, data) + case "can": + parseCanData(link, data) + case "bareudp": + parseBareUDPData(link, data) + } + + case nl.IFLA_INFO_SLAVE_KIND: + slaveType = string(info.Value[:len(info.Value)-1]) + switch slaveType { + case "bond": + linkSlave = &BondSlave{} + case "vrf": + linkSlave = &VrfSlave{} + } + + case nl.IFLA_INFO_SLAVE_DATA: + switch slaveType { + case "bond": + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + parseBondSlaveData(linkSlave, data) + case "vrf": + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + parseVrfSlaveData(linkSlave, data) + } + } + } + case unix.IFLA_ADDRESS: + var nonzero bool + for _, b := range attr.Value { + if b != 0 { + nonzero = true + } + } + if nonzero { + base.HardwareAddr = attr.Value[:] + } + case unix.IFLA_IFNAME: + base.Name = string(attr.Value[:len(attr.Value)-1]) + case unix.IFLA_MTU: + base.MTU = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_LINK: + base.ParentIndex = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_MASTER: + base.MasterIndex = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_TXQLEN: + base.TxQLen = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_IFALIAS: + base.Alias = string(attr.Value[:len(attr.Value)-1]) + case unix.IFLA_STATS: + stats32 = new(LinkStatistics32) + if err := binary.Read(bytes.NewBuffer(attr.Value[:]), nl.NativeEndian(), stats32); err != nil { + return nil, err + } + case unix.IFLA_STATS64: + stats64 = new(LinkStatistics64) + if err := binary.Read(bytes.NewBuffer(attr.Value[:]), nl.NativeEndian(), stats64); err != nil { + return nil, err + } + case unix.IFLA_XDP: + xdp, err := parseLinkXdp(attr.Value[:]) + if err != nil { + return nil, err + } + base.Xdp = xdp + case unix.IFLA_PROTINFO | unix.NLA_F_NESTED: + if hdr != nil && hdr.Type == unix.RTM_NEWLINK && + msg.Family == unix.AF_BRIDGE { + attrs, err := nl.ParseRouteAttr(attr.Value[:]) + if err != nil { + return nil, err + } + protinfo := parseProtinfo(attrs) + base.Protinfo = &protinfo + } + case unix.IFLA_OPERSTATE: + base.OperState = LinkOperState(uint8(attr.Value[0])) + case unix.IFLA_PHYS_SWITCH_ID: + base.PhysSwitchID = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_LINK_NETNSID: + base.NetNsID = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_GSO_MAX_SIZE: + base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_MAX_SEGS: + base.GSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_VFINFO_LIST: + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + vfs, err := parseVfInfoList(data) + if err != nil { + return nil, err + } + base.Vfs = vfs + case unix.IFLA_NUM_TX_QUEUES: + base.NumTxQueues = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_NUM_RX_QUEUES: + base.NumRxQueues = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_GROUP: + base.Group = native.Uint32(attr.Value[0:4]) + } + } + + if stats64 != nil { + base.Statistics = (*LinkStatistics)(stats64) + } else if stats32 != nil { + base.Statistics = (*LinkStatistics)(stats32.to64()) + } + + // Links that don't have IFLA_INFO_KIND are hardware devices + if link == nil { + link = &Device{} + } + *link.Attrs() = base + link.Attrs().Slave = linkSlave + + // If the tuntap attributes are not updated by netlink due to + // an older driver, use sysfs + if link != nil && linkType == "tun" { + tuntap := link.(*Tuntap) + + if tuntap.Mode == 0 { + ifname := tuntap.Attrs().Name + if flags, err := readSysPropAsInt64(ifname, "tun_flags"); err == nil { + + if flags&unix.IFF_TUN != 0 { + tuntap.Mode = unix.IFF_TUN + } else if flags&unix.IFF_TAP != 0 { + tuntap.Mode = unix.IFF_TAP + } + + tuntap.NonPersist = false + if flags&unix.IFF_PERSIST == 0 { + tuntap.NonPersist = true + } + } + + // The sysfs interface for owner/group returns -1 for root user, instead of returning 0. + // So explicitly check for negative value, before assigning the owner uid/gid. + if owner, err := readSysPropAsInt64(ifname, "owner"); err == nil && owner > 0 { + tuntap.Owner = uint32(owner) + } + + if group, err := readSysPropAsInt64(ifname, "group"); err == nil && group > 0 { + tuntap.Group = uint32(group) + } + } + } + + return link, nil +} + +func readSysPropAsInt64(ifname, prop string) (int64, error) { + fname := fmt.Sprintf("/sys/class/net/%s/%s", ifname, prop) + contents, err := ioutil.ReadFile(fname) + if err != nil { + return 0, err + } + + num, err := strconv.ParseInt(strings.TrimSpace(string(contents)), 0, 64) + if err == nil { + return num, nil + } + + return 0, err +} + +// LinkList gets a list of link devices. +// Equivalent to: `ip link show` +func LinkList() ([]Link, error) { + return pkgHandle.LinkList() +} + +// LinkList gets a list of link devices. +// Equivalent to: `ip link show` +func (h *Handle) LinkList() ([]Link, error) { + // NOTE(vish): This duplicates functionality in net/iface_linux.go, but we need + // to get the message ourselves to parse link type. + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(msg) + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK) + if err != nil { + return nil, err + } + + var res []Link + for _, m := range msgs { + link, err := LinkDeserialize(nil, m) + if err != nil { + return nil, err + } + res = append(res, link) + } + + return res, nil +} + +// LinkUpdate is used to pass information back from LinkSubscribe() +type LinkUpdate struct { + nl.IfInfomsg + Header unix.NlMsghdr + Link +} + +// LinkSubscribe takes a chan down which notifications will be sent +// when links change. Close the 'done' chan to stop subscription. +func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { + return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) +} + +// LinkSubscribeAt works like LinkSubscribe plus it allows the caller +// to choose the network namespace in which to subscribe (ns). +func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { + return linkSubscribeAt(ns, netns.None(), ch, done, nil, false) +} + +// LinkSubscribeOptions contains a set of options to use with +// LinkSubscribeWithOptions. +type LinkSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool +} + +// LinkSubscribeWithOptions work like LinkSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func LinkSubscribeWithOptions(ch chan<- LinkUpdate, done <-chan struct{}, options LinkSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) +} + +func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_LINK) + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + if listExisting { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETLINK, + unix.NLM_F_DUMP) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(msg) + if err := s.Send(req); err != nil { + return err + } + } + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + if cberr != nil { + cberr(fmt.Errorf("Receive failed: %v", + err)) + } + return + } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } + for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) + } + continue + } + ifmsg := nl.DeserializeIfInfomsg(m.Data) + header := unix.NlMsghdr(m.Header) + link, err := LinkDeserialize(&header, m.Data) + if err != nil { + if cberr != nil { + cberr(err) + } + continue + } + ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link} + } + } + }() + + return nil +} + +func LinkSetHairpin(link Link, mode bool) error { + return pkgHandle.LinkSetHairpin(link, mode) +} + +func (h *Handle) LinkSetHairpin(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE) +} + +func LinkSetGuard(link Link, mode bool) error { + return pkgHandle.LinkSetGuard(link, mode) +} + +func (h *Handle) LinkSetGuard(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD) +} + +func LinkSetFastLeave(link Link, mode bool) error { + return pkgHandle.LinkSetFastLeave(link, mode) +} + +func (h *Handle) LinkSetFastLeave(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_FAST_LEAVE) +} + +func LinkSetLearning(link Link, mode bool) error { + return pkgHandle.LinkSetLearning(link, mode) +} + +func (h *Handle) LinkSetLearning(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_LEARNING) +} + +func LinkSetRootBlock(link Link, mode bool) error { + return pkgHandle.LinkSetRootBlock(link, mode) +} + +func (h *Handle) LinkSetRootBlock(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROTECT) +} + +func LinkSetFlood(link Link, mode bool) error { + return pkgHandle.LinkSetFlood(link, mode) +} + +func (h *Handle) LinkSetFlood(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD) +} + +func LinkSetBrProxyArp(link Link, mode bool) error { + return pkgHandle.LinkSetBrProxyArp(link, mode) +} + +func (h *Handle) LinkSetBrProxyArp(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP) +} + +func LinkSetBrProxyArpWiFi(link Link, mode bool) error { + return pkgHandle.LinkSetBrProxyArpWiFi(link, mode) +} + +func (h *Handle) LinkSetBrProxyArpWiFi(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP_WIFI) +} + +func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) + msg.Index = int32(base.Index) + req.AddData(msg) + + br := nl.NewRtAttr(unix.IFLA_PROTINFO|unix.NLA_F_NESTED, nil) + br.AddRtAttr(attr, boolToByte(mode)) + req.AddData(br) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + if err != nil { + return err + } + return nil +} + +// LinkSetTxQLen sets the transaction queue length for the link. +// Equivalent to: `ip link set $link txqlen $qlen` +func LinkSetTxQLen(link Link, qlen int) error { + return pkgHandle.LinkSetTxQLen(link, qlen) +} + +// LinkSetTxQLen sets the transaction queue length for the link. +// Equivalent to: `ip link set $link txqlen $qlen` +func (h *Handle) LinkSetTxQLen(link Link, qlen int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(qlen)) + + data := nl.NewRtAttr(unix.IFLA_TXQLEN, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGroup sets the link group id which can be used to perform mass actions +// with iproute2 as well use it as a reference in nft filters. +// Equivalent to: `ip link set $link group $id` +func LinkSetGroup(link Link, group int) error { + return pkgHandle.LinkSetGroup(link, group) +} + +// LinkSetGroup sets the link group id which can be used to perform mass actions +// with iproute2 as well use it as a reference in nft filters. +// Equivalent to: `ip link set $link group $id` +func (h *Handle) LinkSetGroup(link Link, group int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(group)) + + data := nl.NewRtAttr(unix.IFLA_GROUP, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { + vlan := link.(*Vlan) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_VLAN_ID: + vlan.VlanId = int(native.Uint16(datum.Value[0:2])) + case nl.IFLA_VLAN_PROTOCOL: + vlan.VlanProtocol = VlanProtocol(int(ntohs(datum.Value[0:2]))) + } + } +} + +func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { + vxlan := link.(*Vxlan) + for _, datum := range data { + // NOTE(vish): Apparently some messages can be sent with no value. + // We special case GBP here to not change existing + // functionality. It appears that GBP sends a datum.Value + // of null. + if len(datum.Value) == 0 && datum.Attr.Type != nl.IFLA_VXLAN_GBP { + continue + } + switch datum.Attr.Type { + case nl.IFLA_VXLAN_ID: + vxlan.VxlanId = int(native.Uint32(datum.Value[0:4])) + case nl.IFLA_VXLAN_LINK: + vxlan.VtepDevIndex = int(native.Uint32(datum.Value[0:4])) + case nl.IFLA_VXLAN_LOCAL: + vxlan.SrcAddr = net.IP(datum.Value[0:4]) + case nl.IFLA_VXLAN_LOCAL6: + vxlan.SrcAddr = net.IP(datum.Value[0:16]) + case nl.IFLA_VXLAN_GROUP: + vxlan.Group = net.IP(datum.Value[0:4]) + case nl.IFLA_VXLAN_GROUP6: + vxlan.Group = net.IP(datum.Value[0:16]) + case nl.IFLA_VXLAN_TTL: + vxlan.TTL = int(datum.Value[0]) + case nl.IFLA_VXLAN_TOS: + vxlan.TOS = int(datum.Value[0]) + case nl.IFLA_VXLAN_LEARNING: + vxlan.Learning = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_PROXY: + vxlan.Proxy = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_RSC: + vxlan.RSC = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_L2MISS: + vxlan.L2miss = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_L3MISS: + vxlan.L3miss = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_UDP_CSUM: + vxlan.UDPCSum = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_UDP_ZERO_CSUM6_TX: + vxlan.UDP6ZeroCSumTx = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_UDP_ZERO_CSUM6_RX: + vxlan.UDP6ZeroCSumRx = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_GBP: + vxlan.GBP = true + case nl.IFLA_VXLAN_FLOWBASED: + vxlan.FlowBased = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_AGEING: + vxlan.Age = int(native.Uint32(datum.Value[0:4])) + vxlan.NoAge = vxlan.Age == 0 + case nl.IFLA_VXLAN_LIMIT: + vxlan.Limit = int(native.Uint32(datum.Value[0:4])) + case nl.IFLA_VXLAN_PORT: + vxlan.Port = int(ntohs(datum.Value[0:2])) + case nl.IFLA_VXLAN_PORT_RANGE: + buf := bytes.NewBuffer(datum.Value[0:4]) + var pr vxlanPortRange + if binary.Read(buf, binary.BigEndian, &pr) != nil { + vxlan.PortLow = int(pr.Lo) + vxlan.PortHigh = int(pr.Hi) + } + } + } +} + +func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { + bond := link.(*Bond) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_MODE: + bond.Mode = BondMode(data[i].Value[0]) + case nl.IFLA_BOND_ACTIVE_SLAVE: + bond.ActiveSlave = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_MIIMON: + bond.Miimon = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_UPDELAY: + bond.UpDelay = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_DOWNDELAY: + bond.DownDelay = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_USE_CARRIER: + bond.UseCarrier = int(data[i].Value[0]) + case nl.IFLA_BOND_ARP_INTERVAL: + bond.ArpInterval = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_ARP_IP_TARGET: + bond.ArpIpTargets = parseBondArpIpTargets(data[i].Value) + case nl.IFLA_BOND_ARP_VALIDATE: + bond.ArpValidate = BondArpValidate(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_ARP_ALL_TARGETS: + bond.ArpAllTargets = BondArpAllTargets(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_PRIMARY: + bond.Primary = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_PRIMARY_RESELECT: + bond.PrimaryReselect = BondPrimaryReselect(data[i].Value[0]) + case nl.IFLA_BOND_FAIL_OVER_MAC: + bond.FailOverMac = BondFailOverMac(data[i].Value[0]) + case nl.IFLA_BOND_XMIT_HASH_POLICY: + bond.XmitHashPolicy = BondXmitHashPolicy(data[i].Value[0]) + case nl.IFLA_BOND_RESEND_IGMP: + bond.ResendIgmp = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_NUM_PEER_NOTIF: + bond.NumPeerNotif = int(data[i].Value[0]) + case nl.IFLA_BOND_ALL_SLAVES_ACTIVE: + bond.AllSlavesActive = int(data[i].Value[0]) + case nl.IFLA_BOND_MIN_LINKS: + bond.MinLinks = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_LP_INTERVAL: + bond.LpInterval = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_PACKETS_PER_SLAVE: + bond.PacketsPerSlave = int(native.Uint32(data[i].Value[0:4])) + case nl.IFLA_BOND_AD_LACP_RATE: + bond.LacpRate = BondLacpRate(data[i].Value[0]) + case nl.IFLA_BOND_AD_SELECT: + bond.AdSelect = BondAdSelect(data[i].Value[0]) + case nl.IFLA_BOND_AD_INFO: + // TODO: implement + case nl.IFLA_BOND_AD_ACTOR_SYS_PRIO: + bond.AdActorSysPrio = int(native.Uint16(data[i].Value[0:2])) + case nl.IFLA_BOND_AD_USER_PORT_KEY: + bond.AdUserPortKey = int(native.Uint16(data[i].Value[0:2])) + case nl.IFLA_BOND_AD_ACTOR_SYSTEM: + bond.AdActorSystem = net.HardwareAddr(data[i].Value[0:6]) + case nl.IFLA_BOND_TLB_DYNAMIC_LB: + bond.TlbDynamicLb = int(data[i].Value[0]) + } + } +} + +func parseBondArpIpTargets(value []byte) []net.IP { + data, err := nl.ParseRouteAttr(value) + if err != nil { + return nil + } + + targets := []net.IP{} + for i := range data { + target := net.IP(data[i].Value) + if ip := target.To4(); ip != nil { + targets = append(targets, ip) + continue + } + if ip := target.To16(); ip != nil { + targets = append(targets, ip) + } + } + + return targets +} + +func addBondSlaveAttrs(bondSlave *BondSlave, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_SLAVE_DATA, nil) + + data.AddRtAttr(nl.IFLA_BOND_SLAVE_STATE, nl.Uint8Attr(uint8(bondSlave.State))) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_MII_STATUS, nl.Uint8Attr(uint8(bondSlave.MiiStatus))) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, nl.Uint32Attr(bondSlave.LinkFailureCount)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_QUEUE_ID, nl.Uint16Attr(bondSlave.QueueId)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, nl.Uint16Attr(bondSlave.AggregatorId)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, nl.Uint8Attr(bondSlave.AdActorOperPortState)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, nl.Uint16Attr(bondSlave.AdPartnerOperPortState)) + + if mac := bondSlave.PermHardwareAddr; mac != nil { + data.AddRtAttr(nl.IFLA_BOND_SLAVE_PERM_HWADDR, []byte(mac)) + } +} + +func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { + bondSlave := slave.(*BondSlave) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_SLAVE_STATE: + bondSlave.State = BondSlaveState(data[i].Value[0]) + case nl.IFLA_BOND_SLAVE_MII_STATUS: + bondSlave.MiiStatus = BondSlaveMiiStatus(data[i].Value[0]) + case nl.IFLA_BOND_SLAVE_LINK_FAILURE_COUNT: + bondSlave.LinkFailureCount = native.Uint32(data[i].Value[0:4]) + case nl.IFLA_BOND_SLAVE_PERM_HWADDR: + bondSlave.PermHardwareAddr = net.HardwareAddr(data[i].Value[0:6]) + case nl.IFLA_BOND_SLAVE_QUEUE_ID: + bondSlave.QueueId = native.Uint16(data[i].Value[0:2]) + case nl.IFLA_BOND_SLAVE_AD_AGGREGATOR_ID: + bondSlave.AggregatorId = native.Uint16(data[i].Value[0:2]) + case nl.IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE: + bondSlave.AdActorOperPortState = uint8(data[i].Value[0]) + case nl.IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE: + bondSlave.AdPartnerOperPortState = native.Uint16(data[i].Value[0:2]) + } + } +} + +func parseVrfSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { + vrfSlave := slave.(*VrfSlave) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_SLAVE_STATE: + vrfSlave.Table = native.Uint32(data[i].Value[0:4]) + } + } +} + +func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { + ipv := link.(*IPVlan) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPVLAN_MODE: + ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_IPVLAN_FLAG: + ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4])) + } + } +} + +func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) { + ipv := link.(*IPVtap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPVLAN_MODE: + ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_IPVLAN_FLAG: + ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4])) + } + } +} + +func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { + macv := link.(*Macvtap) + parseMacvlanData(&macv.Macvlan, data) +} + +func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { + macv := link.(*Macvlan) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_MACVLAN_MODE: + switch native.Uint32(datum.Value[0:4]) { + case nl.MACVLAN_MODE_PRIVATE: + macv.Mode = MACVLAN_MODE_PRIVATE + case nl.MACVLAN_MODE_VEPA: + macv.Mode = MACVLAN_MODE_VEPA + case nl.MACVLAN_MODE_BRIDGE: + macv.Mode = MACVLAN_MODE_BRIDGE + case nl.MACVLAN_MODE_PASSTHRU: + macv.Mode = MACVLAN_MODE_PASSTHRU + case nl.MACVLAN_MODE_SOURCE: + macv.Mode = MACVLAN_MODE_SOURCE + } + case nl.IFLA_MACVLAN_MACADDR_COUNT: + macv.MACAddrs = make([]net.HardwareAddr, 0, int(native.Uint32(datum.Value[0:4]))) + case nl.IFLA_MACVLAN_MACADDR_DATA: + macs, err := nl.ParseRouteAttr(datum.Value[:]) + if err != nil { + panic(fmt.Sprintf("failed to ParseRouteAttr for IFLA_MACVLAN_MACADDR_DATA: %v", err)) + } + for _, macDatum := range macs { + macv.MACAddrs = append(macv.MACAddrs, net.HardwareAddr(macDatum.Value[0:6])) + } + } + } +} + +// copied from pkg/net_linux.go +func linkFlags(rawFlags uint32) net.Flags { + var f net.Flags + if rawFlags&unix.IFF_UP != 0 { + f |= net.FlagUp + } + if rawFlags&unix.IFF_BROADCAST != 0 { + f |= net.FlagBroadcast + } + if rawFlags&unix.IFF_LOOPBACK != 0 { + f |= net.FlagLoopback + } + if rawFlags&unix.IFF_POINTOPOINT != 0 { + f |= net.FlagPointToPoint + } + if rawFlags&unix.IFF_MULTICAST != 0 { + f |= net.FlagMulticast + } + return f +} + +func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if geneve.FlowBased { + // In flow based mode, no other attributes need to be configured + linkInfo.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, boolAttr(geneve.FlowBased)) + return + } + + if ip := geneve.Remote; ip != nil { + if ip4 := ip.To4(); ip4 != nil { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE, ip.To4()) + } else { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE6, []byte(ip)) + } + } + + if geneve.ID != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_ID, nl.Uint32Attr(geneve.ID)) + } + + if geneve.Dport != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_PORT, htons(geneve.Dport)) + } + + if geneve.Ttl != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TTL, nl.Uint8Attr(geneve.Ttl)) + } + + if geneve.Tos != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos)) + } +} + +func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) { + geneve := link.(*Geneve) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GENEVE_ID: + geneve.ID = native.Uint32(datum.Value[0:4]) + case nl.IFLA_GENEVE_REMOTE, nl.IFLA_GENEVE_REMOTE6: + geneve.Remote = datum.Value + case nl.IFLA_GENEVE_PORT: + geneve.Dport = ntohs(datum.Value[0:2]) + case nl.IFLA_GENEVE_TTL: + geneve.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_TOS: + geneve.Tos = uint8(datum.Value[0]) + } + } +} + +func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if gretap.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + return + } + + if ip := gretap.Local; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_LOCAL, []byte(ip)) + } + + if ip := gretap.Remote; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_REMOTE, []byte(ip)) + } + + if gretap.IKey != 0 { + data.AddRtAttr(nl.IFLA_GRE_IKEY, htonl(gretap.IKey)) + gretap.IFlags |= uint16(nl.GRE_KEY) + } + + if gretap.OKey != 0 { + data.AddRtAttr(nl.IFLA_GRE_OKEY, htonl(gretap.OKey)) + gretap.OFlags |= uint16(nl.GRE_KEY) + } + + data.AddRtAttr(nl.IFLA_GRE_IFLAGS, htons(gretap.IFlags)) + data.AddRtAttr(nl.IFLA_GRE_OFLAGS, htons(gretap.OFlags)) + + if gretap.Link != 0 { + data.AddRtAttr(nl.IFLA_GRE_LINK, nl.Uint32Attr(gretap.Link)) + } + + data.AddRtAttr(nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gretap.PMtuDisc)) + data.AddRtAttr(nl.IFLA_GRE_TTL, nl.Uint8Attr(gretap.Ttl)) + data.AddRtAttr(nl.IFLA_GRE_TOS, nl.Uint8Attr(gretap.Tos)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gretap.EncapType)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gretap.EncapFlags)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_SPORT, htons(gretap.EncapSport)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_DPORT, htons(gretap.EncapDport)) +} + +func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { + gre := link.(*Gretap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GRE_OKEY: + gre.IKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_IKEY: + gre.OKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_LOCAL: + gre.Local = net.IP(datum.Value) + case nl.IFLA_GRE_REMOTE: + gre.Remote = net.IP(datum.Value) + case nl.IFLA_GRE_ENCAP_SPORT: + gre.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_DPORT: + gre.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_IFLAGS: + gre.IFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_OFLAGS: + gre.OFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_TTL: + gre.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GRE_TOS: + gre.Tos = uint8(datum.Value[0]) + case nl.IFLA_GRE_PMTUDISC: + gre.PMtuDisc = uint8(datum.Value[0]) + case nl.IFLA_GRE_ENCAP_TYPE: + gre.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_FLAGS: + gre.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_COLLECT_METADATA: + gre.FlowBased = true + } + } +} + +func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if ip := gre.Local; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_LOCAL, []byte(ip)) + } + + if ip := gre.Remote; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_REMOTE, []byte(ip)) + } + + if gre.IKey != 0 { + data.AddRtAttr(nl.IFLA_GRE_IKEY, htonl(gre.IKey)) + gre.IFlags |= uint16(nl.GRE_KEY) + } + + if gre.OKey != 0 { + data.AddRtAttr(nl.IFLA_GRE_OKEY, htonl(gre.OKey)) + gre.OFlags |= uint16(nl.GRE_KEY) + } + + data.AddRtAttr(nl.IFLA_GRE_IFLAGS, htons(gre.IFlags)) + data.AddRtAttr(nl.IFLA_GRE_OFLAGS, htons(gre.OFlags)) + + if gre.Link != 0 { + data.AddRtAttr(nl.IFLA_GRE_LINK, nl.Uint32Attr(gre.Link)) + } + + data.AddRtAttr(nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gre.PMtuDisc)) + data.AddRtAttr(nl.IFLA_GRE_TTL, nl.Uint8Attr(gre.Ttl)) + data.AddRtAttr(nl.IFLA_GRE_TOS, nl.Uint8Attr(gre.Tos)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gre.EncapType)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gre.EncapFlags)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_SPORT, htons(gre.EncapSport)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_DPORT, htons(gre.EncapDport)) +} + +func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { + gre := link.(*Gretun) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GRE_IKEY: + gre.IKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_OKEY: + gre.OKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_LOCAL: + gre.Local = net.IP(datum.Value) + case nl.IFLA_GRE_REMOTE: + gre.Remote = net.IP(datum.Value) + case nl.IFLA_GRE_IFLAGS: + gre.IFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_OFLAGS: + gre.OFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_TTL: + gre.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GRE_TOS: + gre.Tos = uint8(datum.Value[0]) + case nl.IFLA_GRE_PMTUDISC: + gre.PMtuDisc = uint8(datum.Value[0]) + case nl.IFLA_GRE_ENCAP_TYPE: + gre.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_FLAGS: + gre.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_SPORT: + gre.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_DPORT: + gre.EncapDport = ntohs(datum.Value[0:2]) + } + } +} + +func addXdpAttrs(xdp *LinkXdp, req *nl.NetlinkRequest) { + attrs := nl.NewRtAttr(unix.IFLA_XDP|unix.NLA_F_NESTED, nil) + b := make([]byte, 4) + native.PutUint32(b, uint32(xdp.Fd)) + attrs.AddRtAttr(nl.IFLA_XDP_FD, b) + if xdp.Flags != 0 { + b := make([]byte, 4) + native.PutUint32(b, xdp.Flags) + attrs.AddRtAttr(nl.IFLA_XDP_FLAGS, b) + } + req.AddData(attrs) +} + +func parseLinkXdp(data []byte) (*LinkXdp, error) { + attrs, err := nl.ParseRouteAttr(data) + if err != nil { + return nil, err + } + xdp := &LinkXdp{} + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.IFLA_XDP_FD: + xdp.Fd = int(native.Uint32(attr.Value[0:4])) + case nl.IFLA_XDP_ATTACHED: + xdp.AttachMode = uint32(attr.Value[0]) + xdp.Attached = xdp.AttachMode != 0 + case nl.IFLA_XDP_FLAGS: + xdp.Flags = native.Uint32(attr.Value[0:4]) + case nl.IFLA_XDP_PROG_ID: + xdp.ProgId = native.Uint32(attr.Value[0:4]) + } + } + return xdp, nil +} + +func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { + if iptun.FlowBased { + // In flow based mode, no other attributes need to be configured + linkInfo.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased)) + return + } + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + ip := iptun.Local.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) + } + + ip = iptun.Remote.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip)) + } + + if iptun.Link != 0 { + data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(iptun.Link)) + } + data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(iptun.PMtuDisc)) + data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(iptun.Ttl)) + data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(iptun.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(iptun.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(iptun.Proto)) +} + +func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { + iptun := link.(*Iptun) + for _, datum := range data { + // NOTE: same with vxlan, ip tunnel may also has null datum.Value + if len(datum.Value) == 0 { + continue + } + switch datum.Attr.Type { + case nl.IFLA_IPTUN_LOCAL: + iptun.Local = net.IP(datum.Value[0:4]) + case nl.IFLA_IPTUN_REMOTE: + iptun.Remote = net.IP(datum.Value[0:4]) + case nl.IFLA_IPTUN_TTL: + iptun.Ttl = uint8(datum.Value[0]) + case nl.IFLA_IPTUN_TOS: + iptun.Tos = uint8(datum.Value[0]) + case nl.IFLA_IPTUN_PMTUDISC: + iptun.PMtuDisc = uint8(datum.Value[0]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + iptun.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + iptun.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_TYPE: + iptun.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + iptun.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_COLLECT_METADATA: + iptun.FlowBased = true + case nl.IFLA_IPTUN_PROTO: + iptun.Proto = datum.Value[0] + } + } +} + +func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if ip6tnl.Link != 0 { + data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(ip6tnl.Link)) + } + + ip := ip6tnl.Local.To16() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) + } + + ip = ip6tnl.Remote.To16() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip)) + } + + data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(ip6tnl.Ttl)) + data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(ip6tnl.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_FLAGS, nl.Uint32Attr(ip6tnl.Flags)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(ip6tnl.Proto)) + data.AddRtAttr(nl.IFLA_IPTUN_FLOWINFO, nl.Uint32Attr(ip6tnl.FlowInfo)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(ip6tnl.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(ip6tnl.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(ip6tnl.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(ip6tnl.EncapDport)) +} + +func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { + ip6tnl := link.(*Ip6tnl) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPTUN_LOCAL: + ip6tnl.Local = net.IP(datum.Value[:16]) + case nl.IFLA_IPTUN_REMOTE: + ip6tnl.Remote = net.IP(datum.Value[:16]) + case nl.IFLA_IPTUN_TTL: + ip6tnl.Ttl = datum.Value[0] + case nl.IFLA_IPTUN_TOS: + ip6tnl.Tos = datum.Value[0] + case nl.IFLA_IPTUN_FLAGS: + ip6tnl.Flags = native.Uint32(datum.Value[:4]) + case nl.IFLA_IPTUN_PROTO: + ip6tnl.Proto = datum.Value[0] + case nl.IFLA_IPTUN_FLOWINFO: + ip6tnl.FlowInfo = native.Uint32(datum.Value[:4]) + case nl.IFLA_IPTUN_ENCAP_LIMIT: + ip6tnl.EncapLimit = datum.Value[0] + case nl.IFLA_IPTUN_ENCAP_TYPE: + ip6tnl.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + ip6tnl.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + ip6tnl.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + ip6tnl.EncapDport = ntohs(datum.Value[0:2]) + } + } +} + +func addSittunAttrs(sittun *Sittun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if sittun.Link != 0 { + data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(sittun.Link)) + } + + ip := sittun.Local.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) + } + + ip = sittun.Remote.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip)) + } + + if sittun.Ttl > 0 { + // Would otherwise fail on 3.10 kernel + data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl)) + } + + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(sittun.Proto)) + data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(sittun.EncapLimit)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(sittun.EncapDport)) +} + +func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) { + sittun := link.(*Sittun) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPTUN_LOCAL: + sittun.Local = net.IP(datum.Value[0:4]) + case nl.IFLA_IPTUN_REMOTE: + sittun.Remote = net.IP(datum.Value[0:4]) + case nl.IFLA_IPTUN_TTL: + sittun.Ttl = datum.Value[0] + case nl.IFLA_IPTUN_TOS: + sittun.Tos = datum.Value[0] + case nl.IFLA_IPTUN_PMTUDISC: + sittun.PMtuDisc = datum.Value[0] + case nl.IFLA_IPTUN_PROTO: + sittun.Proto = datum.Value[0] + case nl.IFLA_IPTUN_ENCAP_TYPE: + sittun.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + sittun.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + sittun.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + sittun.EncapDport = ntohs(datum.Value[0:2]) + } + } +} + +func addVtiAttrs(vti *Vti, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + family := FAMILY_V4 + if vti.Local.To4() == nil { + family = FAMILY_V6 + } + + var ip net.IP + + if family == FAMILY_V4 { + ip = vti.Local.To4() + } else { + ip = vti.Local + } + if ip != nil { + data.AddRtAttr(nl.IFLA_VTI_LOCAL, []byte(ip)) + } + + if family == FAMILY_V4 { + ip = vti.Remote.To4() + } else { + ip = vti.Remote + } + if ip != nil { + data.AddRtAttr(nl.IFLA_VTI_REMOTE, []byte(ip)) + } + + if vti.Link != 0 { + data.AddRtAttr(nl.IFLA_VTI_LINK, nl.Uint32Attr(vti.Link)) + } + + data.AddRtAttr(nl.IFLA_VTI_IKEY, htonl(vti.IKey)) + data.AddRtAttr(nl.IFLA_VTI_OKEY, htonl(vti.OKey)) +} + +func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) { + vti := link.(*Vti) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_VTI_LOCAL: + vti.Local = net.IP(datum.Value) + case nl.IFLA_VTI_REMOTE: + vti.Remote = net.IP(datum.Value) + case nl.IFLA_VTI_IKEY: + vti.IKey = ntohl(datum.Value[0:4]) + case nl.IFLA_VTI_OKEY: + vti.OKey = ntohl(datum.Value[0:4]) + } + } +} + +func addVrfAttrs(vrf *Vrf, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + b := make([]byte, 4) + native.PutUint32(b, uint32(vrf.Table)) + data.AddRtAttr(nl.IFLA_VRF_TABLE, b) +} + +func parseVrfData(link Link, data []syscall.NetlinkRouteAttr) { + vrf := link.(*Vrf) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_VRF_TABLE: + vrf.Table = native.Uint32(datum.Value[0:4]) + } + } +} + +func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if bridge.MulticastSnooping != nil { + data.AddRtAttr(nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping)) + } + if bridge.AgeingTime != nil { + data.AddRtAttr(nl.IFLA_BR_AGEING_TIME, nl.Uint32Attr(*bridge.AgeingTime)) + } + if bridge.HelloTime != nil { + data.AddRtAttr(nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime)) + } + if bridge.VlanFiltering != nil { + data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering)) + } +} + +func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { + br := bridge.(*Bridge) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_BR_AGEING_TIME: + ageingTime := native.Uint32(datum.Value[0:4]) + br.AgeingTime = &ageingTime + case nl.IFLA_BR_HELLO_TIME: + helloTime := native.Uint32(datum.Value[0:4]) + br.HelloTime = &helloTime + case nl.IFLA_BR_MCAST_SNOOPING: + mcastSnooping := datum.Value[0] == 1 + br.MulticastSnooping = &mcastSnooping + case nl.IFLA_BR_VLAN_FILTERING: + vlanFiltering := datum.Value[0] == 1 + br.VlanFiltering = &vlanFiltering + } + } +} + +func addGTPAttrs(gtp *GTP, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_GTP_FD0, nl.Uint32Attr(uint32(gtp.FD0))) + data.AddRtAttr(nl.IFLA_GTP_FD1, nl.Uint32Attr(uint32(gtp.FD1))) + data.AddRtAttr(nl.IFLA_GTP_PDP_HASHSIZE, nl.Uint32Attr(131072)) + if gtp.Role != nl.GTP_ROLE_GGSN { + data.AddRtAttr(nl.IFLA_GTP_ROLE, nl.Uint32Attr(uint32(gtp.Role))) + } +} + +func parseGTPData(link Link, data []syscall.NetlinkRouteAttr) { + gtp := link.(*GTP) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GTP_FD0: + gtp.FD0 = int(native.Uint32(datum.Value)) + case nl.IFLA_GTP_FD1: + gtp.FD1 = int(native.Uint32(datum.Value)) + case nl.IFLA_GTP_PDP_HASHSIZE: + gtp.PDPHashsize = int(native.Uint32(datum.Value)) + case nl.IFLA_GTP_ROLE: + gtp.Role = int(native.Uint32(datum.Value)) + } + } +} + +func parseVfInfoList(data []syscall.NetlinkRouteAttr) ([]VfInfo, error) { + var vfs []VfInfo + + for i, element := range data { + if element.Attr.Type != nl.IFLA_VF_INFO { + return nil, fmt.Errorf("Incorrect element type in vf info list: %d", element.Attr.Type) + } + vfAttrs, err := nl.ParseRouteAttr(element.Value) + if err != nil { + return nil, err + } + vfs = append(vfs, parseVfInfo(vfAttrs, i)) + } + return vfs, nil +} + +func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { + vf := VfInfo{ID: id} + for _, element := range data { + switch element.Attr.Type { + case nl.IFLA_VF_MAC: + mac := nl.DeserializeVfMac(element.Value[:]) + vf.Mac = mac.Mac[:6] + case nl.IFLA_VF_VLAN: + vl := nl.DeserializeVfVlan(element.Value[:]) + vf.Vlan = int(vl.Vlan) + vf.Qos = int(vl.Qos) + case nl.IFLA_VF_TX_RATE: + txr := nl.DeserializeVfTxRate(element.Value[:]) + vf.TxRate = int(txr.Rate) + case nl.IFLA_VF_SPOOFCHK: + sp := nl.DeserializeVfSpoofchk(element.Value[:]) + vf.Spoofchk = sp.Setting != 0 + case nl.IFLA_VF_LINK_STATE: + ls := nl.DeserializeVfLinkState(element.Value[:]) + vf.LinkState = ls.LinkState + case nl.IFLA_VF_RATE: + vfr := nl.DeserializeVfRate(element.Value[:]) + vf.MaxTxRate = vfr.MaxTxRate + vf.MinTxRate = vfr.MinTxRate + case nl.IFLA_VF_STATS: + vfstats := nl.DeserializeVfStats(element.Value[:]) + vf.RxPackets = vfstats.RxPackets + vf.TxPackets = vfstats.TxPackets + vf.RxBytes = vfstats.RxBytes + vf.TxBytes = vfstats.TxBytes + vf.Multicast = vfstats.Multicast + vf.Broadcast = vfstats.Broadcast + vf.RxDropped = vfstats.RxDropped + vf.TxDropped = vfstats.TxDropped + + case nl.IFLA_VF_RSS_QUERY_EN: + result := nl.DeserializeVfRssQueryEn(element.Value) + vf.RssQuery = result.Setting + + case nl.IFLA_VF_TRUST: + result := nl.DeserializeVfTrust(element.Value) + vf.Trust = result.Setting + } + } + return vf +} + +func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex))) + if xfrmi.Ifid != 0 { + data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) + } +} + +func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { + xfrmi := link.(*Xfrmi) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_XFRM_LINK: + xfrmi.ParentIndex = int(native.Uint32(datum.Value)) + case nl.IFLA_XFRM_IF_ID: + xfrmi.Ifid = native.Uint32(datum.Value) + } + } +} + +// LinkSetBondSlave add slave to bond link via ioctl interface. +func LinkSetBondSlave(link Link, master *Bond) error { + fd, err := getSocketUDP() + if err != nil { + return err + } + defer syscall.Close(fd) + + ifreq := newIocltSlaveReq(link.Attrs().Name, master.Attrs().Name) + + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCBONDENSLAVE, uintptr(unsafe.Pointer(ifreq))) + if errno != 0 { + return fmt.Errorf("Failed to enslave %q to %q, errno=%v", link.Attrs().Name, master.Attrs().Name, errno) + } + return nil +} + +// LinkSetBondSlaveQueueId modify bond slave queue-id. +func (h *Handle) LinkSetBondSlaveQueueId(link Link, queueId uint16) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_SLAVE_DATA, nil) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_QUEUE_ID, nl.Uint16Attr(queueId)) + + req.AddData(linkInfo) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetBondSlaveQueueId modify bond slave queue-id. +func LinkSetBondSlaveQueueId(link Link, queueId uint16) error { + return pkgHandle.LinkSetBondSlaveQueueId(link, queueId) +} + +func vethStatsSerialize(stats ethtoolStats) ([]byte, error) { + statsSize := int(unsafe.Sizeof(stats)) + int(stats.nStats)*int(unsafe.Sizeof(uint64(0))) + b := make([]byte, 0, statsSize) + buf := bytes.NewBuffer(b) + err := binary.Write(buf, nl.NativeEndian(), stats) + return buf.Bytes()[:statsSize], err +} + +type vethEthtoolStats struct { + Cmd uint32 + NStats uint32 + Peer uint64 + // Newer kernels have XDP stats in here, but we only care + // to extract the peer ifindex here. +} + +func vethStatsDeserialize(b []byte) (vethEthtoolStats, error) { + var stats = vethEthtoolStats{} + err := binary.Read(bytes.NewReader(b), nl.NativeEndian(), &stats) + return stats, err +} + +// VethPeerIndex get veth peer index. +func VethPeerIndex(link *Veth) (int, error) { + fd, err := getSocketUDP() + if err != nil { + return -1, err + } + defer syscall.Close(fd) + + ifreq, sSet := newIocltStringSetReq(link.Name) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), SIOCETHTOOL, uintptr(unsafe.Pointer(ifreq))) + if errno != 0 { + return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno) + } + + stats := ethtoolStats{ + cmd: ETHTOOL_GSTATS, + nStats: sSet.data[0], + } + + buffer, err := vethStatsSerialize(stats) + if err != nil { + return -1, err + } + + ifreq.Data = uintptr(unsafe.Pointer(&buffer[0])) + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), SIOCETHTOOL, uintptr(unsafe.Pointer(ifreq))) + if errno != 0 { + return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno) + } + + vstats, err := vethStatsDeserialize(buffer) + if err != nil { + return -1, err + } + + return int(vstats.Peer), nil +} + +func parseTuntapData(link Link, data []syscall.NetlinkRouteAttr) { + tuntap := link.(*Tuntap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_TUN_OWNER: + tuntap.Owner = native.Uint32(datum.Value) + case nl.IFLA_TUN_GROUP: + tuntap.Group = native.Uint32(datum.Value) + case nl.IFLA_TUN_TYPE: + tuntap.Mode = TuntapMode(uint8(datum.Value[0])) + case nl.IFLA_TUN_PERSIST: + tuntap.NonPersist = false + if uint8(datum.Value[0]) == 0 { + tuntap.NonPersist = true + } + } + } +} + +func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) { + ipoib := link.(*IPoIB) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPOIB_PKEY: + ipoib.Pkey = uint16(native.Uint16(datum.Value)) + case nl.IFLA_IPOIB_MODE: + ipoib.Mode = IPoIBMode(native.Uint16(datum.Value)) + case nl.IFLA_IPOIB_UMCAST: + ipoib.Umcast = uint16(native.Uint16(datum.Value)) + } + } +} + +func parseCanData(link Link, data []syscall.NetlinkRouteAttr) { + can := link.(*Can) + for _, datum := range data { + + switch datum.Attr.Type { + case nl.IFLA_CAN_BITTIMING: + can.BitRate = native.Uint32(datum.Value) + can.SamplePoint = native.Uint32(datum.Value[4:]) + can.TimeQuanta = native.Uint32(datum.Value[8:]) + can.PropagationSegment = native.Uint32(datum.Value[12:]) + can.PhaseSegment1 = native.Uint32(datum.Value[16:]) + can.PhaseSegment2 = native.Uint32(datum.Value[20:]) + can.SyncJumpWidth = native.Uint32(datum.Value[24:]) + can.BitRatePreScaler = native.Uint32(datum.Value[28:]) + case nl.IFLA_CAN_BITTIMING_CONST: + can.Name = string(datum.Value[:16]) + can.TimeSegment1Min = native.Uint32(datum.Value[16:]) + can.TimeSegment1Max = native.Uint32(datum.Value[20:]) + can.TimeSegment2Min = native.Uint32(datum.Value[24:]) + can.TimeSegment2Max = native.Uint32(datum.Value[28:]) + can.SyncJumpWidthMax = native.Uint32(datum.Value[32:]) + can.BitRatePreScalerMin = native.Uint32(datum.Value[36:]) + can.BitRatePreScalerMax = native.Uint32(datum.Value[40:]) + can.BitRatePreScalerInc = native.Uint32(datum.Value[44:]) + case nl.IFLA_CAN_CLOCK: + can.ClockFrequency = native.Uint32(datum.Value) + case nl.IFLA_CAN_STATE: + can.State = native.Uint32(datum.Value) + case nl.IFLA_CAN_CTRLMODE: + can.Mask = native.Uint32(datum.Value) + can.Flags = native.Uint32(datum.Value[4:]) + case nl.IFLA_CAN_BERR_COUNTER: + can.TxError = native.Uint16(datum.Value) + can.RxError = native.Uint16(datum.Value[2:]) + case nl.IFLA_CAN_RESTART_MS: + can.RestartMs = native.Uint32(datum.Value) + case nl.IFLA_CAN_DATA_BITTIMING_CONST: + case nl.IFLA_CAN_RESTART: + case nl.IFLA_CAN_DATA_BITTIMING: + case nl.IFLA_CAN_TERMINATION: + case nl.IFLA_CAN_TERMINATION_CONST: + case nl.IFLA_CAN_BITRATE_CONST: + case nl.IFLA_CAN_DATA_BITRATE_CONST: + case nl.IFLA_CAN_BITRATE_MAX: + } + } +} + +func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey))) + data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode))) + data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast))) +} + +func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port))) + data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType))) + if bareudp.SrcPortMin != 0 { + data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin)) + } + if bareudp.MultiProto { + data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{}) + } +} + +func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) { + bareudp := link.(*BareUDP) + for _, attr := range data { + switch attr.Attr.Type { + case nl.IFLA_BAREUDP_PORT: + bareudp.Port = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_ETHERTYPE: + bareudp.EtherType = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_SRCPORT_MIN: + bareudp.SrcPortMin = native.Uint16(attr.Value) + case nl.IFLA_BAREUDP_MULTIPROTO_MODE: + bareudp.MultiProto = true + } + } +} diff --git a/vendor/github.com/vishvananda/netlink/link_tuntap_linux.go b/vendor/github.com/vishvananda/netlink/link_tuntap_linux.go new file mode 100644 index 000000000000..310bd33d8d42 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/link_tuntap_linux.go @@ -0,0 +1,14 @@ +package netlink + +// ideally golang.org/x/sys/unix would define IfReq but it only has +// IFNAMSIZ, hence this minimalistic implementation +const ( + SizeOfIfReq = 40 + IFNAMSIZ = 16 +) + +type ifReq struct { + Name [IFNAMSIZ]byte + Flags uint16 + pad [SizeOfIfReq - IFNAMSIZ - 2]byte +} diff --git a/vendor/github.com/vishvananda/netlink/neigh.go b/vendor/github.com/vishvananda/netlink/neigh.go new file mode 100644 index 000000000000..32d722e88587 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/neigh.go @@ -0,0 +1,33 @@ +package netlink + +import ( + "fmt" + "net" +) + +// Neigh represents a link layer neighbor from netlink. +type Neigh struct { + LinkIndex int + Family int + State int + Type int + Flags int + FlagsExt int + IP net.IP + HardwareAddr net.HardwareAddr + LLIPAddr net.IP //Used in the case of NHRP + Vlan int + VNI int + MasterIndex int +} + +// String returns $ip/$hwaddr $label +func (neigh *Neigh) String() string { + return fmt.Sprintf("%s %s", neigh.IP, neigh.HardwareAddr) +} + +// NeighUpdate is sent when a neighbor changes - type is RTM_NEWNEIGH or RTM_DELNEIGH. +type NeighUpdate struct { + Type uint16 + Neigh +} diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go new file mode 100644 index 000000000000..4c1e766351c3 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -0,0 +1,452 @@ +package netlink + +import ( + "fmt" + "net" + "syscall" + "unsafe" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const ( + NDA_UNSPEC = iota + NDA_DST + NDA_LLADDR + NDA_CACHEINFO + NDA_PROBES + NDA_VLAN + NDA_PORT + NDA_VNI + NDA_IFINDEX + NDA_MASTER + NDA_LINK_NETNSID + NDA_SRC_VNI + NDA_PROTOCOL + NDA_NH_ID + NDA_FDB_EXT_ATTRS + NDA_FLAGS_EXT + NDA_MAX = NDA_FLAGS_EXT +) + +// Neighbor Cache Entry States. +const ( + NUD_NONE = 0x00 + NUD_INCOMPLETE = 0x01 + NUD_REACHABLE = 0x02 + NUD_STALE = 0x04 + NUD_DELAY = 0x08 + NUD_PROBE = 0x10 + NUD_FAILED = 0x20 + NUD_NOARP = 0x40 + NUD_PERMANENT = 0x80 +) + +// Neighbor Flags +const ( + NTF_USE = 0x01 + NTF_SELF = 0x02 + NTF_MASTER = 0x04 + NTF_PROXY = 0x08 + NTF_EXT_LEARNED = 0x10 + NTF_OFFLOADED = 0x20 + NTF_STICKY = 0x40 + NTF_ROUTER = 0x80 +) + +// Extended Neighbor Flags +const ( + NTF_EXT_MANAGED = 0x00000001 +) + +// Ndmsg is for adding, removing or receiving information about a neighbor table entry +type Ndmsg struct { + Family uint8 + Index uint32 + State uint16 + Flags uint8 + Type uint8 +} + +func deserializeNdmsg(b []byte) *Ndmsg { + var dummy Ndmsg + return (*Ndmsg)(unsafe.Pointer(&b[0:unsafe.Sizeof(dummy)][0])) +} + +func (msg *Ndmsg) Serialize() []byte { + return (*(*[unsafe.Sizeof(*msg)]byte)(unsafe.Pointer(msg)))[:] +} + +func (msg *Ndmsg) Len() int { + return int(unsafe.Sizeof(*msg)) +} + +// NeighAdd will add an IP to MAC mapping to the ARP table +// Equivalent to: `ip neigh add ....` +func NeighAdd(neigh *Neigh) error { + return pkgHandle.NeighAdd(neigh) +} + +// NeighAdd will add an IP to MAC mapping to the ARP table +// Equivalent to: `ip neigh add ....` +func (h *Handle) NeighAdd(neigh *Neigh) error { + return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_EXCL) +} + +// NeighSet will add or replace an IP to MAC mapping to the ARP table +// Equivalent to: `ip neigh replace....` +func NeighSet(neigh *Neigh) error { + return pkgHandle.NeighSet(neigh) +} + +// NeighSet will add or replace an IP to MAC mapping to the ARP table +// Equivalent to: `ip neigh replace....` +func (h *Handle) NeighSet(neigh *Neigh) error { + return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_REPLACE) +} + +// NeighAppend will append an entry to FDB +// Equivalent to: `bridge fdb append...` +func NeighAppend(neigh *Neigh) error { + return pkgHandle.NeighAppend(neigh) +} + +// NeighAppend will append an entry to FDB +// Equivalent to: `bridge fdb append...` +func (h *Handle) NeighAppend(neigh *Neigh) error { + return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_APPEND) +} + +// NeighAppend will append an entry to FDB +// Equivalent to: `bridge fdb append...` +func neighAdd(neigh *Neigh, mode int) error { + return pkgHandle.neighAdd(neigh, mode) +} + +// NeighAppend will append an entry to FDB +// Equivalent to: `bridge fdb append...` +func (h *Handle) neighAdd(neigh *Neigh, mode int) error { + req := h.newNetlinkRequest(unix.RTM_NEWNEIGH, mode|unix.NLM_F_ACK) + return neighHandle(neigh, req) +} + +// NeighDel will delete an IP address from a link device. +// Equivalent to: `ip addr del $addr dev $link` +func NeighDel(neigh *Neigh) error { + return pkgHandle.NeighDel(neigh) +} + +// NeighDel will delete an IP address from a link device. +// Equivalent to: `ip addr del $addr dev $link` +func (h *Handle) NeighDel(neigh *Neigh) error { + req := h.newNetlinkRequest(unix.RTM_DELNEIGH, unix.NLM_F_ACK) + return neighHandle(neigh, req) +} + +func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { + var family int + + if neigh.Family > 0 { + family = neigh.Family + } else { + family = nl.GetIPFamily(neigh.IP) + } + + msg := Ndmsg{ + Family: uint8(family), + Index: uint32(neigh.LinkIndex), + State: uint16(neigh.State), + Type: uint8(neigh.Type), + Flags: uint8(neigh.Flags), + } + req.AddData(&msg) + + ipData := neigh.IP.To4() + if ipData == nil { + ipData = neigh.IP.To16() + } + + dstData := nl.NewRtAttr(NDA_DST, ipData) + req.AddData(dstData) + + if neigh.LLIPAddr != nil { + llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4()) + req.AddData(llIPData) + } else if neigh.HardwareAddr != nil { + hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) + req.AddData(hwData) + } + + if neigh.FlagsExt != 0 { + flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt))) + req.AddData(flagsExtData) + } + + if neigh.Vlan != 0 { + vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan))) + req.AddData(vlanData) + } + + if neigh.VNI != 0 { + vniData := nl.NewRtAttr(NDA_VNI, nl.Uint32Attr(uint32(neigh.VNI))) + req.AddData(vniData) + } + + if neigh.MasterIndex != 0 { + masterData := nl.NewRtAttr(NDA_MASTER, nl.Uint32Attr(uint32(neigh.MasterIndex))) + req.AddData(masterData) + } + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// NeighList returns a list of IP-MAC mappings in the system (ARP table). +// Equivalent to: `ip neighbor show`. +// The list can be filtered by link and ip family. +func NeighList(linkIndex, family int) ([]Neigh, error) { + return pkgHandle.NeighList(linkIndex, family) +} + +// NeighProxyList returns a list of neighbor proxies in the system. +// Equivalent to: `ip neighbor show proxy`. +// The list can be filtered by link and ip family. +func NeighProxyList(linkIndex, family int) ([]Neigh, error) { + return pkgHandle.NeighProxyList(linkIndex, family) +} + +// NeighList returns a list of IP-MAC mappings in the system (ARP table). +// Equivalent to: `ip neighbor show`. +// The list can be filtered by link and ip family. +func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) { + return h.NeighListExecute(Ndmsg{ + Family: uint8(family), + Index: uint32(linkIndex), + }) +} + +// NeighProxyList returns a list of neighbor proxies in the system. +// Equivalent to: `ip neighbor show proxy`. +// The list can be filtered by link, ip family. +func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) { + return h.NeighListExecute(Ndmsg{ + Family: uint8(family), + Index: uint32(linkIndex), + Flags: NTF_PROXY, + }) +} + +// NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state. +func NeighListExecute(msg Ndmsg) ([]Neigh, error) { + return pkgHandle.NeighListExecute(msg) +} + +// NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state. +func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) { + req := h.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP) + req.AddData(&msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNEIGH) + if err != nil { + return nil, err + } + + var res []Neigh + for _, m := range msgs { + ndm := deserializeNdmsg(m) + if msg.Index != 0 && ndm.Index != msg.Index { + // Ignore messages from other interfaces + continue + } + if msg.Family != 0 && ndm.Family != msg.Family { + continue + } + if msg.State != 0 && ndm.State != msg.State { + continue + } + if msg.Type != 0 && ndm.Type != msg.Type { + continue + } + if msg.Flags != 0 && ndm.Flags != msg.Flags { + continue + } + + neigh, err := NeighDeserialize(m) + if err != nil { + continue + } + + res = append(res, *neigh) + } + + return res, nil +} + +func NeighDeserialize(m []byte) (*Neigh, error) { + msg := deserializeNdmsg(m) + + neigh := Neigh{ + LinkIndex: int(msg.Index), + Family: int(msg.Family), + State: int(msg.State), + Type: int(msg.Type), + Flags: int(msg.Flags), + } + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case NDA_DST: + neigh.IP = net.IP(attr.Value) + case NDA_LLADDR: + // BUG: Is this a bug in the netlink library? + // #define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) + // #define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + attrLen := attr.Attr.Len - unix.SizeofRtAttr + if attrLen == 4 { + neigh.LLIPAddr = net.IP(attr.Value) + } else if attrLen == 16 { + // Can be IPv6 or FireWire HWAddr + link, err := LinkByIndex(neigh.LinkIndex) + if err == nil && link.Attrs().EncapType == "tunnel6" { + neigh.IP = net.IP(attr.Value) + } else { + neigh.HardwareAddr = net.HardwareAddr(attr.Value) + } + } else { + neigh.HardwareAddr = net.HardwareAddr(attr.Value) + } + case NDA_FLAGS_EXT: + neigh.FlagsExt = int(native.Uint32(attr.Value[0:4])) + case NDA_VLAN: + neigh.Vlan = int(native.Uint16(attr.Value[0:2])) + case NDA_VNI: + neigh.VNI = int(native.Uint32(attr.Value[0:4])) + case NDA_MASTER: + neigh.MasterIndex = int(native.Uint32(attr.Value[0:4])) + } + } + + return &neigh, nil +} + +// NeighSubscribe takes a chan down which notifications will be sent +// when neighbors are added or deleted. Close the 'done' chan to stop subscription. +func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error { + return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) +} + +// NeighSubscribeAt works like NeighSubscribe plus it allows the caller +// to choose the network namespace in which to subscribe (ns). +func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error { + return neighSubscribeAt(ns, netns.None(), ch, done, nil, false) +} + +// NeighSubscribeOptions contains a set of options to use with +// NeighSubscribeWithOptions. +type NeighSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool +} + +// NeighSubscribeWithOptions work like NeighSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, options NeighSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) +} + +func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) + makeRequest := func(family int) error { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, + unix.NLM_F_DUMP) + infmsg := nl.NewIfInfomsg(family) + req.AddData(infmsg) + if err := s.Send(req); err != nil { + return err + } + return nil + } + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + if listExisting { + if err := makeRequest(unix.AF_UNSPEC); err != nil { + return err + } + // We have to wait for NLMSG_DONE before making AF_BRIDGE request + } + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + if cberr != nil { + cberr(err) + } + return + } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } + for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + if listExisting { + // This will be called after handling AF_UNSPEC + // list request, we have to wait for NLMSG_DONE + // before making another request + if err := makeRequest(unix.AF_BRIDGE); err != nil { + if cberr != nil { + cberr(err) + } + return + } + listExisting = false + } + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(syscall.Errno(-error)) + } + return + } + neigh, err := NeighDeserialize(m.Data) + if err != nil { + if cberr != nil { + cberr(err) + } + return + } + ch <- NeighUpdate{Type: m.Header.Type, Neigh: *neigh} + } + } + }() + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/netlink.go b/vendor/github.com/vishvananda/netlink/netlink.go new file mode 100644 index 000000000000..9cb685dc818f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netlink.go @@ -0,0 +1,40 @@ +// Package netlink provides a simple library for netlink. Netlink is +// the interface a user-space program in linux uses to communicate with +// the kernel. It can be used to add and remove interfaces, set up ip +// addresses and routes, and confiugre ipsec. Netlink communication +// requires elevated privileges, so in most cases this code needs to +// be run as root. The low level primitives for netlink are contained +// in the nl subpackage. This package attempts to provide a high-level +// interface that is loosly modeled on the iproute2 cli. +package netlink + +import ( + "errors" + "net" +) + +var ( + // ErrNotImplemented is returned when a requested feature is not implemented. + ErrNotImplemented = errors.New("not implemented") +) + +// ParseIPNet parses a string in ip/net format and returns a net.IPNet. +// This is valuable because addresses in netlink are often IPNets and +// ParseCIDR returns an IPNet with the IP part set to the base IP of the +// range. +func ParseIPNet(s string) (*net.IPNet, error) { + ip, ipNet, err := net.ParseCIDR(s) + if err != nil { + return nil, err + } + ipNet.IP = ip + return ipNet, nil +} + +// NewIPNet generates an IPNet from an ip address using a netmask of 32 or 128. +func NewIPNet(ip net.IP) *net.IPNet { + if ip.To4() != nil { + return &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)} + } + return &net.IPNet{IP: ip, Mask: net.CIDRMask(128, 128)} +} diff --git a/vendor/github.com/vishvananda/netlink/netlink_linux.go b/vendor/github.com/vishvananda/netlink/netlink_linux.go new file mode 100644 index 000000000000..a20d293d870f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netlink_linux.go @@ -0,0 +1,11 @@ +package netlink + +import "github.com/vishvananda/netlink/nl" + +// Family type definitions +const ( + FAMILY_ALL = nl.FAMILY_ALL + FAMILY_V4 = nl.FAMILY_V4 + FAMILY_V6 = nl.FAMILY_V6 + FAMILY_MPLS = nl.FAMILY_MPLS +) diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go new file mode 100644 index 000000000000..98d2c0dbf374 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -0,0 +1,257 @@ +// +build !linux + +package netlink + +import "net" + +func LinkSetUp(link Link) error { + return ErrNotImplemented +} + +func LinkSetDown(link Link) error { + return ErrNotImplemented +} + +func LinkSetMTU(link Link, mtu int) error { + return ErrNotImplemented +} + +func LinkSetMaster(link Link, master Link) error { + return ErrNotImplemented +} + +func LinkSetNsPid(link Link, nspid int) error { + return ErrNotImplemented +} + +func LinkSetNsFd(link Link, fd int) error { + return ErrNotImplemented +} + +func LinkSetName(link Link, name string) error { + return ErrNotImplemented +} + +func LinkSetAlias(link Link, name string) error { + return ErrNotImplemented +} + +func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { + return ErrNotImplemented +} + +func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error { + return ErrNotImplemented +} + +func LinkSetVfVlan(link Link, vf, vlan int) error { + return ErrNotImplemented +} + +func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { + return ErrNotImplemented +} + +func LinkSetVfTxRate(link Link, vf, rate int) error { + return ErrNotImplemented +} + +func LinkSetVfRate(link Link, vf, minRate, maxRate int) error { + return ErrNotImplemented +} + +func LinkSetNoMaster(link Link) error { + return ErrNotImplemented +} + +func LinkSetMasterByIndex(link Link, masterIndex int) error { + return ErrNotImplemented +} + +func LinkSetXdpFd(link Link, fd int) error { + return ErrNotImplemented +} + +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { + return ErrNotImplemented +} + +func LinkSetARPOff(link Link) error { + return ErrNotImplemented +} + +func LinkSetARPOn(link Link) error { + return ErrNotImplemented +} + +func LinkByName(name string) (Link, error) { + return nil, ErrNotImplemented +} + +func LinkByAlias(alias string) (Link, error) { + return nil, ErrNotImplemented +} + +func LinkByIndex(index int) (Link, error) { + return nil, ErrNotImplemented +} + +func LinkSetHairpin(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkSetGuard(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkSetFastLeave(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkSetLearning(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkSetRootBlock(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkSetFlood(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkSetTxQLen(link Link, qlen int) error { + return ErrNotImplemented +} + +func LinkAdd(link Link) error { + return ErrNotImplemented +} + +func LinkDel(link Link) error { + return ErrNotImplemented +} + +func SetHairpin(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetGuard(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetFastLeave(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetLearning(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetRootBlock(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetFlood(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkList() ([]Link, error) { + return nil, ErrNotImplemented +} + +func AddrAdd(link Link, addr *Addr) error { + return ErrNotImplemented +} + +func AddrReplace(link Link, addr *Addr) error { + return ErrNotImplemented +} + +func AddrDel(link Link, addr *Addr) error { + return ErrNotImplemented +} + +func AddrList(link Link, family int) ([]Addr, error) { + return nil, ErrNotImplemented +} + +func RouteAdd(route *Route) error { + return ErrNotImplemented +} + +func RouteAppend(route *Route) error { + return ErrNotImplemented +} + +func RouteDel(route *Route) error { + return ErrNotImplemented +} + +func RouteGet(destination net.IP) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteList(link Link, family int) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteReplace(route *Route) error { + return ErrNotImplemented +} + +func XfrmPolicyAdd(policy *XfrmPolicy) error { + return ErrNotImplemented +} + +func XfrmPolicyDel(policy *XfrmPolicy) error { + return ErrNotImplemented +} + +func XfrmPolicyList(family int) ([]XfrmPolicy, error) { + return nil, ErrNotImplemented +} + +func XfrmStateAdd(policy *XfrmState) error { + return ErrNotImplemented +} + +func XfrmStateDel(policy *XfrmState) error { + return ErrNotImplemented +} + +func XfrmStateList(family int) ([]XfrmState, error) { + return nil, ErrNotImplemented +} + +func NeighAdd(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighSet(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighAppend(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighDel(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighList(linkIndex, family int) ([]Neigh, error) { + return nil, ErrNotImplemented +} + +func NeighDeserialize(m []byte) (*Neigh, error) { + return nil, ErrNotImplemented +} + +func SocketGet(local, remote net.Addr) (*Socket, error) { + return nil, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/netns_linux.go b/vendor/github.com/vishvananda/netlink/netns_linux.go new file mode 100644 index 000000000000..2eb29c7ce019 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netns_linux.go @@ -0,0 +1,141 @@ +package netlink + +// Network namespace ID functions +// +// The kernel has a weird concept called the network namespace ID. +// This is different from the file reference in proc (and any bind-mounted +// namespaces, etc.) +// +// Instead, namespaces can be assigned a numeric ID at any time. Once set, +// the ID is fixed. The ID can either be set manually by the user, or +// automatically, triggered by certain kernel actions. The most common kernel +// action that triggers namespace ID creation is moving one end of a veth pair +// in to that namespace. + +import ( + "fmt" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// These can be replaced by the values from sys/unix when it is next released. +const ( + _ = iota + NETNSA_NSID + NETNSA_PID + NETNSA_FD +) + +// GetNetNsIdByPid looks up the network namespace ID for a given pid (really thread id). +// Returns -1 if the namespace does not have an ID set. +func (h *Handle) GetNetNsIdByPid(pid int) (int, error) { + return h.getNetNsId(NETNSA_PID, uint32(pid)) +} + +// GetNetNsIdByPid looks up the network namespace ID for a given pid (really thread id). +// Returns -1 if the namespace does not have an ID set. +func GetNetNsIdByPid(pid int) (int, error) { + return pkgHandle.GetNetNsIdByPid(pid) +} + +// SetNetNSIdByPid sets the ID of the network namespace for a given pid (really thread id). +// The ID can only be set for namespaces without an ID already set. +func (h *Handle) SetNetNsIdByPid(pid, nsid int) error { + return h.setNetNsId(NETNSA_PID, uint32(pid), uint32(nsid)) +} + +// SetNetNSIdByPid sets the ID of the network namespace for a given pid (really thread id). +// The ID can only be set for namespaces without an ID already set. +func SetNetNsIdByPid(pid, nsid int) error { + return pkgHandle.SetNetNsIdByPid(pid, nsid) +} + +// GetNetNsIdByFd looks up the network namespace ID for a given fd. +// fd must be an open file descriptor to a namespace file. +// Returns -1 if the namespace does not have an ID set. +func (h *Handle) GetNetNsIdByFd(fd int) (int, error) { + return h.getNetNsId(NETNSA_FD, uint32(fd)) +} + +// GetNetNsIdByFd looks up the network namespace ID for a given fd. +// fd must be an open file descriptor to a namespace file. +// Returns -1 if the namespace does not have an ID set. +func GetNetNsIdByFd(fd int) (int, error) { + return pkgHandle.GetNetNsIdByFd(fd) +} + +// SetNetNSIdByFd sets the ID of the network namespace for a given fd. +// fd must be an open file descriptor to a namespace file. +// The ID can only be set for namespaces without an ID already set. +func (h *Handle) SetNetNsIdByFd(fd, nsid int) error { + return h.setNetNsId(NETNSA_FD, uint32(fd), uint32(nsid)) +} + +// SetNetNSIdByFd sets the ID of the network namespace for a given fd. +// fd must be an open file descriptor to a namespace file. +// The ID can only be set for namespaces without an ID already set. +func SetNetNsIdByFd(fd, nsid int) error { + return pkgHandle.SetNetNsIdByFd(fd, nsid) +} + +// getNetNsId requests the netnsid for a given type-val pair +// type should be either NETNSA_PID or NETNSA_FD +func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) { + req := h.newNetlinkRequest(unix.RTM_GETNSID, unix.NLM_F_REQUEST) + + rtgen := nl.NewRtGenMsg() + req.AddData(rtgen) + + b := make([]byte, 4) + native.PutUint32(b, val) + attr := nl.NewRtAttr(attrType, b) + req.AddData(attr) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNSID) + + if err != nil { + return 0, err + } + + for _, m := range msgs { + msg := nl.DeserializeRtGenMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return 0, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case NETNSA_NSID: + return int(int32(native.Uint32(attr.Value))), nil + } + } + } + + return 0, fmt.Errorf("unexpected empty result") +} + +// setNetNsId sets the netnsid for a given type-val pair +// type should be either NETNSA_PID or NETNSA_FD +// The ID can only be set for namespaces without an ID already set +func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error { + req := h.newNetlinkRequest(unix.RTM_NEWNSID, unix.NLM_F_REQUEST|unix.NLM_F_ACK) + + rtgen := nl.NewRtGenMsg() + req.AddData(rtgen) + + b := make([]byte, 4) + native.PutUint32(b, val) + attr := nl.NewRtAttr(attrType, b) + req.AddData(attr) + + b1 := make([]byte, 4) + native.PutUint32(b1, newnsid) + attr1 := nl.NewRtAttr(NETNSA_NSID, b1) + req.AddData(attr1) + + _, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNSID) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/netns_unspecified.go b/vendor/github.com/vishvananda/netlink/netns_unspecified.go new file mode 100644 index 000000000000..5c5899e3628f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netns_unspecified.go @@ -0,0 +1,19 @@ +// +build !linux + +package netlink + +func GetNetNsIdByPid(pid int) (int, error) { + return 0, ErrNotImplemented +} + +func SetNetNsIdByPid(pid, nsid int) error { + return ErrNotImplemented +} + +func GetNetNsIdByFd(fd int) (int, error) { + return 0, ErrNotImplemented +} + +func SetNetNsIdByFd(fd, nsid int) error { + return ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go new file mode 100644 index 000000000000..6bea4ed02f08 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go @@ -0,0 +1,71 @@ +package nl + +import ( + "unsafe" + + "golang.org/x/sys/unix" +) + +type IfAddrmsg struct { + unix.IfAddrmsg +} + +func NewIfAddrmsg(family int) *IfAddrmsg { + return &IfAddrmsg{ + IfAddrmsg: unix.IfAddrmsg{ + Family: uint8(family), + }, + } +} + +// struct ifaddrmsg { +// __u8 ifa_family; +// __u8 ifa_prefixlen; /* The prefix length */ +// __u8 ifa_flags; /* Flags */ +// __u8 ifa_scope; /* Address scope */ +// __u32 ifa_index; /* Link index */ +// }; + +// type IfAddrmsg struct { +// Family uint8 +// Prefixlen uint8 +// Flags uint8 +// Scope uint8 +// Index uint32 +// } +// SizeofIfAddrmsg = 0x8 + +func DeserializeIfAddrmsg(b []byte) *IfAddrmsg { + return (*IfAddrmsg)(unsafe.Pointer(&b[0:unix.SizeofIfAddrmsg][0])) +} + +func (msg *IfAddrmsg) Serialize() []byte { + return (*(*[unix.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:] +} + +func (msg *IfAddrmsg) Len() int { + return unix.SizeofIfAddrmsg +} + +// struct ifa_cacheinfo { +// __u32 ifa_prefered; +// __u32 ifa_valid; +// __u32 cstamp; /* created timestamp, hundredths of seconds */ +// __u32 tstamp; /* updated timestamp, hundredths of seconds */ +// }; + +type IfaCacheInfo struct { + unix.IfaCacheinfo +} + +func (msg *IfaCacheInfo) Len() int { + return unix.SizeofIfaCacheinfo +} + +func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo { + return (*IfaCacheInfo)(unsafe.Pointer(&b[0:unix.SizeofIfaCacheinfo][0])) +} + +func (msg *IfaCacheInfo) Serialize() []byte { + return (*(*[unix.SizeofIfaCacheinfo]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go b/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go new file mode 100644 index 000000000000..34e78ba8dafc --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go @@ -0,0 +1,74 @@ +package nl + +import ( + "fmt" + "unsafe" +) + +const ( + SizeofBridgeVlanInfo = 0x04 +) + +/* Bridge Flags */ +const ( + BRIDGE_FLAGS_MASTER = iota + 1 /* Bridge command to/from master */ + BRIDGE_FLAGS_SELF /* Bridge command to/from lowerdev */ +) + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] + * } + */ +const ( + IFLA_BRIDGE_FLAGS = iota + IFLA_BRIDGE_MODE + IFLA_BRIDGE_VLAN_INFO +) + +const ( + BRIDGE_VLAN_INFO_MASTER = 1 << iota + BRIDGE_VLAN_INFO_PVID + BRIDGE_VLAN_INFO_UNTAGGED + BRIDGE_VLAN_INFO_RANGE_BEGIN + BRIDGE_VLAN_INFO_RANGE_END +) + +// struct bridge_vlan_info { +// __u16 flags; +// __u16 vid; +// }; + +type BridgeVlanInfo struct { + Flags uint16 + Vid uint16 +} + +func (b *BridgeVlanInfo) Serialize() []byte { + return (*(*[SizeofBridgeVlanInfo]byte)(unsafe.Pointer(b)))[:] +} + +func DeserializeBridgeVlanInfo(b []byte) *BridgeVlanInfo { + return (*BridgeVlanInfo)(unsafe.Pointer(&b[0:SizeofBridgeVlanInfo][0])) +} + +func (b *BridgeVlanInfo) PortVID() bool { + return b.Flags&BRIDGE_VLAN_INFO_PVID > 0 +} + +func (b *BridgeVlanInfo) EngressUntag() bool { + return b.Flags&BRIDGE_VLAN_INFO_UNTAGGED > 0 +} + +func (b *BridgeVlanInfo) String() string { + return fmt.Sprintf("%+v", *b) +} + +/* New extended info filters for IFLA_EXT_MASK */ +const ( + RTEXT_FILTER_VF = 1 << iota + RTEXT_FILTER_BRVLAN + RTEXT_FILTER_BRVLAN_COMPRESSED +) diff --git a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go new file mode 100644 index 000000000000..183601803b8b --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -0,0 +1,219 @@ +package nl + +import "unsafe" + +// Track the message sizes for the correct serialization/deserialization +const ( + SizeofNfgenmsg = 4 + SizeofNfattr = 4 + SizeofNfConntrack = 376 + SizeofNfctTupleHead = 52 +) + +var L4ProtoMap = map[uint8]string{ + 6: "tcp", + 17: "udp", +} + +// All the following constants are coming from: +// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h + +// enum cntl_msg_types { +// IPCTNL_MSG_CT_NEW, +// IPCTNL_MSG_CT_GET, +// IPCTNL_MSG_CT_DELETE, +// IPCTNL_MSG_CT_GET_CTRZERO, +// IPCTNL_MSG_CT_GET_STATS_CPU, +// IPCTNL_MSG_CT_GET_STATS, +// IPCTNL_MSG_CT_GET_DYING, +// IPCTNL_MSG_CT_GET_UNCONFIRMED, +// +// IPCTNL_MSG_MAX +// }; +const ( + IPCTNL_MSG_CT_GET = 1 + IPCTNL_MSG_CT_DELETE = 2 +) + +// #define NFNETLINK_V0 0 +const ( + NFNETLINK_V0 = 0 +) + +const ( + NLA_F_NESTED uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15) + NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14) + NLA_TYPE_MASK = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + NLA_ALIGNTO uint16 = 4 // #define NLA_ALIGNTO 4 +) + +// enum ctattr_type { +// CTA_UNSPEC, +// CTA_TUPLE_ORIG, +// CTA_TUPLE_REPLY, +// CTA_STATUS, +// CTA_PROTOINFO, +// CTA_HELP, +// CTA_NAT_SRC, +// #define CTA_NAT CTA_NAT_SRC /* backwards compatibility */ +// CTA_TIMEOUT, +// CTA_MARK, +// CTA_COUNTERS_ORIG, +// CTA_COUNTERS_REPLY, +// CTA_USE, +// CTA_ID, +// CTA_NAT_DST, +// CTA_TUPLE_MASTER, +// CTA_SEQ_ADJ_ORIG, +// CTA_NAT_SEQ_ADJ_ORIG = CTA_SEQ_ADJ_ORIG, +// CTA_SEQ_ADJ_REPLY, +// CTA_NAT_SEQ_ADJ_REPLY = CTA_SEQ_ADJ_REPLY, +// CTA_SECMARK, /* obsolete */ +// CTA_ZONE, +// CTA_SECCTX, +// CTA_TIMESTAMP, +// CTA_MARK_MASK, +// CTA_LABELS, +// CTA_LABELS_MASK, +// __CTA_MAX +// }; +const ( + CTA_TUPLE_ORIG = 1 + CTA_TUPLE_REPLY = 2 + CTA_STATUS = 3 + CTA_PROTOINFO = 4 + CTA_TIMEOUT = 7 + CTA_MARK = 8 + CTA_COUNTERS_ORIG = 9 + CTA_COUNTERS_REPLY = 10 + CTA_USE = 11 + CTA_ID = 12 + CTA_TIMESTAMP = 20 +) + +// enum ctattr_tuple { +// CTA_TUPLE_UNSPEC, +// CTA_TUPLE_IP, +// CTA_TUPLE_PROTO, +// CTA_TUPLE_ZONE, +// __CTA_TUPLE_MAX +// }; +// #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) +const ( + CTA_TUPLE_IP = 1 + CTA_TUPLE_PROTO = 2 +) + +// enum ctattr_ip { +// CTA_IP_UNSPEC, +// CTA_IP_V4_SRC, +// CTA_IP_V4_DST, +// CTA_IP_V6_SRC, +// CTA_IP_V6_DST, +// __CTA_IP_MAX +// }; +// #define CTA_IP_MAX (__CTA_IP_MAX - 1) +const ( + CTA_IP_V4_SRC = 1 + CTA_IP_V4_DST = 2 + CTA_IP_V6_SRC = 3 + CTA_IP_V6_DST = 4 +) + +// enum ctattr_l4proto { +// CTA_PROTO_UNSPEC, +// CTA_PROTO_NUM, +// CTA_PROTO_SRC_PORT, +// CTA_PROTO_DST_PORT, +// CTA_PROTO_ICMP_ID, +// CTA_PROTO_ICMP_TYPE, +// CTA_PROTO_ICMP_CODE, +// CTA_PROTO_ICMPV6_ID, +// CTA_PROTO_ICMPV6_TYPE, +// CTA_PROTO_ICMPV6_CODE, +// __CTA_PROTO_MAX +// }; +// #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) +const ( + CTA_PROTO_NUM = 1 + CTA_PROTO_SRC_PORT = 2 + CTA_PROTO_DST_PORT = 3 +) + +// enum ctattr_protoinfo { +// CTA_PROTOINFO_UNSPEC, +// CTA_PROTOINFO_TCP, +// CTA_PROTOINFO_DCCP, +// CTA_PROTOINFO_SCTP, +// __CTA_PROTOINFO_MAX +// }; +// #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) +const ( + CTA_PROTOINFO_TCP = 1 +) + +// enum ctattr_protoinfo_tcp { +// CTA_PROTOINFO_TCP_UNSPEC, +// CTA_PROTOINFO_TCP_STATE, +// CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, +// CTA_PROTOINFO_TCP_WSCALE_REPLY, +// CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, +// CTA_PROTOINFO_TCP_FLAGS_REPLY, +// __CTA_PROTOINFO_TCP_MAX +// }; +// #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) +const ( + CTA_PROTOINFO_TCP_STATE = 1 + CTA_PROTOINFO_TCP_WSCALE_ORIGINAL = 2 + CTA_PROTOINFO_TCP_WSCALE_REPLY = 3 + CTA_PROTOINFO_TCP_FLAGS_ORIGINAL = 4 + CTA_PROTOINFO_TCP_FLAGS_REPLY = 5 +) + +// enum ctattr_counters { +// CTA_COUNTERS_UNSPEC, +// CTA_COUNTERS_PACKETS, /* 64bit counters */ +// CTA_COUNTERS_BYTES, /* 64bit counters */ +// CTA_COUNTERS32_PACKETS, /* old 32bit counters, unused */ +// CTA_COUNTERS32_BYTES, /* old 32bit counters, unused */ +// CTA_COUNTERS_PAD, +// __CTA_COUNTERS_M +// }; +// #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) +const ( + CTA_COUNTERS_PACKETS = 1 + CTA_COUNTERS_BYTES = 2 +) + +// enum CTA TIMESTAMP TLVs +// CTA_TIMESTAMP_START /* 64bit value */ +// CTA_TIMESTAMP_STOP /* 64bit value */ +const ( + CTA_TIMESTAMP_START = 1 + CTA_TIMESTAMP_STOP = 2 +) + +// /* General form of address family dependent message. +// */ +// struct nfgenmsg { +// __u8 nfgen_family; /* AF_xxx */ +// __u8 version; /* nfnetlink version */ +// __be16 res_id; /* resource id */ +// }; +type Nfgenmsg struct { + NfgenFamily uint8 + Version uint8 + ResId uint16 // big endian +} + +func (msg *Nfgenmsg) Len() int { + return SizeofNfgenmsg +} + +func DeserializeNfgenmsg(b []byte) *Nfgenmsg { + return (*Nfgenmsg)(unsafe.Pointer(&b[0:SizeofNfgenmsg][0])) +} + +func (msg *Nfgenmsg) Serialize() []byte { + return (*(*[SizeofNfgenmsg]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go new file mode 100644 index 000000000000..2995da492f6f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go @@ -0,0 +1,96 @@ +package nl + +// All the following constants are coming from: +// https://github.com/torvalds/linux/blob/master/include/uapi/linux/devlink.h + +const ( + GENL_DEVLINK_VERSION = 1 + GENL_DEVLINK_NAME = "devlink" +) + +const ( + DEVLINK_CMD_GET = 1 + DEVLINK_CMD_PORT_GET = 5 + DEVLINK_CMD_PORT_SET = 6 + DEVLINK_CMD_PORT_NEW = 7 + DEVLINK_CMD_PORT_DEL = 8 + DEVLINK_CMD_ESWITCH_GET = 29 + DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_INFO_GET = 51 +) + +const ( + DEVLINK_ATTR_BUS_NAME = 1 + DEVLINK_ATTR_DEV_NAME = 2 + DEVLINK_ATTR_PORT_INDEX = 3 + DEVLINK_ATTR_PORT_TYPE = 4 + DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6 + DEVLINK_ATTR_PORT_NETDEV_NAME = 7 + DEVLINK_ATTR_PORT_IBDEV_NAME = 8 + DEVLINK_ATTR_ESWITCH_MODE = 25 + DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 + DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_PORT_FLAVOUR = 77 + DEVLINK_ATTR_INFO_DRIVER_NAME = 98 + DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 + DEVLINK_ATTR_INFO_VERSION_FIXED = 100 + DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 + DEVLINK_ATTR_INFO_VERSION_STORED = 102 + DEVLINK_ATTR_INFO_VERSION_NAME = 103 + DEVLINK_ATTR_INFO_VERSION_VALUE = 104 + DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 + DEVLINK_ATTR_PORT_FUNCTION = 145 + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 + DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164 +) + +const ( + DEVLINK_ESWITCH_MODE_LEGACY = 0 + DEVLINK_ESWITCH_MODE_SWITCHDEV = 1 +) + +const ( + DEVLINK_ESWITCH_INLINE_MODE_NONE = 0 + DEVLINK_ESWITCH_INLINE_MODE_LINK = 1 + DEVLINK_ESWITCH_INLINE_MODE_NETWORK = 2 + DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT = 3 +) + +const ( + DEVLINK_ESWITCH_ENCAP_MODE_NONE = 0 + DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1 +) + +const ( + DEVLINK_PORT_FLAVOUR_PHYSICAL = 0 + DEVLINK_PORT_FLAVOUR_CPU = 1 + DEVLINK_PORT_FLAVOUR_DSA = 2 + DEVLINK_PORT_FLAVOUR_PCI_PF = 3 + DEVLINK_PORT_FLAVOUR_PCI_VF = 4 + DEVLINK_PORT_FLAVOUR_VIRTUAL = 5 + DEVLINK_PORT_FLAVOUR_UNUSED = 6 + DEVLINK_PORT_FLAVOUR_PCI_SF = 7 +) + +const ( + DEVLINK_PORT_TYPE_NOTSET = 0 + DEVLINK_PORT_TYPE_AUTO = 1 + DEVLINK_PORT_TYPE_ETH = 2 + DEVLINK_PORT_TYPE_IB = 3 +) + +const ( + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1 + DEVLINK_PORT_FN_ATTR_STATE = 2 + DEVLINK_PORT_FN_ATTR_OPSTATE = 3 +) + +const ( + DEVLINK_PORT_FN_STATE_INACTIVE = 0 + DEVLINK_PORT_FN_STATE_ACTIVE = 1 +) + +const ( + DEVLINK_PORT_FN_OPSTATE_DETACHED = 0 + DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/genetlink_linux.go b/vendor/github.com/vishvananda/netlink/nl/genetlink_linux.go new file mode 100644 index 000000000000..81b46f2c7920 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/genetlink_linux.go @@ -0,0 +1,89 @@ +package nl + +import ( + "unsafe" +) + +const SizeofGenlmsg = 4 + +const ( + GENL_ID_CTRL = 0x10 + GENL_CTRL_VERSION = 2 + GENL_CTRL_NAME = "nlctrl" +) + +const ( + GENL_CTRL_CMD_GETFAMILY = 3 +) + +const ( + GENL_CTRL_ATTR_UNSPEC = iota + GENL_CTRL_ATTR_FAMILY_ID + GENL_CTRL_ATTR_FAMILY_NAME + GENL_CTRL_ATTR_VERSION + GENL_CTRL_ATTR_HDRSIZE + GENL_CTRL_ATTR_MAXATTR + GENL_CTRL_ATTR_OPS + GENL_CTRL_ATTR_MCAST_GROUPS +) + +const ( + GENL_CTRL_ATTR_OP_UNSPEC = iota + GENL_CTRL_ATTR_OP_ID + GENL_CTRL_ATTR_OP_FLAGS +) + +const ( + GENL_ADMIN_PERM = 1 << iota + GENL_CMD_CAP_DO + GENL_CMD_CAP_DUMP + GENL_CMD_CAP_HASPOL +) + +const ( + GENL_CTRL_ATTR_MCAST_GRP_UNSPEC = iota + GENL_CTRL_ATTR_MCAST_GRP_NAME + GENL_CTRL_ATTR_MCAST_GRP_ID +) + +const ( + GENL_GTP_VERSION = 0 + GENL_GTP_NAME = "gtp" +) + +const ( + GENL_GTP_CMD_NEWPDP = iota + GENL_GTP_CMD_DELPDP + GENL_GTP_CMD_GETPDP +) + +const ( + GENL_GTP_ATTR_UNSPEC = iota + GENL_GTP_ATTR_LINK + GENL_GTP_ATTR_VERSION + GENL_GTP_ATTR_TID + GENL_GTP_ATTR_PEER_ADDRESS + GENL_GTP_ATTR_MS_ADDRESS + GENL_GTP_ATTR_FLOW + GENL_GTP_ATTR_NET_NS_FD + GENL_GTP_ATTR_I_TEI + GENL_GTP_ATTR_O_TEI + GENL_GTP_ATTR_PAD +) + +type Genlmsg struct { + Command uint8 + Version uint8 +} + +func (msg *Genlmsg) Len() int { + return SizeofGenlmsg +} + +func DeserializeGenlmsg(b []byte) *Genlmsg { + return (*Genlmsg)(unsafe.Pointer(&b[0:SizeofGenlmsg][0])) +} + +func (msg *Genlmsg) Serialize() []byte { + return (*(*[SizeofGenlmsg]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go new file mode 100644 index 000000000000..a60b4b09d9b9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go @@ -0,0 +1,222 @@ +package nl + +import ( + "strconv" + + "golang.org/x/sys/unix" +) + +const ( + /* The protocol version */ + IPSET_PROTOCOL = 6 + + /* The max length of strings including NUL: set and type identifiers */ + IPSET_MAXNAMELEN = 32 + + /* The maximum permissible comment length we will accept over netlink */ + IPSET_MAX_COMMENT_SIZE = 255 +) + +const ( + _ = iota + IPSET_CMD_PROTOCOL /* 1: Return protocol version */ + IPSET_CMD_CREATE /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME /* 5: Rename a set */ + IPSET_CMD_SWAP /* 6: Swap two sets */ + IPSET_CMD_LIST /* 7: List sets */ + IPSET_CMD_SAVE /* 8: Save sets */ + IPSET_CMD_ADD /* 9: Add an element to a set */ + IPSET_CMD_DEL /* 10: Delete an element from a set */ + IPSET_CMD_TEST /* 11: Test an element in a set */ + IPSET_CMD_HEADER /* 12: Get set header data only */ + IPSET_CMD_TYPE /* 13: Get set type */ +) + +/* Attributes at command level */ +const ( + _ = iota + IPSET_ATTR_PROTOCOL /* 1: Protocol version */ + IPSET_ATTR_SETNAME /* 2: Name of the set */ + IPSET_ATTR_TYPENAME /* 3: Typename */ + IPSET_ATTR_REVISION /* 4: Settype revision */ + IPSET_ATTR_FAMILY /* 5: Settype family */ + IPSET_ATTR_FLAGS /* 6: Flags at command level */ + IPSET_ATTR_DATA /* 7: Nested attributes */ + IPSET_ATTR_ADT /* 8: Multiple data containers */ + IPSET_ATTR_LINENO /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN /* 10: Minimal supported version number */ + + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME /* Setname at rename/swap */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN /* type rev min */ +) + +/* CADT specific attributes */ +const ( + IPSET_ATTR_IP = 1 + IPSET_ATTR_IP_FROM = 1 + IPSET_ATTR_IP_TO = 2 + IPSET_ATTR_CIDR = 3 + IPSET_ATTR_PORT = 4 + IPSET_ATTR_PORT_FROM = 4 + IPSET_ATTR_PORT_TO = 5 + IPSET_ATTR_TIMEOUT = 6 + IPSET_ATTR_PROTO = 7 + IPSET_ATTR_CADT_FLAGS = 8 + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO /* 9 */ + IPSET_ATTR_MARK = 10 + IPSET_ATTR_MARKMASK = 11 + + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16 + + /* Create-only specific attributes */ + IPSET_ATTR_GC = 3 + iota + IPSET_ATTR_HASHSIZE + IPSET_ATTR_MAXELEM + IPSET_ATTR_NETMASK + IPSET_ATTR_PROBES + IPSET_ATTR_RESIZE + IPSET_ATTR_SIZE + + /* Kernel-only */ + IPSET_ATTR_ELEMENTS + IPSET_ATTR_REFERENCES + IPSET_ATTR_MEMSIZE + + SET_ATTR_CREATE_MAX +) + +/* ADT specific attributes */ +const ( + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1 + IPSET_ATTR_NAME + IPSET_ATTR_NAMEREF + IPSET_ATTR_IP2 + IPSET_ATTR_CIDR2 + IPSET_ATTR_IP2_TO + IPSET_ATTR_IFACE + IPSET_ATTR_BYTES + IPSET_ATTR_PACKETS + IPSET_ATTR_COMMENT + IPSET_ATTR_SKBMARK + IPSET_ATTR_SKBPRIO + IPSET_ATTR_SKBQUEUE +) + +/* Flags at CADT attribute level, upper half of cmdattrs */ +const ( + IPSET_FLAG_BIT_BEFORE = 0 + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE) + IPSET_FLAG_BIT_PHYSDEV = 1 + IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV) + IPSET_FLAG_BIT_NOMATCH = 2 + IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH) + IPSET_FLAG_BIT_WITH_COUNTERS = 3 + IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS) + IPSET_FLAG_BIT_WITH_COMMENT = 4 + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT) + IPSET_FLAG_BIT_WITH_FORCEADD = 5 + IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD) + IPSET_FLAG_BIT_WITH_SKBINFO = 6 + IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO) + IPSET_FLAG_CADT_MAX = 15 +) + +const ( + IPSET_ERR_PRIVATE = 4096 + iota + IPSET_ERR_PROTOCOL + IPSET_ERR_FIND_TYPE + IPSET_ERR_MAX_SETS + IPSET_ERR_BUSY + IPSET_ERR_EXIST_SETNAME2 + IPSET_ERR_TYPE_MISMATCH + IPSET_ERR_EXIST + IPSET_ERR_INVALID_CIDR + IPSET_ERR_INVALID_NETMASK + IPSET_ERR_INVALID_FAMILY + IPSET_ERR_TIMEOUT + IPSET_ERR_REFERENCED + IPSET_ERR_IPADDR_IPV4 + IPSET_ERR_IPADDR_IPV6 + IPSET_ERR_COUNTER + IPSET_ERR_COMMENT + IPSET_ERR_INVALID_MARKMASK + IPSET_ERR_SKBINFO + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352 +) + +type IPSetError uintptr + +func (e IPSetError) Error() string { + switch int(e) { + case IPSET_ERR_PRIVATE: + return "private" + case IPSET_ERR_PROTOCOL: + return "invalid protocol" + case IPSET_ERR_FIND_TYPE: + return "invalid type" + case IPSET_ERR_MAX_SETS: + return "max sets reached" + case IPSET_ERR_BUSY: + return "busy" + case IPSET_ERR_EXIST_SETNAME2: + return "exist_setname2" + case IPSET_ERR_TYPE_MISMATCH: + return "type mismatch" + case IPSET_ERR_EXIST: + return "exist" + case IPSET_ERR_INVALID_CIDR: + return "invalid cidr" + case IPSET_ERR_INVALID_NETMASK: + return "invalid netmask" + case IPSET_ERR_INVALID_FAMILY: + return "invalid family" + case IPSET_ERR_TIMEOUT: + return "timeout" + case IPSET_ERR_REFERENCED: + return "referenced" + case IPSET_ERR_IPADDR_IPV4: + return "invalid ipv4 address" + case IPSET_ERR_IPADDR_IPV6: + return "invalid ipv6 address" + case IPSET_ERR_COUNTER: + return "invalid counter" + case IPSET_ERR_COMMENT: + return "invalid comment" + case IPSET_ERR_INVALID_MARKMASK: + return "invalid markmask" + case IPSET_ERR_SKBINFO: + return "skbinfo" + default: + return "errno " + strconv.Itoa(int(e)) + } +} + +func GetIpsetFlags(cmd int) int { + switch cmd { + case IPSET_CMD_CREATE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_CREATE + case IPSET_CMD_DESTROY, + IPSET_CMD_FLUSH, + IPSET_CMD_RENAME, + IPSET_CMD_SWAP, + IPSET_CMD_TEST: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_LIST, + IPSET_CMD_SAVE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_ROOT | unix.NLM_F_MATCH | unix.NLM_F_DUMP + case IPSET_CMD_ADD, + IPSET_CMD_DEL: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_HEADER, + IPSET_CMD_TYPE, + IPSET_CMD_PROTOCOL: + return unix.NLM_F_REQUEST + default: + return 0 + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go new file mode 100644 index 000000000000..e10edbc09d81 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -0,0 +1,720 @@ +package nl + +import ( + "bytes" + "encoding/binary" + "unsafe" +) + +const ( + DEFAULT_CHANGE = 0xFFFFFFFF +) + +const ( + IFLA_INFO_UNSPEC = iota + IFLA_INFO_KIND + IFLA_INFO_DATA + IFLA_INFO_XSTATS + IFLA_INFO_SLAVE_KIND + IFLA_INFO_SLAVE_DATA + IFLA_INFO_MAX = IFLA_INFO_SLAVE_DATA +) + +const ( + IFLA_VLAN_UNSPEC = iota + IFLA_VLAN_ID + IFLA_VLAN_FLAGS + IFLA_VLAN_EGRESS_QOS + IFLA_VLAN_INGRESS_QOS + IFLA_VLAN_PROTOCOL + IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL +) + +const ( + VETH_INFO_UNSPEC = iota + VETH_INFO_PEER + VETH_INFO_MAX = VETH_INFO_PEER +) + +const ( + IFLA_VXLAN_UNSPEC = iota + IFLA_VXLAN_ID + IFLA_VXLAN_GROUP + IFLA_VXLAN_LINK + IFLA_VXLAN_LOCAL + IFLA_VXLAN_TTL + IFLA_VXLAN_TOS + IFLA_VXLAN_LEARNING + IFLA_VXLAN_AGEING + IFLA_VXLAN_LIMIT + IFLA_VXLAN_PORT_RANGE + IFLA_VXLAN_PROXY + IFLA_VXLAN_RSC + IFLA_VXLAN_L2MISS + IFLA_VXLAN_L3MISS + IFLA_VXLAN_PORT + IFLA_VXLAN_GROUP6 + IFLA_VXLAN_LOCAL6 + IFLA_VXLAN_UDP_CSUM + IFLA_VXLAN_UDP_ZERO_CSUM6_TX + IFLA_VXLAN_UDP_ZERO_CSUM6_RX + IFLA_VXLAN_REMCSUM_TX + IFLA_VXLAN_REMCSUM_RX + IFLA_VXLAN_GBP + IFLA_VXLAN_REMCSUM_NOPARTIAL + IFLA_VXLAN_FLOWBASED + IFLA_VXLAN_MAX = IFLA_VXLAN_FLOWBASED +) + +const ( + BRIDGE_MODE_UNSPEC = iota + BRIDGE_MODE_HAIRPIN +) + +const ( + IFLA_BRPORT_UNSPEC = iota + IFLA_BRPORT_STATE + IFLA_BRPORT_PRIORITY + IFLA_BRPORT_COST + IFLA_BRPORT_MODE + IFLA_BRPORT_GUARD + IFLA_BRPORT_PROTECT + IFLA_BRPORT_FAST_LEAVE + IFLA_BRPORT_LEARNING + IFLA_BRPORT_UNICAST_FLOOD + IFLA_BRPORT_PROXYARP + IFLA_BRPORT_LEARNING_SYNC + IFLA_BRPORT_PROXYARP_WIFI + IFLA_BRPORT_MAX = IFLA_BRPORT_PROXYARP_WIFI +) + +const ( + IFLA_IPVLAN_UNSPEC = iota + IFLA_IPVLAN_MODE + IFLA_IPVLAN_FLAG + IFLA_IPVLAN_MAX = IFLA_IPVLAN_FLAG +) + +const ( + IFLA_MACVLAN_UNSPEC = iota + IFLA_MACVLAN_MODE + IFLA_MACVLAN_FLAGS + IFLA_MACVLAN_MACADDR_MODE + IFLA_MACVLAN_MACADDR + IFLA_MACVLAN_MACADDR_DATA + IFLA_MACVLAN_MACADDR_COUNT + IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS +) + +const ( + MACVLAN_MODE_PRIVATE = 1 + MACVLAN_MODE_VEPA = 2 + MACVLAN_MODE_BRIDGE = 4 + MACVLAN_MODE_PASSTHRU = 8 + MACVLAN_MODE_SOURCE = 16 +) + +const ( + MACVLAN_MACADDR_ADD = iota + MACVLAN_MACADDR_DEL + MACVLAN_MACADDR_FLUSH + MACVLAN_MACADDR_SET +) + +const ( + IFLA_BOND_UNSPEC = iota + IFLA_BOND_MODE + IFLA_BOND_ACTIVE_SLAVE + IFLA_BOND_MIIMON + IFLA_BOND_UPDELAY + IFLA_BOND_DOWNDELAY + IFLA_BOND_USE_CARRIER + IFLA_BOND_ARP_INTERVAL + IFLA_BOND_ARP_IP_TARGET + IFLA_BOND_ARP_VALIDATE + IFLA_BOND_ARP_ALL_TARGETS + IFLA_BOND_PRIMARY + IFLA_BOND_PRIMARY_RESELECT + IFLA_BOND_FAIL_OVER_MAC + IFLA_BOND_XMIT_HASH_POLICY + IFLA_BOND_RESEND_IGMP + IFLA_BOND_NUM_PEER_NOTIF + IFLA_BOND_ALL_SLAVES_ACTIVE + IFLA_BOND_MIN_LINKS + IFLA_BOND_LP_INTERVAL + IFLA_BOND_PACKETS_PER_SLAVE + IFLA_BOND_AD_LACP_RATE + IFLA_BOND_AD_SELECT + IFLA_BOND_AD_INFO + IFLA_BOND_AD_ACTOR_SYS_PRIO + IFLA_BOND_AD_USER_PORT_KEY + IFLA_BOND_AD_ACTOR_SYSTEM + IFLA_BOND_TLB_DYNAMIC_LB +) + +const ( + IFLA_BOND_AD_INFO_UNSPEC = iota + IFLA_BOND_AD_INFO_AGGREGATOR + IFLA_BOND_AD_INFO_NUM_PORTS + IFLA_BOND_AD_INFO_ACTOR_KEY + IFLA_BOND_AD_INFO_PARTNER_KEY + IFLA_BOND_AD_INFO_PARTNER_MAC +) + +const ( + IFLA_BOND_SLAVE_UNSPEC = iota + IFLA_BOND_SLAVE_STATE + IFLA_BOND_SLAVE_MII_STATUS + IFLA_BOND_SLAVE_LINK_FAILURE_COUNT + IFLA_BOND_SLAVE_PERM_HWADDR + IFLA_BOND_SLAVE_QUEUE_ID + IFLA_BOND_SLAVE_AD_AGGREGATOR_ID + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE +) + +const ( + IFLA_GENEVE_UNSPEC = iota + IFLA_GENEVE_ID // vni + IFLA_GENEVE_REMOTE + IFLA_GENEVE_TTL + IFLA_GENEVE_TOS + IFLA_GENEVE_PORT // destination port + IFLA_GENEVE_COLLECT_METADATA + IFLA_GENEVE_REMOTE6 + IFLA_GENEVE_UDP_CSUM + IFLA_GENEVE_UDP_ZERO_CSUM6_TX + IFLA_GENEVE_UDP_ZERO_CSUM6_RX + IFLA_GENEVE_LABEL + IFLA_GENEVE_MAX = IFLA_GENEVE_LABEL +) + +const ( + IFLA_GRE_UNSPEC = iota + IFLA_GRE_LINK + IFLA_GRE_IFLAGS + IFLA_GRE_OFLAGS + IFLA_GRE_IKEY + IFLA_GRE_OKEY + IFLA_GRE_LOCAL + IFLA_GRE_REMOTE + IFLA_GRE_TTL + IFLA_GRE_TOS + IFLA_GRE_PMTUDISC + IFLA_GRE_ENCAP_LIMIT + IFLA_GRE_FLOWINFO + IFLA_GRE_FLAGS + IFLA_GRE_ENCAP_TYPE + IFLA_GRE_ENCAP_FLAGS + IFLA_GRE_ENCAP_SPORT + IFLA_GRE_ENCAP_DPORT + IFLA_GRE_COLLECT_METADATA + IFLA_GRE_MAX = IFLA_GRE_COLLECT_METADATA +) + +const ( + GRE_CSUM = 0x8000 + GRE_ROUTING = 0x4000 + GRE_KEY = 0x2000 + GRE_SEQ = 0x1000 + GRE_STRICT = 0x0800 + GRE_REC = 0x0700 + GRE_FLAGS = 0x00F8 + GRE_VERSION = 0x0007 +) + +const ( + IFLA_VF_INFO_UNSPEC = iota + IFLA_VF_INFO + IFLA_VF_INFO_MAX = IFLA_VF_INFO +) + +const ( + IFLA_VF_UNSPEC = iota + IFLA_VF_MAC /* Hardware queue specific attributes */ + IFLA_VF_VLAN + IFLA_VF_TX_RATE /* Max TX Bandwidth Allocation */ + IFLA_VF_SPOOFCHK /* Spoof Checking on/off switch */ + IFLA_VF_LINK_STATE /* link state enable/disable/auto switch */ + IFLA_VF_RATE /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN /* RSS Redirection Table and Hash Key query + * on/off switch + */ + IFLA_VF_STATS /* network device statistics */ + IFLA_VF_TRUST /* Trust state of VF */ + IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */ + IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID +) + +const ( + IFLA_VF_LINK_STATE_AUTO = iota /* link state of the uplink */ + IFLA_VF_LINK_STATE_ENABLE /* link always up */ + IFLA_VF_LINK_STATE_DISABLE /* link always down */ + IFLA_VF_LINK_STATE_MAX = IFLA_VF_LINK_STATE_DISABLE +) + +const ( + IFLA_VF_STATS_RX_PACKETS = iota + IFLA_VF_STATS_TX_PACKETS + IFLA_VF_STATS_RX_BYTES + IFLA_VF_STATS_TX_BYTES + IFLA_VF_STATS_BROADCAST + IFLA_VF_STATS_MULTICAST + IFLA_VF_STATS_RX_DROPPED + IFLA_VF_STATS_TX_DROPPED + IFLA_VF_STATS_MAX = IFLA_VF_STATS_TX_DROPPED +) + +const ( + SizeofVfMac = 0x24 + SizeofVfVlan = 0x0c + SizeofVfTxRate = 0x08 + SizeofVfRate = 0x0c + SizeofVfSpoofchk = 0x08 + SizeofVfLinkState = 0x08 + SizeofVfRssQueryEn = 0x08 + SizeofVfTrust = 0x08 + SizeofVfGUID = 0x10 +) + +// struct ifla_vf_mac { +// __u32 vf; +// __u8 mac[32]; /* MAX_ADDR_LEN */ +// }; + +type VfMac struct { + Vf uint32 + Mac [32]byte +} + +func (msg *VfMac) Len() int { + return SizeofVfMac +} + +func DeserializeVfMac(b []byte) *VfMac { + return (*VfMac)(unsafe.Pointer(&b[0:SizeofVfMac][0])) +} + +func (msg *VfMac) Serialize() []byte { + return (*(*[SizeofVfMac]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_vlan { +// __u32 vf; +// __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ +// __u32 qos; +// }; + +type VfVlan struct { + Vf uint32 + Vlan uint32 + Qos uint32 +} + +func (msg *VfVlan) Len() int { + return SizeofVfVlan +} + +func DeserializeVfVlan(b []byte) *VfVlan { + return (*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0])) +} + +func (msg *VfVlan) Serialize() []byte { + return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_tx_rate { +// __u32 vf; +// __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ +// }; + +type VfTxRate struct { + Vf uint32 + Rate uint32 +} + +func (msg *VfTxRate) Len() int { + return SizeofVfTxRate +} + +func DeserializeVfTxRate(b []byte) *VfTxRate { + return (*VfTxRate)(unsafe.Pointer(&b[0:SizeofVfTxRate][0])) +} + +func (msg *VfTxRate) Serialize() []byte { + return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:] +} + +//struct ifla_vf_stats { +// __u64 rx_packets; +// __u64 tx_packets; +// __u64 rx_bytes; +// __u64 tx_bytes; +// __u64 broadcast; +// __u64 multicast; +//}; + +type VfStats struct { + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 +} + +func DeserializeVfStats(b []byte) VfStats { + var vfstat VfStats + stats, err := ParseRouteAttr(b) + if err != nil { + return vfstat + } + var valueVar uint64 + for _, stat := range stats { + if err := binary.Read(bytes.NewBuffer(stat.Value), NativeEndian(), &valueVar); err != nil { + break + } + switch stat.Attr.Type { + case IFLA_VF_STATS_RX_PACKETS: + vfstat.RxPackets = valueVar + case IFLA_VF_STATS_TX_PACKETS: + vfstat.TxPackets = valueVar + case IFLA_VF_STATS_RX_BYTES: + vfstat.RxBytes = valueVar + case IFLA_VF_STATS_TX_BYTES: + vfstat.TxBytes = valueVar + case IFLA_VF_STATS_MULTICAST: + vfstat.Multicast = valueVar + case IFLA_VF_STATS_BROADCAST: + vfstat.Broadcast = valueVar + case IFLA_VF_STATS_RX_DROPPED: + vfstat.RxDropped = valueVar + case IFLA_VF_STATS_TX_DROPPED: + vfstat.TxDropped = valueVar + } + } + return vfstat +} + +// struct ifla_vf_rate { +// __u32 vf; +// __u32 min_tx_rate; /* Min Bandwidth in Mbps */ +// __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +// }; + +type VfRate struct { + Vf uint32 + MinTxRate uint32 + MaxTxRate uint32 +} + +func (msg *VfRate) Len() int { + return SizeofVfRate +} + +func DeserializeVfRate(b []byte) *VfRate { + return (*VfRate)(unsafe.Pointer(&b[0:SizeofVfRate][0])) +} + +func (msg *VfRate) Serialize() []byte { + return (*(*[SizeofVfRate]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_spoofchk { +// __u32 vf; +// __u32 setting; +// }; + +type VfSpoofchk struct { + Vf uint32 + Setting uint32 +} + +func (msg *VfSpoofchk) Len() int { + return SizeofVfSpoofchk +} + +func DeserializeVfSpoofchk(b []byte) *VfSpoofchk { + return (*VfSpoofchk)(unsafe.Pointer(&b[0:SizeofVfSpoofchk][0])) +} + +func (msg *VfSpoofchk) Serialize() []byte { + return (*(*[SizeofVfSpoofchk]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_link_state { +// __u32 vf; +// __u32 link_state; +// }; + +type VfLinkState struct { + Vf uint32 + LinkState uint32 +} + +func (msg *VfLinkState) Len() int { + return SizeofVfLinkState +} + +func DeserializeVfLinkState(b []byte) *VfLinkState { + return (*VfLinkState)(unsafe.Pointer(&b[0:SizeofVfLinkState][0])) +} + +func (msg *VfLinkState) Serialize() []byte { + return (*(*[SizeofVfLinkState]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_rss_query_en { +// __u32 vf; +// __u32 setting; +// }; + +type VfRssQueryEn struct { + Vf uint32 + Setting uint32 +} + +func (msg *VfRssQueryEn) Len() int { + return SizeofVfRssQueryEn +} + +func DeserializeVfRssQueryEn(b []byte) *VfRssQueryEn { + return (*VfRssQueryEn)(unsafe.Pointer(&b[0:SizeofVfRssQueryEn][0])) +} + +func (msg *VfRssQueryEn) Serialize() []byte { + return (*(*[SizeofVfRssQueryEn]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_trust { +// __u32 vf; +// __u32 setting; +// }; + +type VfTrust struct { + Vf uint32 + Setting uint32 +} + +func (msg *VfTrust) Len() int { + return SizeofVfTrust +} + +func DeserializeVfTrust(b []byte) *VfTrust { + return (*VfTrust)(unsafe.Pointer(&b[0:SizeofVfTrust][0])) +} + +func (msg *VfTrust) Serialize() []byte { + return (*(*[SizeofVfTrust]byte)(unsafe.Pointer(msg)))[:] +} + +// struct ifla_vf_guid { +// __u32 vf; +// __u32 rsvd; +// __u64 guid; +// }; + +type VfGUID struct { + Vf uint32 + Rsvd uint32 + GUID uint64 +} + +func (msg *VfGUID) Len() int { + return SizeofVfGUID +} + +func DeserializeVfGUID(b []byte) *VfGUID { + return (*VfGUID)(unsafe.Pointer(&b[0:SizeofVfGUID][0])) +} + +func (msg *VfGUID) Serialize() []byte { + return (*(*[SizeofVfGUID]byte)(unsafe.Pointer(msg)))[:] +} + +const ( + XDP_FLAGS_UPDATE_IF_NOEXIST = 1 << iota + XDP_FLAGS_SKB_MODE + XDP_FLAGS_DRV_MODE + XDP_FLAGS_MASK = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE +) + +const ( + IFLA_XDP_UNSPEC = iota + IFLA_XDP_FD /* fd of xdp program to attach, or -1 to remove */ + IFLA_XDP_ATTACHED /* read-only bool indicating if prog is attached */ + IFLA_XDP_FLAGS /* xdp prog related flags */ + IFLA_XDP_PROG_ID /* xdp prog id */ + IFLA_XDP_MAX = IFLA_XDP_PROG_ID +) + +// XDP program attach mode (used as dump value for IFLA_XDP_ATTACHED) +const ( + XDP_ATTACHED_NONE = iota + XDP_ATTACHED_DRV + XDP_ATTACHED_SKB + XDP_ATTACHED_HW +) + +const ( + IFLA_IPTUN_UNSPEC = iota + IFLA_IPTUN_LINK + IFLA_IPTUN_LOCAL + IFLA_IPTUN_REMOTE + IFLA_IPTUN_TTL + IFLA_IPTUN_TOS + IFLA_IPTUN_ENCAP_LIMIT + IFLA_IPTUN_FLOWINFO + IFLA_IPTUN_FLAGS + IFLA_IPTUN_PROTO + IFLA_IPTUN_PMTUDISC + IFLA_IPTUN_6RD_PREFIX + IFLA_IPTUN_6RD_RELAY_PREFIX + IFLA_IPTUN_6RD_PREFIXLEN + IFLA_IPTUN_6RD_RELAY_PREFIXLEN + IFLA_IPTUN_ENCAP_TYPE + IFLA_IPTUN_ENCAP_FLAGS + IFLA_IPTUN_ENCAP_SPORT + IFLA_IPTUN_ENCAP_DPORT + IFLA_IPTUN_COLLECT_METADATA + IFLA_IPTUN_MAX = IFLA_IPTUN_COLLECT_METADATA +) + +const ( + IFLA_VTI_UNSPEC = iota + IFLA_VTI_LINK + IFLA_VTI_IKEY + IFLA_VTI_OKEY + IFLA_VTI_LOCAL + IFLA_VTI_REMOTE + IFLA_VTI_MAX = IFLA_VTI_REMOTE +) + +const ( + IFLA_VRF_UNSPEC = iota + IFLA_VRF_TABLE +) + +const ( + IFLA_BR_UNSPEC = iota + IFLA_BR_FORWARD_DELAY + IFLA_BR_HELLO_TIME + IFLA_BR_MAX_AGE + IFLA_BR_AGEING_TIME + IFLA_BR_STP_STATE + IFLA_BR_PRIORITY + IFLA_BR_VLAN_FILTERING + IFLA_BR_VLAN_PROTOCOL + IFLA_BR_GROUP_FWD_MASK + IFLA_BR_ROOT_ID + IFLA_BR_BRIDGE_ID + IFLA_BR_ROOT_PORT + IFLA_BR_ROOT_PATH_COST + IFLA_BR_TOPOLOGY_CHANGE + IFLA_BR_TOPOLOGY_CHANGE_DETECTED + IFLA_BR_HELLO_TIMER + IFLA_BR_TCN_TIMER + IFLA_BR_TOPOLOGY_CHANGE_TIMER + IFLA_BR_GC_TIMER + IFLA_BR_GROUP_ADDR + IFLA_BR_FDB_FLUSH + IFLA_BR_MCAST_ROUTER + IFLA_BR_MCAST_SNOOPING + IFLA_BR_MCAST_QUERY_USE_IFADDR + IFLA_BR_MCAST_QUERIER + IFLA_BR_MCAST_HASH_ELASTICITY + IFLA_BR_MCAST_HASH_MAX + IFLA_BR_MCAST_LAST_MEMBER_CNT + IFLA_BR_MCAST_STARTUP_QUERY_CNT + IFLA_BR_MCAST_LAST_MEMBER_INTVL + IFLA_BR_MCAST_MEMBERSHIP_INTVL + IFLA_BR_MCAST_QUERIER_INTVL + IFLA_BR_MCAST_QUERY_INTVL + IFLA_BR_MCAST_QUERY_RESPONSE_INTVL + IFLA_BR_MCAST_STARTUP_QUERY_INTVL + IFLA_BR_NF_CALL_IPTABLES + IFLA_BR_NF_CALL_IP6TABLES + IFLA_BR_NF_CALL_ARPTABLES + IFLA_BR_VLAN_DEFAULT_PVID + IFLA_BR_PAD + IFLA_BR_VLAN_STATS_ENABLED + IFLA_BR_MCAST_STATS_ENABLED + IFLA_BR_MCAST_IGMP_VERSION + IFLA_BR_MCAST_MLD_VERSION + IFLA_BR_MAX = IFLA_BR_MCAST_MLD_VERSION +) + +const ( + IFLA_GTP_UNSPEC = iota + IFLA_GTP_FD0 + IFLA_GTP_FD1 + IFLA_GTP_PDP_HASHSIZE + IFLA_GTP_ROLE +) + +const ( + GTP_ROLE_GGSN = iota + GTP_ROLE_SGSN +) + +const ( + IFLA_XFRM_UNSPEC = iota + IFLA_XFRM_LINK + IFLA_XFRM_IF_ID + + IFLA_XFRM_MAX = iota - 1 +) + +const ( + IFLA_TUN_UNSPEC = iota + IFLA_TUN_OWNER + IFLA_TUN_GROUP + IFLA_TUN_TYPE + IFLA_TUN_PI + IFLA_TUN_VNET_HDR + IFLA_TUN_PERSIST + IFLA_TUN_MULTI_QUEUE + IFLA_TUN_NUM_QUEUES + IFLA_TUN_NUM_DISABLED_QUEUES + IFLA_TUN_MAX = IFLA_TUN_NUM_DISABLED_QUEUES +) + +const ( + IFLA_IPOIB_UNSPEC = iota + IFLA_IPOIB_PKEY + IFLA_IPOIB_MODE + IFLA_IPOIB_UMCAST + IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST +) + +const ( + IFLA_CAN_UNSPEC = iota + IFLA_CAN_BITTIMING + IFLA_CAN_BITTIMING_CONST + IFLA_CAN_CLOCK + IFLA_CAN_STATE + IFLA_CAN_CTRLMODE + IFLA_CAN_RESTART_MS + IFLA_CAN_RESTART + IFLA_CAN_BERR_COUNTER + IFLA_CAN_DATA_BITTIMING + IFLA_CAN_DATA_BITTIMING_CONST + IFLA_CAN_TERMINATION + IFLA_CAN_TERMINATION_CONST + IFLA_CAN_BITRATE_CONST + IFLA_CAN_DATA_BITRATE_CONST + IFLA_CAN_BITRATE_MAX + IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX +) + +const ( + IFLA_BAREUDP_UNSPEC = iota + IFLA_BAREUDP_PORT + IFLA_BAREUDP_ETHERTYPE + IFLA_BAREUDP_SRCPORT_MIN + IFLA_BAREUDP_MULTIPROTO_MODE + IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE +) diff --git a/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go new file mode 100644 index 000000000000..bafd593c4fe9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go @@ -0,0 +1,29 @@ +package nl + +const ( + LWT_BPF_PROG_UNSPEC = iota + LWT_BPF_PROG_FD + LWT_BPF_PROG_NAME + __LWT_BPF_PROG_MAX +) + +const ( + LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1 +) + +const ( + LWT_BPF_UNSPEC = iota + LWT_BPF_IN + LWT_BPF_OUT + LWT_BPF_XMIT + LWT_BPF_XMIT_HEADROOM + __LWT_BPF_MAX +) + +const ( + LWT_BPF_MAX = __LWT_BPF_MAX - 1 +) + +const ( + LWT_BPF_MAX_HEADROOM = 256 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/mpls_linux.go b/vendor/github.com/vishvananda/netlink/nl/mpls_linux.go new file mode 100644 index 000000000000..3915b7eec44d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/mpls_linux.go @@ -0,0 +1,36 @@ +package nl + +import "encoding/binary" + +const ( + MPLS_LS_LABEL_SHIFT = 12 + MPLS_LS_S_SHIFT = 8 +) + +func EncodeMPLSStack(labels ...int) []byte { + b := make([]byte, 4*len(labels)) + for idx, label := range labels { + l := label << MPLS_LS_LABEL_SHIFT + if idx == len(labels)-1 { + l |= 1 << MPLS_LS_S_SHIFT + } + binary.BigEndian.PutUint32(b[idx*4:], uint32(l)) + } + return b +} + +func DecodeMPLSStack(buf []byte) []int { + if len(buf)%4 != 0 { + return nil + } + stack := make([]int, 0, len(buf)/4) + for len(buf) > 0 { + l := binary.BigEndian.Uint32(buf[:4]) + buf = buf[4:] + stack = append(stack, int(l)>>MPLS_LS_LABEL_SHIFT) + if (l>>MPLS_LS_S_SHIFT)&1 > 0 { + break + } + } + return stack +} diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go new file mode 100644 index 000000000000..600b942b1785 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -0,0 +1,900 @@ +// Package nl has low level primitives for making Netlink calls. +package nl + +import ( + "bytes" + "encoding/binary" + "fmt" + "net" + "runtime" + "sync" + "sync/atomic" + "syscall" + "unsafe" + + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const ( + // Family type definitions + FAMILY_ALL = unix.AF_UNSPEC + FAMILY_V4 = unix.AF_INET + FAMILY_V6 = unix.AF_INET6 + FAMILY_MPLS = unix.AF_MPLS + // Arbitrary set value (greater than default 4k) to allow receiving + // from kernel more verbose messages e.g. for statistics, + // tc rules or filters, or other more memory requiring data. + RECEIVE_BUFFER_SIZE = 65536 + // Kernel netlink pid + PidKernel uint32 = 0 + SizeofCnMsgOp = 0x18 +) + +// SupportedNlFamilies contains the list of netlink families this netlink package supports +var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETLINK_NETFILTER} + +var nextSeqNr uint32 + +// Default netlink socket timeout, 60s +var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0} + +// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets +var EnableErrorMessageReporting bool = false + +// GetIPFamily returns the family type of a net.IP. +func GetIPFamily(ip net.IP) int { + if len(ip) <= net.IPv4len { + return FAMILY_V4 + } + if ip.To4() != nil { + return FAMILY_V4 + } + return FAMILY_V6 +} + +var nativeEndian binary.ByteOrder + +// NativeEndian gets native endianness for the system +func NativeEndian() binary.ByteOrder { + if nativeEndian == nil { + var x uint32 = 0x01020304 + if *(*byte)(unsafe.Pointer(&x)) == 0x01 { + nativeEndian = binary.BigEndian + } else { + nativeEndian = binary.LittleEndian + } + } + return nativeEndian +} + +// Byte swap a 16 bit value if we aren't big endian +func Swap16(i uint16) uint16 { + if NativeEndian() == binary.BigEndian { + return i + } + return (i&0xff00)>>8 | (i&0xff)<<8 +} + +// Byte swap a 32 bit value if aren't big endian +func Swap32(i uint32) uint32 { + if NativeEndian() == binary.BigEndian { + return i + } + return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24 +} + +const ( + NLMSGERR_ATTR_UNUSED = 0 + NLMSGERR_ATTR_MSG = 1 + NLMSGERR_ATTR_OFFS = 2 + NLMSGERR_ATTR_COOKIE = 3 + NLMSGERR_ATTR_POLICY = 4 +) + +type NetlinkRequestData interface { + Len() int + Serialize() []byte +} + +const ( + PROC_CN_MCAST_LISTEN = 1 + PROC_CN_MCAST_IGNORE +) + +type CbID struct { + Idx uint32 + Val uint32 +} + +type CnMsg struct { + ID CbID + Seq uint32 + Ack uint32 + Length uint16 + Flags uint16 +} + +type CnMsgOp struct { + CnMsg + // here we differ from the C header + Op uint32 +} + +func NewCnMsg(idx, val, op uint32) *CnMsgOp { + var cm CnMsgOp + + cm.ID.Idx = idx + cm.ID.Val = val + + cm.Ack = 0 + cm.Seq = 1 + cm.Length = uint16(binary.Size(op)) + cm.Op = op + + return &cm +} + +func (msg *CnMsgOp) Serialize() []byte { + return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:] +} + +func DeserializeCnMsgOp(b []byte) *CnMsgOp { + return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0])) +} + +func (msg *CnMsgOp) Len() int { + return SizeofCnMsgOp +} + +// IfInfomsg is related to links, but it is used for list requests as well +type IfInfomsg struct { + unix.IfInfomsg +} + +// Create an IfInfomsg with family specified +func NewIfInfomsg(family int) *IfInfomsg { + return &IfInfomsg{ + IfInfomsg: unix.IfInfomsg{ + Family: uint8(family), + }, + } +} + +func DeserializeIfInfomsg(b []byte) *IfInfomsg { + return (*IfInfomsg)(unsafe.Pointer(&b[0:unix.SizeofIfInfomsg][0])) +} + +func (msg *IfInfomsg) Serialize() []byte { + return (*(*[unix.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:] +} + +func (msg *IfInfomsg) Len() int { + return unix.SizeofIfInfomsg +} + +func (msg *IfInfomsg) EncapType() string { + switch msg.Type { + case 0: + return "generic" + case unix.ARPHRD_ETHER: + return "ether" + case unix.ARPHRD_EETHER: + return "eether" + case unix.ARPHRD_AX25: + return "ax25" + case unix.ARPHRD_PRONET: + return "pronet" + case unix.ARPHRD_CHAOS: + return "chaos" + case unix.ARPHRD_IEEE802: + return "ieee802" + case unix.ARPHRD_ARCNET: + return "arcnet" + case unix.ARPHRD_APPLETLK: + return "atalk" + case unix.ARPHRD_DLCI: + return "dlci" + case unix.ARPHRD_ATM: + return "atm" + case unix.ARPHRD_METRICOM: + return "metricom" + case unix.ARPHRD_IEEE1394: + return "ieee1394" + case unix.ARPHRD_INFINIBAND: + return "infiniband" + case unix.ARPHRD_SLIP: + return "slip" + case unix.ARPHRD_CSLIP: + return "cslip" + case unix.ARPHRD_SLIP6: + return "slip6" + case unix.ARPHRD_CSLIP6: + return "cslip6" + case unix.ARPHRD_RSRVD: + return "rsrvd" + case unix.ARPHRD_ADAPT: + return "adapt" + case unix.ARPHRD_ROSE: + return "rose" + case unix.ARPHRD_X25: + return "x25" + case unix.ARPHRD_HWX25: + return "hwx25" + case unix.ARPHRD_PPP: + return "ppp" + case unix.ARPHRD_HDLC: + return "hdlc" + case unix.ARPHRD_LAPB: + return "lapb" + case unix.ARPHRD_DDCMP: + return "ddcmp" + case unix.ARPHRD_RAWHDLC: + return "rawhdlc" + case unix.ARPHRD_TUNNEL: + return "ipip" + case unix.ARPHRD_TUNNEL6: + return "tunnel6" + case unix.ARPHRD_FRAD: + return "frad" + case unix.ARPHRD_SKIP: + return "skip" + case unix.ARPHRD_LOOPBACK: + return "loopback" + case unix.ARPHRD_LOCALTLK: + return "ltalk" + case unix.ARPHRD_FDDI: + return "fddi" + case unix.ARPHRD_BIF: + return "bif" + case unix.ARPHRD_SIT: + return "sit" + case unix.ARPHRD_IPDDP: + return "ip/ddp" + case unix.ARPHRD_IPGRE: + return "gre" + case unix.ARPHRD_PIMREG: + return "pimreg" + case unix.ARPHRD_HIPPI: + return "hippi" + case unix.ARPHRD_ASH: + return "ash" + case unix.ARPHRD_ECONET: + return "econet" + case unix.ARPHRD_IRDA: + return "irda" + case unix.ARPHRD_FCPP: + return "fcpp" + case unix.ARPHRD_FCAL: + return "fcal" + case unix.ARPHRD_FCPL: + return "fcpl" + case unix.ARPHRD_FCFABRIC: + return "fcfb0" + case unix.ARPHRD_FCFABRIC + 1: + return "fcfb1" + case unix.ARPHRD_FCFABRIC + 2: + return "fcfb2" + case unix.ARPHRD_FCFABRIC + 3: + return "fcfb3" + case unix.ARPHRD_FCFABRIC + 4: + return "fcfb4" + case unix.ARPHRD_FCFABRIC + 5: + return "fcfb5" + case unix.ARPHRD_FCFABRIC + 6: + return "fcfb6" + case unix.ARPHRD_FCFABRIC + 7: + return "fcfb7" + case unix.ARPHRD_FCFABRIC + 8: + return "fcfb8" + case unix.ARPHRD_FCFABRIC + 9: + return "fcfb9" + case unix.ARPHRD_FCFABRIC + 10: + return "fcfb10" + case unix.ARPHRD_FCFABRIC + 11: + return "fcfb11" + case unix.ARPHRD_FCFABRIC + 12: + return "fcfb12" + case unix.ARPHRD_IEEE802_TR: + return "tr" + case unix.ARPHRD_IEEE80211: + return "ieee802.11" + case unix.ARPHRD_IEEE80211_PRISM: + return "ieee802.11/prism" + case unix.ARPHRD_IEEE80211_RADIOTAP: + return "ieee802.11/radiotap" + case unix.ARPHRD_IEEE802154: + return "ieee802.15.4" + + case 65534: + return "none" + case 65535: + return "void" + } + return fmt.Sprintf("unknown%d", msg.Type) +} + +// Round the length of a netlink message up to align it properly. +// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license. +func nlmAlignOf(msglen int) int { + return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1) +} + +func rtaAlignOf(attrlen int) int { + return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1) +} + +func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { + msg := NewIfInfomsg(family) + parent.children = append(parent.children, msg) + return msg +} + +type Uint32Attribute struct { + Type uint16 + Value uint32 +} + +func (a *Uint32Attribute) Serialize() []byte { + native := NativeEndian() + buf := make([]byte, rtaAlignOf(8)) + native.PutUint16(buf[0:2], 8) + native.PutUint16(buf[2:4], a.Type) + + if a.Type&NLA_F_NET_BYTEORDER != 0 { + binary.BigEndian.PutUint32(buf[4:], a.Value) + } else { + native.PutUint32(buf[4:], a.Value) + } + return buf +} + +func (a *Uint32Attribute) Len() int { + return 8 +} + +// Extend RtAttr to handle data and children +type RtAttr struct { + unix.RtAttr + Data []byte + children []NetlinkRequestData +} + +// Create a new Extended RtAttr object +func NewRtAttr(attrType int, data []byte) *RtAttr { + return &RtAttr{ + RtAttr: unix.RtAttr{ + Type: uint16(attrType), + }, + children: []NetlinkRequestData{}, + Data: data, + } +} + +// NewRtAttrChild adds an RtAttr as a child to the parent and returns the new attribute +// +// Deprecated: Use AddRtAttr() on the parent object +func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr { + return parent.AddRtAttr(attrType, data) +} + +// AddRtAttr adds an RtAttr as a child and returns the new attribute +func (a *RtAttr) AddRtAttr(attrType int, data []byte) *RtAttr { + attr := NewRtAttr(attrType, data) + a.children = append(a.children, attr) + return attr +} + +// AddChild adds an existing NetlinkRequestData as a child. +func (a *RtAttr) AddChild(attr NetlinkRequestData) { + a.children = append(a.children, attr) +} + +func (a *RtAttr) Len() int { + if len(a.children) == 0 { + return (unix.SizeofRtAttr + len(a.Data)) + } + + l := 0 + for _, child := range a.children { + l += rtaAlignOf(child.Len()) + } + l += unix.SizeofRtAttr + return rtaAlignOf(l + len(a.Data)) +} + +// Serialize the RtAttr into a byte array +// This can't just unsafe.cast because it must iterate through children. +func (a *RtAttr) Serialize() []byte { + native := NativeEndian() + + length := a.Len() + buf := make([]byte, rtaAlignOf(length)) + + next := 4 + if a.Data != nil { + copy(buf[next:], a.Data) + next += rtaAlignOf(len(a.Data)) + } + if len(a.children) > 0 { + for _, child := range a.children { + childBuf := child.Serialize() + copy(buf[next:], childBuf) + next += rtaAlignOf(len(childBuf)) + } + } + + if l := uint16(length); l != 0 { + native.PutUint16(buf[0:2], l) + } + native.PutUint16(buf[2:4], a.Type) + return buf +} + +type NetlinkRequest struct { + unix.NlMsghdr + Data []NetlinkRequestData + RawData []byte + Sockets map[int]*SocketHandle +} + +// Serialize the Netlink Request into a byte array +func (req *NetlinkRequest) Serialize() []byte { + length := unix.SizeofNlMsghdr + dataBytes := make([][]byte, len(req.Data)) + for i, data := range req.Data { + dataBytes[i] = data.Serialize() + length = length + len(dataBytes[i]) + } + length += len(req.RawData) + + req.Len = uint32(length) + b := make([]byte, length) + hdr := (*(*[unix.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:] + next := unix.SizeofNlMsghdr + copy(b[0:next], hdr) + for _, data := range dataBytes { + for _, dataByte := range data { + b[next] = dataByte + next = next + 1 + } + } + // Add the raw data if any + if len(req.RawData) > 0 { + copy(b[next:length], req.RawData) + } + return b +} + +func (req *NetlinkRequest) AddData(data NetlinkRequestData) { + req.Data = append(req.Data, data) +} + +// AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization +func (req *NetlinkRequest) AddRawData(data []byte) { + req.RawData = append(req.RawData, data...) +} + +// Execute the request against a the given sockType. +// Returns a list of netlink messages in serialized format, optionally filtered +// by resType. +func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) { + var ( + s *NetlinkSocket + err error + ) + + if req.Sockets != nil { + if sh, ok := req.Sockets[sockType]; ok { + s = sh.Socket + req.Seq = atomic.AddUint32(&sh.Seq, 1) + } + } + sharedSocket := s != nil + + if s == nil { + s, err = getNetlinkSocket(sockType) + if err != nil { + return nil, err + } + + if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil { + return nil, err + } + if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil { + return nil, err + } + if EnableErrorMessageReporting { + if err := s.SetExtAck(true); err != nil { + return nil, err + } + } + + defer s.Close() + } else { + s.Lock() + defer s.Unlock() + } + + if err := s.Send(req); err != nil { + return nil, err + } + + pid, err := s.GetPid() + if err != nil { + return nil, err + } + + var res [][]byte + +done: + for { + msgs, from, err := s.Receive() + if err != nil { + return nil, err + } + if from.Pid != PidKernel { + return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) + } + for _, m := range msgs { + if m.Header.Seq != req.Seq { + if sharedSocket { + continue + } + return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) + } + if m.Header.Pid != pid { + continue + } + if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR { + native := NativeEndian() + errno := int32(native.Uint32(m.Data[0:4])) + if errno == 0 { + break done + } + var err error + err = syscall.Errno(-errno) + + unreadData := m.Data[4:] + if m.Header.Flags|unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { + // Skip the echoed request message. + echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0])) + unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):] + + // Annotate `err` using nlmsgerr attributes. + for len(unreadData) >= syscall.SizeofRtAttr { + attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0])) + attrData := unreadData[syscall.SizeofRtAttr:attr.Len] + + switch attr.Type { + case NLMSGERR_ATTR_MSG: + err = fmt.Errorf("%w: %s", err, string(attrData)) + + default: + // TODO: handle other NLMSGERR_ATTR types + } + + unreadData = unreadData[rtaAlignOf(int(attr.Len)):] + } + } + + return nil, err + } + if resType != 0 && m.Header.Type != resType { + continue + } + res = append(res, m.Data) + if m.Header.Flags&unix.NLM_F_MULTI == 0 { + break done + } + } + } + return res, nil +} + +// Create a new netlink request from proto and flags +// Note the Len value will be inaccurate once data is added until +// the message is serialized +func NewNetlinkRequest(proto, flags int) *NetlinkRequest { + return &NetlinkRequest{ + NlMsghdr: unix.NlMsghdr{ + Len: uint32(unix.SizeofNlMsghdr), + Type: uint16(proto), + Flags: unix.NLM_F_REQUEST | uint16(flags), + Seq: atomic.AddUint32(&nextSeqNr, 1), + }, + } +} + +type NetlinkSocket struct { + fd int32 + lsa unix.SockaddrNetlink + sync.Mutex +} + +func getNetlinkSocket(protocol int) (*NetlinkSocket, error) { + fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol) + if err != nil { + return nil, err + } + s := &NetlinkSocket{ + fd: int32(fd), + } + s.lsa.Family = unix.AF_NETLINK + if err := unix.Bind(fd, &s.lsa); err != nil { + unix.Close(fd) + return nil, err + } + + return s, nil +} + +// GetNetlinkSocketAt opens a netlink socket in the network namespace newNs +// and positions the thread back into the network namespace specified by curNs, +// when done. If curNs is close, the function derives the current namespace and +// moves back into it when done. If newNs is close, the socket will be opened +// in the current network namespace. +func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) { + c, err := executeInNetns(newNs, curNs) + if err != nil { + return nil, err + } + defer c() + return getNetlinkSocket(protocol) +} + +// executeInNetns sets execution of the code following this call to the +// network namespace newNs, then moves the thread back to curNs if open, +// otherwise to the current netns at the time the function was invoked +// In case of success, the caller is expected to execute the returned function +// at the end of the code that needs to be executed in the network namespace. +// Example: +// func jobAt(...) error { +// d, err := executeInNetns(...) +// if err != nil { return err} +// defer d() +// < code which needs to be executed in specific netns> +// } +// TODO: his function probably belongs to netns pkg. +func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) { + var ( + err error + moveBack func(netns.NsHandle) error + closeNs func() error + unlockThd func() + ) + restore := func() { + // order matters + if moveBack != nil { + moveBack(curNs) + } + if closeNs != nil { + closeNs() + } + if unlockThd != nil { + unlockThd() + } + } + if newNs.IsOpen() { + runtime.LockOSThread() + unlockThd = runtime.UnlockOSThread + if !curNs.IsOpen() { + if curNs, err = netns.Get(); err != nil { + restore() + return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err) + } + closeNs = curNs.Close + } + if err := netns.Set(newNs); err != nil { + restore() + return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err) + } + moveBack = netns.Set + } + return restore, nil +} + +// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE) +// and subscribe it to multicast groups passed in variable argument list. +// Returns the netlink socket on which Receive() method can be called +// to retrieve the messages from the kernel. +func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { + fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, protocol) + if err != nil { + return nil, err + } + s := &NetlinkSocket{ + fd: int32(fd), + } + s.lsa.Family = unix.AF_NETLINK + + for _, g := range groups { + s.lsa.Groups |= (1 << (g - 1)) + } + + if err := unix.Bind(fd, &s.lsa); err != nil { + unix.Close(fd) + return nil, err + } + + return s, nil +} + +// SubscribeAt works like Subscribe plus let's the caller choose the network +// namespace in which the socket would be opened (newNs). Then control goes back +// to curNs if open, otherwise to the netns at the time this function was called. +func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) { + c, err := executeInNetns(newNs, curNs) + if err != nil { + return nil, err + } + defer c() + return Subscribe(protocol, groups...) +} + +func (s *NetlinkSocket) Close() { + fd := int(atomic.SwapInt32(&s.fd, -1)) + unix.Close(fd) +} + +func (s *NetlinkSocket) GetFd() int { + return int(atomic.LoadInt32(&s.fd)) +} + +func (s *NetlinkSocket) Send(request *NetlinkRequest) error { + fd := int(atomic.LoadInt32(&s.fd)) + if fd < 0 { + return fmt.Errorf("Send called on a closed socket") + } + if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil { + return err + } + return nil +} + +func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) { + fd := int(atomic.LoadInt32(&s.fd)) + if fd < 0 { + return nil, nil, fmt.Errorf("Receive called on a closed socket") + } + var fromAddr *unix.SockaddrNetlink + var rb [RECEIVE_BUFFER_SIZE]byte + nr, from, err := unix.Recvfrom(fd, rb[:], 0) + if err != nil { + return nil, nil, err + } + fromAddr, ok := from.(*unix.SockaddrNetlink) + if !ok { + return nil, nil, fmt.Errorf("Error converting to netlink sockaddr") + } + if nr < unix.NLMSG_HDRLEN { + return nil, nil, fmt.Errorf("Got short response from netlink") + } + rb2 := make([]byte, nr) + copy(rb2, rb[:nr]) + nl, err := syscall.ParseNetlinkMessage(rb2) + if err != nil { + return nil, nil, err + } + return nl, fromAddr, nil +} + +// SetSendTimeout allows to set a send timeout on the socket +func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error { + // Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine + // remains stuck on a send on a closed fd + return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_SNDTIMEO, timeout) +} + +// SetReceiveTimeout allows to set a receive timeout on the socket +func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error { + // Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine + // remains stuck on a recvmsg on a closed fd + return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout) +} + +// SetExtAck requests error messages to be reported on the socket +func (s *NetlinkSocket) SetExtAck(enable bool) error { + var enableN int + if enable { + enableN = 1 + } + + return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN) +} + +func (s *NetlinkSocket) GetPid() (uint32, error) { + fd := int(atomic.LoadInt32(&s.fd)) + lsa, err := unix.Getsockname(fd) + if err != nil { + return 0, err + } + switch v := lsa.(type) { + case *unix.SockaddrNetlink: + return v.Pid, nil + } + return 0, fmt.Errorf("Wrong socket type") +} + +func ZeroTerminated(s string) []byte { + bytes := make([]byte, len(s)+1) + for i := 0; i < len(s); i++ { + bytes[i] = s[i] + } + bytes[len(s)] = 0 + return bytes +} + +func NonZeroTerminated(s string) []byte { + bytes := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + bytes[i] = s[i] + } + return bytes +} + +func BytesToString(b []byte) string { + n := bytes.Index(b, []byte{0}) + return string(b[:n]) +} + +func Uint8Attr(v uint8) []byte { + return []byte{byte(v)} +} + +func Uint16Attr(v uint16) []byte { + native := NativeEndian() + bytes := make([]byte, 2) + native.PutUint16(bytes, v) + return bytes +} + +func Uint32Attr(v uint32) []byte { + native := NativeEndian() + bytes := make([]byte, 4) + native.PutUint32(bytes, v) + return bytes +} + +func Uint64Attr(v uint64) []byte { + native := NativeEndian() + bytes := make([]byte, 8) + native.PutUint64(bytes, v) + return bytes +} + +func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { + var attrs []syscall.NetlinkRouteAttr + for len(b) >= unix.SizeofRtAttr { + a, vbuf, alen, err := netlinkRouteAttrAndValue(b) + if err != nil { + return nil, err + } + ra := syscall.NetlinkRouteAttr{Attr: syscall.RtAttr(*a), Value: vbuf[:int(a.Len)-unix.SizeofRtAttr]} + attrs = append(attrs, ra) + b = b[alen:] + } + return attrs, nil +} + +func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) { + a := (*unix.RtAttr)(unsafe.Pointer(&b[0])) + if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) { + return nil, nil, 0, unix.EINVAL + } + return a, b[unix.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil +} + +// SocketHandle contains the netlink socket and the associated +// sequence counter for a specific netlink family +type SocketHandle struct { + Seq uint32 + Socket *NetlinkSocket +} + +// Close closes the netlink socket +func (sh *SocketHandle) Close() { + if sh.Socket != nil { + sh.Socket.Close() + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go b/vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go new file mode 100644 index 000000000000..dfc0be6606c2 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go @@ -0,0 +1,11 @@ +// +build !linux + +package nl + +import "encoding/binary" + +var SupportedNlFamilies = []int{} + +func NativeEndian() binary.ByteOrder { + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go new file mode 100644 index 000000000000..7f49125cffa2 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go @@ -0,0 +1,79 @@ +package nl + +import ( + "encoding/binary" + "fmt" + "log" +) + +type Attribute struct { + Type uint16 + Value []byte +} + +func ParseAttributes(data []byte) <-chan Attribute { + native := NativeEndian() + result := make(chan Attribute) + + go func() { + i := 0 + for i+4 < len(data) { + length := int(native.Uint16(data[i : i+2])) + attrType := native.Uint16(data[i+2 : i+4]) + + if length < 4 { + log.Printf("attribute 0x%02x has invalid length of %d bytes", attrType, length) + break + } + + if len(data) < i+length { + log.Printf("attribute 0x%02x of length %d is truncated, only %d bytes remaining", attrType, length, len(data)-i) + break + } + + result <- Attribute{ + Type: attrType, + Value: data[i+4 : i+length], + } + i += rtaAlignOf(length) + } + close(result) + }() + + return result +} + +func PrintAttributes(data []byte) { + printAttributes(data, 0) +} + +func printAttributes(data []byte, level int) { + for attr := range ParseAttributes(data) { + for i := 0; i < level; i++ { + print("> ") + } + nested := attr.Type&NLA_F_NESTED != 0 + fmt.Printf("type=%d nested=%v len=%v %v\n", attr.Type&NLA_TYPE_MASK, nested, len(attr.Value), attr.Value) + if nested { + printAttributes(attr.Value, level+1) + } + } +} + +// Uint32 returns the uint32 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint32() uint32 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint32(attr.Value) + } else { + return NativeEndian().Uint32(attr.Value) + } +} + +// Uint64 returns the uint64 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint64() uint64 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint64(attr.Value) + } else { + return NativeEndian().Uint64(attr.Value) + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go new file mode 100644 index 000000000000..ce43ee1550c5 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go @@ -0,0 +1,39 @@ +package nl + +const ( + RDMA_NL_GET_CLIENT_SHIFT = 10 +) + +const ( + RDMA_NL_NLDEV = 5 +) + +const ( + RDMA_NLDEV_CMD_GET = 1 + RDMA_NLDEV_CMD_SET = 2 + RDMA_NLDEV_CMD_NEWLINK = 3 + RDMA_NLDEV_CMD_DELLINK = 4 + RDMA_NLDEV_CMD_SYS_GET = 6 + RDMA_NLDEV_CMD_SYS_SET = 7 +) + +const ( + RDMA_NLDEV_ATTR_DEV_INDEX = 1 + RDMA_NLDEV_ATTR_DEV_NAME = 2 + RDMA_NLDEV_ATTR_PORT_INDEX = 3 + RDMA_NLDEV_ATTR_CAP_FLAGS = 4 + RDMA_NLDEV_ATTR_FW_VERSION = 5 + RDMA_NLDEV_ATTR_NODE_GUID = 6 + RDMA_NLDEV_ATTR_SYS_IMAGE_GUID = 7 + RDMA_NLDEV_ATTR_SUBNET_PREFIX = 8 + RDMA_NLDEV_ATTR_LID = 9 + RDMA_NLDEV_ATTR_SM_LID = 10 + RDMA_NLDEV_ATTR_LMC = 11 + RDMA_NLDEV_ATTR_PORT_STATE = 12 + RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13 + RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14 + RDMA_NLDEV_ATTR_NDEV_NAME = 51 + RDMA_NLDEV_ATTR_LINK_TYPE = 65 + RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66 + RDMA_NLDEV_NET_NS_FD = 68 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/vendor/github.com/vishvananda/netlink/nl/route_linux.go new file mode 100644 index 000000000000..03c1900ffa89 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -0,0 +1,107 @@ +package nl + +import ( + "unsafe" + + "golang.org/x/sys/unix" +) + +type RtMsg struct { + unix.RtMsg +} + +func NewRtMsg() *RtMsg { + return &RtMsg{ + RtMsg: unix.RtMsg{ + Table: unix.RT_TABLE_MAIN, + Scope: unix.RT_SCOPE_UNIVERSE, + Protocol: unix.RTPROT_BOOT, + Type: unix.RTN_UNICAST, + }, + } +} + +func NewRtDelMsg() *RtMsg { + return &RtMsg{ + RtMsg: unix.RtMsg{ + Table: unix.RT_TABLE_MAIN, + Scope: unix.RT_SCOPE_NOWHERE, + }, + } +} + +func (msg *RtMsg) Len() int { + return unix.SizeofRtMsg +} + +func DeserializeRtMsg(b []byte) *RtMsg { + return (*RtMsg)(unsafe.Pointer(&b[0:unix.SizeofRtMsg][0])) +} + +func (msg *RtMsg) Serialize() []byte { + return (*(*[unix.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:] +} + +type RtNexthop struct { + unix.RtNexthop + Children []NetlinkRequestData +} + +func DeserializeRtNexthop(b []byte) *RtNexthop { + return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0])) +} + +func (msg *RtNexthop) Len() int { + if len(msg.Children) == 0 { + return unix.SizeofRtNexthop + } + + l := 0 + for _, child := range msg.Children { + l += rtaAlignOf(child.Len()) + } + l += unix.SizeofRtNexthop + return rtaAlignOf(l) +} + +func (msg *RtNexthop) Serialize() []byte { + length := msg.Len() + msg.RtNexthop.Len = uint16(length) + buf := make([]byte, length) + copy(buf, (*(*[unix.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]) + next := rtaAlignOf(unix.SizeofRtNexthop) + if len(msg.Children) > 0 { + for _, child := range msg.Children { + childBuf := child.Serialize() + copy(buf[next:], childBuf) + next += rtaAlignOf(len(childBuf)) + } + } + return buf +} + +type RtGenMsg struct { + unix.RtGenmsg +} + +func NewRtGenMsg() *RtGenMsg { + return &RtGenMsg{ + RtGenmsg: unix.RtGenmsg{ + Family: unix.AF_UNSPEC, + }, + } +} + +func (msg *RtGenMsg) Len() int { + return rtaAlignOf(unix.SizeofRtGenmsg) +} + +func DeserializeRtGenMsg(b []byte) *RtGenMsg { + return &RtGenMsg{RtGenmsg: unix.RtGenmsg{Family: b[0]}} +} + +func (msg *RtGenMsg) Serialize() []byte { + out := make([]byte, msg.Len()) + out[0] = msg.Family + return out +} diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go new file mode 100644 index 000000000000..fe88285f20ec --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go @@ -0,0 +1,154 @@ +package nl + +import ( + "errors" + "fmt" + "net" +) + +type IPv6SrHdr struct { + nextHdr uint8 + hdrLen uint8 + routingType uint8 + segmentsLeft uint8 + firstSegment uint8 + flags uint8 + reserved uint16 + + Segments []net.IP +} + +func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool { + if len(s1.Segments) != len(s2.Segments) { + return false + } + for i := range s1.Segments { + if !s1.Segments[i].Equal(s2.Segments[i]) { + return false + } + } + return s1.nextHdr == s2.nextHdr && + s1.hdrLen == s2.hdrLen && + s1.routingType == s2.routingType && + s1.segmentsLeft == s2.segmentsLeft && + s1.firstSegment == s2.firstSegment && + s1.flags == s2.flags + // reserved doesn't need to be identical. +} + +// seg6 encap mode +const ( + SEG6_IPTUN_MODE_INLINE = iota + SEG6_IPTUN_MODE_ENCAP +) + +// number of nested RTATTR +// from include/uapi/linux/seg6_iptunnel.h +const ( + SEG6_IPTUNNEL_UNSPEC = iota + SEG6_IPTUNNEL_SRH + __SEG6_IPTUNNEL_MAX +) +const ( + SEG6_IPTUNNEL_MAX = __SEG6_IPTUNNEL_MAX - 1 +) + +func EncodeSEG6Encap(mode int, segments []net.IP) ([]byte, error) { + nsegs := len(segments) // nsegs: number of segments + if nsegs == 0 { + return nil, errors.New("EncodeSEG6Encap: No Segment in srh") + } + b := make([]byte, 12, 12+len(segments)*16) + native := NativeEndian() + native.PutUint32(b, uint32(mode)) + b[4] = 0 // srh.nextHdr (0 when calling netlink) + b[5] = uint8(16 * nsegs >> 3) // srh.hdrLen (in 8-octets unit) + b[6] = IPV6_SRCRT_TYPE_4 // srh.routingType (assigned by IANA) + b[7] = uint8(nsegs - 1) // srh.segmentsLeft + b[8] = uint8(nsegs - 1) // srh.firstSegment + b[9] = 0 // srh.flags (SR6_FLAG1_HMAC for srh_hmac) + // srh.reserved: Defined as "Tag" in draft-ietf-6man-segment-routing-header-07 + native.PutUint16(b[10:], 0) // srh.reserved + for _, netIP := range segments { + b = append(b, netIP...) // srh.Segments + } + return b, nil +} + +func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) { + native := NativeEndian() + mode := int(native.Uint32(buf)) + srh := IPv6SrHdr{ + nextHdr: buf[4], + hdrLen: buf[5], + routingType: buf[6], + segmentsLeft: buf[7], + firstSegment: buf[8], + flags: buf[9], + reserved: native.Uint16(buf[10:12]), + } + buf = buf[12:] + if len(buf)%16 != 0 { + err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf)) + return mode, nil, err + } + for len(buf) > 0 { + srh.Segments = append(srh.Segments, net.IP(buf[:16])) + buf = buf[16:] + } + return mode, srh.Segments, nil +} + +func DecodeSEG6Srh(buf []byte) ([]net.IP, error) { + native := NativeEndian() + srh := IPv6SrHdr{ + nextHdr: buf[0], + hdrLen: buf[1], + routingType: buf[2], + segmentsLeft: buf[3], + firstSegment: buf[4], + flags: buf[5], + reserved: native.Uint16(buf[6:8]), + } + buf = buf[8:] + if len(buf)%16 != 0 { + err := fmt.Errorf("DecodeSEG6Srh: error parsing Segment List (buf len: %d)", len(buf)) + return nil, err + } + for len(buf) > 0 { + srh.Segments = append(srh.Segments, net.IP(buf[:16])) + buf = buf[16:] + } + return srh.Segments, nil +} +func EncodeSEG6Srh(segments []net.IP) ([]byte, error) { + nsegs := len(segments) // nsegs: number of segments + if nsegs == 0 { + return nil, errors.New("EncodeSEG6Srh: No Segments") + } + b := make([]byte, 8, 8+len(segments)*16) + native := NativeEndian() + b[0] = 0 // srh.nextHdr (0 when calling netlink) + b[1] = uint8(16 * nsegs >> 3) // srh.hdrLen (in 8-octets unit) + b[2] = IPV6_SRCRT_TYPE_4 // srh.routingType (assigned by IANA) + b[3] = uint8(nsegs - 1) // srh.segmentsLeft + b[4] = uint8(nsegs - 1) // srh.firstSegment + b[5] = 0 // srh.flags (SR6_FLAG1_HMAC for srh_hmac) + // srh.reserved: Defined as "Tag" in draft-ietf-6man-segment-routing-header-07 + native.PutUint16(b[6:], 0) // srh.reserved + for _, netIP := range segments { + b = append(b, netIP...) // srh.Segments + } + return b, nil +} + +// Helper functions +func SEG6EncapModeString(mode int) string { + switch mode { + case SEG6_IPTUN_MODE_INLINE: + return "inline" + case SEG6_IPTUN_MODE_ENCAP: + return "encap" + } + return "unknown" +} diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go new file mode 100644 index 000000000000..1500177267af --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go @@ -0,0 +1,76 @@ +package nl + +import () + +// seg6local parameters +const ( + SEG6_LOCAL_UNSPEC = iota + SEG6_LOCAL_ACTION + SEG6_LOCAL_SRH + SEG6_LOCAL_TABLE + SEG6_LOCAL_NH4 + SEG6_LOCAL_NH6 + SEG6_LOCAL_IIF + SEG6_LOCAL_OIF + __SEG6_LOCAL_MAX +) +const ( + SEG6_LOCAL_MAX = __SEG6_LOCAL_MAX +) + +// seg6local actions +const ( + SEG6_LOCAL_ACTION_END = iota + 1 // 1 + SEG6_LOCAL_ACTION_END_X // 2 + SEG6_LOCAL_ACTION_END_T // 3 + SEG6_LOCAL_ACTION_END_DX2 // 4 + SEG6_LOCAL_ACTION_END_DX6 // 5 + SEG6_LOCAL_ACTION_END_DX4 // 6 + SEG6_LOCAL_ACTION_END_DT6 // 7 + SEG6_LOCAL_ACTION_END_DT4 // 8 + SEG6_LOCAL_ACTION_END_B6 // 9 + SEG6_LOCAL_ACTION_END_B6_ENCAPS // 10 + SEG6_LOCAL_ACTION_END_BM // 11 + SEG6_LOCAL_ACTION_END_S // 12 + SEG6_LOCAL_ACTION_END_AS // 13 + SEG6_LOCAL_ACTION_END_AM // 14 + __SEG6_LOCAL_ACTION_MAX +) +const ( + SEG6_LOCAL_ACTION_MAX = __SEG6_LOCAL_ACTION_MAX - 1 +) + +// Helper functions +func SEG6LocalActionString(action int) string { + switch action { + case SEG6_LOCAL_ACTION_END: + return "End" + case SEG6_LOCAL_ACTION_END_X: + return "End.X" + case SEG6_LOCAL_ACTION_END_T: + return "End.T" + case SEG6_LOCAL_ACTION_END_DX2: + return "End.DX2" + case SEG6_LOCAL_ACTION_END_DX6: + return "End.DX6" + case SEG6_LOCAL_ACTION_END_DX4: + return "End.DX4" + case SEG6_LOCAL_ACTION_END_DT6: + return "End.DT6" + case SEG6_LOCAL_ACTION_END_DT4: + return "End.DT4" + case SEG6_LOCAL_ACTION_END_B6: + return "End.B6" + case SEG6_LOCAL_ACTION_END_B6_ENCAPS: + return "End.B6.Encaps" + case SEG6_LOCAL_ACTION_END_BM: + return "End.BM" + case SEG6_LOCAL_ACTION_END_S: + return "End.S" + case SEG6_LOCAL_ACTION_END_AS: + return "End.AS" + case SEG6_LOCAL_ACTION_END_AM: + return "End.AM" + } + return "unknown" +} diff --git a/vendor/github.com/vishvananda/netlink/nl/syscall.go b/vendor/github.com/vishvananda/netlink/nl/syscall.go new file mode 100644 index 000000000000..bdf6ba639572 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/syscall.go @@ -0,0 +1,76 @@ +package nl + +// syscall package lack of rule attributes type. +// Thus there are defined below +const ( + FRA_UNSPEC = iota + FRA_DST /* destination address */ + FRA_SRC /* source address */ + FRA_IIFNAME /* interface name */ + FRA_GOTO /* target to jump to (FR_ACT_GOTO) */ + FRA_UNUSED2 + FRA_PRIORITY /* priority/preference */ + FRA_UNUSED3 + FRA_UNUSED4 + FRA_UNUSED5 + FRA_FWMARK /* mark */ + FRA_FLOW /* flow/class id */ + FRA_TUN_ID + FRA_SUPPRESS_IFGROUP + FRA_SUPPRESS_PREFIXLEN + FRA_TABLE /* Extended table id */ + FRA_FWMASK /* mask for netfilter mark */ + FRA_OIFNAME + FRA_PAD + FRA_L3MDEV /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE /* UID range */ + FRA_PROTOCOL /* Originator of the rule */ + FRA_IP_PROTO /* ip proto */ + FRA_SPORT_RANGE /* sport */ + FRA_DPORT_RANGE /* dport */ +) + +// ip rule netlink request types +const ( + FR_ACT_UNSPEC = iota + FR_ACT_TO_TBL /* Pass to fixed table */ + FR_ACT_GOTO /* Jump to another rule */ + FR_ACT_NOP /* No operation */ + FR_ACT_RES3 + FR_ACT_RES4 + FR_ACT_BLACKHOLE /* Drop without notification */ + FR_ACT_UNREACHABLE /* Drop with ENETUNREACH */ + FR_ACT_PROHIBIT /* Drop with EACCES */ +) + +// socket diags related +const ( + SOCK_DIAG_BY_FAMILY = 20 /* linux.sock_diag.h */ + TCPDIAG_NOCOOKIE = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/ +) + +// RTA_ENCAP subtype +const ( + MPLS_IPTUNNEL_UNSPEC = iota + MPLS_IPTUNNEL_DST +) + +// light weight tunnel encap types +const ( + LWTUNNEL_ENCAP_NONE = iota + LWTUNNEL_ENCAP_MPLS + LWTUNNEL_ENCAP_IP + LWTUNNEL_ENCAP_ILA + LWTUNNEL_ENCAP_IP6 + LWTUNNEL_ENCAP_SEG6 + LWTUNNEL_ENCAP_BPF + LWTUNNEL_ENCAP_SEG6_LOCAL +) + +// routing header types +const ( + IPV6_SRCRT_STRICT = 0x01 // Deprecated; will be removed + IPV6_SRCRT_TYPE_0 = 0 // Deprecated; will be removed + IPV6_SRCRT_TYPE_2 = 2 // IPv6 type 2 Routing Header + IPV6_SRCRT_TYPE_4 = 4 // Segment Routing with IPv6 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go new file mode 100644 index 000000000000..eb05ff1cd1c0 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -0,0 +1,1119 @@ +package nl + +import ( + "encoding/binary" + "unsafe" +) + +// LinkLayer +const ( + LINKLAYER_UNSPEC = iota + LINKLAYER_ETHERNET + LINKLAYER_ATM +) + +// ATM +const ( + ATM_CELL_PAYLOAD = 48 + ATM_CELL_SIZE = 53 +) + +const TC_LINKLAYER_MASK = 0x0F + +// Police +const ( + TCA_POLICE_UNSPEC = iota + TCA_POLICE_TBF + TCA_POLICE_RATE + TCA_POLICE_PEAKRATE + TCA_POLICE_AVRATE + TCA_POLICE_RESULT + TCA_POLICE_MAX = TCA_POLICE_RESULT +) + +// Message types +const ( + TCA_UNSPEC = iota + TCA_KIND + TCA_OPTIONS + TCA_STATS + TCA_XSTATS + TCA_RATE + TCA_FCNT + TCA_STATS2 + TCA_STAB + TCA_MAX = TCA_STAB +) + +const ( + TCA_ACT_TAB = 1 + TCAA_MAX = 1 +) + +const ( + TCA_ACT_UNSPEC = iota + TCA_ACT_KIND + TCA_ACT_OPTIONS + TCA_ACT_INDEX + TCA_ACT_STATS + TCA_ACT_MAX +) + +const ( + TCA_PRIO_UNSPEC = iota + TCA_PRIO_MQ + TCA_PRIO_MAX = TCA_PRIO_MQ +) + +const ( + TCA_STATS_UNSPEC = iota + TCA_STATS_BASIC + TCA_STATS_RATE_EST + TCA_STATS_QUEUE + TCA_STATS_APP + TCA_STATS_MAX = TCA_STATS_APP +) + +const ( + SizeofTcMsg = 0x14 + SizeofTcActionMsg = 0x04 + SizeofTcPrioMap = 0x14 + SizeofTcRateSpec = 0x0c + SizeofTcNetemQopt = 0x18 + SizeofTcNetemCorr = 0x0c + SizeofTcNetemReorder = 0x08 + SizeofTcNetemCorrupt = 0x08 + SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c + SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14 + SizeofTcHtbGlob = 0x14 + SizeofTcU32Key = 0x10 + SizeofTcU32Sel = 0x10 // without keys + SizeofTcGen = 0x14 + SizeofTcConnmark = SizeofTcGen + 0x04 + SizeofTcCsum = SizeofTcGen + 0x04 + SizeofTcMirred = SizeofTcGen + 0x08 + SizeofTcTunnelKey = SizeofTcGen + 0x04 + SizeofTcSkbEdit = SizeofTcGen + SizeofTcPolice = 2*SizeofTcRateSpec + 0x20 + SizeofTcSfqQopt = 0x0b + SizeofTcSfqRedStats = 0x18 + SizeofTcSfqQoptV1 = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c +) + +// struct tcmsg { +// unsigned char tcm_family; +// unsigned char tcm__pad1; +// unsigned short tcm__pad2; +// int tcm_ifindex; +// __u32 tcm_handle; +// __u32 tcm_parent; +// __u32 tcm_info; +// }; + +type TcMsg struct { + Family uint8 + Pad [3]byte + Ifindex int32 + Handle uint32 + Parent uint32 + Info uint32 +} + +func (msg *TcMsg) Len() int { + return SizeofTcMsg +} + +func DeserializeTcMsg(b []byte) *TcMsg { + return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0])) +} + +func (x *TcMsg) Serialize() []byte { + return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] +} + +// struct tcamsg { +// unsigned char tca_family; +// unsigned char tca__pad1; +// unsigned short tca__pad2; +// }; + +type TcActionMsg struct { + Family uint8 + Pad [3]byte +} + +func (msg *TcActionMsg) Len() int { + return SizeofTcActionMsg +} + +func DeserializeTcActionMsg(b []byte) *TcActionMsg { + return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0])) +} + +func (x *TcActionMsg) Serialize() []byte { + return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TC_PRIO_MAX = 15 +) + +// struct tc_prio_qopt { +// int bands; /* Number of bands */ +// __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +// }; + +type TcPrioMap struct { + Bands int32 + Priomap [TC_PRIO_MAX + 1]uint8 +} + +func (msg *TcPrioMap) Len() int { + return SizeofTcPrioMap +} + +func DeserializeTcPrioMap(b []byte) *TcPrioMap { + return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0])) +} + +func (x *TcPrioMap) Serialize() []byte { + return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_TBF_UNSPEC = iota + TCA_TBF_PARMS + TCA_TBF_RTAB + TCA_TBF_PTAB + TCA_TBF_RATE64 + TCA_TBF_PRATE64 + TCA_TBF_BURST + TCA_TBF_PBURST + TCA_TBF_MAX = TCA_TBF_PBURST +) + +// struct tc_ratespec { +// unsigned char cell_log; +// __u8 linklayer; /* lower 4 bits */ +// unsigned short overhead; +// short cell_align; +// unsigned short mpu; +// __u32 rate; +// }; + +type TcRateSpec struct { + CellLog uint8 + Linklayer uint8 + Overhead uint16 + CellAlign int16 + Mpu uint16 + Rate uint32 +} + +func (msg *TcRateSpec) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcRateSpec(b []byte) *TcRateSpec { + return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (x *TcRateSpec) Serialize() []byte { + return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:] +} + +/** +* NETEM + */ + +const ( + TCA_NETEM_UNSPEC = iota + TCA_NETEM_CORR + TCA_NETEM_DELAY_DIST + TCA_NETEM_REORDER + TCA_NETEM_CORRUPT + TCA_NETEM_LOSS + TCA_NETEM_RATE + TCA_NETEM_ECN + TCA_NETEM_RATE64 + TCA_NETEM_MAX = TCA_NETEM_RATE64 +) + +// struct tc_netem_qopt { +// __u32 latency; /* added delay (us) */ +// __u32 limit; /* fifo limit (packets) */ +// __u32 loss; /* random packet loss (0=none ~0=100%) */ +// __u32 gap; /* re-ordering gap (0 for none) */ +// __u32 duplicate; /* random packet dup (0=none ~0=100%) */ +// __u32 jitter; /* random jitter in latency (us) */ +// }; + +type TcNetemQopt struct { + Latency uint32 + Limit uint32 + Loss uint32 + Gap uint32 + Duplicate uint32 + Jitter uint32 +} + +func (msg *TcNetemQopt) Len() int { + return SizeofTcNetemQopt +} + +func DeserializeTcNetemQopt(b []byte) *TcNetemQopt { + return (*TcNetemQopt)(unsafe.Pointer(&b[0:SizeofTcNetemQopt][0])) +} + +func (x *TcNetemQopt) Serialize() []byte { + return (*(*[SizeofTcNetemQopt]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_netem_corr { +// __u32 delay_corr; /* delay correlation */ +// __u32 loss_corr; /* packet loss correlation */ +// __u32 dup_corr; /* duplicate correlation */ +// }; + +type TcNetemCorr struct { + DelayCorr uint32 + LossCorr uint32 + DupCorr uint32 +} + +func (msg *TcNetemCorr) Len() int { + return SizeofTcNetemCorr +} + +func DeserializeTcNetemCorr(b []byte) *TcNetemCorr { + return (*TcNetemCorr)(unsafe.Pointer(&b[0:SizeofTcNetemCorr][0])) +} + +func (x *TcNetemCorr) Serialize() []byte { + return (*(*[SizeofTcNetemCorr]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_netem_reorder { +// __u32 probability; +// __u32 correlation; +// }; + +type TcNetemReorder struct { + Probability uint32 + Correlation uint32 +} + +func (msg *TcNetemReorder) Len() int { + return SizeofTcNetemReorder +} + +func DeserializeTcNetemReorder(b []byte) *TcNetemReorder { + return (*TcNetemReorder)(unsafe.Pointer(&b[0:SizeofTcNetemReorder][0])) +} + +func (x *TcNetemReorder) Serialize() []byte { + return (*(*[SizeofTcNetemReorder]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_netem_corrupt { +// __u32 probability; +// __u32 correlation; +// }; + +type TcNetemCorrupt struct { + Probability uint32 + Correlation uint32 +} + +func (msg *TcNetemCorrupt) Len() int { + return SizeofTcNetemCorrupt +} + +func DeserializeTcNetemCorrupt(b []byte) *TcNetemCorrupt { + return (*TcNetemCorrupt)(unsafe.Pointer(&b[0:SizeofTcNetemCorrupt][0])) +} + +func (x *TcNetemCorrupt) Serialize() []byte { + return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_tbf_qopt { +// struct tc_ratespec rate; +// struct tc_ratespec peakrate; +// __u32 limit; +// __u32 buffer; +// __u32 mtu; +// }; + +type TcTbfQopt struct { + Rate TcRateSpec + Peakrate TcRateSpec + Limit uint32 + Buffer uint32 + Mtu uint32 +} + +func (msg *TcTbfQopt) Len() int { + return SizeofTcTbfQopt +} + +func DeserializeTcTbfQopt(b []byte) *TcTbfQopt { + return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0])) +} + +func (x *TcTbfQopt) Serialize() []byte { + return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_HTB_UNSPEC = iota + TCA_HTB_PARMS + TCA_HTB_INIT + TCA_HTB_CTAB + TCA_HTB_RTAB + TCA_HTB_DIRECT_QLEN + TCA_HTB_RATE64 + TCA_HTB_CEIL64 + TCA_HTB_MAX = TCA_HTB_CEIL64 +) + +//struct tc_htb_opt { +// struct tc_ratespec rate; +// struct tc_ratespec ceil; +// __u32 buffer; +// __u32 cbuffer; +// __u32 quantum; +// __u32 level; /* out only */ +// __u32 prio; +//}; + +type TcHtbCopt struct { + Rate TcRateSpec + Ceil TcRateSpec + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (msg *TcHtbCopt) Len() int { + return SizeofTcHtbCopt +} + +func DeserializeTcHtbCopt(b []byte) *TcHtbCopt { + return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0])) +} + +func (x *TcHtbCopt) Serialize() []byte { + return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:] +} + +type TcHtbGlob struct { + Version uint32 + Rate2Quantum uint32 + Defcls uint32 + Debug uint32 + DirectPkts uint32 +} + +func (msg *TcHtbGlob) Len() int { + return SizeofTcHtbGlob +} + +func DeserializeTcHtbGlob(b []byte) *TcHtbGlob { + return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0])) +} + +func (x *TcHtbGlob) Serialize() []byte { + return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:] +} + +// HFSC + +type Curve struct { + m1 uint32 + d uint32 + m2 uint32 +} + +type HfscCopt struct { + Rsc Curve + Fsc Curve + Usc Curve +} + +func (c *Curve) Attrs() (uint32, uint32, uint32) { + return c.m1, c.d, c.m2 +} + +func (c *Curve) Set(m1 uint32, d uint32, m2 uint32) { + c.m1 = m1 + c.d = d + c.m2 = m2 +} + +func DeserializeHfscCurve(b []byte) *Curve { + return &Curve{ + m1: binary.LittleEndian.Uint32(b[0:4]), + d: binary.LittleEndian.Uint32(b[4:8]), + m2: binary.LittleEndian.Uint32(b[8:12]), + } +} + +func SerializeHfscCurve(c *Curve) (b []byte) { + t := make([]byte, binary.MaxVarintLen32) + binary.LittleEndian.PutUint32(t, c.m1) + b = append(b, t[:4]...) + binary.LittleEndian.PutUint32(t, c.d) + b = append(b, t[:4]...) + binary.LittleEndian.PutUint32(t, c.m2) + b = append(b, t[:4]...) + return b +} + +type TcHfscOpt struct { + Defcls uint16 +} + +func (x *TcHfscOpt) Serialize() []byte { + return (*(*[2]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_U32_UNSPEC = iota + TCA_U32_CLASSID + TCA_U32_HASH + TCA_U32_LINK + TCA_U32_DIVISOR + TCA_U32_SEL + TCA_U32_POLICE + TCA_U32_ACT + TCA_U32_INDEV + TCA_U32_PCNT + TCA_U32_MARK + TCA_U32_MAX = TCA_U32_MARK +) + +// struct tc_u32_key { +// __be32 mask; +// __be32 val; +// int off; +// int offmask; +// }; + +type TcU32Key struct { + Mask uint32 // big endian + Val uint32 // big endian + Off int32 + OffMask int32 +} + +func (msg *TcU32Key) Len() int { + return SizeofTcU32Key +} + +func DeserializeTcU32Key(b []byte) *TcU32Key { + return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0])) +} + +func (x *TcU32Key) Serialize() []byte { + return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_u32_sel { +// unsigned char flags; +// unsigned char offshift; +// unsigned char nkeys; +// +// __be16 offmask; +// __u16 off; +// short offoff; +// +// short hoff; +// __be32 hmask; +// struct tc_u32_key keys[0]; +// }; + +const ( + TC_U32_TERMINAL = 1 << iota + TC_U32_OFFSET = 1 << iota + TC_U32_VAROFFSET = 1 << iota + TC_U32_EAT = 1 << iota +) + +type TcU32Sel struct { + Flags uint8 + Offshift uint8 + Nkeys uint8 + Pad uint8 + Offmask uint16 // big endian + Off uint16 + Offoff int16 + Hoff int16 + Hmask uint32 // big endian + Keys []TcU32Key +} + +func (msg *TcU32Sel) Len() int { + return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key +} + +func DeserializeTcU32Sel(b []byte) *TcU32Sel { + x := &TcU32Sel{} + copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b) + next := SizeofTcU32Sel + var i uint8 + for i = 0; i < x.Nkeys; i++ { + x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:])) + next += SizeofTcU32Key + } + return x +} + +func (x *TcU32Sel) Serialize() []byte { + // This can't just unsafe.cast because it must iterate through keys. + buf := make([]byte, x.Len()) + copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:]) + next := SizeofTcU32Sel + for _, key := range x.Keys { + keyBuf := key.Serialize() + copy(buf[next:], keyBuf) + next += SizeofTcU32Key + } + return buf +} + +type TcGen struct { + Index uint32 + Capab uint32 + Action int32 + Refcnt int32 + Bindcnt int32 +} + +func (msg *TcGen) Len() int { + return SizeofTcGen +} + +func DeserializeTcGen(b []byte) *TcGen { + return (*TcGen)(unsafe.Pointer(&b[0:SizeofTcGen][0])) +} + +func (x *TcGen) Serialize() []byte { + return (*(*[SizeofTcGen]byte)(unsafe.Pointer(x)))[:] +} + +// #define tc_gen \ +// __u32 index; \ +// __u32 capab; \ +// int action; \ +// int refcnt; \ +// int bindcnt + +const ( + TCA_ACT_GACT = 5 +) + +const ( + TCA_GACT_UNSPEC = iota + TCA_GACT_TM + TCA_GACT_PARMS + TCA_GACT_PROB + TCA_GACT_MAX = TCA_GACT_PROB +) + +type TcGact TcGen + +const ( + TCA_ACT_BPF = 13 +) + +const ( + TCA_ACT_BPF_UNSPEC = iota + TCA_ACT_BPF_TM + TCA_ACT_BPF_PARMS + TCA_ACT_BPF_OPS_LEN + TCA_ACT_BPF_OPS + TCA_ACT_BPF_FD + TCA_ACT_BPF_NAME + TCA_ACT_BPF_MAX = TCA_ACT_BPF_NAME +) + +const ( + TCA_BPF_FLAG_ACT_DIRECT uint32 = 1 << iota +) + +const ( + TCA_BPF_UNSPEC = iota + TCA_BPF_ACT + TCA_BPF_POLICE + TCA_BPF_CLASSID + TCA_BPF_OPS_LEN + TCA_BPF_OPS + TCA_BPF_FD + TCA_BPF_NAME + TCA_BPF_FLAGS + TCA_BPF_FLAGS_GEN + TCA_BPF_TAG + TCA_BPF_ID + TCA_BPF_MAX = TCA_BPF_ID +) + +type TcBpf TcGen + +const ( + TCA_ACT_CONNMARK = 14 +) + +const ( + TCA_CONNMARK_UNSPEC = iota + TCA_CONNMARK_PARMS + TCA_CONNMARK_TM + TCA_CONNMARK_MAX = TCA_CONNMARK_TM +) + +// struct tc_connmark { +// tc_gen; +// __u16 zone; +// }; + +type TcConnmark struct { + TcGen + Zone uint16 +} + +func (msg *TcConnmark) Len() int { + return SizeofTcConnmark +} + +func DeserializeTcConnmark(b []byte) *TcConnmark { + return (*TcConnmark)(unsafe.Pointer(&b[0:SizeofTcConnmark][0])) +} + +func (x *TcConnmark) Serialize() []byte { + return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_CSUM_UNSPEC = iota + TCA_CSUM_PARMS + TCA_CSUM_TM + TCA_CSUM_PAD + TCA_CSUM_MAX = TCA_CSUM_PAD +) + +// struct tc_csum { +// tc_gen; +// __u32 update_flags; +// } + +type TcCsum struct { + TcGen + UpdateFlags uint32 +} + +func (msg *TcCsum) Len() int { + return SizeofTcCsum +} + +func DeserializeTcCsum(b []byte) *TcCsum { + return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0])) +} + +func (x *TcCsum) Serialize() []byte { + return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_ACT_MIRRED = 8 +) + +const ( + TCA_MIRRED_UNSPEC = iota + TCA_MIRRED_TM + TCA_MIRRED_PARMS + TCA_MIRRED_MAX = TCA_MIRRED_PARMS +) + +// struct tc_mirred { +// tc_gen; +// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ +// __u32 ifindex; /* ifindex of egress port */ +// }; + +type TcMirred struct { + TcGen + Eaction int32 + Ifindex uint32 +} + +func (msg *TcMirred) Len() int { + return SizeofTcMirred +} + +func DeserializeTcMirred(b []byte) *TcMirred { + return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0])) +} + +func (x *TcMirred) Serialize() []byte { + return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_TUNNEL_KEY_UNSPEC = iota + TCA_TUNNEL_KEY_TM + TCA_TUNNEL_KEY_PARMS + TCA_TUNNEL_KEY_ENC_IPV4_SRC + TCA_TUNNEL_KEY_ENC_IPV4_DST + TCA_TUNNEL_KEY_ENC_IPV6_SRC + TCA_TUNNEL_KEY_ENC_IPV6_DST + TCA_TUNNEL_KEY_ENC_KEY_ID + TCA_TUNNEL_KEY_PAD + TCA_TUNNEL_KEY_ENC_DST_PORT + TCA_TUNNEL_KEY_NO_CSUM + TCA_TUNNEL_KEY_ENC_OPTS + TCA_TUNNEL_KEY_ENC_TOS + TCA_TUNNEL_KEY_ENC_TTL + TCA_TUNNEL_KEY_MAX +) + +type TcTunnelKey struct { + TcGen + Action int32 +} + +func (x *TcTunnelKey) Len() int { + return SizeofTcTunnelKey +} + +func DeserializeTunnelKey(b []byte) *TcTunnelKey { + return (*TcTunnelKey)(unsafe.Pointer(&b[0:SizeofTcTunnelKey][0])) +} + +func (x *TcTunnelKey) Serialize() []byte { + return (*(*[SizeofTcTunnelKey]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_SKBEDIT_UNSPEC = iota + TCA_SKBEDIT_TM + TCA_SKBEDIT_PARMS + TCA_SKBEDIT_PRIORITY + TCA_SKBEDIT_QUEUE_MAPPING + TCA_SKBEDIT_MARK + TCA_SKBEDIT_PAD + TCA_SKBEDIT_PTYPE + TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK +) + +type TcSkbEdit struct { + TcGen +} + +func (x *TcSkbEdit) Len() int { + return SizeofTcSkbEdit +} + +func DeserializeSkbEdit(b []byte) *TcSkbEdit { + return (*TcSkbEdit)(unsafe.Pointer(&b[0:SizeofTcSkbEdit][0])) +} + +func (x *TcSkbEdit) Serialize() []byte { + return (*(*[SizeofTcSkbEdit]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_police { +// __u32 index; +// int action; +// __u32 limit; +// __u32 burst; +// __u32 mtu; +// struct tc_ratespec rate; +// struct tc_ratespec peakrate; +// int refcnt; +// int bindcnt; +// __u32 capab; +// }; + +type TcPolice struct { + Index uint32 + Action int32 + Limit uint32 + Burst uint32 + Mtu uint32 + Rate TcRateSpec + PeakRate TcRateSpec + Refcnt int32 + Bindcnt int32 + Capab uint32 +} + +func (msg *TcPolice) Len() int { + return SizeofTcPolice +} + +func DeserializeTcPolice(b []byte) *TcPolice { + return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0])) +} + +func (x *TcPolice) Serialize() []byte { + return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_FW_UNSPEC = iota + TCA_FW_CLASSID + TCA_FW_POLICE + TCA_FW_INDEV + TCA_FW_ACT + TCA_FW_MASK + TCA_FW_MAX = TCA_FW_MASK +) + +const ( + TCA_MATCHALL_UNSPEC = iota + TCA_MATCHALL_CLASSID + TCA_MATCHALL_ACT + TCA_MATCHALL_FLAGS +) + +const ( + TCA_FQ_UNSPEC = iota + TCA_FQ_PLIMIT // limit of total number of packets in queue + TCA_FQ_FLOW_PLIMIT // limit of packets per flow + TCA_FQ_QUANTUM // RR quantum + TCA_FQ_INITIAL_QUANTUM // RR quantum for new flow + TCA_FQ_RATE_ENABLE // enable/disable rate limiting + TCA_FQ_FLOW_DEFAULT_RATE // obsolete do not use + TCA_FQ_FLOW_MAX_RATE // per flow max rate + TCA_FQ_BUCKETS_LOG // log2(number of buckets) + TCA_FQ_FLOW_REFILL_DELAY // flow credit refill delay in usec + TCA_FQ_ORPHAN_MASK // mask applied to orphaned skb hashes + TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate +) + +const ( + TCA_FQ_CODEL_UNSPEC = iota + TCA_FQ_CODEL_TARGET + TCA_FQ_CODEL_LIMIT + TCA_FQ_CODEL_INTERVAL + TCA_FQ_CODEL_ECN + TCA_FQ_CODEL_FLOWS + TCA_FQ_CODEL_QUANTUM + TCA_FQ_CODEL_CE_THRESHOLD + TCA_FQ_CODEL_DROP_BATCH_SIZE + TCA_FQ_CODEL_MEMORY_LIMIT +) + +const ( + TCA_HFSC_UNSPEC = iota + TCA_HFSC_RSC + TCA_HFSC_FSC + TCA_HFSC_USC +) + +const ( + TCA_FLOWER_UNSPEC = iota + TCA_FLOWER_CLASSID + TCA_FLOWER_INDEV + TCA_FLOWER_ACT + TCA_FLOWER_KEY_ETH_DST /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE /* be16 */ + TCA_FLOWER_KEY_IP_PROTO /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC /* be16 */ + TCA_FLOWER_KEY_TCP_DST /* be16 */ + TCA_FLOWER_KEY_UDP_SRC /* be16 */ + TCA_FLOWER_KEY_UDP_DST /* be16 */ + + TCA_FLOWER_FLAGS + TCA_FLOWER_KEY_VLAN_ID /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC /* be16 */ + TCA_FLOWER_KEY_SCTP_DST /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */ + + TCA_FLOWER_KEY_FLAGS /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */ + + TCA_FLOWER_KEY_ARP_SIP /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_TIP /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_OP /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK /* u8 */ + TCA_FLOWER_KEY_ARP_SHA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */ + + TCA_FLOWER_KEY_IP_TOS /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_IP_TTL /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS + TCA_FLOWER_KEY_ENC_OPTS_MASK + + __TCA_FLOWER_MAX +) + +// struct tc_sfq_qopt { +// unsigned quantum; /* Bytes per round allocated to flow */ +// int perturb_period; /* Period of hash perturbation */ +// __u32 limit; /* Maximal packets in queue */ +// unsigned divisor; /* Hash divisor */ +// unsigned flows; /* Maximal number of flows */ +// }; + +type TcSfqQopt struct { + Quantum uint8 + Perturb int32 + Limit uint32 + Divisor uint8 + Flows uint8 +} + +func (x *TcSfqQopt) Len() int { + return SizeofTcSfqQopt +} + +func DeserializeTcSfqQopt(b []byte) *TcSfqQopt { + return (*TcSfqQopt)(unsafe.Pointer(&b[0:SizeofTcSfqQopt][0])) +} + +func (x *TcSfqQopt) Serialize() []byte { + return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfqred_stats { +// __u32 prob_drop; /* Early drops, below max threshold */ +// __u32 forced_drop; /* Early drops, after max threshold */ +// __u32 prob_mark; /* Marked packets, below max threshold */ +// __u32 forced_mark; /* Marked packets, after max threshold */ +// __u32 prob_mark_head; /* Marked packets, below max threshold */ +// __u32 forced_mark_head;/* Marked packets, after max threshold */ +// }; +type TcSfqRedStats struct { + ProbDrop uint32 + ForcedDrop uint32 + ProbMark uint32 + ForcedMark uint32 + ProbMarkHead uint32 + ForcedMarkHead uint32 +} + +func (x *TcSfqRedStats) Len() int { + return SizeofTcSfqRedStats +} + +func DeserializeTcSfqRedStats(b []byte) *TcSfqRedStats { + return (*TcSfqRedStats)(unsafe.Pointer(&b[0:SizeofTcSfqRedStats][0])) +} + +func (x *TcSfqRedStats) Serialize() []byte { + return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfq_qopt_v1 { +// struct tc_sfq_qopt v0; +// unsigned int depth; /* max number of packets per flow */ +// unsigned int headdrop; +// /* SFQRED parameters */ +// __u32 limit; /* HARD maximal flow queue length (bytes) */ +// __u32 qth_min; /* Min average length threshold (bytes) */ +// __u32 qth_max; /* Max average length threshold (bytes) */ +// unsigned char Wlog; /* log(W) */ +// unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ +// unsigned char Scell_log; /* cell size for idle damping */ +// unsigned char flags; +// __u32 max_P; /* probability, high resolution */ +// /* SFQRED stats */ +// struct tc_sfqred_stats stats; +// }; +type TcSfqQoptV1 struct { + TcSfqQopt + Depth uint32 + HeadDrop uint32 + Limit uint32 + QthMin uint32 + QthMax uint32 + Wlog byte + Plog byte + ScellLog byte + Flags byte + MaxP uint32 + TcSfqRedStats +} + +func (x *TcSfqQoptV1) Len() int { + return SizeofTcSfqQoptV1 +} + +func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 { + return (*TcSfqQoptV1)(unsafe.Pointer(&b[0:SizeofTcSfqQoptV1][0])) +} + +func (x *TcSfqQoptV1) Serialize() []byte { + return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go new file mode 100644 index 000000000000..dce9073f7b58 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -0,0 +1,306 @@ +package nl + +import ( + "bytes" + "net" + "unsafe" +) + +// Infinity for packet and byte counts +const ( + XFRM_INF = ^uint64(0) +) + +type XfrmMsgType uint8 + +type XfrmMsg interface { + Type() XfrmMsgType +} + +// Message Types +const ( + XFRM_MSG_BASE XfrmMsgType = 0x10 + XFRM_MSG_NEWSA = 0x10 + XFRM_MSG_DELSA = 0x11 + XFRM_MSG_GETSA = 0x12 + XFRM_MSG_NEWPOLICY = 0x13 + XFRM_MSG_DELPOLICY = 0x14 + XFRM_MSG_GETPOLICY = 0x15 + XFRM_MSG_ALLOCSPI = 0x16 + XFRM_MSG_ACQUIRE = 0x17 + XFRM_MSG_EXPIRE = 0x18 + XFRM_MSG_UPDPOLICY = 0x19 + XFRM_MSG_UPDSA = 0x1a + XFRM_MSG_POLEXPIRE = 0x1b + XFRM_MSG_FLUSHSA = 0x1c + XFRM_MSG_FLUSHPOLICY = 0x1d + XFRM_MSG_NEWAE = 0x1e + XFRM_MSG_GETAE = 0x1f + XFRM_MSG_REPORT = 0x20 + XFRM_MSG_MIGRATE = 0x21 + XFRM_MSG_NEWSADINFO = 0x22 + XFRM_MSG_GETSADINFO = 0x23 + XFRM_MSG_NEWSPDINFO = 0x24 + XFRM_MSG_GETSPDINFO = 0x25 + XFRM_MSG_MAPPING = 0x26 + XFRM_MSG_MAX = 0x26 + XFRM_NR_MSGTYPES = 0x17 +) + +// Attribute types +const ( + /* Netlink message attributes. */ + XFRMA_UNSPEC = iota + XFRMA_ALG_AUTH /* struct xfrm_algo */ + XFRMA_ALG_CRYPT /* struct xfrm_algo */ + XFRMA_ALG_COMP /* struct xfrm_algo */ + XFRMA_ENCAP /* struct xfrm_algo + struct xfrm_encap_tmpl */ + XFRMA_TMPL /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA /* struct xfrm_usersa_info */ + XFRMA_POLICY /* struct xfrm_userpolicy_info */ + XFRMA_SEC_CTX /* struct xfrm_sec_ctx */ + XFRMA_LTIME_VAL + XFRMA_REPLAY_VAL + XFRMA_REPLAY_THRESH + XFRMA_ETIMER_THRESH + XFRMA_SRCADDR /* xfrm_address_t */ + XFRMA_COADDR /* xfrm_address_t */ + XFRMA_LASTUSED /* unsigned long */ + XFRMA_POLICY_TYPE /* struct xfrm_userpolicy_type */ + XFRMA_MIGRATE + XFRMA_ALG_AEAD /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS /* struct xfrm_user_kmaddress */ + XFRMA_ALG_AUTH_TRUNC /* struct xfrm_algo_auth */ + XFRMA_MARK /* struct xfrm_mark */ + XFRMA_TFCPAD /* __u32 */ + XFRMA_REPLAY_ESN_VAL /* struct xfrm_replay_esn */ + XFRMA_SA_EXTRA_FLAGS /* __u32 */ + XFRMA_PROTO /* __u8 */ + XFRMA_ADDRESS_FILTER /* struct xfrm_address_filter */ + XFRMA_PAD + XFRMA_OFFLOAD_DEV /* struct xfrm_state_offload */ + XFRMA_SET_MARK /* __u32 */ + XFRMA_SET_MARK_MASK /* __u32 */ + XFRMA_IF_ID /* __u32 */ + + XFRMA_MAX = iota - 1 +) + +const XFRMA_OUTPUT_MARK = XFRMA_SET_MARK + +const ( + SizeofXfrmAddress = 0x10 + SizeofXfrmSelector = 0x38 + SizeofXfrmLifetimeCfg = 0x40 + SizeofXfrmLifetimeCur = 0x20 + SizeofXfrmId = 0x18 + SizeofXfrmMark = 0x08 +) + +// Netlink groups +const ( + XFRMNLGRP_NONE = 0x0 + XFRMNLGRP_ACQUIRE = 0x1 + XFRMNLGRP_EXPIRE = 0x2 + XFRMNLGRP_SA = 0x3 + XFRMNLGRP_POLICY = 0x4 + XFRMNLGRP_AEVENTS = 0x5 + XFRMNLGRP_REPORT = 0x6 + XFRMNLGRP_MIGRATE = 0x7 + XFRMNLGRP_MAPPING = 0x8 + __XFRMNLGRP_MAX = 0x9 +) + +// typedef union { +// __be32 a4; +// __be32 a6[4]; +// } xfrm_address_t; + +type XfrmAddress [SizeofXfrmAddress]byte + +func (x *XfrmAddress) ToIP() net.IP { + var empty = [12]byte{} + ip := make(net.IP, net.IPv6len) + if bytes.Equal(x[4:16], empty[:]) { + ip[10] = 0xff + ip[11] = 0xff + copy(ip[12:16], x[0:4]) + } else { + copy(ip[:], x[:]) + } + return ip +} + +func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { + ip := x.ToIP() + if GetIPFamily(ip) == FAMILY_V4 { + return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} + } + return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)} +} + +func (x *XfrmAddress) FromIP(ip net.IP) { + var empty = [16]byte{} + if len(ip) < net.IPv4len { + copy(x[4:16], empty[:]) + } else if GetIPFamily(ip) == FAMILY_V4 { + copy(x[0:4], ip.To4()[0:4]) + copy(x[4:16], empty[:12]) + } else { + copy(x[0:16], ip.To16()[0:16]) + } +} + +func DeserializeXfrmAddress(b []byte) *XfrmAddress { + return (*XfrmAddress)(unsafe.Pointer(&b[0:SizeofXfrmAddress][0])) +} + +func (x *XfrmAddress) Serialize() []byte { + return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(x)))[:] +} + +// struct xfrm_selector { +// xfrm_address_t daddr; +// xfrm_address_t saddr; +// __be16 dport; +// __be16 dport_mask; +// __be16 sport; +// __be16 sport_mask; +// __u16 family; +// __u8 prefixlen_d; +// __u8 prefixlen_s; +// __u8 proto; +// int ifindex; +// __kernel_uid32_t user; +// }; + +type XfrmSelector struct { + Daddr XfrmAddress + Saddr XfrmAddress + Dport uint16 // big endian + DportMask uint16 // big endian + Sport uint16 // big endian + SportMask uint16 // big endian + Family uint16 + PrefixlenD uint8 + PrefixlenS uint8 + Proto uint8 + Pad [3]byte + Ifindex int32 + User uint32 +} + +func (msg *XfrmSelector) Len() int { + return SizeofXfrmSelector +} + +func DeserializeXfrmSelector(b []byte) *XfrmSelector { + return (*XfrmSelector)(unsafe.Pointer(&b[0:SizeofXfrmSelector][0])) +} + +func (msg *XfrmSelector) Serialize() []byte { + return (*(*[SizeofXfrmSelector]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_lifetime_cfg { +// __u64 soft_byte_limit; +// __u64 hard_byte_limit; +// __u64 soft_packet_limit; +// __u64 hard_packet_limit; +// __u64 soft_add_expires_seconds; +// __u64 hard_add_expires_seconds; +// __u64 soft_use_expires_seconds; +// __u64 hard_use_expires_seconds; +// }; +// + +type XfrmLifetimeCfg struct { + SoftByteLimit uint64 + HardByteLimit uint64 + SoftPacketLimit uint64 + HardPacketLimit uint64 + SoftAddExpiresSeconds uint64 + HardAddExpiresSeconds uint64 + SoftUseExpiresSeconds uint64 + HardUseExpiresSeconds uint64 +} + +func (msg *XfrmLifetimeCfg) Len() int { + return SizeofXfrmLifetimeCfg +} + +func DeserializeXfrmLifetimeCfg(b []byte) *XfrmLifetimeCfg { + return (*XfrmLifetimeCfg)(unsafe.Pointer(&b[0:SizeofXfrmLifetimeCfg][0])) +} + +func (msg *XfrmLifetimeCfg) Serialize() []byte { + return (*(*[SizeofXfrmLifetimeCfg]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_lifetime_cur { +// __u64 bytes; +// __u64 packets; +// __u64 add_time; +// __u64 use_time; +// }; + +type XfrmLifetimeCur struct { + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + +func (msg *XfrmLifetimeCur) Len() int { + return SizeofXfrmLifetimeCur +} + +func DeserializeXfrmLifetimeCur(b []byte) *XfrmLifetimeCur { + return (*XfrmLifetimeCur)(unsafe.Pointer(&b[0:SizeofXfrmLifetimeCur][0])) +} + +func (msg *XfrmLifetimeCur) Serialize() []byte { + return (*(*[SizeofXfrmLifetimeCur]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_id { +// xfrm_address_t daddr; +// __be32 spi; +// __u8 proto; +// }; + +type XfrmId struct { + Daddr XfrmAddress + Spi uint32 // big endian + Proto uint8 + Pad [3]byte +} + +func (msg *XfrmId) Len() int { + return SizeofXfrmId +} + +func DeserializeXfrmId(b []byte) *XfrmId { + return (*XfrmId)(unsafe.Pointer(&b[0:SizeofXfrmId][0])) +} + +func (msg *XfrmId) Serialize() []byte { + return (*(*[SizeofXfrmId]byte)(unsafe.Pointer(msg)))[:] +} + +type XfrmMark struct { + Value uint32 + Mask uint32 +} + +func (msg *XfrmMark) Len() int { + return SizeofXfrmMark +} + +func DeserializeXfrmMark(b []byte) *XfrmMark { + return (*XfrmMark)(unsafe.Pointer(&b[0:SizeofXfrmMark][0])) +} + +func (msg *XfrmMark) Serialize() []byte { + return (*(*[SizeofXfrmMark]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go new file mode 100644 index 000000000000..715df4cc5f02 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go @@ -0,0 +1,32 @@ +package nl + +import ( + "unsafe" +) + +const ( + SizeofXfrmUserExpire = 0xe8 +) + +// struct xfrm_user_expire { +// struct xfrm_usersa_info state; +// __u8 hard; +// }; + +type XfrmUserExpire struct { + XfrmUsersaInfo XfrmUsersaInfo + Hard uint8 + Pad [7]byte +} + +func (msg *XfrmUserExpire) Len() int { + return SizeofXfrmUserExpire +} + +func DeserializeXfrmUserExpire(b []byte) *XfrmUserExpire { + return (*XfrmUserExpire)(unsafe.Pointer(&b[0:SizeofXfrmUserExpire][0])) +} + +func (msg *XfrmUserExpire) Serialize() []byte { + return (*(*[SizeofXfrmUserExpire]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go new file mode 100644 index 000000000000..66f7e03d2d7f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go @@ -0,0 +1,119 @@ +package nl + +import ( + "unsafe" +) + +const ( + SizeofXfrmUserpolicyId = 0x40 + SizeofXfrmUserpolicyInfo = 0xa8 + SizeofXfrmUserTmpl = 0x40 +) + +// struct xfrm_userpolicy_id { +// struct xfrm_selector sel; +// __u32 index; +// __u8 dir; +// }; +// + +type XfrmUserpolicyId struct { + Sel XfrmSelector + Index uint32 + Dir uint8 + Pad [3]byte +} + +func (msg *XfrmUserpolicyId) Len() int { + return SizeofXfrmUserpolicyId +} + +func DeserializeXfrmUserpolicyId(b []byte) *XfrmUserpolicyId { + return (*XfrmUserpolicyId)(unsafe.Pointer(&b[0:SizeofXfrmUserpolicyId][0])) +} + +func (msg *XfrmUserpolicyId) Serialize() []byte { + return (*(*[SizeofXfrmUserpolicyId]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_userpolicy_info { +// struct xfrm_selector sel; +// struct xfrm_lifetime_cfg lft; +// struct xfrm_lifetime_cur curlft; +// __u32 priority; +// __u32 index; +// __u8 dir; +// __u8 action; +// #define XFRM_POLICY_ALLOW 0 +// #define XFRM_POLICY_BLOCK 1 +// __u8 flags; +// #define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */ +// /* Automatically expand selector to include matching ICMP payloads. */ +// #define XFRM_POLICY_ICMP 2 +// __u8 share; +// }; + +type XfrmUserpolicyInfo struct { + Sel XfrmSelector + Lft XfrmLifetimeCfg + Curlft XfrmLifetimeCur + Priority uint32 + Index uint32 + Dir uint8 + Action uint8 + Flags uint8 + Share uint8 + Pad [4]byte +} + +func (msg *XfrmUserpolicyInfo) Len() int { + return SizeofXfrmUserpolicyInfo +} + +func DeserializeXfrmUserpolicyInfo(b []byte) *XfrmUserpolicyInfo { + return (*XfrmUserpolicyInfo)(unsafe.Pointer(&b[0:SizeofXfrmUserpolicyInfo][0])) +} + +func (msg *XfrmUserpolicyInfo) Serialize() []byte { + return (*(*[SizeofXfrmUserpolicyInfo]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_user_tmpl { +// struct xfrm_id id; +// __u16 family; +// xfrm_address_t saddr; +// __u32 reqid; +// __u8 mode; +// __u8 share; +// __u8 optional; +// __u32 aalgos; +// __u32 ealgos; +// __u32 calgos; +// } + +type XfrmUserTmpl struct { + XfrmId XfrmId + Family uint16 + Pad1 [2]byte + Saddr XfrmAddress + Reqid uint32 + Mode uint8 + Share uint8 + Optional uint8 + Pad2 byte + Aalgos uint32 + Ealgos uint32 + Calgos uint32 +} + +func (msg *XfrmUserTmpl) Len() int { + return SizeofXfrmUserTmpl +} + +func DeserializeXfrmUserTmpl(b []byte) *XfrmUserTmpl { + return (*XfrmUserTmpl)(unsafe.Pointer(&b[0:SizeofXfrmUserTmpl][0])) +} + +func (msg *XfrmUserTmpl) Serialize() []byte { + return (*(*[SizeofXfrmUserTmpl]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go new file mode 100644 index 000000000000..43a947f2294c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -0,0 +1,334 @@ +package nl + +import ( + "unsafe" +) + +const ( + SizeofXfrmUsersaId = 0x18 + SizeofXfrmStats = 0x0c + SizeofXfrmUsersaInfo = 0xe0 + SizeofXfrmUserSpiInfo = 0xe8 + SizeofXfrmAlgo = 0x44 + SizeofXfrmAlgoAuth = 0x48 + SizeofXfrmAlgoAEAD = 0x48 + SizeofXfrmEncapTmpl = 0x18 + SizeofXfrmUsersaFlush = 0x1 + SizeofXfrmReplayStateEsn = 0x18 +) + +const ( + XFRM_STATE_NOECN = 1 + XFRM_STATE_DECAP_DSCP = 2 + XFRM_STATE_NOPMTUDISC = 4 + XFRM_STATE_WILDRECV = 8 + XFRM_STATE_ICMP = 16 + XFRM_STATE_AF_UNSPEC = 32 + XFRM_STATE_ALIGN4 = 64 + XFRM_STATE_ESN = 128 +) + +// struct xfrm_usersa_id { +// xfrm_address_t daddr; +// __be32 spi; +// __u16 family; +// __u8 proto; +// }; + +type XfrmUsersaId struct { + Daddr XfrmAddress + Spi uint32 // big endian + Family uint16 + Proto uint8 + Pad byte +} + +func (msg *XfrmUsersaId) Len() int { + return SizeofXfrmUsersaId +} + +func DeserializeXfrmUsersaId(b []byte) *XfrmUsersaId { + return (*XfrmUsersaId)(unsafe.Pointer(&b[0:SizeofXfrmUsersaId][0])) +} + +func (msg *XfrmUsersaId) Serialize() []byte { + return (*(*[SizeofXfrmUsersaId]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_stats { +// __u32 replay_window; +// __u32 replay; +// __u32 integrity_failed; +// }; + +type XfrmStats struct { + ReplayWindow uint32 + Replay uint32 + IntegrityFailed uint32 +} + +func (msg *XfrmStats) Len() int { + return SizeofXfrmStats +} + +func DeserializeXfrmStats(b []byte) *XfrmStats { + return (*XfrmStats)(unsafe.Pointer(&b[0:SizeofXfrmStats][0])) +} + +func (msg *XfrmStats) Serialize() []byte { + return (*(*[SizeofXfrmStats]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_usersa_info { +// struct xfrm_selector sel; +// struct xfrm_id id; +// xfrm_address_t saddr; +// struct xfrm_lifetime_cfg lft; +// struct xfrm_lifetime_cur curlft; +// struct xfrm_stats stats; +// __u32 seq; +// __u32 reqid; +// __u16 family; +// __u8 mode; /* XFRM_MODE_xxx */ +// __u8 replay_window; +// __u8 flags; +// #define XFRM_STATE_NOECN 1 +// #define XFRM_STATE_DECAP_DSCP 2 +// #define XFRM_STATE_NOPMTUDISC 4 +// #define XFRM_STATE_WILDRECV 8 +// #define XFRM_STATE_ICMP 16 +// #define XFRM_STATE_AF_UNSPEC 32 +// #define XFRM_STATE_ALIGN4 64 +// #define XFRM_STATE_ESN 128 +// }; +// +// #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +// + +type XfrmUsersaInfo struct { + Sel XfrmSelector + Id XfrmId + Saddr XfrmAddress + Lft XfrmLifetimeCfg + Curlft XfrmLifetimeCur + Stats XfrmStats + Seq uint32 + Reqid uint32 + Family uint16 + Mode uint8 + ReplayWindow uint8 + Flags uint8 + Pad [7]byte +} + +func (msg *XfrmUsersaInfo) Len() int { + return SizeofXfrmUsersaInfo +} + +func DeserializeXfrmUsersaInfo(b []byte) *XfrmUsersaInfo { + return (*XfrmUsersaInfo)(unsafe.Pointer(&b[0:SizeofXfrmUsersaInfo][0])) +} + +func (msg *XfrmUsersaInfo) Serialize() []byte { + return (*(*[SizeofXfrmUsersaInfo]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_userspi_info { +// struct xfrm_usersa_info info; +// __u32 min; +// __u32 max; +// }; + +type XfrmUserSpiInfo struct { + XfrmUsersaInfo XfrmUsersaInfo + Min uint32 + Max uint32 +} + +func (msg *XfrmUserSpiInfo) Len() int { + return SizeofXfrmUserSpiInfo +} + +func DeserializeXfrmUserSpiInfo(b []byte) *XfrmUserSpiInfo { + return (*XfrmUserSpiInfo)(unsafe.Pointer(&b[0:SizeofXfrmUserSpiInfo][0])) +} + +func (msg *XfrmUserSpiInfo) Serialize() []byte { + return (*(*[SizeofXfrmUserSpiInfo]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_algo { +// char alg_name[64]; +// unsigned int alg_key_len; /* in bits */ +// char alg_key[0]; +// }; + +type XfrmAlgo struct { + AlgName [64]byte + AlgKeyLen uint32 + AlgKey []byte +} + +func (msg *XfrmAlgo) Len() int { + return SizeofXfrmAlgo + int(msg.AlgKeyLen/8) +} + +func DeserializeXfrmAlgo(b []byte) *XfrmAlgo { + ret := XfrmAlgo{} + copy(ret.AlgName[:], b[0:64]) + ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64])) + ret.AlgKey = b[68:ret.Len()] + return &ret +} + +func (msg *XfrmAlgo) Serialize() []byte { + b := make([]byte, msg.Len()) + copy(b[0:64], msg.AlgName[:]) + copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:]) + copy(b[68:msg.Len()], msg.AlgKey[:]) + return b +} + +// struct xfrm_algo_auth { +// char alg_name[64]; +// unsigned int alg_key_len; /* in bits */ +// unsigned int alg_trunc_len; /* in bits */ +// char alg_key[0]; +// }; + +type XfrmAlgoAuth struct { + AlgName [64]byte + AlgKeyLen uint32 + AlgTruncLen uint32 + AlgKey []byte +} + +func (msg *XfrmAlgoAuth) Len() int { + return SizeofXfrmAlgoAuth + int(msg.AlgKeyLen/8) +} + +func DeserializeXfrmAlgoAuth(b []byte) *XfrmAlgoAuth { + ret := XfrmAlgoAuth{} + copy(ret.AlgName[:], b[0:64]) + ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64])) + ret.AlgTruncLen = *(*uint32)(unsafe.Pointer(&b[68])) + ret.AlgKey = b[72:ret.Len()] + return &ret +} + +func (msg *XfrmAlgoAuth) Serialize() []byte { + b := make([]byte, msg.Len()) + copy(b[0:64], msg.AlgName[:]) + copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:]) + copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgTruncLen)))[:]) + copy(b[72:msg.Len()], msg.AlgKey[:]) + return b +} + +// struct xfrm_algo_aead { +// char alg_name[64]; +// unsigned int alg_key_len; /* in bits */ +// unsigned int alg_icv_len; /* in bits */ +// char alg_key[0]; +// } + +type XfrmAlgoAEAD struct { + AlgName [64]byte + AlgKeyLen uint32 + AlgICVLen uint32 + AlgKey []byte +} + +func (msg *XfrmAlgoAEAD) Len() int { + return SizeofXfrmAlgoAEAD + int(msg.AlgKeyLen/8) +} + +func DeserializeXfrmAlgoAEAD(b []byte) *XfrmAlgoAEAD { + ret := XfrmAlgoAEAD{} + copy(ret.AlgName[:], b[0:64]) + ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64])) + ret.AlgICVLen = *(*uint32)(unsafe.Pointer(&b[68])) + ret.AlgKey = b[72:ret.Len()] + return &ret +} + +func (msg *XfrmAlgoAEAD) Serialize() []byte { + b := make([]byte, msg.Len()) + copy(b[0:64], msg.AlgName[:]) + copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:]) + copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgICVLen)))[:]) + copy(b[72:msg.Len()], msg.AlgKey[:]) + return b +} + +// struct xfrm_encap_tmpl { +// __u16 encap_type; +// __be16 encap_sport; +// __be16 encap_dport; +// xfrm_address_t encap_oa; +// }; + +type XfrmEncapTmpl struct { + EncapType uint16 + EncapSport uint16 // big endian + EncapDport uint16 // big endian + Pad [2]byte + EncapOa XfrmAddress +} + +func (msg *XfrmEncapTmpl) Len() int { + return SizeofXfrmEncapTmpl +} + +func DeserializeXfrmEncapTmpl(b []byte) *XfrmEncapTmpl { + return (*XfrmEncapTmpl)(unsafe.Pointer(&b[0:SizeofXfrmEncapTmpl][0])) +} + +func (msg *XfrmEncapTmpl) Serialize() []byte { + return (*(*[SizeofXfrmEncapTmpl]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_usersa_flush { +// __u8 proto; +// }; + +type XfrmUsersaFlush struct { + Proto uint8 +} + +func (msg *XfrmUsersaFlush) Len() int { + return SizeofXfrmUsersaFlush +} + +func DeserializeXfrmUsersaFlush(b []byte) *XfrmUsersaFlush { + return (*XfrmUsersaFlush)(unsafe.Pointer(&b[0:SizeofXfrmUsersaFlush][0])) +} + +func (msg *XfrmUsersaFlush) Serialize() []byte { + return (*(*[SizeofXfrmUsersaFlush]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_replay_state_esn { +// unsigned int bmp_len; +// __u32 oseq; +// __u32 seq; +// __u32 oseq_hi; +// __u32 seq_hi; +// __u32 replay_window; +// __u32 bmp[0]; +// }; + +type XfrmReplayStateEsn struct { + BmpLen uint32 + OSeq uint32 + Seq uint32 + OSeqHi uint32 + SeqHi uint32 + ReplayWindow uint32 + Bmp []uint32 +} + +func (msg *XfrmReplayStateEsn) Serialize() []byte { + // We deliberately do not pass Bmp, as it gets set by the kernel. + return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/order.go b/vendor/github.com/vishvananda/netlink/order.go new file mode 100644 index 000000000000..e28e153a1bf0 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/order.go @@ -0,0 +1,32 @@ +package netlink + +import ( + "encoding/binary" + + "github.com/vishvananda/netlink/nl" +) + +var ( + native = nl.NativeEndian() + networkOrder = binary.BigEndian +) + +func htonl(val uint32) []byte { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, val) + return bytes +} + +func htons(val uint16) []byte { + bytes := make([]byte, 2) + binary.BigEndian.PutUint16(bytes, val) + return bytes +} + +func ntohl(buf []byte) uint32 { + return binary.BigEndian.Uint32(buf) +} + +func ntohs(buf []byte) uint16 { + return binary.BigEndian.Uint16(buf) +} diff --git a/vendor/github.com/vishvananda/netlink/proc_event_linux.go b/vendor/github.com/vishvananda/netlink/proc_event_linux.go new file mode 100644 index 000000000000..53bc59a6ecf4 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/proc_event_linux.go @@ -0,0 +1,217 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const CN_IDX_PROC = 0x1 + +const ( + PROC_EVENT_NONE = 0x00000000 + PROC_EVENT_FORK = 0x00000001 + PROC_EVENT_EXEC = 0x00000002 + PROC_EVENT_UID = 0x00000004 + PROC_EVENT_GID = 0x00000040 + PROC_EVENT_SID = 0x00000080 + PROC_EVENT_PTRACE = 0x00000100 + PROC_EVENT_COMM = 0x00000200 + PROC_EVENT_COREDUMP = 0x40000000 + PROC_EVENT_EXIT = 0x80000000 +) + +const ( + CN_VAL_PROC = 0x1 + PROC_CN_MCAST_LISTEN = 0x1 +) + +type ProcEventMsg interface { + Pid() uint32 + Tgid() uint32 +} + +type ProcEventHeader struct { + What uint32 + CPU uint32 + Timestamp uint64 +} + +type ProcEvent struct { + ProcEventHeader + Msg ProcEventMsg +} + +func (pe *ProcEvent) setHeader(h ProcEventHeader) { + pe.What = h.What + pe.CPU = h.CPU + pe.Timestamp = h.Timestamp +} + +type ExitProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +type ExitProcEvent2 struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +func (e *ExitProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExitProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ExecProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 +} + +func (e *ExecProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExecProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ForkProcEvent struct { + ParentPid uint32 + ParentTgid uint32 + ChildPid uint32 + ChildTgid uint32 +} + +func (e *ForkProcEvent) Pid() uint32 { + return e.ParentPid +} + +func (e *ForkProcEvent) Tgid() uint32 { + return e.ParentTgid +} + +type CommProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + Comm [16]byte +} + +func (e *CommProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *CommProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error { + h, err := NewHandle() + if err != nil { + return err + } + defer h.Delete() + + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC) + if err != nil { + return err + } + + var nlmsg nl.NetlinkRequest + + nlmsg.Pid = uint32(os.Getpid()) + nlmsg.Type = unix.NLMSG_DONE + nlmsg.Len = uint32(unix.SizeofNlMsghdr) + + cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN) + nlmsg.AddData(cm) + + s.Send(&nlmsg) + + if done != nil { + go func() { + <-done + s.Close() + }() + } + + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + errorChan <- err + return + } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } + + for _, m := range msgs { + e := parseNetlinkMessage(m) + if e != nil { + ch <- *e + } + } + + } + }() + + return nil +} + +func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent { + if m.Header.Type == unix.NLMSG_DONE { + buf := bytes.NewBuffer(m.Data) + msg := &nl.CnMsg{} + hdr := &ProcEventHeader{} + binary.Read(buf, nl.NativeEndian(), msg) + binary.Read(buf, nl.NativeEndian(), hdr) + + pe := &ProcEvent{} + pe.setHeader(*hdr) + switch hdr.What { + case PROC_EVENT_EXIT: + event := &ExitProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_FORK: + event := &ForkProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_EXEC: + event := &ExecProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_COMM: + event := &CommProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + } + return nil + } + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go new file mode 100644 index 000000000000..60b23b3742c9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -0,0 +1,62 @@ +package netlink + +import ( + "strings" +) + +// Protinfo represents bridge flags from netlink. +type Protinfo struct { + Hairpin bool + Guard bool + FastLeave bool + RootBlock bool + Learning bool + Flood bool + ProxyArp bool + ProxyArpWiFi bool +} + +// String returns a list of enabled flags +func (prot *Protinfo) String() string { + if prot == nil { + return "" + } + + var boolStrings []string + if prot.Hairpin { + boolStrings = append(boolStrings, "Hairpin") + } + if prot.Guard { + boolStrings = append(boolStrings, "Guard") + } + if prot.FastLeave { + boolStrings = append(boolStrings, "FastLeave") + } + if prot.RootBlock { + boolStrings = append(boolStrings, "RootBlock") + } + if prot.Learning { + boolStrings = append(boolStrings, "Learning") + } + if prot.Flood { + boolStrings = append(boolStrings, "Flood") + } + if prot.ProxyArp { + boolStrings = append(boolStrings, "ProxyArp") + } + if prot.ProxyArpWiFi { + boolStrings = append(boolStrings, "ProxyArpWiFi") + } + return strings.Join(boolStrings, " ") +} + +func boolToByte(x bool) []byte { + if x { + return []byte{1} + } + return []byte{0} +} + +func byteToBool(x byte) bool { + return uint8(x) != 0 +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go new file mode 100644 index 000000000000..15b65123cef7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -0,0 +1,74 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +func LinkGetProtinfo(link Link) (Protinfo, error) { + return pkgHandle.LinkGetProtinfo(link) +} + +func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) { + base := link.Attrs() + h.ensureIndex(base) + var pi Protinfo + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP) + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_ROUTE, 0) + if err != nil { + return pi, err + } + + for _, m := range msgs { + ans := nl.DeserializeIfInfomsg(m) + if int(ans.Index) != base.Index { + continue + } + attrs, err := nl.ParseRouteAttr(m[ans.Len():]) + if err != nil { + return pi, err + } + for _, attr := range attrs { + if attr.Attr.Type != unix.IFLA_PROTINFO|unix.NLA_F_NESTED { + continue + } + infos, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return pi, err + } + pi = parseProtinfo(infos) + + return pi, nil + } + } + return pi, fmt.Errorf("Device with index %d not found", base.Index) +} + +func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) { + for _, info := range infos { + switch info.Attr.Type { + case nl.IFLA_BRPORT_MODE: + pi.Hairpin = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_GUARD: + pi.Guard = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_FAST_LEAVE: + pi.FastLeave = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_PROTECT: + pi.RootBlock = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_LEARNING: + pi.Learning = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_UNICAST_FLOOD: + pi.Flood = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_PROXYARP: + pi.ProxyArp = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_PROXYARP_WIFI: + pi.ProxyArpWiFi = byteToBool(info.Value[0]) + } + } + return +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc.go b/vendor/github.com/vishvananda/netlink/qdisc.go new file mode 100644 index 000000000000..f594c9c212df --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/qdisc.go @@ -0,0 +1,366 @@ +package netlink + +import ( + "fmt" + "math" +) + +const ( + HANDLE_NONE = 0 + HANDLE_INGRESS = 0xFFFFFFF1 + HANDLE_CLSACT = HANDLE_INGRESS + HANDLE_ROOT = 0xFFFFFFFF + PRIORITY_MAP_LEN = 16 +) +const ( + HANDLE_MIN_INGRESS = 0xFFFFFFF2 + HANDLE_MIN_EGRESS = 0xFFFFFFF3 +) + +type Qdisc interface { + Attrs() *QdiscAttrs + Type() string +} + +// QdiscAttrs represents a netlink qdisc. A qdisc is associated with a link, +// has a handle, a parent and a refcnt. The root qdisc of a device should +// have parent == HANDLE_ROOT. +type QdiscAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only +} + +func (q QdiscAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt) +} + +func MakeHandle(major, minor uint16) uint32 { + return (uint32(major) << 16) | uint32(minor) +} + +func MajorMinor(handle uint32) (uint16, uint16) { + return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF) +} + +func HandleStr(handle uint32) string { + switch handle { + case HANDLE_NONE: + return "none" + case HANDLE_INGRESS: + return "ingress" + case HANDLE_ROOT: + return "root" + default: + major, minor := MajorMinor(handle) + return fmt.Sprintf("%x:%x", major, minor) + } +} + +func Percentage2u32(percentage float32) uint32 { + // FIXME this is most likely not the best way to convert from % to uint32 + if percentage == 100 { + return math.MaxUint32 + } + return uint32(math.MaxUint32 * (percentage / 100)) +} + +// PfifoFast is the default qdisc created by the kernel if one has not +// been defined for the interface +type PfifoFast struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func (qdisc *PfifoFast) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *PfifoFast) Type() string { + return "pfifo_fast" +} + +// Prio is a basic qdisc that works just like PfifoFast +type Prio struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func NewPrio(attrs QdiscAttrs) *Prio { + return &Prio{ + QdiscAttrs: attrs, + Bands: 3, + PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + } +} + +func (qdisc *Prio) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Prio) Type() string { + return "prio" +} + +// Htb is a classful qdisc that rate limits based on tokens +type Htb struct { + QdiscAttrs + Version uint32 + Rate2Quantum uint32 + Defcls uint32 + Debug uint32 + DirectPkts uint32 +} + +func NewHtb(attrs QdiscAttrs) *Htb { + return &Htb{ + QdiscAttrs: attrs, + Version: 3, + Defcls: 0, + Rate2Quantum: 10, + Debug: 0, + DirectPkts: 0, + } +} + +func (qdisc *Htb) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Htb) Type() string { + return "htb" +} + +// Netem is a classless qdisc that rate limits based on tokens + +type NetemQdiscAttrs struct { + Latency uint32 // in us + DelayCorr float32 // in % + Limit uint32 + Loss float32 // in % + LossCorr float32 // in % + Gap uint32 + Duplicate float32 // in % + DuplicateCorr float32 // in % + Jitter uint32 // in us + ReorderProb float32 // in % + ReorderCorr float32 // in % + CorruptProb float32 // in % + CorruptCorr float32 // in % +} + +func (q NetemQdiscAttrs) String() string { + return fmt.Sprintf( + "{Latency: %d, Limit: %d, Loss: %f, Gap: %d, Duplicate: %f, Jitter: %d}", + q.Latency, q.Limit, q.Loss, q.Gap, q.Duplicate, q.Jitter, + ) +} + +type Netem struct { + QdiscAttrs + Latency uint32 + DelayCorr uint32 + Limit uint32 + Loss uint32 + LossCorr uint32 + Gap uint32 + Duplicate uint32 + DuplicateCorr uint32 + Jitter uint32 + ReorderProb uint32 + ReorderCorr uint32 + CorruptProb uint32 + CorruptCorr uint32 +} + +func (netem *Netem) String() string { + return fmt.Sprintf( + "{Latency: %v, Limit: %v, Loss: %v, Gap: %v, Duplicate: %v, Jitter: %v}", + netem.Latency, netem.Limit, netem.Loss, netem.Gap, netem.Duplicate, netem.Jitter, + ) +} + +func (qdisc *Netem) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Netem) Type() string { + return "netem" +} + +// Tbf is a classless qdisc that rate limits based on tokens +type Tbf struct { + QdiscAttrs + Rate uint64 + Limit uint32 + Buffer uint32 + Peakrate uint64 + Minburst uint32 + // TODO: handle other settings +} + +func (qdisc *Tbf) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Tbf) Type() string { + return "tbf" +} + +// Ingress is a qdisc for adding ingress filters +type Ingress struct { + QdiscAttrs +} + +func (qdisc *Ingress) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Ingress) Type() string { + return "ingress" +} + +// GenericQdisc qdiscs represent types that are not currently understood +// by this netlink library. +type GenericQdisc struct { + QdiscAttrs + QdiscType string +} + +func (qdisc *GenericQdisc) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *GenericQdisc) Type() string { + return qdisc.QdiscType +} + +type Hfsc struct { + QdiscAttrs + Defcls uint16 +} + +func NewHfsc(attrs QdiscAttrs) *Hfsc { + return &Hfsc{ + QdiscAttrs: attrs, + Defcls: 1, + } +} + +func (hfsc *Hfsc) Attrs() *QdiscAttrs { + return &hfsc.QdiscAttrs +} + +func (hfsc *Hfsc) Type() string { + return "hfsc" +} + +func (hfsc *Hfsc) String() string { + return fmt.Sprintf( + "{%v -- default: %d}", + hfsc.Attrs(), hfsc.Defcls, + ) +} + +// Fq is a classless packet scheduler meant to be mostly used for locally generated traffic. +type Fq struct { + QdiscAttrs + PacketLimit uint32 + FlowPacketLimit uint32 + // In bytes + Quantum uint32 + InitialQuantum uint32 + // called RateEnable under the hood + Pacing uint32 + FlowDefaultRate uint32 + FlowMaxRate uint32 + // called BucketsLog under the hood + Buckets uint32 + FlowRefillDelay uint32 + LowRateThreshold uint32 +} + +func (fq *Fq) String() string { + return fmt.Sprintf( + "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v}", + fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, + ) +} + +func NewFq(attrs QdiscAttrs) *Fq { + return &Fq{ + QdiscAttrs: attrs, + Pacing: 1, + } +} + +func (qdisc *Fq) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Fq) Type() string { + return "fq" +} + +// FQ_Codel (Fair Queuing Controlled Delay) is queuing discipline that combines Fair Queuing with the CoDel AQM scheme. +type FqCodel struct { + QdiscAttrs + Target uint32 + Limit uint32 + Interval uint32 + ECN uint32 + Flows uint32 + Quantum uint32 + CEThreshold uint32 + DropBatchSize uint32 + MemoryLimit uint32 +} + +func (fqcodel *FqCodel) String() string { + return fmt.Sprintf( + "{%v -- Target: %v, Limit: %v, Interval: %v, ECM: %v, Flows: %v, Quantum: %v}", + fqcodel.Attrs(), fqcodel.Target, fqcodel.Limit, fqcodel.Interval, fqcodel.ECN, fqcodel.Flows, fqcodel.Quantum, + ) +} + +func NewFqCodel(attrs QdiscAttrs) *FqCodel { + return &FqCodel{ + QdiscAttrs: attrs, + ECN: 1, + } +} + +func (qdisc *FqCodel) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *FqCodel) Type() string { + return "fq_codel" +} + +type Sfq struct { + QdiscAttrs + // TODO: Only the simplified options for SFQ are handled here. Support for the extended one can be added later. + Quantum uint8 + Perturb uint8 + Limit uint32 + Divisor uint8 +} + +func (sfq *Sfq) String() string { + return fmt.Sprintf( + "{%v -- Quantum: %v, Perturb: %v, Limit: %v, Divisor: %v}", + sfq.Attrs(), sfq.Quantum, sfq.Perturb, sfq.Limit, sfq.Divisor, + ) +} + +func (qdisc *Sfq) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Sfq) Type() string { + return "sfq" +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go new file mode 100644 index 000000000000..e182e1cfe678 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -0,0 +1,712 @@ +package netlink + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// NOTE function is here because it uses other linux functions +func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { + var limit uint32 = 1000 + var lossCorr, delayCorr, duplicateCorr uint32 + var reorderProb, reorderCorr uint32 + var corruptProb, corruptCorr uint32 + + latency := nattrs.Latency + loss := Percentage2u32(nattrs.Loss) + gap := nattrs.Gap + duplicate := Percentage2u32(nattrs.Duplicate) + jitter := nattrs.Jitter + + // Correlation + if latency > 0 && jitter > 0 { + delayCorr = Percentage2u32(nattrs.DelayCorr) + } + if loss > 0 { + lossCorr = Percentage2u32(nattrs.LossCorr) + } + if duplicate > 0 { + duplicateCorr = Percentage2u32(nattrs.DuplicateCorr) + } + // FIXME should validate values(like loss/duplicate are percentages...) + latency = time2Tick(latency) + + if nattrs.Limit != 0 { + limit = nattrs.Limit + } + // Jitter is only value if latency is > 0 + if latency > 0 { + jitter = time2Tick(jitter) + } + + reorderProb = Percentage2u32(nattrs.ReorderProb) + reorderCorr = Percentage2u32(nattrs.ReorderCorr) + + if reorderProb > 0 { + // ERROR if lantency == 0 + if gap == 0 { + gap = 1 + } + } + + corruptProb = Percentage2u32(nattrs.CorruptProb) + corruptCorr = Percentage2u32(nattrs.CorruptCorr) + + return &Netem{ + QdiscAttrs: attrs, + Latency: latency, + DelayCorr: delayCorr, + Limit: limit, + Loss: loss, + LossCorr: lossCorr, + Gap: gap, + Duplicate: duplicate, + DuplicateCorr: duplicateCorr, + Jitter: jitter, + ReorderProb: reorderProb, + ReorderCorr: reorderCorr, + CorruptProb: corruptProb, + CorruptCorr: corruptCorr, + } +} + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func QdiscDel(qdisc Qdisc) error { + return pkgHandle.QdiscDel(qdisc) +} + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func (h *Handle) QdiscDel(qdisc Qdisc) error { + return h.qdiscModify(unix.RTM_DELQDISC, 0, qdisc) +} + +// QdiscChange will change a qdisc in place +// Equivalent to: `tc qdisc change $qdisc` +// The parent and handle MUST NOT be changed. +func QdiscChange(qdisc Qdisc) error { + return pkgHandle.QdiscChange(qdisc) +} + +// QdiscChange will change a qdisc in place +// Equivalent to: `tc qdisc change $qdisc` +// The parent and handle MUST NOT be changed. +func (h *Handle) QdiscChange(qdisc Qdisc) error { + return h.qdiscModify(unix.RTM_NEWQDISC, 0, qdisc) +} + +// QdiscReplace will replace a qdisc to the system. +// Equivalent to: `tc qdisc replace $qdisc` +// The handle MUST change. +func QdiscReplace(qdisc Qdisc) error { + return pkgHandle.QdiscReplace(qdisc) +} + +// QdiscReplace will replace a qdisc to the system. +// Equivalent to: `tc qdisc replace $qdisc` +// The handle MUST change. +func (h *Handle) QdiscReplace(qdisc Qdisc) error { + return h.qdiscModify( + unix.RTM_NEWQDISC, + unix.NLM_F_CREATE|unix.NLM_F_REPLACE, + qdisc) +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func QdiscAdd(qdisc Qdisc) error { + return pkgHandle.QdiscAdd(qdisc) +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func (h *Handle) QdiscAdd(qdisc Qdisc) error { + return h.qdiscModify( + unix.RTM_NEWQDISC, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, + qdisc) +} + +func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + // When deleting don't bother building the rest of the netlink payload + if cmd != unix.RTM_DELQDISC { + if err := qdiscPayload(req, qdisc); err != nil { + return err + } + } + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { + + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + + switch qdisc := qdisc.(type) { + case *Prio: + tcmap := nl.TcPrioMap{ + Bands: int32(qdisc.Bands), + Priomap: qdisc.PriorityMap, + } + options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) + case *Tbf: + opt := nl.TcTbfQopt{} + opt.Rate.Rate = uint32(qdisc.Rate) + opt.Peakrate.Rate = uint32(qdisc.Peakrate) + opt.Limit = qdisc.Limit + opt.Buffer = qdisc.Buffer + options.AddRtAttr(nl.TCA_TBF_PARMS, opt.Serialize()) + if qdisc.Rate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate)) + } + if qdisc.Peakrate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate)) + } + if qdisc.Peakrate > 0 { + options.AddRtAttr(nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst)) + } + case *Htb: + opt := nl.TcHtbGlob{} + opt.Version = qdisc.Version + opt.Rate2Quantum = qdisc.Rate2Quantum + opt.Defcls = qdisc.Defcls + // TODO: Handle Debug properly. For now default to 0 + opt.Debug = qdisc.Debug + opt.DirectPkts = qdisc.DirectPkts + options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize()) + // options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + case *Hfsc: + opt := nl.TcHfscOpt{} + opt.Defcls = qdisc.Defcls + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) + case *Netem: + opt := nl.TcNetemQopt{} + opt.Latency = qdisc.Latency + opt.Limit = qdisc.Limit + opt.Loss = qdisc.Loss + opt.Gap = qdisc.Gap + opt.Duplicate = qdisc.Duplicate + opt.Jitter = qdisc.Jitter + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) + // Correlation + corr := nl.TcNetemCorr{} + corr.DelayCorr = qdisc.DelayCorr + corr.LossCorr = qdisc.LossCorr + corr.DupCorr = qdisc.DuplicateCorr + + if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 { + options.AddRtAttr(nl.TCA_NETEM_CORR, corr.Serialize()) + } + // Corruption + corruption := nl.TcNetemCorrupt{} + corruption.Probability = qdisc.CorruptProb + corruption.Correlation = qdisc.CorruptCorr + if corruption.Probability > 0 { + options.AddRtAttr(nl.TCA_NETEM_CORRUPT, corruption.Serialize()) + } + // Reorder + reorder := nl.TcNetemReorder{} + reorder.Probability = qdisc.ReorderProb + reorder.Correlation = qdisc.ReorderCorr + if reorder.Probability > 0 { + options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize()) + } + case *Ingress: + // ingress filters must use the proper handle + if qdisc.Attrs().Parent != HANDLE_INGRESS { + return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") + } + case *FqCodel: + options.AddRtAttr(nl.TCA_FQ_CODEL_ECN, nl.Uint32Attr((uint32(qdisc.ECN)))) + if qdisc.Limit > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_LIMIT, nl.Uint32Attr((uint32(qdisc.Limit)))) + } + if qdisc.Interval > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_INTERVAL, nl.Uint32Attr((uint32(qdisc.Interval)))) + } + if qdisc.Flows > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_FLOWS, nl.Uint32Attr((uint32(qdisc.Flows)))) + } + if qdisc.Quantum > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum)))) + } + if qdisc.CEThreshold > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_CE_THRESHOLD, nl.Uint32Attr(qdisc.CEThreshold)) + } + if qdisc.DropBatchSize > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_DROP_BATCH_SIZE, nl.Uint32Attr(qdisc.DropBatchSize)) + } + if qdisc.MemoryLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_MEMORY_LIMIT, nl.Uint32Attr(qdisc.MemoryLimit)) + } + case *Fq: + options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing)))) + + if qdisc.Buckets > 0 { + options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets)))) + } + if qdisc.LowRateThreshold > 0 { + options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold)))) + } + if qdisc.Quantum > 0 { + options.AddRtAttr(nl.TCA_FQ_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum)))) + } + if qdisc.InitialQuantum > 0 { + options.AddRtAttr(nl.TCA_FQ_INITIAL_QUANTUM, nl.Uint32Attr((uint32(qdisc.InitialQuantum)))) + } + if qdisc.FlowRefillDelay > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_REFILL_DELAY, nl.Uint32Attr((uint32(qdisc.FlowRefillDelay)))) + } + if qdisc.FlowPacketLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_PLIMIT, nl.Uint32Attr((uint32(qdisc.FlowPacketLimit)))) + } + if qdisc.FlowMaxRate > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_MAX_RATE, nl.Uint32Attr((uint32(qdisc.FlowMaxRate)))) + } + if qdisc.FlowDefaultRate > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate)))) + } + case *Sfq: + opt := nl.TcSfqQoptV1{} + opt.TcSfqQopt.Quantum = qdisc.Quantum + opt.TcSfqQopt.Perturb = int32(qdisc.Perturb) + opt.TcSfqQopt.Limit = qdisc.Limit + opt.TcSfqQopt.Divisor = qdisc.Divisor + + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) + default: + options = nil + } + + if options != nil { + req.AddData(options) + } + return nil +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func QdiscList(link Link) ([]Qdisc, error) { + return pkgHandle.QdiscList(link) +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { + req := h.newNetlinkRequest(unix.RTM_GETQDISC, unix.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWQDISC) + if err != nil { + return nil, err + } + + var res []Qdisc + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip qdiscs from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + base := QdiscAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Refcnt: msg.Info, + } + var qdisc Qdisc + qdiscType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + qdiscType = string(attr.Value[:len(attr.Value)-1]) + switch qdiscType { + case "pfifo_fast": + qdisc = &PfifoFast{} + case "prio": + qdisc = &Prio{} + case "tbf": + qdisc = &Tbf{} + case "ingress": + qdisc = &Ingress{} + case "htb": + qdisc = &Htb{} + case "fq": + qdisc = &Fq{} + case "hfsc": + qdisc = &Hfsc{} + case "fq_codel": + qdisc = &FqCodel{} + case "netem": + qdisc = &Netem{} + case "sfq": + qdisc = &Sfq{} + default: + qdisc = &GenericQdisc{QdiscType: qdiscType} + } + case nl.TCA_OPTIONS: + switch qdiscType { + case "pfifo_fast": + // pfifo returns TcPrioMap directly without wrapping it in rtattr + if err := parsePfifoFastData(qdisc, attr.Value); err != nil { + return nil, err + } + case "prio": + // prio returns TcPrioMap directly without wrapping it in rtattr + if err := parsePrioData(qdisc, attr.Value); err != nil { + return nil, err + } + case "tbf": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseTbfData(qdisc, data); err != nil { + return nil, err + } + case "hfsc": + if err := parseHfscData(qdisc, attr.Value); err != nil { + return nil, err + } + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseHtbData(qdisc, data); err != nil { + return nil, err + } + case "fq": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseFqData(qdisc, data); err != nil { + return nil, err + } + case "fq_codel": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseFqCodelData(qdisc, data); err != nil { + return nil, err + } + case "netem": + if err := parseNetemData(qdisc, attr.Value); err != nil { + return nil, err + } + case "sfq": + if err := parseSfqData(qdisc, attr.Value); err != nil { + return nil, err + } + + // no options for ingress + } + } + } + *qdisc.Attrs() = base + res = append(res, qdisc) + } + + return res, nil +} + +func parsePfifoFastData(qdisc Qdisc, value []byte) error { + pfifo := qdisc.(*PfifoFast) + tcmap := nl.DeserializeTcPrioMap(value) + pfifo.PriorityMap = tcmap.Priomap + pfifo.Bands = uint8(tcmap.Bands) + return nil +} + +func parsePrioData(qdisc Qdisc, value []byte) error { + prio := qdisc.(*Prio) + tcmap := nl.DeserializeTcPrioMap(value) + prio.PriorityMap = tcmap.Priomap + prio.Bands = uint8(tcmap.Bands) + return nil +} + +func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + htb := qdisc.(*Htb) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_INIT: + opt := nl.DeserializeTcHtbGlob(datum.Value) + htb.Version = opt.Version + htb.Rate2Quantum = opt.Rate2Quantum + htb.Defcls = opt.Defcls + htb.Debug = opt.Debug + htb.DirectPkts = opt.DirectPkts + case nl.TCA_HTB_DIRECT_QLEN: + // TODO + //htb.DirectQlen = native.uint32(datum.Value) + } + } + return nil +} + +func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + fqCodel := qdisc.(*FqCodel) + for _, datum := range data { + + switch datum.Attr.Type { + case nl.TCA_FQ_CODEL_TARGET: + fqCodel.Target = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_LIMIT: + fqCodel.Limit = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_INTERVAL: + fqCodel.Interval = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_ECN: + fqCodel.ECN = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_FLOWS: + fqCodel.Flows = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_QUANTUM: + fqCodel.Quantum = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_CE_THRESHOLD: + fqCodel.CEThreshold = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_DROP_BATCH_SIZE: + fqCodel.DropBatchSize = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_MEMORY_LIMIT: + fqCodel.MemoryLimit = native.Uint32(datum.Value) + } + } + return nil +} + +func parseHfscData(qdisc Qdisc, data []byte) error { + Hfsc := qdisc.(*Hfsc) + Hfsc.Defcls = native.Uint16(data) + return nil +} + +func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + fq := qdisc.(*Fq) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FQ_BUCKETS_LOG: + fq.Buckets = native.Uint32(datum.Value) + case nl.TCA_FQ_LOW_RATE_THRESHOLD: + fq.LowRateThreshold = native.Uint32(datum.Value) + case nl.TCA_FQ_QUANTUM: + fq.Quantum = native.Uint32(datum.Value) + case nl.TCA_FQ_RATE_ENABLE: + fq.Pacing = native.Uint32(datum.Value) + case nl.TCA_FQ_INITIAL_QUANTUM: + fq.InitialQuantum = native.Uint32(datum.Value) + case nl.TCA_FQ_ORPHAN_MASK: + // TODO + case nl.TCA_FQ_FLOW_REFILL_DELAY: + fq.FlowRefillDelay = native.Uint32(datum.Value) + case nl.TCA_FQ_FLOW_PLIMIT: + fq.FlowPacketLimit = native.Uint32(datum.Value) + case nl.TCA_FQ_PLIMIT: + fq.PacketLimit = native.Uint32(datum.Value) + case nl.TCA_FQ_FLOW_MAX_RATE: + fq.FlowMaxRate = native.Uint32(datum.Value) + case nl.TCA_FQ_FLOW_DEFAULT_RATE: + fq.FlowDefaultRate = native.Uint32(datum.Value) + } + } + return nil +} + +func parseNetemData(qdisc Qdisc, value []byte) error { + netem := qdisc.(*Netem) + opt := nl.DeserializeTcNetemQopt(value) + netem.Latency = opt.Latency + netem.Limit = opt.Limit + netem.Loss = opt.Loss + netem.Gap = opt.Gap + netem.Duplicate = opt.Duplicate + netem.Jitter = opt.Jitter + data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:]) + if err != nil { + return err + } + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_NETEM_CORR: + opt := nl.DeserializeTcNetemCorr(datum.Value) + netem.DelayCorr = opt.DelayCorr + netem.LossCorr = opt.LossCorr + netem.DuplicateCorr = opt.DupCorr + case nl.TCA_NETEM_CORRUPT: + opt := nl.DeserializeTcNetemCorrupt(datum.Value) + netem.CorruptProb = opt.Probability + netem.CorruptCorr = opt.Correlation + case nl.TCA_NETEM_REORDER: + opt := nl.DeserializeTcNetemReorder(datum.Value) + netem.ReorderProb = opt.Probability + netem.ReorderCorr = opt.Correlation + } + } + return nil +} + +func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + tbf := qdisc.(*Tbf) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_TBF_PARMS: + opt := nl.DeserializeTcTbfQopt(datum.Value) + tbf.Rate = uint64(opt.Rate.Rate) + tbf.Peakrate = uint64(opt.Peakrate.Rate) + tbf.Limit = opt.Limit + tbf.Buffer = opt.Buffer + case nl.TCA_TBF_RATE64: + tbf.Rate = native.Uint64(datum.Value[0:8]) + case nl.TCA_TBF_PRATE64: + tbf.Peakrate = native.Uint64(datum.Value[0:8]) + case nl.TCA_TBF_PBURST: + tbf.Minburst = native.Uint32(datum.Value[0:4]) + } + } + return nil +} + +func parseSfqData(qdisc Qdisc, value []byte) error { + sfq := qdisc.(*Sfq) + opt := nl.DeserializeTcSfqQoptV1(value) + sfq.Quantum = opt.TcSfqQopt.Quantum + sfq.Perturb = uint8(opt.TcSfqQopt.Perturb) + sfq.Limit = opt.TcSfqQopt.Limit + sfq.Divisor = opt.TcSfqQopt.Divisor + + return nil +} + +const ( + TIME_UNITS_PER_SEC = 1000000 +) + +var ( + tickInUsec float64 + clockFactor float64 + hz float64 +) + +func initClock() { + data, err := ioutil.ReadFile("/proc/net/psched") + if err != nil { + return + } + parts := strings.Split(strings.TrimSpace(string(data)), " ") + if len(parts) < 4 { + return + } + var vals [4]uint64 + for i := range vals { + val, err := strconv.ParseUint(parts[i], 16, 32) + if err != nil { + return + } + vals[i] = val + } + // compatibility + if vals[2] == 1000000000 { + vals[0] = vals[1] + } + clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC + tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor + if vals[2] == 1000000 { + // ref https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/lib/utils.c#n963 + hz = float64(vals[3]) + } else { + hz = 100 + } +} + +func TickInUsec() float64 { + if tickInUsec == 0.0 { + initClock() + } + return tickInUsec +} + +func ClockFactor() float64 { + if clockFactor == 0.0 { + initClock() + } + return clockFactor +} + +func Hz() float64 { + if hz == 0.0 { + initClock() + } + return hz +} + +func time2Tick(time uint32) uint32 { + return uint32(float64(time) * TickInUsec()) +} + +func tick2Time(tick uint32) uint32 { + return uint32(float64(tick) / TickInUsec()) +} + +func time2Ktime(time uint32) uint32 { + return uint32(float64(time) * ClockFactor()) +} + +func ktime2Time(ktime uint32) uint32 { + return uint32(float64(ktime) / ClockFactor()) +} + +func burst(rate uint64, buffer uint32) uint32 { + return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC) +} + +func latency(rate uint64, limit, buffer uint32) float64 { + return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) +} + +func Xmittime(rate uint64, size uint32) uint32 { + // https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62 + return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate)))) +} + +func Xmitsize(rate uint64, ticks uint32) uint32 { + return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC) +} diff --git a/vendor/github.com/vishvananda/netlink/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go new file mode 100644 index 000000000000..036399db6b0c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go @@ -0,0 +1,331 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "net" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// LinkAttrs represents data shared by most link types +type RdmaLinkAttrs struct { + Index uint32 + Name string + FirmwareVersion string + NodeGuid string + SysImageGuid string +} + +// Link represents a rdma device from netlink. +type RdmaLink struct { + Attrs RdmaLinkAttrs +} + +func getProtoField(clientType int, op int) int { + return ((clientType << nl.RDMA_NL_GET_CLIENT_SHIFT) | op) +} + +func uint64ToGuidString(guid uint64) string { + //Convert to byte array + sysGuidBytes := new(bytes.Buffer) + binary.Write(sysGuidBytes, binary.LittleEndian, guid) + + //Convert to HardwareAddr + sysGuidNet := net.HardwareAddr(sysGuidBytes.Bytes()) + + //Get the String + return sysGuidNet.String() +} + +func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) { + + link := RdmaLink{} + + reader := bytes.NewReader(data) + for reader.Len() >= 4 { + _, attrType, len, value := parseNfAttrTLV(reader) + + switch attrType { + case nl.RDMA_NLDEV_ATTR_DEV_INDEX: + var Index uint32 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &Index) + link.Attrs.Index = Index + case nl.RDMA_NLDEV_ATTR_DEV_NAME: + link.Attrs.Name = string(value[0 : len-1]) + case nl.RDMA_NLDEV_ATTR_FW_VERSION: + link.Attrs.FirmwareVersion = string(value[0 : len-1]) + case nl.RDMA_NLDEV_ATTR_NODE_GUID: + var guid uint64 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &guid) + link.Attrs.NodeGuid = uint64ToGuidString(guid) + case nl.RDMA_NLDEV_ATTR_SYS_IMAGE_GUID: + var sysGuid uint64 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &sysGuid) + link.Attrs.SysImageGuid = uint64ToGuidString(sysGuid) + } + if (len % 4) != 0 { + // Skip pad bytes + reader.Seek(int64(4-(len%4)), seekCurrent) + } + } + return &link, nil +} + +func execRdmaSetLink(req *nl.NetlinkRequest) error { + + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func RdmaLinkList() ([]*RdmaLink, error) { + return pkgHandle.RdmaLinkList() +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP) + + msgs, err := req.Execute(unix.NETLINK_RDMA, 0) + if err != nil { + return nil, err + } + + var res []*RdmaLink + for _, m := range msgs { + link, err := executeOneGetRdmaLink(m) + if err != nil { + return nil, err + } + res = append(res, link) + } + + return res, nil +} + +// RdmaLinkByName finds a link by name and returns a pointer to the object if +// found and nil error, otherwise returns error code. +func RdmaLinkByName(name string) (*RdmaLink, error) { + return pkgHandle.RdmaLinkByName(name) +} + +// RdmaLinkByName finds a link by name and returns a pointer to the object if +// found and nil error, otherwise returns error code. +func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) { + links, err := h.RdmaLinkList() + if err != nil { + return nil, err + } + for _, link := range links { + if link.Attrs.Name == name { + return link, nil + } + } + return nil, fmt.Errorf("Rdma device %v not found", name) +} + +// RdmaLinkSetName sets the name of the rdma link device. Return nil on success +// or error otherwise. +// Equivalent to: `rdma dev set $old_devname name $name` +func RdmaLinkSetName(link *RdmaLink, name string) error { + return pkgHandle.RdmaLinkSetName(link, name) +} + +// RdmaLinkSetName sets the name of the rdma link device. Return nil on success +// or error otherwise. +// Equivalent to: `rdma dev set $old_devname name $name` +func (h *Handle) RdmaLinkSetName(link *RdmaLink, name string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs.Index)) + data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b) + req.AddData(data) + + b = make([]byte, len(name)+1) + copy(b, name) + data = nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, b) + req.AddData(data) + + return execRdmaSetLink(req) +} + +func netnsModeToString(mode uint8) string { + switch mode { + case 0: + return "exclusive" + case 1: + return "shared" + default: + return "unknown" + } +} + +func executeOneGetRdmaNetnsMode(data []byte) (string, error) { + reader := bytes.NewReader(data) + for reader.Len() >= 4 { + _, attrType, len, value := parseNfAttrTLV(reader) + + switch attrType { + case nl.RDMA_NLDEV_SYS_ATTR_NETNS_MODE: + var mode uint8 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &mode) + return netnsModeToString(mode), nil + } + if (len % 4) != 0 { + // Skip pad bytes + reader.Seek(int64(4-(len%4)), seekCurrent) + } + } + return "", fmt.Errorf("Invalid netns mode") +} + +// RdmaSystemGetNetnsMode gets the net namespace mode for RDMA subsystem +// Returns mode string and error status as nil on success or returns error +// otherwise. +// Equivalent to: `rdma system show netns' +func RdmaSystemGetNetnsMode() (string, error) { + return pkgHandle.RdmaSystemGetNetnsMode() +} + +// RdmaSystemGetNetnsMode gets the net namespace mode for RDMA subsystem +// Returns mode string and error status as nil on success or returns error +// otherwise. +// Equivalent to: `rdma system show netns' +func (h *Handle) RdmaSystemGetNetnsMode() (string, error) { + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SYS_GET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + msgs, err := req.Execute(unix.NETLINK_RDMA, 0) + if err != nil { + return "", err + } + if len(msgs) == 0 { + return "", fmt.Errorf("No valid response from kernel") + } + return executeOneGetRdmaNetnsMode(msgs[0]) +} + +func netnsModeStringToUint8(mode string) (uint8, error) { + switch mode { + case "exclusive": + return 0, nil + case "shared": + return 1, nil + default: + return 0, fmt.Errorf("Invalid mode; %q", mode) + } +} + +// RdmaSystemSetNetnsMode sets the net namespace mode for RDMA subsystem +// Returns nil on success or appropriate error code. +// Equivalent to: `rdma system set netns { shared | exclusive }' +func RdmaSystemSetNetnsMode(NewMode string) error { + return pkgHandle.RdmaSystemSetNetnsMode(NewMode) +} + +// RdmaSystemSetNetnsMode sets the net namespace mode for RDMA subsystem +// Returns nil on success or appropriate error code. +// Equivalent to: `rdma system set netns { shared | exclusive }' +func (h *Handle) RdmaSystemSetNetnsMode(NewMode string) error { + value, err := netnsModeStringToUint8(NewMode) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SYS_SET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + data := nl.NewRtAttr(nl.RDMA_NLDEV_SYS_ATTR_NETNS_MODE, []byte{value}) + req.AddData(data) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkSetNsFd puts the RDMA device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `rdma dev set $dev netns $ns` +func RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { + return pkgHandle.RdmaLinkSetNsFd(link, fd) +} + +// RdmaLinkSetNsFd puts the RDMA device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `rdma dev set $dev netns $ns` +func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, + nl.Uint32Attr(link.Attrs.Index)) + req.AddData(data) + + data = nl.NewRtAttr(nl.RDMA_NLDEV_NET_NS_FD, nl.Uint32Attr(fd)) + req.AddData(data) + + return execRdmaSetLink(req) +} + +// RdmaLinkDel deletes an rdma link +// +// Similar to: rdma link delete NAME +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkDel(name string) error { + return pkgHandle.RdmaLinkDel(name) +} + +// RdmaLinkDel deletes an rdma link. +func (h *Handle) RdmaLinkDel(name string) error { + link, err := h.RdmaLinkByName(name) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, link.Attrs.Index) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +// Similar to: rdma link add NAME type TYPE netdev NETDEV +// NAME - specifies the new name of the rdma link to add +// TYPE - specifies which rdma type to use. Link types: +// rxe - Soft RoCE driver +// siw - Soft iWARP driver +// NETDEV - specifies the network device to which the link is bound +// +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkAdd(linkName, linkType, netdev string) error { + return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev) +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev))) + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go new file mode 100644 index 000000000000..79cc218ec812 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -0,0 +1,227 @@ +package netlink + +import ( + "fmt" + "net" + "strings" +) + +// Scope is an enum representing a route scope. +type Scope uint8 + +type NextHopFlag int + +const ( + RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) + RT_FILTER_SCOPE + RT_FILTER_TYPE + RT_FILTER_TOS + RT_FILTER_IIF + RT_FILTER_OIF + RT_FILTER_DST + RT_FILTER_SRC + RT_FILTER_GW + RT_FILTER_TABLE + RT_FILTER_HOPLIMIT + RT_FILTER_PRIORITY + RT_FILTER_MARK + RT_FILTER_MASK + RT_FILTER_REALM +) + +type Destination interface { + Family() int + Decode([]byte) error + Encode() ([]byte, error) + String() string + Equal(Destination) bool +} + +type Encap interface { + Type() int + Decode([]byte) error + Encode() ([]byte, error) + String() string + Equal(Encap) bool +} + +//Protocol describe what was the originator of the route +type RouteProtocol int + +// Route represents a netlink route. +type Route struct { + LinkIndex int + ILinkIndex int + Scope Scope + Dst *net.IPNet + Src net.IP + Gw net.IP + MultiPath []*NexthopInfo + Protocol RouteProtocol + Priority int + Family int + Table int + Type int + Tos int + Flags int + MPLSDst *int + NewDst Destination + Encap Encap + Via Destination + Realm int + MTU int + Window int + Rtt int + RttVar int + Ssthresh int + Cwnd int + AdvMSS int + Reordering int + Hoplimit int + InitCwnd int + Features int + RtoMin int + InitRwnd int + QuickACK int + Congctl string + FastOpenNoCookie int +} + +func (r Route) String() string { + elems := []string{} + if len(r.MultiPath) == 0 { + elems = append(elems, fmt.Sprintf("Ifindex: %d", r.LinkIndex)) + } + if r.MPLSDst != nil { + elems = append(elems, fmt.Sprintf("Dst: %d", r.MPLSDst)) + } else { + elems = append(elems, fmt.Sprintf("Dst: %s", r.Dst)) + } + if r.NewDst != nil { + elems = append(elems, fmt.Sprintf("NewDst: %s", r.NewDst)) + } + if r.Encap != nil { + elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap)) + } + if r.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", r.Via)) + } + elems = append(elems, fmt.Sprintf("Src: %s", r.Src)) + if len(r.MultiPath) > 0 { + elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath)) + } else { + elems = append(elems, fmt.Sprintf("Gw: %s", r.Gw)) + } + elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags())) + elems = append(elems, fmt.Sprintf("Table: %d", r.Table)) + elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm)) + return fmt.Sprintf("{%s}", strings.Join(elems, " ")) +} + +func (r Route) Equal(x Route) bool { + return r.LinkIndex == x.LinkIndex && + r.ILinkIndex == x.ILinkIndex && + r.Scope == x.Scope && + ipNetEqual(r.Dst, x.Dst) && + r.Src.Equal(x.Src) && + r.Gw.Equal(x.Gw) && + nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) && + r.Protocol == x.Protocol && + r.Priority == x.Priority && + r.Realm == x.Realm && + r.Table == x.Table && + r.Type == x.Type && + r.Tos == x.Tos && + r.Hoplimit == x.Hoplimit && + r.Flags == x.Flags && + (r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) && + (r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) && + (r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) && + (r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap))) +} + +func (r *Route) SetFlag(flag NextHopFlag) { + r.Flags |= int(flag) +} + +func (r *Route) ClearFlag(flag NextHopFlag) { + r.Flags &^= int(flag) +} + +type flagString struct { + f NextHopFlag + s string +} + +// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE +type RouteUpdate struct { + Type uint16 + Route +} + +type NexthopInfo struct { + LinkIndex int + Hops int + Gw net.IP + Flags int + NewDst Destination + Encap Encap + Via Destination +} + +func (n *NexthopInfo) String() string { + elems := []string{} + elems = append(elems, fmt.Sprintf("Ifindex: %d", n.LinkIndex)) + if n.NewDst != nil { + elems = append(elems, fmt.Sprintf("NewDst: %s", n.NewDst)) + } + if n.Encap != nil { + elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap)) + } + if n.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", n.Via)) + } + elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1)) + elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw)) + elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags())) + return fmt.Sprintf("{%s}", strings.Join(elems, " ")) +} + +func (n NexthopInfo) Equal(x NexthopInfo) bool { + return n.LinkIndex == x.LinkIndex && + n.Hops == x.Hops && + n.Gw.Equal(x.Gw) && + n.Flags == x.Flags && + (n.NewDst == x.NewDst || (n.NewDst != nil && n.NewDst.Equal(x.NewDst))) && + (n.Encap == x.Encap || (n.Encap != nil && n.Encap.Equal(x.Encap))) +} + +type nexthopInfoSlice []*NexthopInfo + +func (n nexthopInfoSlice) Equal(x []*NexthopInfo) bool { + if len(n) != len(x) { + return false + } + for i := range n { + if n[i] == nil || x[i] == nil { + return false + } + if !n[i].Equal(*x[i]) { + return false + } + } + return true +} + +// ipNetEqual returns true iff both IPNet are equal +func ipNetEqual(ipn1 *net.IPNet, ipn2 *net.IPNet) bool { + if ipn1 == ipn2 { + return true + } + if ipn1 == nil || ipn2 == nil { + return false + } + m1, _ := ipn1.Mask.Size() + m2, _ := ipn2.Mask.Size() + return m1 == m2 && ipn1.IP.Equal(ipn2.IP) +} diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go new file mode 100644 index 000000000000..8da8866573c8 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -0,0 +1,1555 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "net" + "strconv" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +// RtAttr is shared so it is in netlink_linux.go + +const ( + SCOPE_UNIVERSE Scope = unix.RT_SCOPE_UNIVERSE + SCOPE_SITE Scope = unix.RT_SCOPE_SITE + SCOPE_LINK Scope = unix.RT_SCOPE_LINK + SCOPE_HOST Scope = unix.RT_SCOPE_HOST + SCOPE_NOWHERE Scope = unix.RT_SCOPE_NOWHERE +) + +func (s Scope) String() string { + switch s { + case SCOPE_UNIVERSE: + return "universe" + case SCOPE_SITE: + return "site" + case SCOPE_LINK: + return "link" + case SCOPE_HOST: + return "host" + case SCOPE_NOWHERE: + return "nowhere" + default: + return "unknown" + } +} + + +const ( + FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK + FLAG_PERVASIVE NextHopFlag = unix.RTNH_F_PERVASIVE +) + +var testFlags = []flagString{ + {f: FLAG_ONLINK, s: "onlink"}, + {f: FLAG_PERVASIVE, s: "pervasive"}, +} + +func listFlags(flag int) []string { + var flags []string + for _, tf := range testFlags { + if flag&int(tf.f) != 0 { + flags = append(flags, tf.s) + } + } + return flags +} + +func (r *Route) ListFlags() []string { + return listFlags(r.Flags) +} + +func (n *NexthopInfo) ListFlags() []string { + return listFlags(n.Flags) +} + +type MPLSDestination struct { + Labels []int +} + +func (d *MPLSDestination) Family() int { + return nl.FAMILY_MPLS +} + +func (d *MPLSDestination) Decode(buf []byte) error { + d.Labels = nl.DecodeMPLSStack(buf) + return nil +} + +func (d *MPLSDestination) Encode() ([]byte, error) { + return nl.EncodeMPLSStack(d.Labels...), nil +} + +func (d *MPLSDestination) String() string { + s := make([]string, 0, len(d.Labels)) + for _, l := range d.Labels { + s = append(s, fmt.Sprintf("%d", l)) + } + return strings.Join(s, "/") +} + +func (d *MPLSDestination) Equal(x Destination) bool { + o, ok := x.(*MPLSDestination) + if !ok { + return false + } + if d == nil && o == nil { + return true + } + if d == nil || o == nil { + return false + } + if d.Labels == nil && o.Labels == nil { + return true + } + if d.Labels == nil || o.Labels == nil { + return false + } + if len(d.Labels) != len(o.Labels) { + return false + } + for i := range d.Labels { + if d.Labels[i] != o.Labels[i] { + return false + } + } + return true +} + +type MPLSEncap struct { + Labels []int +} + +func (e *MPLSEncap) Type() int { + return nl.LWTUNNEL_ENCAP_MPLS +} + +func (e *MPLSEncap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lack of bytes") + } + l := native.Uint16(buf) + if len(buf) < int(l) { + return fmt.Errorf("lack of bytes") + } + buf = buf[:l] + typ := native.Uint16(buf[2:]) + if typ != nl.MPLS_IPTUNNEL_DST { + return fmt.Errorf("unknown MPLS Encap Type: %d", typ) + } + e.Labels = nl.DecodeMPLSStack(buf[4:]) + return nil +} + +func (e *MPLSEncap) Encode() ([]byte, error) { + s := nl.EncodeMPLSStack(e.Labels...) + hdr := make([]byte, 4) + native.PutUint16(hdr, uint16(len(s)+4)) + native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST) + return append(hdr, s...), nil +} + +func (e *MPLSEncap) String() string { + s := make([]string, 0, len(e.Labels)) + for _, l := range e.Labels { + s = append(s, fmt.Sprintf("%d", l)) + } + return strings.Join(s, "/") +} + +func (e *MPLSEncap) Equal(x Encap) bool { + o, ok := x.(*MPLSEncap) + if !ok { + return false + } + if e == nil && o == nil { + return true + } + if e == nil || o == nil { + return false + } + if e.Labels == nil && o.Labels == nil { + return true + } + if e.Labels == nil || o.Labels == nil { + return false + } + if len(e.Labels) != len(o.Labels) { + return false + } + for i := range e.Labels { + if e.Labels[i] != o.Labels[i] { + return false + } + } + return true +} + +// SEG6 definitions +type SEG6Encap struct { + Mode int + Segments []net.IP +} + +func (e *SEG6Encap) Type() int { + return nl.LWTUNNEL_ENCAP_SEG6 +} +func (e *SEG6Encap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lack of bytes") + } + // Get Length(l) & Type(typ) : 2 + 2 bytes + l := native.Uint16(buf) + if len(buf) < int(l) { + return fmt.Errorf("lack of bytes") + } + buf = buf[:l] // make sure buf size upper limit is Length + typ := native.Uint16(buf[2:]) + // LWTUNNEL_ENCAP_SEG6 has only one attr type SEG6_IPTUNNEL_SRH + if typ != nl.SEG6_IPTUNNEL_SRH { + return fmt.Errorf("unknown SEG6 Type: %d", typ) + } + + var err error + e.Mode, e.Segments, err = nl.DecodeSEG6Encap(buf[4:]) + + return err +} +func (e *SEG6Encap) Encode() ([]byte, error) { + s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments) + hdr := make([]byte, 4) + native.PutUint16(hdr, uint16(len(s)+4)) + native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH) + return append(hdr, s...), err +} +func (e *SEG6Encap) String() string { + segs := make([]string, 0, len(e.Segments)) + // append segment backwards (from n to 0) since seg#0 is the last segment. + for i := len(e.Segments); i > 0; i-- { + segs = append(segs, e.Segments[i-1].String()) + } + str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode), + len(e.Segments), strings.Join(segs, " ")) + return str +} +func (e *SEG6Encap) Equal(x Encap) bool { + o, ok := x.(*SEG6Encap) + if !ok { + return false + } + if e == o { + return true + } + if e == nil || o == nil { + return false + } + if e.Mode != o.Mode { + return false + } + if len(e.Segments) != len(o.Segments) { + return false + } + for i := range e.Segments { + if !e.Segments[i].Equal(o.Segments[i]) { + return false + } + } + return true +} + +// SEG6LocalEncap definitions +type SEG6LocalEncap struct { + Flags [nl.SEG6_LOCAL_MAX]bool + Action int + Segments []net.IP // from SRH in seg6_local_lwt + Table int // table id for End.T and End.DT6 + InAddr net.IP + In6Addr net.IP + Iif int + Oif int +} + +func (e *SEG6LocalEncap) Type() int { + return nl.LWTUNNEL_ENCAP_SEG6_LOCAL +} +func (e *SEG6LocalEncap) Decode(buf []byte) error { + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.SEG6_LOCAL_ACTION: + e.Action = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_ACTION] = true + case nl.SEG6_LOCAL_SRH: + e.Segments, err = nl.DecodeSEG6Srh(attr.Value[:]) + e.Flags[nl.SEG6_LOCAL_SRH] = true + case nl.SEG6_LOCAL_TABLE: + e.Table = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_TABLE] = true + case nl.SEG6_LOCAL_NH4: + e.InAddr = net.IP(attr.Value[0:4]) + e.Flags[nl.SEG6_LOCAL_NH4] = true + case nl.SEG6_LOCAL_NH6: + e.In6Addr = net.IP(attr.Value[0:16]) + e.Flags[nl.SEG6_LOCAL_NH6] = true + case nl.SEG6_LOCAL_IIF: + e.Iif = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_IIF] = true + case nl.SEG6_LOCAL_OIF: + e.Oif = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_OIF] = true + } + } + return err +} +func (e *SEG6LocalEncap) Encode() ([]byte, error) { + var err error + res := make([]byte, 8) + native.PutUint16(res, 8) // length + native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION) + native.PutUint32(res[4:], uint32(e.Action)) + if e.Flags[nl.SEG6_LOCAL_SRH] { + srh, err := nl.EncodeSEG6Srh(e.Segments) + if err != nil { + return nil, err + } + attr := make([]byte, 4) + native.PutUint16(attr, uint16(len(srh)+4)) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_SRH) + attr = append(attr, srh...) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_TABLE] { + attr := make([]byte, 8) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_TABLE) + native.PutUint32(attr[4:], uint32(e.Table)) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_NH4] { + attr := make([]byte, 4) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_NH4) + ipv4 := e.InAddr.To4() + if ipv4 == nil { + err = fmt.Errorf("SEG6_LOCAL_NH4 has invalid IPv4 address") + return nil, err + } + attr = append(attr, ipv4...) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_NH6] { + attr := make([]byte, 4) + native.PutUint16(attr, 20) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_NH6) + attr = append(attr, e.In6Addr...) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_IIF] { + attr := make([]byte, 8) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_IIF) + native.PutUint32(attr[4:], uint32(e.Iif)) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_OIF] { + attr := make([]byte, 8) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_OIF) + native.PutUint32(attr[4:], uint32(e.Oif)) + res = append(res, attr...) + } + return res, err +} +func (e *SEG6LocalEncap) String() string { + strs := make([]string, 0, nl.SEG6_LOCAL_MAX) + strs = append(strs, fmt.Sprintf("action %s", nl.SEG6LocalActionString(e.Action))) + + if e.Flags[nl.SEG6_LOCAL_TABLE] { + strs = append(strs, fmt.Sprintf("table %d", e.Table)) + } + if e.Flags[nl.SEG6_LOCAL_NH4] { + strs = append(strs, fmt.Sprintf("nh4 %s", e.InAddr)) + } + if e.Flags[nl.SEG6_LOCAL_NH6] { + strs = append(strs, fmt.Sprintf("nh6 %s", e.In6Addr)) + } + if e.Flags[nl.SEG6_LOCAL_IIF] { + link, err := LinkByIndex(e.Iif) + if err != nil { + strs = append(strs, fmt.Sprintf("iif %d", e.Iif)) + } else { + strs = append(strs, fmt.Sprintf("iif %s", link.Attrs().Name)) + } + } + if e.Flags[nl.SEG6_LOCAL_OIF] { + link, err := LinkByIndex(e.Oif) + if err != nil { + strs = append(strs, fmt.Sprintf("oif %d", e.Oif)) + } else { + strs = append(strs, fmt.Sprintf("oif %s", link.Attrs().Name)) + } + } + if e.Flags[nl.SEG6_LOCAL_SRH] { + segs := make([]string, 0, len(e.Segments)) + //append segment backwards (from n to 0) since seg#0 is the last segment. + for i := len(e.Segments); i > 0; i-- { + segs = append(segs, e.Segments[i-1].String()) + } + strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " "))) + } + return strings.Join(strs, " ") +} +func (e *SEG6LocalEncap) Equal(x Encap) bool { + o, ok := x.(*SEG6LocalEncap) + if !ok { + return false + } + if e == o { + return true + } + if e == nil || o == nil { + return false + } + // compare all arrays first + for i := range e.Flags { + if e.Flags[i] != o.Flags[i] { + return false + } + } + if len(e.Segments) != len(o.Segments) { + return false + } + for i := range e.Segments { + if !e.Segments[i].Equal(o.Segments[i]) { + return false + } + } + // compare values + if !e.InAddr.Equal(o.InAddr) || !e.In6Addr.Equal(o.In6Addr) { + return false + } + if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif { + return false + } + return true +} + +// Encap BPF definitions +type bpfObj struct { + progFd int + progName string +} +type BpfEncap struct { + progs [nl.LWT_BPF_MAX]bpfObj + headroom int +} + +// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf +// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should +// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT). +func (e *BpfEncap) SetProg(mode, progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("lwt bpf SetProg: invalid fd") + } + if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM { + return fmt.Errorf("lwt bpf SetProg:invalid mode") + } + e.progs[mode].progFd = progFd + e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd) + return nil +} + +// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP. +// maximum headroom is LWT_BPF_MAX_HEADROOM +func (e *BpfEncap) SetXmitHeadroom(headroom int) error { + if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 { + return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM) + } + e.headroom = headroom + return nil +} + +func (e *BpfEncap) Type() int { + return nl.LWTUNNEL_ENCAP_BPF +} +func (e *BpfEncap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lwt bpf decode: lack of bytes") + } + native := nl.NativeEndian() + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err) + } + for _, attr := range attrs { + if int(attr.Attr.Type) < 1 { + // nl.LWT_BPF_UNSPEC + continue + } + if int(attr.Attr.Type) > nl.LWT_BPF_MAX { + return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type) + } + switch int(attr.Attr.Type) { + case nl.LWT_BPF_MAX_HEADROOM: + e.headroom = int(native.Uint32(attr.Value)) + default: + bpfO := bpfObj{} + parsedAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing route attribute") + } + for _, parsedAttr := range parsedAttrs { + switch int(parsedAttr.Attr.Type) { + case nl.LWT_BPF_PROG_FD: + bpfO.progFd = int(native.Uint32(parsedAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfO.progName = string(parsedAttr.Value) + default: + return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len) + } + } + e.progs[attr.Attr.Type] = bpfO + } + } + return nil +} + +func (e *BpfEncap) Encode() ([]byte, error) { + buf := make([]byte, 0) + native = nl.NativeEndian() + for index, attr := range e.progs { + nlMsg := nl.NewRtAttr(index, []byte{}) + if attr.progFd != 0 { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd))) + } + if attr.progName != "" { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName)) + } + if nlMsg.Len() > 4 { + buf = append(buf, nlMsg.Serialize()...) + } + } + if len(buf) <= 4 { + return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer") + } + if e.headroom > 0 { + hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom))) + buf = append(buf, hRoom.Serialize()...) + } + return buf, nil +} + +func (e *BpfEncap) String() string { + progs := make([]string, 0) + for index, obj := range e.progs { + empty := bpfObj{} + switch index { + case nl.LWT_BPF_IN: + if obj != empty { + progs = append(progs, fmt.Sprintf("in: %s", obj.progName)) + } + case nl.LWT_BPF_OUT: + if obj != empty { + progs = append(progs, fmt.Sprintf("out: %s", obj.progName)) + } + case nl.LWT_BPF_XMIT: + if obj != empty { + progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName)) + } + } + } + if e.headroom > 0 { + progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom)) + } + return strings.Join(progs, " ") +} + +func (e *BpfEncap) Equal(x Encap) bool { + o, ok := x.(*BpfEncap) + if !ok { + return false + } + if e.headroom != o.headroom { + return false + } + for i := range o.progs { + if o.progs[i] != e.progs[i] { + return false + } + } + return true +} + +type Via struct { + AddrFamily int + Addr net.IP +} + +func (v *Via) Equal(x Destination) bool { + o, ok := x.(*Via) + if !ok { + return false + } + if v.AddrFamily == x.Family() && v.Addr.Equal(o.Addr) { + return true + } + return false +} + +func (v *Via) String() string { + return fmt.Sprintf("Family: %d, Address: %s", v.AddrFamily, v.Addr.String()) +} + +func (v *Via) Family() int { + return v.AddrFamily +} + +func (v *Via) Encode() ([]byte, error) { + buf := &bytes.Buffer{} + err := binary.Write(buf, native, uint16(v.AddrFamily)) + if err != nil { + return nil, err + } + err = binary.Write(buf, native, v.Addr) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func (v *Via) Decode(b []byte) error { + if len(b) < 6 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.AddrFamily = int(native.Uint16(b[0:2])) + if v.AddrFamily == nl.FAMILY_V4 { + v.Addr = net.IP(b[2:6]) + return nil + } else if v.AddrFamily == nl.FAMILY_V6 { + if len(b) < 18 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.Addr = net.IP(b[2:]) + return nil + } + return fmt.Errorf("decoding failed: address family %d unknown", v.AddrFamily) +} + +// RouteAdd will add a route to the system. +// Equivalent to: `ip route add $route` +func RouteAdd(route *Route) error { + return pkgHandle.RouteAdd(route) +} + +// RouteAdd will add a route to the system. +// Equivalent to: `ip route add $route` +func (h *Handle) RouteAdd(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + return h.routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func RouteAppend(route *Route) error { + return pkgHandle.RouteAppend(route) +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func (h *Handle) RouteAppend(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + return h.routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteAddEcmp will add a route to the system. +func RouteAddEcmp(route *Route) error { + return pkgHandle.RouteAddEcmp(route) +} + +// RouteAddEcmp will add a route to the system. +func (h *Handle) RouteAddEcmp(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + return h.routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteReplace will add a route to the system. +// Equivalent to: `ip route replace $route` +func RouteReplace(route *Route) error { + return pkgHandle.RouteReplace(route) +} + +// RouteReplace will add a route to the system. +// Equivalent to: `ip route replace $route` +func (h *Handle) RouteReplace(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + return h.routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteDel will delete a route from the system. +// Equivalent to: `ip route del $route` +func RouteDel(route *Route) error { + return pkgHandle.RouteDel(route) +} + +// RouteDel will delete a route from the system. +// Equivalent to: `ip route del $route` +func (h *Handle) RouteDel(route *Route) error { + req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK) + return h.routeHandle(route, req, nl.NewRtDelMsg()) +} + +func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { + if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { + return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") + } + + family := -1 + var rtAttrs []*nl.RtAttr + + if route.Dst != nil && route.Dst.IP != nil { + dstLen, _ := route.Dst.Mask.Size() + msg.Dst_len = uint8(dstLen) + dstFamily := nl.GetIPFamily(route.Dst.IP) + family = dstFamily + var dstData []byte + if dstFamily == FAMILY_V4 { + dstData = route.Dst.IP.To4() + } else { + dstData = route.Dst.IP.To16() + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData)) + } else if route.MPLSDst != nil { + family = nl.FAMILY_MPLS + msg.Dst_len = uint8(20) + msg.Type = unix.RTN_UNICAST + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst))) + } + + if route.NewDst != nil { + if family != -1 && family != route.NewDst.Family() { + return fmt.Errorf("new destination and destination are not the same address family") + } + buf, err := route.NewDst.Encode() + if err != nil { + return err + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_NEWDST, buf)) + } + + if route.Encap != nil { + buf := make([]byte, 2) + native.PutUint16(buf, uint16(route.Encap.Type())) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf)) + buf, err := route.Encap.Encode() + if err != nil { + return err + } + switch route.Encap.Type() { + case nl.LWTUNNEL_ENCAP_BPF: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf)) + default: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + + } + + if route.Src != nil { + srcFamily := nl.GetIPFamily(route.Src) + if family != -1 && family != srcFamily { + return fmt.Errorf("source and destination ip are not the same IP family") + } + family = srcFamily + var srcData []byte + if srcFamily == FAMILY_V4 { + srcData = route.Src.To4() + } else { + srcData = route.Src.To16() + } + // The commonly used src ip for routes is actually PREFSRC + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PREFSRC, srcData)) + } + + if route.Gw != nil { + gwFamily := nl.GetIPFamily(route.Gw) + if family != -1 && family != gwFamily { + return fmt.Errorf("gateway, source, and destination ip are not the same IP family") + } + family = gwFamily + var gwData []byte + if gwFamily == FAMILY_V4 { + gwData = route.Gw.To4() + } else { + gwData = route.Gw.To16() + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData)) + } + + if route.Via != nil { + buf, err := route.Via.Encode() + if err != nil { + return fmt.Errorf("failed to encode RTA_VIA: %v", err) + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf)) + } + + if len(route.MultiPath) > 0 { + buf := []byte{} + for _, nh := range route.MultiPath { + rtnh := &nl.RtNexthop{ + RtNexthop: unix.RtNexthop{ + Hops: uint8(nh.Hops), + Ifindex: int32(nh.LinkIndex), + Flags: uint8(nh.Flags), + }, + } + children := []nl.NetlinkRequestData{} + if nh.Gw != nil { + gwFamily := nl.GetIPFamily(nh.Gw) + if family != -1 && family != gwFamily { + return fmt.Errorf("gateway, source, and destination ip are not the same IP family") + } + if gwFamily == FAMILY_V4 { + children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To4()))) + } else { + children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To16()))) + } + } + if nh.NewDst != nil { + if family != -1 && family != nh.NewDst.Family() { + return fmt.Errorf("new destination and destination are not the same address family") + } + buf, err := nh.NewDst.Encode() + if err != nil { + return err + } + children = append(children, nl.NewRtAttr(unix.RTA_NEWDST, buf)) + } + if nh.Encap != nil { + buf := make([]byte, 2) + native.PutUint16(buf, uint16(nh.Encap.Type())) + children = append(children, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf)) + buf, err := nh.Encap.Encode() + if err != nil { + return err + } + children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + if nh.Via != nil { + buf, err := nh.Via.Encode() + if err != nil { + return err + } + children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf)) + } + rtnh.Children = children + buf = append(buf, rtnh.Serialize()...) + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_MULTIPATH, buf)) + } + + if route.Table > 0 { + if route.Table >= 256 { + msg.Table = unix.RT_TABLE_UNSPEC + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Table)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_TABLE, b)) + } else { + msg.Table = uint8(route.Table) + } + } + + if route.Priority > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Priority)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b)) + } + if route.Realm > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Realm)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b)) + } + if route.Tos > 0 { + msg.Tos = uint8(route.Tos) + } + if route.Protocol > 0 { + msg.Protocol = uint8(route.Protocol) + } + if route.Type > 0 { + msg.Type = uint8(route.Type) + } + + var metrics []*nl.RtAttr + if route.MTU > 0 { + b := nl.Uint32Attr(uint32(route.MTU)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b)) + } + if route.Window > 0 { + b := nl.Uint32Attr(uint32(route.Window)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_WINDOW, b)) + } + if route.Rtt > 0 { + b := nl.Uint32Attr(uint32(route.Rtt)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTT, b)) + } + if route.RttVar > 0 { + b := nl.Uint32Attr(uint32(route.RttVar)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTTVAR, b)) + } + if route.Ssthresh > 0 { + b := nl.Uint32Attr(uint32(route.Ssthresh)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_SSTHRESH, b)) + } + if route.Cwnd > 0 { + b := nl.Uint32Attr(uint32(route.Cwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CWND, b)) + } + if route.AdvMSS > 0 { + b := nl.Uint32Attr(uint32(route.AdvMSS)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b)) + } + if route.Reordering > 0 { + b := nl.Uint32Attr(uint32(route.Reordering)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_REORDERING, b)) + } + if route.Hoplimit > 0 { + b := nl.Uint32Attr(uint32(route.Hoplimit)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_HOPLIMIT, b)) + } + if route.InitCwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitCwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITCWND, b)) + } + if route.Features > 0 { + b := nl.Uint32Attr(uint32(route.Features)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FEATURES, b)) + } + if route.RtoMin > 0 { + b := nl.Uint32Attr(uint32(route.RtoMin)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTO_MIN, b)) + } + if route.InitRwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitRwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITRWND, b)) + } + if route.QuickACK > 0 { + b := nl.Uint32Attr(uint32(route.QuickACK)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_QUICKACK, b)) + } + if route.Congctl != "" { + b := nl.ZeroTerminated(route.Congctl) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CC_ALGO, b)) + } + if route.FastOpenNoCookie > 0 { + b := nl.Uint32Attr(uint32(route.FastOpenNoCookie)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FASTOPEN_NO_COOKIE, b)) + } + + if metrics != nil { + attr := nl.NewRtAttr(unix.RTA_METRICS, nil) + for _, metric := range metrics { + attr.AddChild(metric) + } + rtAttrs = append(rtAttrs, attr) + } + + msg.Flags = uint32(route.Flags) + msg.Scope = uint8(route.Scope) + msg.Family = uint8(family) + req.AddData(msg) + for _, attr := range rtAttrs { + req.AddData(attr) + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(route.LinkIndex)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// RouteList gets a list of routes in the system. +// Equivalent to: `ip route show`. +// The list can be filtered by link and ip family. +func RouteList(link Link, family int) ([]Route, error) { + return pkgHandle.RouteList(link, family) +} + +// RouteList gets a list of routes in the system. +// Equivalent to: `ip route show`. +// The list can be filtered by link and ip family. +func (h *Handle) RouteList(link Link, family int) ([]Route, error) { + var routeFilter *Route + if link != nil { + routeFilter = &Route{ + LinkIndex: link.Attrs().Index, + } + } + return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) +} + +// RouteListFiltered gets a list of routes in the system filtered with specified rules. +// All rules must be defined in RouteFilter struct +func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + return pkgHandle.RouteListFiltered(family, filter, filterMask) +} + +// RouteListFiltered gets a list of routes in the system filtered with specified rules. +// All rules must be defined in RouteFilter struct +func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) + rtmsg := nl.NewRtMsg() + rtmsg.Family = uint8(family) + req.AddData(rtmsg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) + if err != nil { + return nil, err + } + + var res []Route + for _, m := range msgs { + msg := nl.DeserializeRtMsg(m) + if msg.Flags&unix.RTM_F_CLONED != 0 { + // Ignore cloned routes + continue + } + if msg.Table != unix.RT_TABLE_MAIN { + if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 { + // Ignore non-main tables + continue + } + } + route, err := deserializeRoute(m) + if err != nil { + return nil, err + } + if filter != nil { + switch { + case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table: + continue + case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol: + continue + case filterMask&RT_FILTER_SCOPE != 0 && route.Scope != filter.Scope: + continue + case filterMask&RT_FILTER_TYPE != 0 && route.Type != filter.Type: + continue + case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos: + continue + case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm: + continue + case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex: + continue + case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex: + continue + case filterMask&RT_FILTER_GW != 0 && !route.Gw.Equal(filter.Gw): + continue + case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src): + continue + case filterMask&RT_FILTER_DST != 0: + if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) { + if !ipNetEqual(route.Dst, filter.Dst) { + continue + } + } + case filterMask&RT_FILTER_HOPLIMIT != 0 && route.Hoplimit != filter.Hoplimit: + continue + } + } + res = append(res, route) + } + return res, nil +} + +// deserializeRoute decodes a binary netlink message into a Route struct +func deserializeRoute(m []byte) (Route, error) { + msg := nl.DeserializeRtMsg(m) + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return Route{}, err + } + route := Route{ + Scope: Scope(msg.Scope), + Protocol: RouteProtocol(int(msg.Protocol)), + Table: int(msg.Table), + Type: int(msg.Type), + Tos: int(msg.Tos), + Flags: int(msg.Flags), + Family: int(msg.Family), + } + + var encap, encapType syscall.NetlinkRouteAttr + for _, attr := range attrs { + switch attr.Attr.Type { + case unix.RTA_GATEWAY: + route.Gw = net.IP(attr.Value) + case unix.RTA_PREFSRC: + route.Src = net.IP(attr.Value) + case unix.RTA_DST: + if msg.Family == nl.FAMILY_MPLS { + stack := nl.DecodeMPLSStack(attr.Value) + if len(stack) == 0 || len(stack) > 1 { + return route, fmt.Errorf("invalid MPLS RTA_DST") + } + route.MPLSDst = &stack[0] + } else { + route.Dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), + } + } + case unix.RTA_OIF: + route.LinkIndex = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_IIF: + route.ILinkIndex = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_PRIORITY: + route.Priority = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_FLOW: + route.Realm = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_TABLE: + route.Table = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_MULTIPATH: + parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) { + if len(value) < unix.SizeofRtNexthop { + return nil, nil, fmt.Errorf("lack of bytes") + } + nh := nl.DeserializeRtNexthop(value) + if len(value) < int(nh.RtNexthop.Len) { + return nil, nil, fmt.Errorf("lack of bytes") + } + info := &NexthopInfo{ + LinkIndex: int(nh.RtNexthop.Ifindex), + Hops: int(nh.RtNexthop.Hops), + Flags: int(nh.RtNexthop.Flags), + } + attrs, err := nl.ParseRouteAttr(value[unix.SizeofRtNexthop:int(nh.RtNexthop.Len)]) + if err != nil { + return nil, nil, err + } + var encap, encapType syscall.NetlinkRouteAttr + for _, attr := range attrs { + switch attr.Attr.Type { + case unix.RTA_GATEWAY: + info.Gw = net.IP(attr.Value) + case unix.RTA_NEWDST: + var d Destination + switch msg.Family { + case nl.FAMILY_MPLS: + d = &MPLSDestination{} + } + if err := d.Decode(attr.Value); err != nil { + return nil, nil, err + } + info.NewDst = d + case unix.RTA_ENCAP_TYPE: + encapType = attr + case unix.RTA_ENCAP: + encap = attr + case unix.RTA_VIA: + d := &Via{} + if err := d.Decode(attr.Value); err != nil { + return nil, nil, err + } + info.Via = d + } + } + + if len(encap.Value) != 0 && len(encapType.Value) != 0 { + typ := int(native.Uint16(encapType.Value[0:2])) + var e Encap + switch typ { + case nl.LWTUNNEL_ENCAP_MPLS: + e = &MPLSEncap{} + if err := e.Decode(encap.Value); err != nil { + return nil, nil, err + } + } + info.Encap = e + } + + return info, value[int(nh.RtNexthop.Len):], nil + } + rest := attr.Value + for len(rest) > 0 { + info, buf, err := parseRtNexthop(rest) + if err != nil { + return route, err + } + route.MultiPath = append(route.MultiPath, info) + rest = buf + } + case unix.RTA_NEWDST: + var d Destination + switch msg.Family { + case nl.FAMILY_MPLS: + d = &MPLSDestination{} + } + if err := d.Decode(attr.Value); err != nil { + return route, err + } + route.NewDst = d + case unix.RTA_VIA: + v := &Via{} + if err := v.Decode(attr.Value); err != nil { + return route, err + } + route.Via = v + case unix.RTA_ENCAP_TYPE: + encapType = attr + case unix.RTA_ENCAP: + encap = attr + case unix.RTA_METRICS: + metrics, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return route, err + } + for _, metric := range metrics { + switch metric.Attr.Type { + case unix.RTAX_MTU: + route.MTU = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_WINDOW: + route.Window = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTT: + route.Rtt = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTTVAR: + route.RttVar = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_SSTHRESH: + route.Ssthresh = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CWND: + route.Cwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_ADVMSS: + route.AdvMSS = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_REORDERING: + route.Reordering = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_HOPLIMIT: + route.Hoplimit = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITCWND: + route.InitCwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_FEATURES: + route.Features = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTO_MIN: + route.RtoMin = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITRWND: + route.InitRwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_QUICKACK: + route.QuickACK = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CC_ALGO: + route.Congctl = nl.BytesToString(metric.Value) + case unix.RTAX_FASTOPEN_NO_COOKIE: + route.FastOpenNoCookie = int(native.Uint32(metric.Value[0:4])) + } + } + } + } + + if len(encap.Value) != 0 && len(encapType.Value) != 0 { + typ := int(native.Uint16(encapType.Value[0:2])) + var e Encap + switch typ { + case nl.LWTUNNEL_ENCAP_MPLS: + e = &MPLSEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } + case nl.LWTUNNEL_ENCAP_SEG6: + e = &SEG6Encap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } + case nl.LWTUNNEL_ENCAP_SEG6_LOCAL: + e = &SEG6LocalEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } + case nl.LWTUNNEL_ENCAP_BPF: + e = &BpfEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } + } + route.Encap = e + } + + return route, nil +} + +// RouteGetOptions contains a set of options to use with +// RouteGetWithOptions +type RouteGetOptions struct { + Iif string + Oif string + VrfName string + SrcAddr net.IP +} + +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { + return pkgHandle.RouteGetWithOptions(destination, options) +} + +// RouteGet gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get'. +func RouteGet(destination net.IP) ([]Route, error) { + return pkgHandle.RouteGet(destination) +} + +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST) + family := nl.GetIPFamily(destination) + var destinationData []byte + var bitlen uint8 + if family == FAMILY_V4 { + destinationData = destination.To4() + bitlen = 32 + } else { + destinationData = destination.To16() + bitlen = 128 + } + msg := &nl.RtMsg{} + msg.Family = uint8(family) + msg.Dst_len = bitlen + if options != nil && options.SrcAddr != nil { + msg.Src_len = bitlen + } + msg.Flags = unix.RTM_F_LOOKUP_TABLE + req.AddData(msg) + + rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData) + req.AddData(rtaDst) + + if options != nil { + if options.VrfName != "" { + link, err := LinkByName(options.VrfName) + if err != nil { + return nil, err + } + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + if len(options.Iif) > 0 { + link, err := LinkByName(options.Iif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_IIF, b)) + } + + if len(options.Oif) > 0 { + link, err := LinkByName(options.Oif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + if options.SrcAddr != nil { + var srcAddr []byte + if family == FAMILY_V4 { + srcAddr = options.SrcAddr.To4() + } else { + srcAddr = options.SrcAddr.To16() + } + + req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr)) + } + } + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) + if err != nil { + return nil, err + } + + var res []Route + for _, m := range msgs { + route, err := deserializeRoute(m) + if err != nil { + return nil, err + } + res = append(res, route) + } + return res, nil +} + +// RouteGet gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get'. +func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { + return h.RouteGetWithOptions(destination, nil) +} + +// RouteSubscribe takes a chan down which notifications will be sent +// when routes are added or deleted. Close the 'done' chan to stop subscription. +func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { + return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) +} + +// RouteSubscribeAt works like RouteSubscribe plus it allows the caller +// to choose the network namespace in which to subscribe (ns). +func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { + return routeSubscribeAt(ns, netns.None(), ch, done, nil, false) +} + +// RouteSubscribeOptions contains a set of options to use with +// RouteSubscribeWithOptions. +type RouteSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool +} + +// RouteSubscribeWithOptions work like RouteSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, options RouteSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) +} + +func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE) + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + if listExisting { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETROUTE, + unix.NLM_F_DUMP) + infmsg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(infmsg) + if err := s.Send(req); err != nil { + return err + } + } + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + if cberr != nil { + cberr(fmt.Errorf("Receive failed: %v", + err)) + } + return + } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } + for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) + } + continue + } + route, err := deserializeRoute(m.Data) + if err != nil { + if cberr != nil { + cberr(err) + } + continue + } + ch <- RouteUpdate{Type: m.Header.Type, Route: route} + } + } + }() + + return nil +} + +func (p RouteProtocol) String() string { + switch int(p) { + case unix.RTPROT_BABEL: + return "babel" + case unix.RTPROT_BGP: + return "bgp" + case unix.RTPROT_BIRD: + return "bird" + case unix.RTPROT_BOOT: + return "boot" + case unix.RTPROT_DHCP: + return "dhcp" + case unix.RTPROT_DNROUTED: + return "dnrouted" + case unix.RTPROT_EIGRP: + return "eigrp" + case unix.RTPROT_GATED: + return "gated" + case unix.RTPROT_ISIS: + return "isis" + //case unix.RTPROT_KEEPALIVED: + // return "keepalived" + case unix.RTPROT_KERNEL: + return "kernel" + case unix.RTPROT_MROUTED: + return "mrouted" + case unix.RTPROT_MRT: + return "mrt" + case unix.RTPROT_NTK: + return "ntk" + case unix.RTPROT_OSPF: + return "ospf" + case unix.RTPROT_RA: + return "ra" + case unix.RTPROT_REDIRECT: + return "redirect" + case unix.RTPROT_RIP: + return "rip" + case unix.RTPROT_STATIC: + return "static" + case unix.RTPROT_UNSPEC: + return "unspec" + case unix.RTPROT_XORP: + return "xorp" + case unix.RTPROT_ZEBRA: + return "zebra" + default: + return strconv.Itoa(int(p)) + } +} diff --git a/vendor/github.com/vishvananda/netlink/route_unspecified.go b/vendor/github.com/vishvananda/netlink/route_unspecified.go new file mode 100644 index 000000000000..db7372689589 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/route_unspecified.go @@ -0,0 +1,21 @@ +// +build !linux + +package netlink + +import "strconv" + +func (r *Route) ListFlags() []string { + return []string{} +} + +func (n *NexthopInfo) ListFlags() []string { + return []string{} +} + +func (s Scope) String() string { + return "unknown" +} + +func (p RouteProtocol) String() string { + return strconv.Itoa(int(p)) +} diff --git a/vendor/github.com/vishvananda/netlink/rule.go b/vendor/github.com/vishvananda/netlink/rule.go new file mode 100644 index 000000000000..53cd3d4f6ac2 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/rule.go @@ -0,0 +1,68 @@ +package netlink + +import ( + "fmt" + "net" +) + +// Rule represents a netlink rule. +type Rule struct { + Priority int + Family int + Table int + Mark int + Mask int + Tos uint + TunID uint + Goto int + Src *net.IPNet + Dst *net.IPNet + Flow int + IifName string + OifName string + SuppressIfgroup int + SuppressPrefixlen int + Invert bool + Dport *RulePortRange + Sport *RulePortRange + IPProto int +} + +func (r Rule) String() string { + from := "all" + if r.Src != nil && r.Src.String() != "" { + from = r.Src.String() + } + + to := "all" + if r.Dst != nil && r.Dst.String() != "" { + to = r.Dst.String() + } + + return fmt.Sprintf("ip rule %d: from %s to %s table %d", + r.Priority, from, to, r.Table) +} + +// NewRule return empty rules. +func NewRule() *Rule { + return &Rule{ + SuppressIfgroup: -1, + SuppressPrefixlen: -1, + Priority: -1, + Mark: -1, + Mask: -1, + Goto: -1, + Flow: -1, + } +} + +// NewRulePortRange creates rule sport/dport range. +func NewRulePortRange(start, end uint16) *RulePortRange { + return &RulePortRange{Start: start, End: end} +} + +// RulePortRange represents rule sport/dport range. +type RulePortRange struct { + Start uint16 + End uint16 +} diff --git a/vendor/github.com/vishvananda/netlink/rule_linux.go b/vendor/github.com/vishvananda/netlink/rule_linux.go new file mode 100644 index 000000000000..3ae2138808ee --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -0,0 +1,301 @@ +package netlink + +import ( + "bytes" + "fmt" + "net" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const FibRuleInvert = 0x2 + +// RuleAdd adds a rule to the system. +// Equivalent to: ip rule add +func RuleAdd(rule *Rule) error { + return pkgHandle.RuleAdd(rule) +} + +// RuleAdd adds a rule to the system. +// Equivalent to: ip rule add +func (h *Handle) RuleAdd(rule *Rule) error { + req := h.newNetlinkRequest(unix.RTM_NEWRULE, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) + return ruleHandle(rule, req) +} + +// RuleDel deletes a rule from the system. +// Equivalent to: ip rule del +func RuleDel(rule *Rule) error { + return pkgHandle.RuleDel(rule) +} + +// RuleDel deletes a rule from the system. +// Equivalent to: ip rule del +func (h *Handle) RuleDel(rule *Rule) error { + req := h.newNetlinkRequest(unix.RTM_DELRULE, unix.NLM_F_ACK) + return ruleHandle(rule, req) +} + +func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { + msg := nl.NewRtMsg() + msg.Family = unix.AF_INET + msg.Protocol = unix.RTPROT_BOOT + msg.Scope = unix.RT_SCOPE_UNIVERSE + msg.Table = unix.RT_TABLE_UNSPEC + msg.Type = unix.RTN_UNSPEC + if req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 { + msg.Type = unix.RTN_UNICAST + } + if rule.Invert { + msg.Flags |= FibRuleInvert + } + if rule.Family != 0 { + msg.Family = uint8(rule.Family) + } + if rule.Table >= 0 && rule.Table < 256 { + msg.Table = uint8(rule.Table) + } + if rule.Tos != 0 { + msg.Tos = uint8(rule.Tos) + } + + var dstFamily uint8 + var rtAttrs []*nl.RtAttr + if rule.Dst != nil && rule.Dst.IP != nil { + dstLen, _ := rule.Dst.Mask.Size() + msg.Dst_len = uint8(dstLen) + msg.Family = uint8(nl.GetIPFamily(rule.Dst.IP)) + dstFamily = msg.Family + var dstData []byte + if msg.Family == unix.AF_INET { + dstData = rule.Dst.IP.To4() + } else { + dstData = rule.Dst.IP.To16() + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData)) + } + + if rule.Src != nil && rule.Src.IP != nil { + msg.Family = uint8(nl.GetIPFamily(rule.Src.IP)) + if dstFamily != 0 && dstFamily != msg.Family { + return fmt.Errorf("source and destination ip are not the same IP family") + } + srcLen, _ := rule.Src.Mask.Size() + msg.Src_len = uint8(srcLen) + var srcData []byte + if msg.Family == unix.AF_INET { + srcData = rule.Src.IP.To4() + } else { + srcData = rule.Src.IP.To16() + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_SRC, srcData)) + } + + req.AddData(msg) + for i := range rtAttrs { + req.AddData(rtAttrs[i]) + } + + if rule.Priority >= 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.Priority)) + req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b)) + } + if rule.Mark >= 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.Mark)) + req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b)) + } + if rule.Mask >= 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.Mask)) + req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b)) + } + if rule.Flow >= 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.Flow)) + req.AddData(nl.NewRtAttr(nl.FRA_FLOW, b)) + } + if rule.TunID > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.TunID)) + req.AddData(nl.NewRtAttr(nl.FRA_TUN_ID, b)) + } + if rule.Table >= 256 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.Table)) + req.AddData(nl.NewRtAttr(nl.FRA_TABLE, b)) + } + if msg.Table > 0 { + if rule.SuppressPrefixlen >= 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.SuppressPrefixlen)) + req.AddData(nl.NewRtAttr(nl.FRA_SUPPRESS_PREFIXLEN, b)) + } + if rule.SuppressIfgroup >= 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.SuppressIfgroup)) + req.AddData(nl.NewRtAttr(nl.FRA_SUPPRESS_IFGROUP, b)) + } + } + if rule.IifName != "" { + req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName+"\x00"))) + } + if rule.OifName != "" { + req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName+"\x00"))) + } + if rule.Goto >= 0 { + msg.Type = nl.FR_ACT_GOTO + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.Goto)) + req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b)) + } + + if rule.IPProto > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.IPProto)) + req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b)) + } + + if rule.Dport != nil { + b := rule.Dport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b)) + } + + if rule.Sport != nil { + b := rule.Sport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b)) + } + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// RuleList lists rules in the system. +// Equivalent to: ip rule list +func RuleList(family int) ([]Rule, error) { + return pkgHandle.RuleList(family) +} + +// RuleList lists rules in the system. +// Equivalent to: ip rule list +func (h *Handle) RuleList(family int) ([]Rule, error) { + return h.RuleListFiltered(family, nil, 0) +} + +// RuleListFiltered gets a list of rules in the system filtered by the +// specified rule template `filter`. +// Equivalent to: ip rule list +func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { + return pkgHandle.RuleListFiltered(family, filter, filterMask) +} + +// RuleListFiltered lists rules in the system. +// Equivalent to: ip rule list +func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { + req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST) + msg := nl.NewIfInfomsg(family) + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWRULE) + if err != nil { + return nil, err + } + + var res = make([]Rule, 0) + for i := range msgs { + msg := nl.DeserializeRtMsg(msgs[i]) + attrs, err := nl.ParseRouteAttr(msgs[i][msg.Len():]) + if err != nil { + return nil, err + } + + rule := NewRule() + + rule.Invert = msg.Flags&FibRuleInvert > 0 + rule.Tos = uint(msg.Tos) + + for j := range attrs { + switch attrs[j].Attr.Type { + case unix.RTA_TABLE: + rule.Table = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_SRC: + rule.Src = &net.IPNet{ + IP: attrs[j].Value, + Mask: net.CIDRMask(int(msg.Src_len), 8*len(attrs[j].Value)), + } + case nl.FRA_DST: + rule.Dst = &net.IPNet{ + IP: attrs[j].Value, + Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)), + } + case nl.FRA_FWMARK: + rule.Mark = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_FWMASK: + rule.Mask = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_TUN_ID: + rule.TunID = uint(native.Uint64(attrs[j].Value[0:8])) + case nl.FRA_IIFNAME: + rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1]) + case nl.FRA_OIFNAME: + rule.OifName = string(attrs[j].Value[:len(attrs[j].Value)-1]) + case nl.FRA_SUPPRESS_PREFIXLEN: + i := native.Uint32(attrs[j].Value[0:4]) + if i != 0xffffffff { + rule.SuppressPrefixlen = int(i) + } + case nl.FRA_SUPPRESS_IFGROUP: + i := native.Uint32(attrs[j].Value[0:4]) + if i != 0xffffffff { + rule.SuppressIfgroup = int(i) + } + case nl.FRA_FLOW: + rule.Flow = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_GOTO: + rule.Goto = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_PRIORITY: + rule.Priority = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_IP_PROTO: + rule.IPProto = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_DPORT_RANGE: + rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_SPORT_RANGE: + rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + } + } + + if filter != nil { + switch { + case filterMask&RT_FILTER_SRC != 0 && + (rule.Src == nil || rule.Src.String() != filter.Src.String()): + continue + case filterMask&RT_FILTER_DST != 0 && + (rule.Dst == nil || rule.Dst.String() != filter.Dst.String()): + continue + case filterMask&RT_FILTER_TABLE != 0 && + filter.Table != unix.RT_TABLE_UNSPEC && rule.Table != filter.Table: + continue + case filterMask&RT_FILTER_TOS != 0 && rule.Tos != filter.Tos: + continue + case filterMask&RT_FILTER_PRIORITY != 0 && rule.Priority != filter.Priority: + continue + case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark: + continue + case filterMask&RT_FILTER_MASK != 0 && rule.Mask != filter.Mask: + continue + } + } + + res = append(res, *rule) + } + + return res, nil +} + +func (pr *RulePortRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 2), make([]byte, 2)} + native.PutUint16(b[0], pr.Start) + native.PutUint16(b[1], pr.End) + return bytes.Join(b, []byte{}) +} diff --git a/vendor/github.com/vishvananda/netlink/socket.go b/vendor/github.com/vishvananda/netlink/socket.go new file mode 100644 index 000000000000..41aa726245bc --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/socket.go @@ -0,0 +1,27 @@ +package netlink + +import "net" + +// SocketID identifies a single socket. +type SocketID struct { + SourcePort uint16 + DestinationPort uint16 + Source net.IP + Destination net.IP + Interface uint32 + Cookie [2]uint32 +} + +// Socket represents a netlink socket. +type Socket struct { + Family uint8 + State uint8 + Timer uint8 + Retrans uint8 + ID SocketID + Expires uint32 + RQueue uint32 + WQueue uint32 + UID uint32 + INode uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/socket_linux.go b/vendor/github.com/vishvananda/netlink/socket_linux.go new file mode 100644 index 000000000000..b881fe496dd4 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/socket_linux.go @@ -0,0 +1,291 @@ +package netlink + +import ( + "errors" + "fmt" + "net" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const ( + sizeofSocketID = 0x30 + sizeofSocketRequest = sizeofSocketID + 0x8 + sizeofSocket = sizeofSocketID + 0x18 +) + +type socketRequest struct { + Family uint8 + Protocol uint8 + Ext uint8 + pad uint8 + States uint32 + ID SocketID +} + +type writeBuffer struct { + Bytes []byte + pos int +} + +func (b *writeBuffer) Write(c byte) { + b.Bytes[b.pos] = c + b.pos++ +} + +func (b *writeBuffer) Next(n int) []byte { + s := b.Bytes[b.pos : b.pos+n] + b.pos += n + return s +} + +func (r *socketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + b.Write(r.Ext) + b.Write(r.pad) + native.PutUint32(b.Next(4), r.States) + networkOrder.PutUint16(b.Next(2), r.ID.SourcePort) + networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort) + if r.Family == unix.AF_INET6 { + copy(b.Next(16), r.ID.Source) + copy(b.Next(16), r.ID.Destination) + } else { + copy(b.Next(4), r.ID.Source.To4()) + b.Next(12) + copy(b.Next(4), r.ID.Destination.To4()) + b.Next(12) + } + native.PutUint32(b.Next(4), r.ID.Interface) + native.PutUint32(b.Next(4), r.ID.Cookie[0]) + native.PutUint32(b.Next(4), r.ID.Cookie[1]) + return b.Bytes +} + +func (r *socketRequest) Len() int { return sizeofSocketRequest } + +type readBuffer struct { + Bytes []byte + pos int +} + +func (b *readBuffer) Read() byte { + c := b.Bytes[b.pos] + b.pos++ + return c +} + +func (b *readBuffer) Next(n int) []byte { + s := b.Bytes[b.pos : b.pos+n] + b.pos += n + return s +} + +func (s *Socket) deserialize(b []byte) error { + if len(b) < sizeofSocket { + return fmt.Errorf("socket data short read (%d); want %d", len(b), sizeofSocket) + } + rb := readBuffer{Bytes: b} + s.Family = rb.Read() + s.State = rb.Read() + s.Timer = rb.Read() + s.Retrans = rb.Read() + s.ID.SourcePort = networkOrder.Uint16(rb.Next(2)) + s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2)) + if s.Family == unix.AF_INET6 { + s.ID.Source = net.IP(rb.Next(16)) + s.ID.Destination = net.IP(rb.Next(16)) + } else { + s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + } + s.ID.Interface = native.Uint32(rb.Next(4)) + s.ID.Cookie[0] = native.Uint32(rb.Next(4)) + s.ID.Cookie[1] = native.Uint32(rb.Next(4)) + s.Expires = native.Uint32(rb.Next(4)) + s.RQueue = native.Uint32(rb.Next(4)) + s.WQueue = native.Uint32(rb.Next(4)) + s.UID = native.Uint32(rb.Next(4)) + s.INode = native.Uint32(rb.Next(4)) + return nil +} + +// SocketGet returns the Socket identified by its local and remote addresses. +func SocketGet(local, remote net.Addr) (*Socket, error) { + localTCP, ok := local.(*net.TCPAddr) + if !ok { + return nil, ErrNotImplemented + } + remoteTCP, ok := remote.(*net.TCPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP := localTCP.IP.To4() + if localIP == nil { + return nil, ErrNotImplemented + } + remoteIP := remoteTCP.IP.To4() + if remoteIP == nil { + return nil, ErrNotImplemented + } + + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return nil, err + } + defer s.Close() + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0) + req.AddData(&socketRequest{ + Family: unix.AF_INET, + Protocol: unix.IPPROTO_TCP, + ID: SocketID{ + SourcePort: uint16(localTCP.Port), + DestinationPort: uint16(remoteTCP.Port), + Source: localIP, + Destination: remoteIP, + Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, + }, + }) + s.Send(req) + msgs, from, err := s.Receive() + if err != nil { + return nil, err + } + if from.Pid != nl.PidKernel { + return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return nil, errors.New("no message nor error from netlink") + } + if len(msgs) > 2 { + return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) + } + sock := &Socket{} + if err := sock.deserialize(msgs[0].Data); err != nil { + return nil, err + } + return sock, nil +} + +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + var result []*InetDiagTCPInfoResp + err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + sockInfo := &Socket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:]) + if err != nil { + return err + } + + res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func SocketDiagTCP(family uint8) ([]*Socket, error) { + var result []*Socket + err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + sockInfo := &Socket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + result = append(result, sockInfo) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// socketDiagTCPExecutor requests INET_DIAG_INFO for TCP protocol for specified family type. +func socketDiagTCPExecutor(family uint8, receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } + defer s.Close() + + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // All TCP states + }) + s.Send(req) + +loop: + for { + msgs, from, err := s.Receive() + if err != nil { + return err + } + if from.Pid != nl.PidKernel { + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return errors.New("no message nor error from netlink") + } + + for _, m := range msgs { + switch m.Header.Type { + case unix.NLMSG_DONE: + break loop + case unix.NLMSG_ERROR: + error := int32(native.Uint32(m.Data[0:4])) + return syscall.Errno(-error) + } + if err := receiver(m); err != nil { + return err + } + } + } + return nil +} + +func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) { + var tcpInfo *TCPInfo + var tcpBBRInfo *TCPBBRInfo + for _, a := range attrs { + if a.Attr.Type == INET_DIAG_INFO { + tcpInfo = &TCPInfo{} + if err := tcpInfo.deserialize(a.Value); err != nil { + return nil, err + } + continue + } + + if a.Attr.Type == INET_DIAG_BBRINFO { + tcpBBRInfo = &TCPBBRInfo{} + if err := tcpBBRInfo.deserialize(a.Value); err != nil { + return nil, err + } + continue + } + } + + return &InetDiagTCPInfoResp{ + InetDiagMsg: sockInfo, + TCPInfo: tcpInfo, + TCPBBRInfo: tcpBBRInfo, + }, nil +} diff --git a/vendor/github.com/vishvananda/netlink/tcp.go b/vendor/github.com/vishvananda/netlink/tcp.go new file mode 100644 index 000000000000..23ca014d43b8 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp.go @@ -0,0 +1,84 @@ +package netlink + +// TCP States +const ( + TCP_ESTABLISHED = iota + 0x01 + TCP_SYN_SENT + TCP_SYN_RECV + TCP_FIN_WAIT1 + TCP_FIN_WAIT2 + TCP_TIME_WAIT + TCP_CLOSE + TCP_CLOSE_WAIT + TCP_LAST_ACK + TCP_LISTEN + TCP_CLOSING + TCP_NEW_SYN_REC + TCP_MAX_STATES +) + +type TCPInfo struct { + State uint8 + Ca_state uint8 + Retransmits uint8 + Probes uint8 + Backoff uint8 + Options uint8 + Snd_wscale uint8 // no uint4 + Rcv_wscale uint8 + Delivery_rate_app_limited uint8 + Fastopen_client_fail uint8 + Rto uint32 + Ato uint32 + Snd_mss uint32 + Rcv_mss uint32 + Unacked uint32 + Sacked uint32 + Lost uint32 + Retrans uint32 + Fackets uint32 + Last_data_sent uint32 + Last_ack_sent uint32 + Last_data_recv uint32 + Last_ack_recv uint32 + Pmtu uint32 + Rcv_ssthresh uint32 + Rtt uint32 + Rttvar uint32 + Snd_ssthresh uint32 + Snd_cwnd uint32 + Advmss uint32 + Reordering uint32 + Rcv_rtt uint32 + Rcv_space uint32 + Total_retrans uint32 + Pacing_rate uint64 + Max_pacing_rate uint64 + Bytes_acked uint64 /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + Bytes_received uint64 /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + Segs_out uint32 /* RFC4898 tcpEStatsPerfSegsOut */ + Segs_in uint32 /* RFC4898 tcpEStatsPerfSegsIn */ + Notsent_bytes uint32 + Min_rtt uint32 + Data_segs_in uint32 /* RFC4898 tcpEStatsDataSegsIn */ + Data_segs_out uint32 /* RFC4898 tcpEStatsDataSegsOut */ + Delivery_rate uint64 + Busy_time uint64 /* Time (usec) busy sending data */ + Rwnd_limited uint64 /* Time (usec) limited by receive window */ + Sndbuf_limited uint64 /* Time (usec) limited by send buffer */ + Delivered uint32 + Delivered_ce uint32 + Bytes_sent uint64 /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ + Bytes_retrans uint64 /* RFC4898 tcpEStatsPerfOctetsRetrans */ + Dsack_dups uint32 /* RFC4898 tcpEStatsStackDSACKDups */ + Reord_seen uint32 /* reordering events seen */ + Rcv_ooopack uint32 /* Out-of-order packets received */ + Snd_wnd uint32 /* peer's advertised receive window after * scaling (bytes) */ +} + +type TCPBBRInfo struct { + BBRBW uint64 + BBRMinRTT uint32 + BBRPacingGain uint32 + BBRCwndGain uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/tcp_linux.go b/vendor/github.com/vishvananda/netlink/tcp_linux.go new file mode 100644 index 000000000000..293858738d8d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp_linux.go @@ -0,0 +1,353 @@ +package netlink + +import ( + "bytes" + "errors" + "io" +) + +const ( + tcpBBRInfoLen = 20 +) + +func checkDeserErr(err error) error { + if err == io.EOF { + return nil + } + return err +} + +func (t *TCPInfo) deserialize(b []byte) error { + var err error + rb := bytes.NewBuffer(b) + + t.State, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Ca_state, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Retransmits, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Probes, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Backoff, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Options, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + scales, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Snd_wscale = scales >> 4 // first 4 bits + t.Rcv_wscale = scales & 0xf // last 4 bits + + rateLimAndFastOpen, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Delivery_rate_app_limited = rateLimAndFastOpen >> 7 // get first bit + t.Fastopen_client_fail = rateLimAndFastOpen >> 5 & 3 // get next two bits + + next := rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rto = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Ato = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Unacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Sacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Lost = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Retrans = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Fackets = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Pmtu = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rttvar = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_cwnd = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Advmss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reordering = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_space = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Total_retrans = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Max_pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_acked = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_received = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_out = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Notsent_bytes = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Min_rtt = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_out = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Delivery_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Busy_time = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Rwnd_limited = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Sndbuf_limited = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered_ce = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_sent = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_retrans = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Dsack_dups = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reord_seen = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ooopack = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_wnd = native.Uint32(next) + return nil +} + +func (t *TCPBBRInfo) deserialize(b []byte) error { + if len(b) != tcpBBRInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + t.BBRBW = native.Uint64(rb.Next(8)) + t.BBRMinRTT = native.Uint32(rb.Next(4)) + t.BBRPacingGain = native.Uint32(rb.Next(4)) + t.BBRCwndGain = native.Uint32(rb.Next(4)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm.go b/vendor/github.com/vishvananda/netlink/xfrm.go new file mode 100644 index 000000000000..02b41842e102 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm.go @@ -0,0 +1,75 @@ +package netlink + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// Proto is an enum representing an ipsec protocol. +type Proto uint8 + +const ( + XFRM_PROTO_ROUTE2 Proto = unix.IPPROTO_ROUTING + XFRM_PROTO_ESP Proto = unix.IPPROTO_ESP + XFRM_PROTO_AH Proto = unix.IPPROTO_AH + XFRM_PROTO_HAO Proto = unix.IPPROTO_DSTOPTS + XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin + XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW +) + +func (p Proto) String() string { + switch p { + case XFRM_PROTO_ROUTE2: + return "route2" + case XFRM_PROTO_ESP: + return "esp" + case XFRM_PROTO_AH: + return "ah" + case XFRM_PROTO_HAO: + return "hao" + case XFRM_PROTO_COMP: + return "comp" + case XFRM_PROTO_IPSEC_ANY: + return "ipsec-any" + } + return fmt.Sprintf("%d", p) +} + +// Mode is an enum representing an ipsec transport. +type Mode uint8 + +const ( + XFRM_MODE_TRANSPORT Mode = iota + XFRM_MODE_TUNNEL + XFRM_MODE_ROUTEOPTIMIZATION + XFRM_MODE_IN_TRIGGER + XFRM_MODE_BEET + XFRM_MODE_MAX +) + +func (m Mode) String() string { + switch m { + case XFRM_MODE_TRANSPORT: + return "transport" + case XFRM_MODE_TUNNEL: + return "tunnel" + case XFRM_MODE_ROUTEOPTIMIZATION: + return "ro" + case XFRM_MODE_IN_TRIGGER: + return "in_trigger" + case XFRM_MODE_BEET: + return "beet" + } + return fmt.Sprintf("%d", m) +} + +// XfrmMark represents the mark associated to the state or policy +type XfrmMark struct { + Value uint32 + Mask uint32 +} + +func (m *XfrmMark) String() string { + return fmt.Sprintf("(0x%x,0x%x)", m.Value, m.Mask) +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go new file mode 100644 index 000000000000..985d3a915f27 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go @@ -0,0 +1,101 @@ +package netlink + +import ( + "fmt" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +type XfrmMsg interface { + Type() nl.XfrmMsgType +} + +type XfrmMsgExpire struct { + XfrmState *XfrmState + Hard bool +} + +func (ue *XfrmMsgExpire) Type() nl.XfrmMsgType { + return nl.XFRM_MSG_EXPIRE +} + +func parseXfrmMsgExpire(b []byte) *XfrmMsgExpire { + var e XfrmMsgExpire + + msg := nl.DeserializeXfrmUserExpire(b) + e.XfrmState = xfrmStateFromXfrmUsersaInfo(&msg.XfrmUsersaInfo) + e.Hard = msg.Hard == 1 + + return &e +} + +func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error, + types ...nl.XfrmMsgType) error { + + groups, err := xfrmMcastGroups(types) + if err != nil { + return nil + } + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_XFRM, groups...) + if err != nil { + return err + } + + if done != nil { + go func() { + <-done + s.Close() + }() + + } + + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + errorChan <- err + return + } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } + for _, m := range msgs { + switch m.Header.Type { + case nl.XFRM_MSG_EXPIRE: + ch <- parseXfrmMsgExpire(m.Data) + default: + errorChan <- fmt.Errorf("unsupported msg type: %x", m.Header.Type) + } + } + } + }() + + return nil +} + +func xfrmMcastGroups(types []nl.XfrmMsgType) ([]uint, error) { + groups := make([]uint, 0) + + if len(types) == 0 { + return nil, fmt.Errorf("no xfrm msg type specified") + } + + for _, t := range types { + var group uint + + switch t { + case nl.XFRM_MSG_EXPIRE: + group = nl.XFRMNLGRP_EXPIRE + default: + return nil, fmt.Errorf("unsupported group: %x", t) + } + + groups = append(groups, group) + } + + return groups, nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy.go b/vendor/github.com/vishvananda/netlink/xfrm_policy.go new file mode 100644 index 000000000000..b7532b092cb7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy.go @@ -0,0 +1,97 @@ +package netlink + +import ( + "fmt" + "net" +) + +// Dir is an enum representing an ipsec template direction. +type Dir uint8 + +const ( + XFRM_DIR_IN Dir = iota + XFRM_DIR_OUT + XFRM_DIR_FWD + XFRM_SOCKET_IN + XFRM_SOCKET_OUT + XFRM_SOCKET_FWD +) + +func (d Dir) String() string { + switch d { + case XFRM_DIR_IN: + return "dir in" + case XFRM_DIR_OUT: + return "dir out" + case XFRM_DIR_FWD: + return "dir fwd" + case XFRM_SOCKET_IN: + return "socket in" + case XFRM_SOCKET_OUT: + return "socket out" + case XFRM_SOCKET_FWD: + return "socket fwd" + } + return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) +} + +// PolicyAction is an enum representing an ipsec policy action. +type PolicyAction uint8 + +const ( + XFRM_POLICY_ALLOW PolicyAction = 0 + XFRM_POLICY_BLOCK PolicyAction = 1 +) + +func (a PolicyAction) String() string { + switch a { + case XFRM_POLICY_ALLOW: + return "allow" + case XFRM_POLICY_BLOCK: + return "block" + default: + return fmt.Sprintf("action %d", a) + } +} + +// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec +// policy. These rules are matched with XfrmState to determine encryption +// and authentication algorithms. +type XfrmPolicyTmpl struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + Optional int +} + +func (t XfrmPolicyTmpl) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", + t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) +} + +// XfrmPolicy represents an ipsec policy. It represents the overlay network +// and has a list of XfrmPolicyTmpls representing the base addresses of +// the policy. +type XfrmPolicy struct { + Dst *net.IPNet + Src *net.IPNet + Proto Proto + DstPort int + SrcPort int + Dir Dir + Priority int + Index int + Action PolicyAction + Ifindex int + Ifid int + Mark *XfrmMark + Tmpls []XfrmPolicyTmpl +} + +func (p XfrmPolicy) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", + p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go new file mode 100644 index 000000000000..35849680413c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -0,0 +1,269 @@ +package netlink + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { + sel.Family = uint16(nl.FAMILY_V4) + if policy.Dst != nil { + sel.Family = uint16(nl.GetIPFamily(policy.Dst.IP)) + sel.Daddr.FromIP(policy.Dst.IP) + prefixlenD, _ := policy.Dst.Mask.Size() + sel.PrefixlenD = uint8(prefixlenD) + } + if policy.Src != nil { + sel.Saddr.FromIP(policy.Src.IP) + prefixlenS, _ := policy.Src.Mask.Size() + sel.PrefixlenS = uint8(prefixlenS) + } + sel.Proto = uint8(policy.Proto) + sel.Dport = nl.Swap16(uint16(policy.DstPort)) + sel.Sport = nl.Swap16(uint16(policy.SrcPort)) + if sel.Dport != 0 { + sel.DportMask = ^uint16(0) + } + if sel.Sport != 0 { + sel.SportMask = ^uint16(0) + } + sel.Ifindex = int32(policy.Ifindex) +} + +// XfrmPolicyAdd will add an xfrm policy to the system. +// Equivalent to: `ip xfrm policy add $policy` +func XfrmPolicyAdd(policy *XfrmPolicy) error { + return pkgHandle.XfrmPolicyAdd(policy) +} + +// XfrmPolicyAdd will add an xfrm policy to the system. +// Equivalent to: `ip xfrm policy add $policy` +func (h *Handle) XfrmPolicyAdd(policy *XfrmPolicy) error { + return h.xfrmPolicyAddOrUpdate(policy, nl.XFRM_MSG_NEWPOLICY) +} + +// XfrmPolicyUpdate will update an xfrm policy to the system. +// Equivalent to: `ip xfrm policy update $policy` +func XfrmPolicyUpdate(policy *XfrmPolicy) error { + return pkgHandle.XfrmPolicyUpdate(policy) +} + +// XfrmPolicyUpdate will update an xfrm policy to the system. +// Equivalent to: `ip xfrm policy update $policy` +func (h *Handle) XfrmPolicyUpdate(policy *XfrmPolicy) error { + return h.xfrmPolicyAddOrUpdate(policy, nl.XFRM_MSG_UPDPOLICY) +} + +func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { + req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) + + msg := &nl.XfrmUserpolicyInfo{} + selFromPolicy(&msg.Sel, policy) + msg.Priority = uint32(policy.Priority) + msg.Index = uint32(policy.Index) + msg.Dir = uint8(policy.Dir) + msg.Action = uint8(policy.Action) + msg.Lft.SoftByteLimit = nl.XFRM_INF + msg.Lft.HardByteLimit = nl.XFRM_INF + msg.Lft.SoftPacketLimit = nl.XFRM_INF + msg.Lft.HardPacketLimit = nl.XFRM_INF + req.AddData(msg) + + tmplData := make([]byte, nl.SizeofXfrmUserTmpl*len(policy.Tmpls)) + for i, tmpl := range policy.Tmpls { + start := i * nl.SizeofXfrmUserTmpl + userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl]) + userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst) + userTmpl.Saddr.FromIP(tmpl.Src) + userTmpl.XfrmId.Proto = uint8(tmpl.Proto) + userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi)) + userTmpl.Mode = uint8(tmpl.Mode) + userTmpl.Reqid = uint32(tmpl.Reqid) + userTmpl.Optional = uint8(tmpl.Optional) + userTmpl.Aalgos = ^uint32(0) + userTmpl.Ealgos = ^uint32(0) + userTmpl.Calgos = ^uint32(0) + } + if len(tmplData) > 0 { + tmpls := nl.NewRtAttr(nl.XFRMA_TMPL, tmplData) + req.AddData(tmpls) + } + if policy.Mark != nil { + out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(policy.Mark)) + req.AddData(out) + } + + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } + + _, err := req.Execute(unix.NETLINK_XFRM, 0) + return err +} + +// XfrmPolicyDel will delete an xfrm policy from the system. Note that +// the Tmpls are ignored when matching the policy to delete. +// Equivalent to: `ip xfrm policy del $policy` +func XfrmPolicyDel(policy *XfrmPolicy) error { + return pkgHandle.XfrmPolicyDel(policy) +} + +// XfrmPolicyDel will delete an xfrm policy from the system. Note that +// the Tmpls are ignored when matching the policy to delete. +// Equivalent to: `ip xfrm policy del $policy` +func (h *Handle) XfrmPolicyDel(policy *XfrmPolicy) error { + _, err := h.xfrmPolicyGetOrDelete(policy, nl.XFRM_MSG_DELPOLICY) + return err +} + +// XfrmPolicyList gets a list of xfrm policies in the system. +// Equivalent to: `ip xfrm policy show`. +// The list can be filtered by ip family. +func XfrmPolicyList(family int) ([]XfrmPolicy, error) { + return pkgHandle.XfrmPolicyList(family) +} + +// XfrmPolicyList gets a list of xfrm policies in the system. +// Equivalent to: `ip xfrm policy show`. +// The list can be filtered by ip family. +func (h *Handle) XfrmPolicyList(family int) ([]XfrmPolicy, error) { + req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, unix.NLM_F_DUMP) + + msg := nl.NewIfInfomsg(family) + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY) + if err != nil { + return nil, err + } + + var res []XfrmPolicy + for _, m := range msgs { + if policy, err := parseXfrmPolicy(m, family); err == nil { + res = append(res, *policy) + } else if err == familyError { + continue + } else { + return nil, err + } + } + return res, nil +} + +// XfrmPolicyGet gets a the policy described by the index or selector, if found. +// Equivalent to: `ip xfrm policy get { SELECTOR | index INDEX } dir DIR [ctx CTX ] [ mark MARK [ mask MASK ] ] [ ptype PTYPE ]`. +func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) { + return pkgHandle.XfrmPolicyGet(policy) +} + +// XfrmPolicyGet gets a the policy described by the index or selector, if found. +// Equivalent to: `ip xfrm policy get { SELECTOR | index INDEX } dir DIR [ctx CTX ] [ mark MARK [ mask MASK ] ] [ ptype PTYPE ]`. +func (h *Handle) XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) { + return h.xfrmPolicyGetOrDelete(policy, nl.XFRM_MSG_GETPOLICY) +} + +// XfrmPolicyFlush will flush the policies on the system. +// Equivalent to: `ip xfrm policy flush` +func XfrmPolicyFlush() error { + return pkgHandle.XfrmPolicyFlush() +} + +// XfrmPolicyFlush will flush the policies on the system. +// Equivalent to: `ip xfrm policy flush` +func (h *Handle) XfrmPolicyFlush() error { + req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHPOLICY, unix.NLM_F_ACK) + _, err := req.Execute(unix.NETLINK_XFRM, 0) + return err +} + +func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPolicy, error) { + req := h.newNetlinkRequest(nlProto, unix.NLM_F_ACK) + + msg := &nl.XfrmUserpolicyId{} + selFromPolicy(&msg.Sel, policy) + msg.Index = uint32(policy.Index) + msg.Dir = uint8(policy.Dir) + req.AddData(msg) + + if policy.Mark != nil { + out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(policy.Mark)) + req.AddData(out) + } + + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } + + resType := nl.XFRM_MSG_NEWPOLICY + if nlProto == nl.XFRM_MSG_DELPOLICY { + resType = 0 + } + + msgs, err := req.Execute(unix.NETLINK_XFRM, uint16(resType)) + if err != nil { + return nil, err + } + + if nlProto == nl.XFRM_MSG_DELPOLICY { + return nil, err + } + + return parseXfrmPolicy(msgs[0], FAMILY_ALL) +} + +func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { + msg := nl.DeserializeXfrmUserpolicyInfo(m) + + // This is mainly for the policy dump + if family != FAMILY_ALL && family != int(msg.Sel.Family) { + return nil, familyError + } + + var policy XfrmPolicy + + policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD) + policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS) + policy.Proto = Proto(msg.Sel.Proto) + policy.DstPort = int(nl.Swap16(msg.Sel.Dport)) + policy.SrcPort = int(nl.Swap16(msg.Sel.Sport)) + policy.Ifindex = int(msg.Sel.Ifindex) + policy.Priority = int(msg.Priority) + policy.Index = int(msg.Index) + policy.Dir = Dir(msg.Dir) + policy.Action = PolicyAction(msg.Action) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.XFRMA_TMPL: + max := len(attr.Value) + for i := 0; i < max; i += nl.SizeofXfrmUserTmpl { + var resTmpl XfrmPolicyTmpl + tmpl := nl.DeserializeXfrmUserTmpl(attr.Value[i : i+nl.SizeofXfrmUserTmpl]) + resTmpl.Dst = tmpl.XfrmId.Daddr.ToIP() + resTmpl.Src = tmpl.Saddr.ToIP() + resTmpl.Proto = Proto(tmpl.XfrmId.Proto) + resTmpl.Mode = Mode(tmpl.Mode) + resTmpl.Spi = int(nl.Swap32(tmpl.XfrmId.Spi)) + resTmpl.Reqid = int(tmpl.Reqid) + resTmpl.Optional = int(tmpl.Optional) + policy.Tmpls = append(policy.Tmpls, resTmpl) + } + case nl.XFRMA_MARK: + mark := nl.DeserializeXfrmMark(attr.Value[:]) + policy.Mark = new(XfrmMark) + policy.Mark.Value = mark.Value + policy.Mark.Mask = mark.Mask + case nl.XFRMA_IF_ID: + policy.Ifid = int(native.Uint32(attr.Value)) + } + } + + return &policy, nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state.go b/vendor/github.com/vishvananda/netlink/xfrm_state.go new file mode 100644 index 000000000000..19df82c76321 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_state.go @@ -0,0 +1,131 @@ +package netlink + +import ( + "fmt" + "net" + "time" +) + +// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. +type XfrmStateAlgo struct { + Name string + Key []byte + TruncateLen int // Auth only + ICVLen int // AEAD only +} + +func (a XfrmStateAlgo) String() string { + base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) + if a.TruncateLen != 0 { + base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) + } + if a.ICVLen != 0 { + base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) + } + return fmt.Sprintf("%s}", base) +} + +// EncapType is an enum representing the optional packet encapsulation. +type EncapType uint8 + +const ( + XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 + XFRM_ENCAP_ESPINUDP +) + +func (e EncapType) String() string { + switch e { + case XFRM_ENCAP_ESPINUDP_NONIKE: + return "espinudp-non-ike" + case XFRM_ENCAP_ESPINUDP: + return "espinudp" + } + return "unknown" +} + +// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. +type XfrmStateEncap struct { + Type EncapType + SrcPort int + DstPort int + OriginalAddress net.IP +} + +func (e XfrmStateEncap) String() string { + return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", + e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) +} + +// XfrmStateLimits represents the configured limits for the state. +type XfrmStateLimits struct { + ByteSoft uint64 + ByteHard uint64 + PacketSoft uint64 + PacketHard uint64 + TimeSoft uint64 + TimeHard uint64 + TimeUseSoft uint64 + TimeUseHard uint64 +} + +// XfrmStateStats represents the current number of bytes/packets +// processed by this State, the State's installation and first use +// time and the replay window counters. +type XfrmStateStats struct { + ReplayWindow uint32 + Replay uint32 + Failed uint32 + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + +// XfrmState represents the state of an ipsec policy. It optionally +// contains an XfrmStateAlgo for encryption and one for authentication. +type XfrmState struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + ReplayWindow int + Limits XfrmStateLimits + Statistics XfrmStateStats + Mark *XfrmMark + OutputMark *XfrmMark + Ifid int + Auth *XfrmStateAlgo + Crypt *XfrmStateAlgo + Aead *XfrmStateAlgo + Encap *XfrmStateEncap + ESN bool +} + +func (sa XfrmState) String() string { + return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", + sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) +} +func (sa XfrmState) Print(stats bool) string { + if !stats { + return sa.String() + } + at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) + ut := "-" + if sa.Statistics.UseTime > 0 { + ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) + } + return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ + "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", + sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), + sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, + sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) +} + +func printLimit(lmt uint64) string { + if lmt == ^uint64(0) { + return "(INF)" + } + return fmt.Sprintf("%d", lmt) +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go new file mode 100644 index 000000000000..61a2d2dea28f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -0,0 +1,481 @@ +package netlink + +import ( + "fmt" + "unsafe" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +func writeStateAlgo(a *XfrmStateAlgo) []byte { + algo := nl.XfrmAlgo{ + AlgKeyLen: uint32(len(a.Key) * 8), + AlgKey: a.Key, + } + end := len(a.Name) + if end > 64 { + end = 64 + } + copy(algo.AlgName[:end], a.Name) + return algo.Serialize() +} + +func writeStateAlgoAuth(a *XfrmStateAlgo) []byte { + algo := nl.XfrmAlgoAuth{ + AlgKeyLen: uint32(len(a.Key) * 8), + AlgTruncLen: uint32(a.TruncateLen), + AlgKey: a.Key, + } + end := len(a.Name) + if end > 64 { + end = 64 + } + copy(algo.AlgName[:end], a.Name) + return algo.Serialize() +} + +func writeStateAlgoAead(a *XfrmStateAlgo) []byte { + algo := nl.XfrmAlgoAEAD{ + AlgKeyLen: uint32(len(a.Key) * 8), + AlgICVLen: uint32(a.ICVLen), + AlgKey: a.Key, + } + end := len(a.Name) + if end > 64 { + end = 64 + } + copy(algo.AlgName[:end], a.Name) + return algo.Serialize() +} + +func writeMark(m *XfrmMark) []byte { + mark := &nl.XfrmMark{ + Value: m.Value, + Mask: m.Mask, + } + if mark.Mask == 0 { + mark.Mask = ^uint32(0) + } + return mark.Serialize() +} + +func writeReplayEsn(replayWindow int) []byte { + replayEsn := &nl.XfrmReplayStateEsn{ + OSeq: 0, + Seq: 0, + OSeqHi: 0, + SeqHi: 0, + ReplayWindow: uint32(replayWindow), + } + + // Linux stores the bitmap to identify the already received sequence packets in blocks of uint32 elements. + // Therefore bitmap length is the minimum number of uint32 elements needed. The following is a ceiling operation. + bytesPerElem := int(unsafe.Sizeof(replayEsn.BmpLen)) // Any uint32 variable is good for this + replayEsn.BmpLen = uint32((replayWindow + (bytesPerElem * 8) - 1) / (bytesPerElem * 8)) + + return replayEsn.Serialize() +} + +// XfrmStateAdd will add an xfrm state to the system. +// Equivalent to: `ip xfrm state add $state` +func XfrmStateAdd(state *XfrmState) error { + return pkgHandle.XfrmStateAdd(state) +} + +// XfrmStateAdd will add an xfrm state to the system. +// Equivalent to: `ip xfrm state add $state` +func (h *Handle) XfrmStateAdd(state *XfrmState) error { + return h.xfrmStateAddOrUpdate(state, nl.XFRM_MSG_NEWSA) +} + +// XfrmStateAllocSpi will allocate an xfrm state in the system. +// Equivalent to: `ip xfrm state allocspi` +func XfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { + return pkgHandle.xfrmStateAllocSpi(state) +} + +// XfrmStateUpdate will update an xfrm state to the system. +// Equivalent to: `ip xfrm state update $state` +func XfrmStateUpdate(state *XfrmState) error { + return pkgHandle.XfrmStateUpdate(state) +} + +// XfrmStateUpdate will update an xfrm state to the system. +// Equivalent to: `ip xfrm state update $state` +func (h *Handle) XfrmStateUpdate(state *XfrmState) error { + return h.xfrmStateAddOrUpdate(state, nl.XFRM_MSG_UPDSA) +} + +func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { + + // A state with spi 0 can't be deleted so don't allow it to be set + if state.Spi == 0 { + return fmt.Errorf("Spi must be set when adding xfrm state") + } + req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) + + msg := xfrmUsersaInfoFromXfrmState(state) + + if state.ESN { + if state.ReplayWindow == 0 { + return fmt.Errorf("ESN flag set without ReplayWindow") + } + msg.Flags |= nl.XFRM_STATE_ESN + msg.ReplayWindow = 0 + } + + limitsToLft(state.Limits, &msg.Lft) + req.AddData(msg) + + if state.Auth != nil { + out := nl.NewRtAttr(nl.XFRMA_ALG_AUTH_TRUNC, writeStateAlgoAuth(state.Auth)) + req.AddData(out) + } + if state.Crypt != nil { + out := nl.NewRtAttr(nl.XFRMA_ALG_CRYPT, writeStateAlgo(state.Crypt)) + req.AddData(out) + } + if state.Aead != nil { + out := nl.NewRtAttr(nl.XFRMA_ALG_AEAD, writeStateAlgoAead(state.Aead)) + req.AddData(out) + } + if state.Encap != nil { + encapData := make([]byte, nl.SizeofXfrmEncapTmpl) + encap := nl.DeserializeXfrmEncapTmpl(encapData) + encap.EncapType = uint16(state.Encap.Type) + encap.EncapSport = nl.Swap16(uint16(state.Encap.SrcPort)) + encap.EncapDport = nl.Swap16(uint16(state.Encap.DstPort)) + encap.EncapOa.FromIP(state.Encap.OriginalAddress) + out := nl.NewRtAttr(nl.XFRMA_ENCAP, encapData) + req.AddData(out) + } + if state.Mark != nil { + out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) + req.AddData(out) + } + if state.ESN { + out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow)) + req.AddData(out) + } + if state.OutputMark != nil { + out := nl.NewRtAttr(nl.XFRMA_SET_MARK, nl.Uint32Attr(state.OutputMark.Value)) + req.AddData(out) + if state.OutputMark.Mask != 0 { + out = nl.NewRtAttr(nl.XFRMA_SET_MARK_MASK, nl.Uint32Attr(state.OutputMark.Mask)) + req.AddData(out) + } + } + + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } + + _, err := req.Execute(unix.NETLINK_XFRM, 0) + return err +} + +func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { + req := h.newNetlinkRequest(nl.XFRM_MSG_ALLOCSPI, + unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) + + msg := &nl.XfrmUserSpiInfo{} + msg.XfrmUsersaInfo = *(xfrmUsersaInfoFromXfrmState(state)) + // 1-255 is reserved by IANA for future use + msg.Min = 0x100 + msg.Max = 0xffffffff + req.AddData(msg) + + if state.Mark != nil { + out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) + req.AddData(out) + } + + msgs, err := req.Execute(unix.NETLINK_XFRM, 0) + if err != nil { + return nil, err + } + + return parseXfrmState(msgs[0], FAMILY_ALL) +} + +// XfrmStateDel will delete an xfrm state from the system. Note that +// the Algos are ignored when matching the state to delete. +// Equivalent to: `ip xfrm state del $state` +func XfrmStateDel(state *XfrmState) error { + return pkgHandle.XfrmStateDel(state) +} + +// XfrmStateDel will delete an xfrm state from the system. Note that +// the Algos are ignored when matching the state to delete. +// Equivalent to: `ip xfrm state del $state` +func (h *Handle) XfrmStateDel(state *XfrmState) error { + _, err := h.xfrmStateGetOrDelete(state, nl.XFRM_MSG_DELSA) + return err +} + +// XfrmStateList gets a list of xfrm states in the system. +// Equivalent to: `ip [-4|-6] xfrm state show`. +// The list can be filtered by ip family. +func XfrmStateList(family int) ([]XfrmState, error) { + return pkgHandle.XfrmStateList(family) +} + +// XfrmStateList gets a list of xfrm states in the system. +// Equivalent to: `ip xfrm state show`. +// The list can be filtered by ip family. +func (h *Handle) XfrmStateList(family int) ([]XfrmState, error) { + req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, unix.NLM_F_DUMP) + + msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWSA) + if err != nil { + return nil, err + } + + var res []XfrmState + for _, m := range msgs { + if state, err := parseXfrmState(m, family); err == nil { + res = append(res, *state) + } else if err == familyError { + continue + } else { + return nil, err + } + } + return res, nil +} + +// XfrmStateGet gets the xfrm state described by the ID, if found. +// Equivalent to: `ip xfrm state get ID [ mark MARK [ mask MASK ] ]`. +// Only the fields which constitue the SA ID must be filled in: +// ID := [ src ADDR ] [ dst ADDR ] [ proto XFRM-PROTO ] [ spi SPI ] +// mark is optional +func XfrmStateGet(state *XfrmState) (*XfrmState, error) { + return pkgHandle.XfrmStateGet(state) +} + +// XfrmStateGet gets the xfrm state described by the ID, if found. +// Equivalent to: `ip xfrm state get ID [ mark MARK [ mask MASK ] ]`. +// Only the fields which constitue the SA ID must be filled in: +// ID := [ src ADDR ] [ dst ADDR ] [ proto XFRM-PROTO ] [ spi SPI ] +// mark is optional +func (h *Handle) XfrmStateGet(state *XfrmState) (*XfrmState, error) { + return h.xfrmStateGetOrDelete(state, nl.XFRM_MSG_GETSA) +} + +func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState, error) { + req := h.newNetlinkRequest(nlProto, unix.NLM_F_ACK) + + msg := &nl.XfrmUsersaId{} + msg.Family = uint16(nl.GetIPFamily(state.Dst)) + msg.Daddr.FromIP(state.Dst) + msg.Proto = uint8(state.Proto) + msg.Spi = nl.Swap32(uint32(state.Spi)) + req.AddData(msg) + + if state.Mark != nil { + out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) + req.AddData(out) + } + if state.Src != nil { + out := nl.NewRtAttr(nl.XFRMA_SRCADDR, state.Src.To16()) + req.AddData(out) + } + + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } + + resType := nl.XFRM_MSG_NEWSA + if nlProto == nl.XFRM_MSG_DELSA { + resType = 0 + } + + msgs, err := req.Execute(unix.NETLINK_XFRM, uint16(resType)) + if err != nil { + return nil, err + } + + if nlProto == nl.XFRM_MSG_DELSA { + return nil, nil + } + + s, err := parseXfrmState(msgs[0], FAMILY_ALL) + if err != nil { + return nil, err + } + + return s, nil +} + +var familyError = fmt.Errorf("family error") + +func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { + var state XfrmState + + state.Dst = msg.Id.Daddr.ToIP() + state.Src = msg.Saddr.ToIP() + state.Proto = Proto(msg.Id.Proto) + state.Mode = Mode(msg.Mode) + state.Spi = int(nl.Swap32(msg.Id.Spi)) + state.Reqid = int(msg.Reqid) + state.ReplayWindow = int(msg.ReplayWindow) + lftToLimits(&msg.Lft, &state.Limits) + curToStats(&msg.Curlft, &msg.Stats, &state.Statistics) + + return &state +} + +func parseXfrmState(m []byte, family int) (*XfrmState, error) { + msg := nl.DeserializeXfrmUsersaInfo(m) + + // This is mainly for the state dump + if family != FAMILY_ALL && family != int(msg.Family) { + return nil, familyError + } + + state := xfrmStateFromXfrmUsersaInfo(msg) + + attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:]) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.XFRMA_ALG_AUTH, nl.XFRMA_ALG_CRYPT: + var resAlgo *XfrmStateAlgo + if attr.Attr.Type == nl.XFRMA_ALG_AUTH { + if state.Auth == nil { + state.Auth = new(XfrmStateAlgo) + } + resAlgo = state.Auth + } else { + state.Crypt = new(XfrmStateAlgo) + resAlgo = state.Crypt + } + algo := nl.DeserializeXfrmAlgo(attr.Value[:]) + (*resAlgo).Name = nl.BytesToString(algo.AlgName[:]) + (*resAlgo).Key = algo.AlgKey + case nl.XFRMA_ALG_AUTH_TRUNC: + if state.Auth == nil { + state.Auth = new(XfrmStateAlgo) + } + algo := nl.DeserializeXfrmAlgoAuth(attr.Value[:]) + state.Auth.Name = nl.BytesToString(algo.AlgName[:]) + state.Auth.Key = algo.AlgKey + state.Auth.TruncateLen = int(algo.AlgTruncLen) + case nl.XFRMA_ALG_AEAD: + state.Aead = new(XfrmStateAlgo) + algo := nl.DeserializeXfrmAlgoAEAD(attr.Value[:]) + state.Aead.Name = nl.BytesToString(algo.AlgName[:]) + state.Aead.Key = algo.AlgKey + state.Aead.ICVLen = int(algo.AlgICVLen) + case nl.XFRMA_ENCAP: + encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:]) + state.Encap = new(XfrmStateEncap) + state.Encap.Type = EncapType(encap.EncapType) + state.Encap.SrcPort = int(nl.Swap16(encap.EncapSport)) + state.Encap.DstPort = int(nl.Swap16(encap.EncapDport)) + state.Encap.OriginalAddress = encap.EncapOa.ToIP() + case nl.XFRMA_MARK: + mark := nl.DeserializeXfrmMark(attr.Value[:]) + state.Mark = new(XfrmMark) + state.Mark.Value = mark.Value + state.Mark.Mask = mark.Mask + case nl.XFRMA_SET_MARK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Value = native.Uint32(attr.Value) + case nl.XFRMA_SET_MARK_MASK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Mask = native.Uint32(attr.Value) + if state.OutputMark.Mask == 0xffffffff { + state.OutputMark.Mask = 0 + } + case nl.XFRMA_IF_ID: + state.Ifid = int(native.Uint32(attr.Value)) + } + } + + return state, nil +} + +// XfrmStateFlush will flush the xfrm state on the system. +// proto = 0 means any transformation protocols +// Equivalent to: `ip xfrm state flush [ proto XFRM-PROTO ]` +func XfrmStateFlush(proto Proto) error { + return pkgHandle.XfrmStateFlush(proto) +} + +// XfrmStateFlush will flush the xfrm state on the system. +// proto = 0 means any transformation protocols +// Equivalent to: `ip xfrm state flush [ proto XFRM-PROTO ]` +func (h *Handle) XfrmStateFlush(proto Proto) error { + req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHSA, unix.NLM_F_ACK) + + req.AddData(&nl.XfrmUsersaFlush{Proto: uint8(proto)}) + + _, err := req.Execute(unix.NETLINK_XFRM, 0) + return err +} + +func limitsToLft(lmts XfrmStateLimits, lft *nl.XfrmLifetimeCfg) { + if lmts.ByteSoft != 0 { + lft.SoftByteLimit = lmts.ByteSoft + } else { + lft.SoftByteLimit = nl.XFRM_INF + } + if lmts.ByteHard != 0 { + lft.HardByteLimit = lmts.ByteHard + } else { + lft.HardByteLimit = nl.XFRM_INF + } + if lmts.PacketSoft != 0 { + lft.SoftPacketLimit = lmts.PacketSoft + } else { + lft.SoftPacketLimit = nl.XFRM_INF + } + if lmts.PacketHard != 0 { + lft.HardPacketLimit = lmts.PacketHard + } else { + lft.HardPacketLimit = nl.XFRM_INF + } + lft.SoftAddExpiresSeconds = lmts.TimeSoft + lft.HardAddExpiresSeconds = lmts.TimeHard + lft.SoftUseExpiresSeconds = lmts.TimeUseSoft + lft.HardUseExpiresSeconds = lmts.TimeUseHard +} + +func lftToLimits(lft *nl.XfrmLifetimeCfg, lmts *XfrmStateLimits) { + *lmts = *(*XfrmStateLimits)(unsafe.Pointer(lft)) +} + +func curToStats(cur *nl.XfrmLifetimeCur, wstats *nl.XfrmStats, stats *XfrmStateStats) { + stats.Bytes = cur.Bytes + stats.Packets = cur.Packets + stats.AddTime = cur.AddTime + stats.UseTime = cur.UseTime + stats.ReplayWindow = wstats.ReplayWindow + stats.Replay = wstats.Replay + stats.Failed = wstats.IntegrityFailed +} + +func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo { + msg := &nl.XfrmUsersaInfo{} + msg.Family = uint16(nl.GetIPFamily(state.Dst)) + msg.Id.Daddr.FromIP(state.Dst) + msg.Saddr.FromIP(state.Src) + msg.Id.Proto = uint8(state.Proto) + msg.Mode = uint8(state.Mode) + msg.Id.Spi = nl.Swap32(uint32(state.Spi)) + msg.Reqid = uint32(state.Reqid) + msg.ReplayWindow = uint8(state.ReplayWindow) + + return msg +} diff --git a/vendor/github.com/vishvananda/netns/LICENSE b/vendor/github.com/vishvananda/netns/LICENSE new file mode 100644 index 000000000000..9f64db8582cf --- /dev/null +++ b/vendor/github.com/vishvananda/netns/LICENSE @@ -0,0 +1,192 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Vishvananda Ishaya. + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/vishvananda/netns/README.md b/vendor/github.com/vishvananda/netns/README.md new file mode 100644 index 000000000000..1fdb2d3e4ae1 --- /dev/null +++ b/vendor/github.com/vishvananda/netns/README.md @@ -0,0 +1,61 @@ +# netns - network namespaces in go # + +The netns package provides an ultra-simple interface for handling +network namespaces in go. Changing namespaces requires elevated +privileges, so in most cases this code needs to be run as root. + +## Local Build and Test ## + +You can use go get command: + + go get github.com/vishvananda/netns + +Testing (requires root): + + sudo -E go test github.com/vishvananda/netns + +## Example ## + +```go +package main + +import ( + "fmt" + "net" + "runtime" + "github.com/vishvananda/netns" +) + +func main() { + // Lock the OS Thread so we don't accidentally switch namespaces + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + // Save the current network namespace + origns, _ := netns.Get() + defer origns.Close() + + // Create a new network namespace + newns, _ := netns.New() + defer newns.Close() + + // Do something with the network namespace + ifaces, _ := net.Interfaces() + fmt.Printf("Interfaces: %v\n", ifaces) + + // Switch back to the original namespace + netns.Set(origns) +} + +``` + +## NOTE + +The library can be safely used only with Go >= 1.10 due to [golang/go#20676](https://github.com/golang/go/issues/20676). + +After locking a goroutine to its current OS thread with `runtime.LockOSThread()` +and changing its network namespace, any new subsequent goroutine won't be +scheduled on that thread while it's locked. Therefore, the new goroutine +will run in a different namespace leading to unexpected results. + +See [here](https://www.weave.works/blog/linux-namespaces-golang-followup) for more details. diff --git a/vendor/github.com/vishvananda/netns/netns.go b/vendor/github.com/vishvananda/netns/netns.go new file mode 100644 index 000000000000..116befd548c4 --- /dev/null +++ b/vendor/github.com/vishvananda/netns/netns.go @@ -0,0 +1,81 @@ +// Package netns allows ultra-simple network namespace handling. NsHandles +// can be retrieved and set. Note that the current namespace is thread +// local so actions that set and reset namespaces should use LockOSThread +// to make sure the namespace doesn't change due to a goroutine switch. +// It is best to close NsHandles when you are done with them. This can be +// accomplished via a `defer ns.Close()` on the handle. Changing namespaces +// requires elevated privileges, so in most cases this code needs to be run +// as root. +package netns + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// NsHandle is a handle to a network namespace. It can be cast directly +// to an int and used as a file descriptor. +type NsHandle int + +// Equal determines if two network handles refer to the same network +// namespace. This is done by comparing the device and inode that the +// file descriptors point to. +func (ns NsHandle) Equal(other NsHandle) bool { + if ns == other { + return true + } + var s1, s2 unix.Stat_t + if err := unix.Fstat(int(ns), &s1); err != nil { + return false + } + if err := unix.Fstat(int(other), &s2); err != nil { + return false + } + return (s1.Dev == s2.Dev) && (s1.Ino == s2.Ino) +} + +// String shows the file descriptor number and its dev and inode. +func (ns NsHandle) String() string { + if ns == -1 { + return "NS(None)" + } + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { + return fmt.Sprintf("NS(%d: unknown)", ns) + } + return fmt.Sprintf("NS(%d: %d, %d)", ns, s.Dev, s.Ino) +} + +// UniqueId returns a string which uniquely identifies the namespace +// associated with the network handle. +func (ns NsHandle) UniqueId() string { + if ns == -1 { + return "NS(none)" + } + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { + return "NS(unknown)" + } + return fmt.Sprintf("NS(%d:%d)", s.Dev, s.Ino) +} + +// IsOpen returns true if Close() has not been called. +func (ns NsHandle) IsOpen() bool { + return ns != -1 +} + +// Close closes the NsHandle and resets its file descriptor to -1. +// It is not safe to use an NsHandle after Close() is called. +func (ns *NsHandle) Close() error { + if err := unix.Close(int(*ns)); err != nil { + return err + } + (*ns) = -1 + return nil +} + +// None gets an empty (closed) NsHandle. +func None() NsHandle { + return NsHandle(-1) +} diff --git a/vendor/github.com/vishvananda/netns/netns_linux.go b/vendor/github.com/vishvananda/netns/netns_linux.go new file mode 100644 index 000000000000..36e64906b6ce --- /dev/null +++ b/vendor/github.com/vishvananda/netns/netns_linux.go @@ -0,0 +1,266 @@ +// +build linux,go1.10 + +package netns + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "strconv" + "strings" + "syscall" + + "golang.org/x/sys/unix" +) + +// Deprecated: use syscall pkg instead (go >= 1.5 needed). +const ( + CLONE_NEWUTS = 0x04000000 /* New utsname group? */ + CLONE_NEWIPC = 0x08000000 /* New ipcs */ + CLONE_NEWUSER = 0x10000000 /* New user namespace */ + CLONE_NEWPID = 0x20000000 /* New pid namespace */ + CLONE_NEWNET = 0x40000000 /* New network namespace */ + CLONE_IO = 0x80000000 /* Get io context */ + bindMountPath = "/run/netns" /* Bind mount path for named netns */ +) + +// Setns sets namespace using syscall. Note that this should be a method +// in syscall but it has not been added. +func Setns(ns NsHandle, nstype int) (err error) { + return unix.Setns(int(ns), nstype) +} + +// Set sets the current network namespace to the namespace represented +// by NsHandle. +func Set(ns NsHandle) (err error) { + return Setns(ns, CLONE_NEWNET) +} + +// New creates a new network namespace, sets it as current and returns +// a handle to it. +func New() (ns NsHandle, err error) { + if err := unix.Unshare(CLONE_NEWNET); err != nil { + return -1, err + } + return Get() +} + +// NewNamed creates a new named network namespace and returns a handle to it +func NewNamed(name string) (NsHandle, error) { + if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { + err = os.MkdirAll(bindMountPath, 0755) + if err != nil { + return None(), err + } + } + + newNs, err := New() + if err != nil { + return None(), err + } + + namedPath := path.Join(bindMountPath, name) + + f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) + if err != nil { + return None(), err + } + f.Close() + + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid()) + err = syscall.Mount(nsPath, namedPath, "bind", syscall.MS_BIND, "") + if err != nil { + return None(), err + } + + return newNs, nil +} + +// DeleteNamed deletes a named network namespace +func DeleteNamed(name string) error { + namedPath := path.Join(bindMountPath, name) + + err := syscall.Unmount(namedPath, syscall.MNT_DETACH) + if err != nil { + return err + } + + return os.Remove(namedPath) +} + +// Get gets a handle to the current threads network namespace. +func Get() (NsHandle, error) { + return GetFromThread(os.Getpid(), unix.Gettid()) +} + +// GetFromPath gets a handle to a network namespace +// identified by the path +func GetFromPath(path string) (NsHandle, error) { + fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return -1, err + } + return NsHandle(fd), nil +} + +// GetFromName gets a handle to a named network namespace such as one +// created by `ip netns add`. +func GetFromName(name string) (NsHandle, error) { + return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name)) +} + +// GetFromPid gets a handle to the network namespace of a given pid. +func GetFromPid(pid int) (NsHandle, error) { + return GetFromPath(fmt.Sprintf("/proc/%d/ns/net", pid)) +} + +// GetFromThread gets a handle to the network namespace of a given pid and tid. +func GetFromThread(pid, tid int) (NsHandle, error) { + return GetFromPath(fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid)) +} + +// GetFromDocker gets a handle to the network namespace of a docker container. +// Id is prefixed matched against the running docker containers, so a short +// identifier can be used as long as it isn't ambiguous. +func GetFromDocker(id string) (NsHandle, error) { + pid, err := getPidForContainer(id) + if err != nil { + return -1, err + } + return GetFromPid(pid) +} + +// borrowed from docker/utils/utils.go +func findCgroupMountpoint(cgroupType string) (string, error) { + output, err := ioutil.ReadFile("/proc/mounts") + if err != nil { + return "", err + } + + // /proc/mounts has 6 fields per line, one mount per line, e.g. + // cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0 + for _, line := range strings.Split(string(output), "\n") { + parts := strings.Split(line, " ") + if len(parts) == 6 && parts[2] == "cgroup" { + for _, opt := range strings.Split(parts[3], ",") { + if opt == cgroupType { + return parts[1], nil + } + } + } + } + + return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) +} + +// Returns the relative path to the cgroup docker is running in. +// borrowed from docker/utils/utils.go +// modified to get the docker pid instead of using /proc/self +func getThisCgroup(cgroupType string) (string, error) { + dockerpid, err := ioutil.ReadFile("/var/run/docker.pid") + if err != nil { + return "", err + } + result := strings.Split(string(dockerpid), "\n") + if len(result) == 0 || len(result[0]) == 0 { + return "", fmt.Errorf("docker pid not found in /var/run/docker.pid") + } + pid, err := strconv.Atoi(result[0]) + if err != nil { + return "", err + } + output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + if err != nil { + return "", err + } + for _, line := range strings.Split(string(output), "\n") { + parts := strings.Split(line, ":") + // any type used by docker should work + if parts[1] == cgroupType { + return parts[2], nil + } + } + return "", fmt.Errorf("cgroup '%s' not found in /proc/%d/cgroup", cgroupType, pid) +} + +// Returns the first pid in a container. +// borrowed from docker/utils/utils.go +// modified to only return the first pid +// modified to glob with id +// modified to search for newer docker containers +func getPidForContainer(id string) (int, error) { + pid := 0 + + // memory is chosen randomly, any cgroup used by docker works + cgroupType := "memory" + + cgroupRoot, err := findCgroupMountpoint(cgroupType) + if err != nil { + return pid, err + } + + cgroupThis, err := getThisCgroup(cgroupType) + if err != nil { + return pid, err + } + + id += "*" + + attempts := []string{ + filepath.Join(cgroupRoot, cgroupThis, id, "tasks"), + // With more recent lxc versions use, cgroup will be in lxc/ + filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"), + // With more recent docker, cgroup will be in docker/ + filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"), + // Even more recent docker versions under systemd use docker-.scope/ + filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"), + // Even more recent docker versions under cgroup/systemd/docker// + filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"), + // Kubernetes with docker and CNI is even more different. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, "tasks"), + // Another flavor of containers location in recent kubernetes 1.11+. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", "tasks"), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "*", "docker-"+id+".scope", "tasks"), + // When runs inside of a container with recent kubernetes 1.11+. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", "tasks"), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", "tasks"), + } + + var filename string + for _, attempt := range attempts { + filenames, _ := filepath.Glob(attempt) + if len(filenames) > 1 { + return pid, fmt.Errorf("Ambiguous id supplied: %v", filenames) + } else if len(filenames) == 1 { + filename = filenames[0] + break + } + } + + if filename == "" { + return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1]) + } + + output, err := ioutil.ReadFile(filename) + if err != nil { + return pid, err + } + + result := strings.Split(string(output), "\n") + if len(result) == 0 || len(result[0]) == 0 { + return pid, fmt.Errorf("No pid found for container") + } + + pid, err = strconv.Atoi(result[0]) + if err != nil { + return pid, fmt.Errorf("Invalid pid '%s': %s", result[0], err) + } + + return pid, nil +} diff --git a/vendor/github.com/vishvananda/netns/netns_unspecified.go b/vendor/github.com/vishvananda/netns/netns_unspecified.go new file mode 100644 index 000000000000..d06af62b68ad --- /dev/null +++ b/vendor/github.com/vishvananda/netns/netns_unspecified.go @@ -0,0 +1,43 @@ +// +build !linux + +package netns + +import ( + "errors" +) + +var ( + ErrNotImplemented = errors.New("not implemented") +) + +func Set(ns NsHandle) (err error) { + return ErrNotImplemented +} + +func New() (ns NsHandle, err error) { + return -1, ErrNotImplemented +} + +func Get() (NsHandle, error) { + return -1, ErrNotImplemented +} + +func GetFromPath(path string) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func GetFromName(name string) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func GetFromPid(pid int) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func GetFromThread(pid, tid int) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func GetFromDocker(id string) (NsHandle, error) { + return -1, ErrNotImplemented +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 0649b50cf15c..3d9b9212bc5e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -769,6 +769,13 @@ github.com/urfave/cli # github.com/vbatts/tar-split v0.11.3 ## explicit; go 1.15 github.com/vbatts/tar-split/archive/tar +# github.com/vishvananda/netlink v1.2.1-beta.2 +## explicit; go 1.12 +github.com/vishvananda/netlink +github.com/vishvananda/netlink/nl +# github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f +## explicit; go 1.12 +github.com/vishvananda/netns # go.etcd.io/bbolt v1.3.7 ## explicit; go 1.17 go.etcd.io/bbolt