From dac7dc7f582db3ec16862d9cc2158f1f1e5bc2b9 Mon Sep 17 00:00:00 2001 From: terassyi Date: Tue, 12 Mar 2024 22:21:17 +0900 Subject: [PATCH] add a cni feature Signed-off-by: terassyi --- .github/workflows/ci.yaml | 84 -- .github/workflows/e2e-test.yaml | 153 +++ .github/workflows/unit-test.yaml | 32 + .gitignore | 6 +- Dockerfile | 2 +- Makefile | 4 +- README.md | 78 +- docs/cni.md | 66 + docs/design.md | 19 +- docs/kubernetes.md | 8 +- e2e/Makefile | 69 +- e2e/Makefile.versions | 12 +- e2e/README.md | 32 +- e2e/img/kubernetes-cni-compact.drawio.svg | 651 ++++++++++ e2e/img/kubernetes-cni.drawio.svg | 811 ++++++++++++ e2e/kind-config.yaml | 1 - e2e/kubernetes_cni_test.go | 756 +++++++++++ e2e/kubernetes_test.go | 119 +- e2e/suite_test.go | 20 +- e2e/topology/generator/go.mod | 48 + e2e/topology/generator/go.sum | 156 +++ e2e/topology/generator/main.go | 125 ++ e2e/topology/kubernetes-cni-compact.yaml.tmpl | 210 +++ e2e/topology/kubernetes-cni.yaml.tmpl | 235 ++++ manifests/base/certs/tls.cert | 11 - manifests/base/certs/tls.key | 5 - manifests/base/webhook/admission_webhook.yaml | 2 +- .../base/webhook/admission_webhook_patch.yaml | 12 +- manifests/base/workloads/agent.yaml | 11 +- manifests/base/workloads/controller.yaml | 19 +- manifests/base/workloads/speaker.yaml | 2 +- manifests/cni/agent-patch.yaml | 9 +- manifests/cni/configmap.yaml | 18 + manifests/cni/controller-patch.yaml | 5 + manifests/cni/kustomization.yaml | 2 + manifests/cni/sample/bgp_peer.yaml | 31 + manifests/cni/sample/client.yaml | 9 + manifests/cni/sample/cluster_bgp_spine0.yaml | 18 + manifests/cni/sample/cluster_bgp_spine1.yaml | 18 + manifests/cni/sample/deployment.yaml | 33 + manifests/cni/sample/kustomization.yaml | 7 + manifests/cni/sample/namespace.yaml | 6 + manifests/cni/sample/peer_template.yaml | 19 + manifests/cni/sample/test_pod.yaml | 59 + manifests/cni/sample/test_pod2.yaml | 11 + .../cni/sample/test_pod_another_pool.yaml | 13 + .../cni/sample/test_pod_in_namespace.yaml | 32 + manifests/cni/speaker-patch.yaml | 37 + manifests/dual/agent-patch.yaml | 14 + manifests/dual/controller-patch.yaml | 13 + manifests/lb/agent-patch.yaml | 5 +- manifests/lb/controller-patch.yaml | 13 + manifests/lb/kustomization.yaml | 1 + manifests/{base => lb}/sample/bgp_peer.yaml | 0 .../{base => lb}/sample/cluster_bgp_a.yaml | 1 + .../{base => lb}/sample/cluster_bgp_b.yaml | 1 + .../{base => lb}/sample/cluster_bgp_c.yaml | 1 + .../{base => lb}/sample/kustomization.yaml | 0 manifests/{base => lb}/sample/lb.yaml | 0 .../{base => lb}/sample/lb_address_pool.yaml | 0 manifests/{base => lb}/sample/lb_another.yaml | 0 .../{base => lb}/sample/peer_template.yaml | 0 sartd/Cargo.lock | 56 +- sartd/src/bgp/src/error.rs | 6 + sartd/src/bgp/src/event.rs | 59 +- sartd/src/bgp/src/packet/codec.rs | 15 +- sartd/src/bgp/src/peer/fsm.rs | 2 +- sartd/src/bgp/src/peer/peer.rs | 220 +++- sartd/src/bgp/src/server.rs | 4 +- sartd/src/bin/cni-installer.rs | 77 ++ sartd/src/cmd/Cargo.lock | 56 +- sartd/src/cmd/src/cmd.rs | 2 + sartd/src/cmd/src/controller.rs | 12 +- sartd/src/fib/src/kernel.rs | 71 +- sartd/src/kubernetes/Cargo.lock | 19 +- sartd/src/kubernetes/Cargo.toml | 5 +- sartd/src/kubernetes/config/.cargo | 2 + sartd/src/kubernetes/src/agent/cni.rs | 4 + sartd/src/kubernetes/src/agent/cni/error.rs | 48 + sartd/src/kubernetes/src/agent/cni/gc.rs | 163 +++ sartd/src/kubernetes/src/agent/cni/netlink.rs | 1030 +++++++++++++++ sartd/src/kubernetes/src/agent/cni/netns.rs | 182 +++ sartd/src/kubernetes/src/agent/cni/pod.rs | 363 +++++- sartd/src/kubernetes/src/agent/cni/server.rs | 1153 +++++++++++++---- .../src/agent/reconciler/address_block.rs | 101 +- .../src/agent/reconciler/bgp_advertisement.rs | 5 + .../src/agent/reconciler/bgp_peer.rs | 10 +- .../src/agent/reconciler/node_bgp.rs | 21 +- sartd/src/kubernetes/src/agent/server.rs | 49 +- sartd/src/kubernetes/src/config.rs | 8 +- .../controller/reconciler/address_block.rs | 11 +- .../src/controller/reconciler/address_pool.rs | 1 - .../controller/reconciler/block_request.rs | 186 ++- sartd/src/kubernetes/src/controller/server.rs | 48 +- .../src/controller/webhook/address_pool.rs | 12 +- .../src/controller/webhook/bgp_peer.rs | 2 +- sartd/src/kubernetes/src/crd/cluster_bgp.rs | 1 + sartd/src/kubernetes/src/fixture.rs | 41 +- .../kubernetes/tests/agent_cni_server_test.rs | 272 ++++ sartd/src/kubernetes/tests/common/mod.rs | 84 +- sartd/src/kubernetes/tests/config/.cargo | 2 + sartd/src/kubernetes/tests/config/config.yaml | 6 + .../kubernetes/tests/config/dummy_kubeconfig | 19 + 103 files changed, 7841 insertions(+), 722 deletions(-) delete mode 100644 .github/workflows/ci.yaml create mode 100644 .github/workflows/e2e-test.yaml create mode 100644 .github/workflows/unit-test.yaml create mode 100644 e2e/img/kubernetes-cni-compact.drawio.svg create mode 100644 e2e/img/kubernetes-cni.drawio.svg create mode 100644 e2e/kubernetes_cni_test.go create mode 100644 e2e/topology/generator/go.mod create mode 100644 e2e/topology/generator/go.sum create mode 100644 e2e/topology/generator/main.go create mode 100644 e2e/topology/kubernetes-cni-compact.yaml.tmpl create mode 100644 e2e/topology/kubernetes-cni.yaml.tmpl delete mode 100644 manifests/base/certs/tls.cert delete mode 100644 manifests/base/certs/tls.key create mode 100644 manifests/cni/configmap.yaml create mode 100644 manifests/cni/sample/bgp_peer.yaml create mode 100644 manifests/cni/sample/client.yaml create mode 100644 manifests/cni/sample/cluster_bgp_spine0.yaml create mode 100644 manifests/cni/sample/cluster_bgp_spine1.yaml create mode 100644 manifests/cni/sample/deployment.yaml create mode 100644 manifests/cni/sample/namespace.yaml create mode 100644 manifests/cni/sample/peer_template.yaml create mode 100644 manifests/cni/sample/test_pod.yaml create mode 100644 manifests/cni/sample/test_pod2.yaml create mode 100644 manifests/cni/sample/test_pod_another_pool.yaml create mode 100644 manifests/cni/sample/test_pod_in_namespace.yaml create mode 100644 manifests/cni/speaker-patch.yaml create mode 100644 manifests/dual/agent-patch.yaml create mode 100644 manifests/dual/controller-patch.yaml create mode 100644 manifests/lb/controller-patch.yaml rename manifests/{base => lb}/sample/bgp_peer.yaml (100%) rename manifests/{base => lb}/sample/cluster_bgp_a.yaml (94%) rename manifests/{base => lb}/sample/cluster_bgp_b.yaml (92%) rename manifests/{base => lb}/sample/cluster_bgp_c.yaml (95%) rename manifests/{base => lb}/sample/kustomization.yaml (100%) rename manifests/{base => lb}/sample/lb.yaml (100%) rename manifests/{base => lb}/sample/lb_address_pool.yaml (100%) rename manifests/{base => lb}/sample/lb_another.yaml (100%) rename manifests/{base => lb}/sample/peer_template.yaml (100%) create mode 100644 sartd/src/bin/cni-installer.rs create mode 100644 sartd/src/kubernetes/config/.cargo create mode 100644 sartd/src/kubernetes/src/agent/cni/error.rs create mode 100644 sartd/src/kubernetes/src/agent/cni/gc.rs create mode 100644 sartd/src/kubernetes/src/agent/cni/netlink.rs create mode 100644 sartd/src/kubernetes/src/agent/cni/netns.rs create mode 100644 sartd/src/kubernetes/tests/agent_cni_server_test.rs create mode 100644 sartd/src/kubernetes/tests/config/.cargo create mode 100644 sartd/src/kubernetes/tests/config/config.yaml create mode 100644 sartd/src/kubernetes/tests/config/dummy_kubeconfig diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index 130fd53..0000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,84 +0,0 @@ -name: ci -on: [push, pull_request] - -env: - CARGO_TERM_COLOR: always - TAGS: dev - go-version: "1.21" - -jobs: - unit-test: - name: Unit Test - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v4 - - name: Setup Build Dependencies - run: make setup-grpc - - name: Fmt - run: make fmt - - name: Run Unit Test - run: make unit-test - integration-test: - name: Integration Test - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v4 - - name: Setup Build Dependencies - run: make setup-grpc - - name: Install Kubernetes Dependencies - run: |- - make -C e2e setup - - name: Run Integration Test - run: make integration-test - bgp-e2e-test: - name: BGP End-to-End Test - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version: ${{ env.go-version }} - - name: Build Image - run: make build-dev-image - - name: Setup Tools - working-directory: e2e - run: make setup - - name: Run E2E Test - working-directory: e2e - run: make bgp-e2e - kubernetes-e2e-test: - name: Kubernetes End-to-End Test - strategy: - matrix: - # kindest node version - kubernetes-version: ["1.26.4", "1.27.1", "1.28.0"] - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version: ${{ env.go-version }} - - name: Build Image - run: make build-image - - name: Setup Tools - working-directory: e2e - run: |- - make setup - make -C ../ setup-grpc - - name: Start Kind Cluster - working-directory: e2e - run: make kubernetes KUBERNETES_VERSION=${{ matrix.kubernetes-version }} - - name: Generate Certificates - run: make certs - - name: Generate CRD - run: make crd - - name: Install sart - working-directory: e2e - run: make install-sart - - name: Run E2E Test - working-directory: e2e - run: make kubernetes-e2e - - name: Clean up Kind Cluster - working-directory: e2e - run: make kubernetes-down - if: always() diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml new file mode 100644 index 0000000..0695e8c --- /dev/null +++ b/.github/workflows/e2e-test.yaml @@ -0,0 +1,153 @@ +name: e2e test +on: + pull_request: + paths-ignore: + - '.gitignore' + - '.dockerignore' + - 'LICENSE' + - '**.md' + push: + branches: + - main + +env: + CARGO_TERM_COLOR: always + TAGS: dev + go-version: "1.22" + +jobs: + build-image: + name: Build container image + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + - name: Build and Export + uses: docker/build-push-action@v5 + with: + context: . + tags: sart:dev + outputs: type=docker,dest=/tmp/sart.tar + - name: Upload container image + uses: actions/upload-artifact@v4 + with: + name: sart + path: /tmp/sart.tar + bgp-e2e-test: + name: BGP End-to-End Test + needs: build-image + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: ${{ env.go-version }} + - name: Download sart container image + uses: actions/download-artifact@v4 + with: + name: sart + path: /tmp + - name: Load sart container image + run: | + docker load --input /tmp/sart.tar + docker image ls -a + - name: Setup Tools + working-directory: e2e + run: make setup + - name: Run E2E Test + working-directory: e2e + run: make bgp-e2e + kubernetes-e2e-test: + name: Kubernetes End-to-End Test + needs: build-image + strategy: + matrix: + # kindest node version + kubernetes-version: ["1.28.6", "1.29.2"] + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: ${{ env.go-version }} + - name: Download sart container image + uses: actions/download-artifact@v4 + with: + name: sart + path: /tmp + - name: Load sartd container image + run: | + docker load --input /tmp/sart.tar + docker image ls -a + - name: Setup Tools + working-directory: e2e + run: |- + make setup + make -C ../ setup-grpc + - name: Start Kind Cluster + working-directory: e2e + run: make kubernetes KUBERNETES_VERSION=${{ matrix.kubernetes-version }} + - name: Generate Certificates + run: make certs + - name: Generate CRD + run: make crd + - name: Install sart + working-directory: e2e + run: make install-sart + - name: Run E2E Test + working-directory: e2e + run: make kubernetes-e2e + - name: Clean up Kind Cluster + working-directory: e2e + run: make kubernetes-down + if: always() + cni-e2e-test: + name: CNI End-to-End Test + needs: build-image + strategy: + matrix: + # kindest node version + kubernetes-version: ["1.28.6", "1.29.2"] + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: ${{ env.go-version }} + - name: Download sart container image + uses: actions/download-artifact@v4 + with: + name: sart + path: /tmp + - name: Load sart container image + run: | + docker load --input /tmp/sart.tar + docker image ls -a + - name: Setup Tools + working-directory: e2e + run: |- + make setup + make -C ../ setup-grpc + - name: Start Kind Cluster + working-directory: e2e + run: make kubernetes MODE=cni COMPACT=true KUBERNETES_VERSION=${{ matrix.kubernetes-version }} + env: + MODE: cni + KUBERNETES_VERSION: ${{ matrix.kubernetes-version }} + - name: Generate Certificates + run: make certs + - name: Generate CRD + run: make crd + - name: Install sart + working-directory: e2e + run: make install-sart MODE=cni + env: + MODE: cni + - name: Run E2E Test + working-directory: e2e + run: make cni-e2e + - name: Clean up Kind Cluster + working-directory: e2e + run: make kubernetes-down MODE=cni COMPACT=true + if: always() diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml new file mode 100644 index 0000000..234e5dc --- /dev/null +++ b/.github/workflows/unit-test.yaml @@ -0,0 +1,32 @@ +name: unit test +on: [push] + +env: + CARGO_TERM_COLOR: always + +jobs: + unit-test: + name: Unit Test + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Setup Build Dependencies + run: make setup-grpc + - name: Fmt + run: make fmt + - name: Run Unit Test + run: make unit-test + integration-test: + name: Integration Test + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Setup Build Dependencies + run: make setup-grpc + - name: Generate CRD manifests + run: make crd + - name: Install Kubernetes Dependencies + run: |- + make -C e2e setup + - name: Run Integration Test + run: make integration-test diff --git a/.gitignore b/.gitignore index 34888d2..c4b3c30 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,8 @@ *.pcaping .vscode *.bin -**/bin +/bin +/e2e/bin */*.pem */*.cert */*.key @@ -14,7 +15,8 @@ sart.yaml **/e2e.test e2e/topology/.*.yaml.bak +e2e/topology/kubernetes-cni*.yaml e2e/clab-sart -manifests/certs/* +manifests/base/certs/* manifests/webhook/admission_webhook_patch.yaml diff --git a/Dockerfile b/Dockerfile index f06b737..f7ad2d2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG RUST_VERSION=1.74.1 +ARG RUST_VERSION=1.76.0 # BUILDPLATFORM = linux/amd64 diff --git a/Makefile b/Makefile index 73149b4..1419bd0 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,8 @@ CARGO := cargo IMAGE_VERSION := dev PROJECT := github.com/terassyi/sart -KIND_VERSION := 0.20.0 -KUBERNETES_VERSION := 1.28.0 +KIND_VERSION := 0.22.0 +KUBERNETES_VERSION := 1.29.2 KUSTOMIZE_VERSION := 5.2.1 BINDIR := $(abspath $(PWD)/bin) diff --git a/README.md b/README.md index 488bff7..ac742c4 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,82 @@ # Sart -Sart is the Kubernetes network load-balancer and CNI plugin for Kubernetes using BGP in Rust. +Sart is the Kubernetes network load-balancer and CNI plugin for Kubernetes using BGP in pure Rust. This project is inspired by [Metallb](https://github.com/metallb/metallb) and [Coil](https://github.com/cybozu-go/coil). > [!WARNING] > This project is under experimental. -> CNI feature is not implemented yet. ## Features +### CNI + +Sart has a CNI plugin feature to configure pod network settings. + +Now sart supports only an IPAM feature. + +### Assigning an address to Pods + +To assign an IP address to a pod and configure the network settings such as routing information, we have to create `AddressPool` resources. +Sart can handle multiple AddressPool. +So we can select the pool to use per a pod or a namespace. +To specify the pool, we can add an annotation `sart.terassyi.net/addresspool` to a pod or a namespace. + +For more details, please see [docs/cni.md](./docs/cni.md). + +### Quick Start + +Sart can run on the container based environment using [kind](https://kind.sigs.k8s.io/) and [containerlab](https://containerlab.dev/). + +And we also need to [install Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html). + +First, we have to create the test environment and run e2e tests. + +For more information about e2e tests, please see [e2e/README.md](./e2e/README.md) + +```console +$ make build-image +$ make certs +$ make crd +$ cd e2e +$ make setup +$ make kubernetes MODE=cni +$ make install-sart MODE=cni +$ make kubernetes-e2e MODE=cni +``` + +After running e2e test, we can find pods are running. + +```console +$ kubectl get pod -n test -owide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +test-cp 1/1 Running 0 11h 10.1.0.0 sart-control-plane +test-worker 1/1 Running 0 11h 10.1.0.8 sart-worker +test-worker-non-default 1/1 Running 0 11h 10.10.0.0 sart-worker +test-worker2 1/1 Running 0 11h 10.1.0.24 sart-worker2 +test-worker3 1/1 Running 0 11h 10.1.0.16 sart-worker3 +test-worker3-2 1/1 Running 0 11h 10.1.0.17 sart-worker3 +test-worker3-3 1/1 Running 0 11h 10.1.0.18 sart-worker3 +``` + +And we can confirm that each pod has the reachability to the other pod. + +```console +$ kubectl -n test exec -it test-cp -- ping -c 1 10.1.0.17 +PING 10.1.0.17 (10.1.0.17) 56(84) bytes of data. +64 bytes from 10.1.0.17: icmp_seq=1 ttl=61 time=0.155 ms + +--- 10.1.0.17 ping statistics --- +1 packets transmitted, 1 received, 0% packet loss, time 0ms +rtt min/avg/max/mdev = 0.155/0.155/0.155/0.000 ms +``` + +To clean up, run `make kubernetes-down MODE=cni`. + + +## Load Balancer + ### Allocating LoadBalancer addresses Sart can create multiple AddressPools to define the range of IP addresses usable for Kubernetes service type LoadBalancer. @@ -31,11 +97,7 @@ Sart implements the BGP speaker feature and provides its abstraction layer as Ku Please see detail manifests in [manifests/sample](manifests/sample/). -## Quick Start - -Sart can run on the container based environment using [kind](https://kind.sigs.k8s.io/) and [containerlab](https://containerlab.dev/). - -And we also need to [install Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html). +### Quick Start First, we have to create the test environment and run e2e tests. @@ -49,7 +111,7 @@ $ cd e2e $ make setup $ make kubernetes $ make install-sart -$ make kubernetes-e2e +$ make kubernetes-e2e ``` After that, we can confirm `EXTENAL-IPs` are assigned and the connectivity. diff --git a/docs/cni.md b/docs/cni.md index e69de29..07aa1e4 100644 --- a/docs/cni.md +++ b/docs/cni.md @@ -0,0 +1,66 @@ +# How to use as Kubernetes CNI plugin + +To run as Kubernetes CNI plugin, we need to run four components: `sartd controller`, `sartd agent`, `sartd bgp` and `sartd fib`. + +`controller` runs as `Deployment`. +And `agent`, `bgp` and `fib` run as `DaemonSet`. + +To perform as CNI plugin, all components must run in host network namespace. + +To know sart's CRDs, please see [design #Kubernetes](./design.md#kubernetes) and [design #CNI](./design.md#cni-for-kubernetes). + +To work CNI well, we need to configure BGP speakers on nodes. +Please see [Kubernetes BGP configurations](./kubernetes.md#bgp-configurations). + +## Address pools + + +To assign IP addresses to pods, we have to create address pools. +We can create multiple pools in a cluster. + +The `type` field specifies for which use the address pool is to be used. +For pods, we have to set `type: pod`. + +```yaml +apiVersion: sart.terassyi.net/v1alpha2 +kind: AddressPool +metadata: + name: default-pod-pool +spec: + cidr: 10.1.0.0/24 + type: pod + allocType: bit + blockSize: 29 + autoAssign: true +``` + +`cidr` is the subnet of assignable addresses for pods. + +`allocType` specifies a method how to pick an address from a pool. +We can only use `bit`. + +Bit allocator type choose the address with the lowest number. +For example, `10.0.0.1` and `10.0.0.10` are assignable, the bit allocator always chooses `10.0.0.1`. + +`blockSize` is used to divide the address pool into `AddressBlock`s. +`AddressBlock` for pods is associated with a node and it is requested to create via a `BlockRequest` resource. +Each block requests its cidr based on this value. + +In this example, the cidr of one address block is `10.1.0.0/29` and another is `10.1.0.8/29`. + +`autoAssign` specifies its pool is used as default pool. +If `autoAssign` is true, we can omit specifying the name of the pool in the annotation(described below). +We cannot create multiple auto assignable pools in each `type`. + +Please note that we cannot change `AddressPool`'s spec fields except for `autoAssign` once it is created. + +### Basic usage + +Once creating `AddressPool`s, we can create some pods same as using other CNI plugins. +When allocating from the auto assignable pool, there is nothing to do any other special procedure. + +### Choosing AddressPool + +Sart CNI can create multiple pools for pods. +To use the pool that is not auto assignable pool, we should add an annotation named `sart.terassyi.net/addresspool` to the namespace in which its pod exist or to the pod directly. + diff --git a/docs/design.md b/docs/design.md index d4870bf..58b36b3 100644 --- a/docs/design.md +++ b/docs/design.md @@ -20,8 +20,9 @@ Sart has following progarams. - fib: FIB daemon - agent: Kubernetes agent running as DaemonSet - controller: Kubernetes custom controller running as Deployment -- `sartcni` - - TBD +- `sart-cni` + - CLI tool for CNI + - gRPC client to communicate with `sartd agent` ## BGP @@ -42,7 +43,7 @@ Now sartd-bgp support minimal features to work as BGP speaker. - [ ] BGP unnumbered - [ ] Route Reflector -### Archietecture +### Architecture This figure shows a basic model of sartd-bgp. @@ -434,3 +435,15 @@ The program satisfy this specification is `sartcni`. And this is called by `kubelet` to create a pod and delegates a given request to `sartd-agent` via gRPC API like following. ![cni-internal.drawio.svg](./img/cni-internal.drawio.svg) + +The flow looks like following. + +1. The admin creates the AddressPool. +2. The user creates a pod. +3. Kubelet on a node creates container processes. +4. Kubelet(or container runtime) call `sart-cni` binary to set up the interface in the container. +5. `Sart-cni` calls gRPC API to the `sartd-agent` on the same node to configure the interface. +6. `Sartd-agent` gets the desired pool to assign an IP address and refers to the block on the node. +7. If a block doesn't exist on the node, `sartd-agent` requests to allocate and `sart-controller` creates it. +8. `Sartd-agent` assigns an IP address to the pod from the block and configures an interface and routing information. +9. `Sartd-agent` returns the result to `sart-cni` and `sart-cni` also propagates its result to the caller. diff --git a/docs/kubernetes.md b/docs/kubernetes.md index c33825a..80d700e 100644 --- a/docs/kubernetes.md +++ b/docs/kubernetes.md @@ -1,12 +1,10 @@ # How to Use as Kubernetes network load-balancer -To use kubernetes network load-balancer, we need to run two components: `sartd controller` and `sartd agent`. +To use kubernetes network load-balancer, we need to run three components: `sartd controller`, `sartd agent` and `sartd bgp`. The `controller` runs as `Deployment`. -And the `agent` runs as `DaemonSet`. -`agent` must run in host network namespace. - -For installation, please read [setup for kubernetes](./setup-kubernetes.md). +And the `agent` and `bgp` run as `DaemonSet`. +`agent` and `bgp` must run in host network namespace. To know sart's CRDs, please see [design #Kubernetes](./design.md#kubernetes) diff --git a/e2e/Makefile b/e2e/Makefile index 2668128..c84a480 100644 --- a/e2e/Makefile +++ b/e2e/Makefile @@ -20,13 +20,22 @@ CLUSTER_NAME = sart KIND_CONFIG = kind-config.yaml CNI_KIND_CONFIG = kind-config-disable-cni.yaml CONTAINERLAB_KUBERNETES = $(TOPOLOGYDIR)/kubernetes.yaml +CONTAINERLAB_KUBERNETES_CNI = $(TOPOLOGYDIR)/kubernetes-cni.yaml +CONTAINERLAB_KUBERNETES_CNI_COMPACT = $(TOPOLOGYDIR)/kubernetes-cni-compact.yaml MODE := lb +COMPACT := false -WORKER_ASN ?= 65000 -WORKER2_ASN ?= 65000 -WORKER3_ASN ?= 65000 -CP_ASN ?= 65000 +CONTROL_PLANE_ADDR ?= +WORKER0_ADDR ?= +WORKER1_ADDR ?= +WORKER2_ADDR ?= + +WORKER_ASN ?= 65020 +WORKER2_ASN ?= 65030 +WORKER3_ASN ?= 65040 +CP_ASN ?= 65010 +IBGP_ASN ?= 65000 FEATURE = .PHONY: bgp-e2e @@ -41,6 +50,12 @@ kubernetes-e2e: TARGET=kubernetes ./e2e.test --ginkgo.v -test.v rm -f ./e2e.test +.PHONY: cni-e2e +cni-e2e: + go test -c ./... + TARGET=cni ./e2e.test --ginkgo.v -test.v + rm -f ./e2e.test + .PHONY: topology topology: @@ -50,25 +65,61 @@ kubernetes: $(SUDO) sysctl -w fs.inotify.max_user_watches=65536 ifeq ($(MODE),cni) $(KIND) create cluster --image kindest/node:v$(KUBERNETES_VERSION) --config=$(CNI_KIND_CONFIG) --name $(CLUSTER_NAME) +ifeq ($(COMPACT),true) + go run $(TOPOLOGYDIR)/generator/main.go -template-path $(CONTAINERLAB_KUBERNETES_CNI_COMPACT).tmpl -output-path $(CONTAINERLAB_KUBERNETES_CNI_COMPACT) + $(SUDO) $(CONTAINERLAB) -t $(CONTAINERLAB_KUBERNETES_CNI_COMPACT) deploy else - $(KIND) create cluster --image kindest/node:v$(KUBERNETES_VERSION) --config=$(KIND_CONFIG) --name $(CLUSTER_NAME) + go run $(TOPOLOGYDIR)/generator/main.go -template-path $(CONTAINERLAB_KUBERNETES_CNI).tmpl -output-path $(CONTAINERLAB_KUBERNETES_CNI) + $(SUDO) $(CONTAINERLAB) -t $(CONTAINERLAB_KUBERNETES_CNI) deploy endif + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker sart.terassyi.net/asn=$(WORKER_ASN) + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker2 sart.terassyi.net/asn=$(WORKER2_ASN) + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker3 sart.terassyi.net/asn=$(WORKER3_ASN) + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-control-plane sart.terassyi.net/asn=$(CP_ASN) + + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker bgp=a + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker2 bgp=a + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker3 bgp=a + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-control-plane bgp=a + +else + $(KIND) create cluster --image kindest/node:v$(KUBERNETES_VERSION) --config=$(KIND_CONFIG) --name $(CLUSTER_NAME) $(SUDO) $(CONTAINERLAB) -t $(CONTAINERLAB_KUBERNETES) deploy + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker sart.terassyi.net/asn=$(IBGP_ASN) + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker2 sart.terassyi.net/asn=$(IBGP_ASN) + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker3 sart.terassyi.net/asn=$(IBGP_ASN) + $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-control-plane sart.terassyi.net/asn=$(IBGP_ASN) $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker bgp=a $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker2 bgp=a $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker3 bgp=a $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-control-plane bgp=b - $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker sart.terassyi.net/asn=$(WORKER_ASN) - $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker2 sart.terassyi.net/asn=$(WORKER2_ASN) - $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-worker3 sart.terassyi.net/asn=$(WORKER3_ASN) - $(KUBECTL) label nodes --overwrite $(CLUSTER_NAME)-control-plane sart.terassyi.net/asn=$(CP_ASN) +endif + + + + $(KUBECTL) taint node sart-control-plane node-role.kubernetes.io/control-plane:NoSchedule- || true + + kubectl patch -n local-path-storage deployment local-path-provisioner --patch '{"spec": {"template": {"spec": {"nodeSelector": {"kubernetes.io/hostname": "sart-control-plane"}}}}}' $(KIND) load docker-image $(IMAGE_NAME):$(IMAGE_VERSION) -n $(CLUSTER_NAME) + docker pull ghcr.io/terassyi/test-server:0.1.2 + $(KIND) load docker-image ghcr.io/terassyi/test-server:0.1.2 -n $(CLUSTER_NAME) + .PHONY: kubernetes-down kubernetes-down: +ifeq ($(MODE),cni) +ifeq ($(COMPACT),true) + $(SUDO) $(CONTAINERLAB) -t $(CONTAINERLAB_KUBERNETES_CNI_COMPACT) destroy + rm $(CONTAINERLAB_KUBERNETES_CNI_COMPACT) +else + $(SUDO) $(CONTAINERLAB) -t $(CONTAINERLAB_KUBERNETES_CNI) destroy + rm $(CONTAINERLAB_KUBERNETES_CNI) +endif +else $(SUDO) $(CONTAINERLAB) -t $(CONTAINERLAB_KUBERNETES) destroy +endif $(KIND) delete cluster --name $(CLUSTER_NAME) .PHONY: install-sart diff --git a/e2e/Makefile.versions b/e2e/Makefile.versions index 7e69c61..8f0ae30 100644 --- a/e2e/Makefile.versions +++ b/e2e/Makefile.versions @@ -1,6 +1,6 @@ -KIND_VERSION := 0.20.0 -KUBERNETES_VERSION := 1.28.0 -KUSTOMIZE_VERSION := 5.2.1 -CONTAINERLAB_VERSION := 0.48.6 -HELM_VERSION := 3.13.2 -CERT_MANAGER_VERSION := 1.13.2 +KIND_VERSION := 0.22.0 +KUBERNETES_VERSION := 1.29.2 +KUSTOMIZE_VERSION := 5.3.0 +CONTAINERLAB_VERSION := 0.51.3 +HELM_VERSION := 3.14.2 +CERT_MANAGER_VERSION := 1.14.4 diff --git a/e2e/README.md b/e2e/README.md index 762a17c..8e36b4e 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -46,7 +46,7 @@ This is the topology for kubernetes e2e test. ![kubernetes.drawio.svg](./img/kubernetes.drawio.svg) -We confirm that following points in this test. +This tests confirm that following points. - Establish BGP peers via Kubernetes custom resources. - Assign external ip addresses to LoadBalancer services. @@ -62,3 +62,33 @@ We confirm that following points in this test. - sartd-agent - sartd-bgp - sart-controller + +## CNI + +To set up a CNI e2e test environment, we need following commands. + +```console +$ make kubernetes MODE=cni +$ make install-sart MODE=cni +``` + +After that, we can run the test. + +```console +$ make cni-e2e +``` + +The following figure shows the topology for CNI e2e test. + +![kubernetes-cni.drawio.svg](./img/kubernetes-cni.drawio.svg) + +This test confirm that following points. + +- Establish BGP peers via Kubernetes custom resources. +- Assign an IP address to pods from the auto assignable pool +- Assign an IP address to pods from the non auto assignable pool +- Connectivity of pods in the cluster +- Restart + - sartd-agent + - sartd-bgp + - sart-controller diff --git a/e2e/img/kubernetes-cni-compact.drawio.svg b/e2e/img/kubernetes-cni-compact.drawio.svg new file mode 100644 index 0000000..228f0d3 --- /dev/null +++ b/e2e/img/kubernetes-cni-compact.drawio.svg @@ -0,0 +1,651 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + sart-control-plane + +
+
+
+
+ + sart-control-plane + +
+
+ + + + +
+
+
+ + sart-worker + +
+
+
+
+ + sart-worker + +
+
+ + + + +
+
+
+ + sart-worker2 + +
+
+
+
+ + sart-worker2 + +
+
+ + + + +
+
+
+ + sart-worker3 + +
+
+
+
+ + sart-worker3 + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + router0 + +
+
+
+
+ + router0 + +
+
+ + + + +
+
+
+ + router1 + +
+
+
+
+ + router1 + +
+
+ + + + +
+
+
+ + client0 + +
+
+
+
+ + client0 + +
+
+ + + + + +
+
+
+ docker network(kind) +
+
+
+
+ + docker network(kind) + +
+
+ + + + +
+
+
+ + ASN 65000 +
+ ID 9.9.9.9 +
+
+
+
+
+
+ + ASN 65000... + +
+
+ + + + + +
+
+
+ + ASN 65000 +
+ ID 7.7.7.7 +
+
+
+
+
+
+ + ASN 65000... + +
+
+ + + + +
+
+
+ + ASN 65020 + +
+
+
+
+ + ASN 65020 + +
+
+ + + + +
+
+
+ + ASN 65010 + +
+
+
+
+ + ASN 65010 + +
+
+ + + + +
+
+
+ + ASN 65030 + +
+
+
+
+ + ASN 65030 + +
+
+ + + + +
+
+
+ + ASN 65040 + +
+
+
+
+ + ASN 65040 + +
+
+ + + + +
+
+
+ + 169.254.1.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.2.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.3.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.4.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.1.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.2.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.3.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.4.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.253.1.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.2.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.3.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.4.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.1.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.2.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.3.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.4.2 + +
+
+
+
+ + 169.253... + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
diff --git a/e2e/img/kubernetes-cni.drawio.svg b/e2e/img/kubernetes-cni.drawio.svg new file mode 100644 index 0000000..2dad2ac --- /dev/null +++ b/e2e/img/kubernetes-cni.drawio.svg @@ -0,0 +1,811 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + sart-control-plane + +
+
+
+
+ + sart-control-plane + +
+
+ + + + +
+
+
+ + sart-worker + +
+
+
+
+ + sart-worker + +
+
+ + + + +
+
+
+ + sart-worker2 + +
+
+
+
+ + sart-worker2 + +
+
+ + + + +
+
+
+ + sart-worker3 + +
+
+
+
+ + sart-worker3 + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + spine0 + +
+
+
+
+ + spine0 + +
+
+ + + + +
+
+
+ + spine1 + +
+
+
+
+ + spine1 + +
+
+ + + + + +
+
+
+ docker network(kind) +
+
+
+
+ + docker network(kind) + +
+
+ + + + +
+
+
+ + ASN 65001 +
+ ID 9.9.9.9 +
+
+
+
+
+
+ + ASN 65001... + +
+
+ + + + + +
+
+
+ + ASN 65002 +
+ ID 7.7.7.7 +
+
+
+
+
+
+ + ASN 65002... + +
+
+ + + + +
+
+
+ + ASN 65020 + +
+
+
+
+ + ASN 65020 + +
+
+ + + + +
+
+
+ + ASN 65010 + +
+
+
+
+ + ASN 65010 + +
+
+ + + + +
+
+
+ + ASN 65030 + +
+
+
+
+ + ASN 65030 + +
+
+ + + + +
+
+
+ + ASN 65040 + +
+
+
+
+ + ASN 65040 + +
+
+ + + + +
+
+
+ + 169.254.1.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.2.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.3.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.4.1 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.1.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.2.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.3.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.254.4.2 + +
+
+
+
+ + 169.254... + +
+
+ + + + +
+
+
+ + 169.253.1.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.2.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.3.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.4.1 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.1.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.2.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.3.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + +
+
+
+ + 169.253.4.2 + +
+
+
+
+ + 169.253... + +
+
+ + + + + +
+
+
+ + ASN 65000 +
+ ID 5.5.5.5 +
+
+
+
+
+
+ + ASN 65000... + +
+
+ + + + + +
+
+
+ + core0 + +
+
+
+
+ + core0 + +
+
+ + + + +
+
+
+ + client0 + +
+
+
+
+ + client0 + +
+
+ + + + +
+
+
+ + 169.252.1.1 + +
+
+
+
+ + 169.252... + +
+
+ + + + +
+
+
+ + 169.252.1.0 + +
+
+
+
+ + 169.252... + +
+
+ + + + +
+
+
+ + 169.252.2.0 + +
+
+
+
+ + 169.252... + +
+
+ + + + +
+
+
+ + 169.252.2.1 + +
+
+
+
+ + 169.252... + +
+
+ + + + +
+
+
+ + 192.168.0.1 + +
+
+
+
+ + 192.168... + +
+
+ + + + +
+
+
+ + 192.168.0.2 + +
+
+
+
+ + 192.168... + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
diff --git a/e2e/kind-config.yaml b/e2e/kind-config.yaml index 7cc6773..6d99eaa 100644 --- a/e2e/kind-config.yaml +++ b/e2e/kind-config.yaml @@ -1,7 +1,6 @@ apiVersion: kind.x-k8s.io/v1alpha4 kind: Cluster networking: - ipFamily: dual podSubnet: "10.100.0.0/16" serviceSubnet: "10.101.0.0/16" nodes: diff --git a/e2e/kubernetes_cni_test.go b/e2e/kubernetes_cni_test.go new file mode 100644 index 0000000..4e52748 --- /dev/null +++ b/e2e/kubernetes_cni_test.go @@ -0,0 +1,756 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "net" + "net/netip" + "os" + "path" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func isCompact() bool { + compact := os.Getenv("COMPACT") + return compact == "true" +} + +func testClusterBGPForCNI() { + // compact := isCompact() + + It("should install BGP related resource", func() { + By("applying BGP for spine0") + Eventually(func(g Gomega) error { + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "peer_template.yaml")) + g.Expect(err).NotTo(HaveOccurred()) + + _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "cluster_bgp_spine0.yaml")) + g.Expect(err).NotTo(HaveOccurred()) + return nil + }).Should(Succeed()) + + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + By("checking ClusterBGP bgp=a") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(clusterBGP).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(1)) + return nil + }).Should(Succeed()) + + By("checking NodeBGP with label(bgp=a)") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(nodeBGP).List(ctx, metav1.ListOptions{LabelSelector: "bgp=a"}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(4)) + return nil + }).Should(Succeed()) + + By("checking BGPPeer for spine0") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(4)) + return nil + }).Should(Succeed()) + + By("checking BGPPeer status is Established state for spine0") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + for _, p := range list.Items { + res, found, err := unstructured.NestedSlice(p.Object, "status", "conditions") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + nowCond := res[len(res)-1].(map[string]any) + nowStatus, ok := nowCond["status"] + g.Expect(ok).To(BeTrue()) + g.Expect(nowStatus.(string)).To(Equal("Established")) + } + return nil + }).WithTimeout(5 * time.Minute).Should(Succeed()) + + By("checking sartd-bgp's peer state for spine0") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + for _, p := range list.Items { + node, found, err := unstructured.NestedString(p.Object, "spec", "nodeBGPRef") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + peerAddr, found, err := unstructured.NestedString(p.Object, "spec", "addr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + out, err := kubectl(nil, "-n", "kube-system", "get", "pod", "-l", "app=sartd", "--field-selector", fmt.Sprintf("spec.nodeName=%s", node), "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + + var podList corev1.PodList + err = json.Unmarshal(out, &podList) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(podList.Items)).To(Equal(1)) + + pod := podList.Items[0] + peerBytes, err := kubectlExec(pod.Name, pod.Namespace, nil, "sart", "bgp", "neighbor", "get", peerAddr, "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + var peer sartPeerInfo + err = json.Unmarshal(peerBytes, &peer) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(peer.State).To(Equal("Established")) + } + return nil + }).Should(Succeed()) + + By("applying BGP for spine1") + Eventually(func(g Gomega) error { + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "cluster_bgp_spine1.yaml")) + g.Expect(err).NotTo(HaveOccurred()) + return nil + }).Should(Succeed()) + + By("checking BGPPeer for spine1") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(8)) // spine0 + spine1 + return nil + }).Should(Succeed()) + + By("checking BGPPeer status is Established state for spine1") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + for _, p := range list.Items { + res, found, err := unstructured.NestedSlice(p.Object, "status", "conditions") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + nowCond := res[len(res)-1].(map[string]any) + nowStatus, ok := nowCond["status"] + g.Expect(ok).To(BeTrue()) + g.Expect(nowStatus.(string)).To(Equal("Established")) + } + return nil + }).WithTimeout(5 * time.Minute).Should(Succeed()) + + By("checking sartd-bgp's peer state for spine1") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + for _, p := range list.Items { + node, found, err := unstructured.NestedString(p.Object, "spec", "nodeBGPRef") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + peerAddr, found, err := unstructured.NestedString(p.Object, "spec", "addr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + out, err := kubectl(nil, "-n", "kube-system", "get", "pod", "-l", "app=sartd", "--field-selector", fmt.Sprintf("spec.nodeName=%s", node), "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + + var podList corev1.PodList + err = json.Unmarshal(out, &podList) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(podList.Items)).To(Equal(1)) + + pod := podList.Items[0] + peerBytes, err := kubectlExec(pod.Name, pod.Namespace, nil, "sart", "bgp", "neighbor", "get", peerAddr, "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + var peer sartPeerInfo + err = json.Unmarshal(peerBytes, &peer) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(peer.State).To(Equal("Established")) + } + return nil + }).Should(Succeed()) + + }) +} + +func testPodAddressPool() { + It("should create AddressPools for pods", func() { + By("applying AddressPools") + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "pool.yaml")) + Expect(err).NotTo(HaveOccurred()) + + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(addressPool).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(2)) + return nil + }).Should(Succeed()) + }) +} + +func testCreatePods() { + It("should create namespace", func() { + By("applying Namespace") + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "namespace.yaml")) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should create address block and related resource for pods", func() { + By("applying Pods") + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "test_pod.yaml")) + Expect(err).NotTo(HaveOccurred()) + + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + By("checking address blocks are created for each node") + nodeCIDRMap := make(map[string]*net.IPNet) + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(addressBlock).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(4)) + for _, block := range list.Items { + poolName, found, err := unstructured.NestedString(block.Object, "spec", "poolRef") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + g.Expect(poolName).To(Equal("default-pod-pool")) + node, found, err := unstructured.NestedString(block.Object, "spec", "nodeRef") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + cidrStr, found, err := unstructured.NestedString(block.Object, "spec", "cidr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + _, cidr, err := net.ParseCIDR(cidrStr) + g.Expect(err).NotTo(HaveOccurred()) + nodeCIDRMap[node] = cidr + } + + g.Expect(len(nodeCIDRMap)).To(Equal(4)) + return nil + }).Should(Succeed()) + + By("checking BGPAdvertisement is created for each block") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(bgpAdvertisement).List(ctx, metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(4)) + for _, adv := range list.Items { + nodeName := "" + for node, cidr := range nodeCIDRMap { + if strings.Contains(adv.GetName(), fmt.Sprintf("%s-", node)) { + prefixStr, found, err := unstructured.NestedString(adv.Object, "spec", "cidr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + _, prefix, err := net.ParseCIDR(prefixStr) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cidr).To(Equal(prefix)) + + nodeName = node + break + } + } + + peers, found, err := unstructured.NestedMap(adv.Object, "status", "peers") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + + peerList, err := dynamicClient.Resource(bgpPeer).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("bgppeer.sart.terassyi.net/node=%s", nodeName)}) + g.Expect(err).NotTo(HaveOccurred()) + + for _, peer := range peerList.Items { + peerStatus, ok := peers[peer.GetName()] + if !ok { + return fmt.Errorf("advertisement for peer: %s is not found: peers: %v", peer.GetName(), peers) + } + g.Expect(peerStatus).To(Equal("Advertised")) + if peerStatus.(string) != "Advertised" { + return fmt.Errorf("advertisement status: %s is not found: peers: %v", peerStatus.(string), peers) + } + } + } + return nil + }).Should(Succeed()) + + By("checking all pods are running") + var podList corev1.PodList + podAddrMap := make(map[string]net.IP) + + Eventually(func(g Gomega) error { + list, err := kubectl(nil, "-n", "test", "get", "pod", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(list, &podList) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(podList.Items)).To(Equal(5)) + + for _, pod := range podList.Items { + g.Expect(pod.Status.Phase).To(Equal(corev1.PodRunning)) + podAddr := net.ParseIP(pod.Status.PodIP) + podAddrMap[pod.Name] = podAddr + + cidr, ok := nodeCIDRMap[pod.Spec.NodeName] + g.Expect(ok).To(BeTrue()) + g.Expect(cidr.Contains(podAddr)).To(BeTrue()) + } + return nil + }).Should(Succeed()) + + }) + + It("should exist an address block on each nodes", func() { + By("checking all pods are running") + var podList corev1.PodList + podAddrMap := make(map[string]net.IP) + + Eventually(func(g Gomega) error { + list, err := kubectl(nil, "-n", "test", "get", "pod", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(list, &podList) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(podList.Items)).To(Equal(5)) + + for _, pod := range podList.Items { + g.Expect(pod.Status.Phase).To(Equal(corev1.PodRunning)) + podAddr := net.ParseIP(pod.Status.PodIP) + podAddrMap[pod.Name] = podAddr + } + return nil + }).Should(Succeed()) + + By("checking the connectivity") + Eventually(func(g Gomega) error { + for src, srcAddr := range podAddrMap { + for dst, dstAddr := range podAddrMap { + for i := 0; i < 10; i++ { + _, err := kubectlExec(src, "test", nil, "ping", "-c", "1", dstAddr.String()) + if err != nil { + return fmt.Errorf("src: %s(%s) dst: %s(%s): %v", src, srcAddr, dst, dstAddr, err) + } + } + } + } + return nil + }).Should(Succeed()) + }) +} + +func testDeletePod() { + It("should delete a pod on sart-worker3", func() { + By("get pods on sart-worker3") + var podList corev1.PodList + out, err := kubectl(nil, "-n", "test", "get", "pod", "--field-selector=spec.nodeName=sart-worker3", "-ojson") + Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &podList) + Expect(err).NotTo(HaveOccurred()) + Expect(len(podList.Items)).To(Equal(2)) + + By("finding the target pod to delete") + var deleteTarget corev1.Pod + + pod0Addr, err := netip.ParseAddr(podList.Items[0].Status.PodIP) + Expect(err).NotTo(HaveOccurred()) + pod1Addr, err := netip.ParseAddr(podList.Items[1].Status.PodIP) + Expect(err).NotTo(HaveOccurred()) + + if pod0Addr.Compare(pod1Addr) > 0 { + // pod0Addr > pod1Addr + deleteTarget = podList.Items[1] + } else { + deleteTarget = podList.Items[0] + } + + By("deleting the pod that has the lower address") + _, err = kubectl(nil, "-n", deleteTarget.Namespace, "delete", "pod", deleteTarget.Name) + Expect(err).NotTo(HaveOccurred()) + + By("checking the target pod is deleted") + Eventually(func(g Gomega) error { + _, err := kubectl(nil, "-n", deleteTarget.Namespace, "get", "pod", deleteTarget.Name, "-ojson") + if err != nil { + // should be not found + return nil + } + return fmt.Errorf("deleted pod(%s) should not exist", deleteTarget.Name) + + }).Should(Succeed()) + + By("creating the pod on sart-worker3 again") + _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "test_pod.yaml")) + Expect(err).NotTo(HaveOccurred()) + + By("checking the recreated pod has same address") + // When using a bit allocator pool, allocator always picks a lowest address in the pool + // So in this case, the recreated pod should be given the same address. + Eventually(func(g Gomega) error { + var recreatedPod corev1.Pod + out, err := kubectl(nil, "-n", deleteTarget.Namespace, "get", "pod", deleteTarget.Name, "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &recreatedPod) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(deleteTarget.Status.PodIP).To(Equal(recreatedPod.Status.PodIP)) + return nil + }).Should(Succeed()) + + }) +} + +func testNonDefaultPool() { + It("should create pod with non-default-pod-pool", func() { + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + By("getting the non-default AddressPool") + Eventually(func(g Gomega) error { + _, err := dynamicClient.Resource(addressPool).Get(ctx, "non-default-pod-pool", metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + return nil + }).Should(Succeed()) + + By("applying a pod with non-default-pod-pool") + _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "test_pod_another_pool.yaml")) + Expect(err).NotTo(HaveOccurred()) + + By("checking the AddressBlock for non-default-pod-pool") + var cidr net.IPNet + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(addressBlock).List(ctx, metav1.ListOptions{LabelSelector: "sart.terassyi.net/addresspool=non-default-pod-pool"}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(1)) + + poolRef, found, err := unstructured.NestedString(list.Items[0].Object, "spec", "poolRef") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + g.Expect(poolRef).To(Equal("non-default-pod-pool")) + + cidrStr, found, err := unstructured.NestedString(list.Items[0].Object, "spec", "cidr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + + _, actualCIDR, err := net.ParseCIDR(cidrStr) + g.Expect(err).NotTo(HaveOccurred()) + cidr = *actualCIDR + return nil + }).Should(Succeed()) + + By("checking the pod is running") + var pod corev1.Pod + Eventually(func(g Gomega) error { + out, err := kubectl(nil, "-n", "test", "get", "pod", "test-worker-non-default", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &pod) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(pod.Status.Phase).To(Equal(corev1.PodRunning)) + return nil + }).Should(Succeed()) + + By("checking the allocated address") + addr := net.ParseIP(pod.Status.PodIP) + Expect(cidr.Contains(addr)).To(BeTrue()) + + By("checking the connectivity") + var dstPod corev1.Pod + out, err := kubectl(nil, "-n", "test", "get", "pod", "test-cp", "-ojson") + Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &dstPod) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func(g Gomega) error { + for i := 0; i < 10; i++ { + _, err := kubectlExec("test-worker-non-default", "test", nil, "ping", "-c", "1", dstPod.Status.PodIP) + g.Expect(err).NotTo(HaveOccurred()) + } + return nil + }).Should(Succeed()) + }) +} + +func testNonDefaultPoolInNamespace() { + It("should create pods in test-non-default namespace", func() { + By("applying a namespace and pods") + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "test_pod_in_namespace.yaml")) + Expect(err).NotTo(HaveOccurred()) + + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + By("checking the AddressBlock for non-default-pod-pool") + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(addressBlock).List(ctx, metav1.ListOptions{LabelSelector: "sart.terassyi.net/addresspool=non-default-pod-pool"}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(3)) + return nil + }).Should(Succeed()) + + By("checking all pods are running") + var podList corev1.PodList + Eventually(func(g Gomega) error { + out, err := kubectl(nil, "-n", "test-non-default", "get", "pod", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &podList) + g.Expect(err).NotTo(HaveOccurred()) + + for _, pod := range podList.Items { + g.Expect(pod.Status.Phase).To(Equal(corev1.PodRunning)) + } + return nil + }).Should(Succeed()) + + By("checking the connectivity") + Eventually(func(g Gomega) error { + for _, src := range podList.Items { + for _, dst := range podList.Items { + for i := 0; i < 10; i++ { + _, err := kubectlExec(src.Name, src.Namespace, nil, "ping", "-c", "1", dst.Status.PodIP) + g.Expect(err).NotTo(HaveOccurred()) + } + + } + } + return nil + }).Should(Succeed()) + + }) +} + +func testReleaseAddressBlock() { + It("should release and re-create AddressBlocks", func() { + By("getting a pod in test-non-default on sart-worker2") + var pod corev1.Pod + out, err := kubectl(nil, "-n", "test-non-default", "get", "pod", "test-worker2-non-default", "-ojson") + Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &pod) + Expect(err).NotTo(HaveOccurred()) + + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + By("getting an AddressBlock associated with sart-worker2") + // var workerAddressBlock unstructured.Unstructured + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(addressBlock).List(ctx, metav1.ListOptions{LabelSelector: "addressblock.sart.terassyi.net/node=sart-worker2,sart.terassyi.net/addresspool=non-default-pod-pool"}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).To(Equal(1)) + return nil + }).Should(Succeed()) + + By("deleting the pod in test-non-default namespace on sart-worker2") + _, err = kubectl(nil, "-n", pod.Namespace, "delete", "pod", pod.Name) + Expect(err).NotTo(HaveOccurred()) + + By("checking the deleted pod doesn't exist") + Eventually(func(g Gomega) error { + _, err := kubectl(nil, "-n", pod.Namespace, "get", "pod", pod.Name, "-ojson") + if err != nil { + // should be not found + return nil + } + return fmt.Errorf("deleted pod(%s) should not exist", pod.Name) + }).Should(Succeed()) + + By("checking the AddressBlock that is associated with sart-worker is released") + // Once the AddressBlock has been unused, it must be released and removed + Eventually(func(g Gomega) error { + list, err := dynamicClient.Resource(addressBlock).List(ctx, metav1.ListOptions{LabelSelector: "addressblock.sart.terassyi.net/node=sart-worker2,sart.terassyi.net/addresspool=non-default-pod-pool"}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(list.Items)).NotTo(Equal(1)) + return nil + }).Should(Succeed()) + }) +} + +func testRecoverAllocationsAfterRestart() { + It("should recover existing allocations on the node after restarting sartd-agent", func() { + By("getting existing pods on sart-worker3") + var podList corev1.PodList + out, err := kubectl(nil, "-n", "test", "get", "pod", "-ojson", "--field-selector=spec.nodeName=sarto-worker3") + Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &podList) + Expect(err).NotTo(HaveOccurred()) + podAddrMap := make(map[string]corev1.Pod) + for _, pod := range podList.Items { + podAddrMap[pod.Status.PodIP] = pod + } + + By("getting existing pods on sart-worker") + var podListOnWorker corev1.PodList + out, err = kubectl(nil, "-n", "test", "get", "pod", "-ojson", "--field-selector=spec.nodeName=sarto-worker") + Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &podListOnWorker) + Expect(err).NotTo(HaveOccurred()) + + By("restarting sartd-agent pods") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "restart", "ds/sartd-agent") + Expect(err).NotTo(HaveOccurred()) + + By("waiting for rollout restart") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "status", "ds/sartd-agent") + Expect(err).NotTo(HaveOccurred()) + + By("applying new pod on sart-worker3") + _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR_CNI, "test_pod2.yaml")) + Expect(err).NotTo(HaveOccurred()) + + By("checking all pods are running") + var newPod corev1.Pod + Eventually(func(g Gomega) error { + out, err := kubectl(nil, "-n", "test", "get", "pod", "test-worker3-3", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &newPod) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(newPod.Status.Phase).To(Equal(corev1.PodRunning)) + return nil + }).Should(Succeed()) + + By("checking new pod is allocated new address") + _, found := podAddrMap[newPod.Status.PodIP] + Expect(found).To(BeFalse()) + + By("checking the connectivity") + Eventually(func(g Gomega) error { + for _, dst := range podListOnWorker.Items { + for i := 0; i < 10; i++ { + _, err := kubectlExec(newPod.Name, newPod.Namespace, nil, "ping", "-c", "1", dst.Status.PodIP) + g.Expect(err).NotTo(HaveOccurred()) + } + } + return nil + }).Should(Succeed()) + + By("restarting sartd-bgp pods") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "restart", "ds/sartd-agent") + Expect(err).NotTo(HaveOccurred()) + + By("waiting for rollout restart") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "status", "ds/sartd-bgp") + Expect(err).NotTo(HaveOccurred()) + + By("checking the connectivity") + Eventually(func(g Gomega) error { + for _, dst := range podListOnWorker.Items { + for i := 0; i < 10; i++ { + _, err := kubectlExec(newPod.Name, newPod.Namespace, nil, "ping", "-c", "1", dst.Status.PodIP) + g.Expect(err).NotTo(HaveOccurred()) + } + } + return nil + }).Should(Succeed()) + + By("restarting sart-controller pods") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "restart", "deploy/sart-controller") + Expect(err).NotTo(HaveOccurred()) + + By("waiting for rollout restart") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "status", "deploy/sart-controller") + Expect(err).NotTo(HaveOccurred()) + + By("checking the connectivity") + Eventually(func(g Gomega) error { + for _, dst := range podListOnWorker.Items { + for i := 0; i < 10; i++ { + _, err := kubectlExec(newPod.Name, newPod.Namespace, nil, "ping", "-c", "1", dst.Status.PodIP) + g.Expect(err).NotTo(HaveOccurred()) + } + } + return nil + }).Should(Succeed()) + }) + +} + +func testSwitchModeToDual() { + It("should change the mode from cni to dual", func() { + By("patching sartd-agent") + _, err := kubectl(nil, "patch", "-n", "kube-system", "daemonset", "sartd-agent", "--type=strategic", "--patch-file", path.Join(MANIFEST_DIR_DUAL, "agent-patch.yaml")) + Expect(err).NotTo(HaveOccurred()) + By("waiting for rollout restart") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "status", "ds/sartd-agent") + Expect(err).NotTo(HaveOccurred()) + + By("patching sart-controller") + _, err = kubectl(nil, "patch", "-n", "kube-system", "deployment", "sart-controller", "--type=strategic", "--patch-file", path.Join(MANIFEST_DIR_DUAL, "controller-patch.yaml")) + Expect(err).NotTo(HaveOccurred()) + By("waiting for rollout restart") + _, err = kubectl(nil, "-n", "kube-system", "rollout", "status", "deploy/sart-controller") + Expect(err).NotTo(HaveOccurred()) + + By("checking sart-controller working") + var dp appsv1.Deployment + Eventually(func(g Gomega) error { + out, err := kubectl(nil, "-n", "kube-system", "get", "deploy", "sart-controller", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &dp) + g.Expect(err).NotTo(HaveOccurred()) + if dp.Status.Replicas != dp.Status.AvailableReplicas { + return fmt.Errorf("sart-controller is not ready") + } + return nil + }).Should(Succeed()) + }) +} + +func testLBConnectivityWithDualMode() { + It("should communicate via app-svc-cluster", func() { + By("checking app-svc-cluster gets addresses") + var svc corev1.Service + Eventually(func(g Gomega) error { + out, err := kubectl(nil, "-n", "test", "get", "svc", "app-svc-cluster", "-ojson") + g.Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal(out, &svc) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(svc.Status.LoadBalancer.Ingress)).To(Equal(1)) + return nil + }).Should(Succeed()) + + addr := svc.Status.LoadBalancer.Ingress[0].IP + + By("checking BGPAdvertisement") + ctx := context.Background() + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + Eventually(func(g Gomega) error { + advList, err := dynamicClient.Resource(bgpAdvertisement).Namespace(svc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("kubernetes.io/service-name=%s", svc.Name)}) + g.Expect(err).NotTo(HaveOccurred()) + peers, found, err := unstructured.NestedMap(advList.Items[0].Object, "status", "peers") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + g.Expect(len(peers)).To(Equal(8)) + for _, status := range peers { + g.Expect(status.(string)).To(Equal("Advertised")) + } + return nil + }).Should(Succeed()) + + By("checking the connectivity to app-svc-cluster") + Eventually(func(g Gomega) error { + _, err := execInContainer("clab-sart-client0", nil, "curl", "-m", "1", addr) + g.Expect(err).NotTo(HaveOccurred()) + return nil + }).Should(Succeed()) + + }) + +} diff --git a/e2e/kubernetes_test.go b/e2e/kubernetes_test.go index bdd4620..8a072b3 100644 --- a/e2e/kubernetes_test.go +++ b/e2e/kubernetes_test.go @@ -5,7 +5,9 @@ import ( "encoding/json" "fmt" "net" + "os" "path" + "path/filepath" "strings" "time" @@ -20,9 +22,11 @@ import ( ) const ( - MANIFEST_DIR string = "../manifests/base/sample" - GROUP string = "sart.terassyi.net" - VERSION string = "v1alpha2" + MANIFEST_DIR string = "../manifests/lb/sample" + MANIFEST_DIR_CNI string = "../manifests/cni/sample" + MANIFEST_DIR_DUAL string = "../manifests/dual" + GROUP string = "sart.terassyi.net" + VERSION string = "v1alpha2" ) var ( @@ -63,6 +67,19 @@ var ( } ) +func getManifestDir() string { + target := os.Getenv("TARGET") + manifestDirBase := "../manifests" + switch target { + case "kubernetes": + return filepath.Join(manifestDirBase, "base", "sample") + case "cni": + return filepath.Join(manifestDirBase, "cni", "sample") + default: + return filepath.Join(manifestDirBase, "base", "sample") + } +} + func testControllerWorkloads() { It("should confirm workloads is working well", func() { By("checking sartd-agent working") @@ -95,7 +112,7 @@ func testControllerWorkloads() { Eventually(func(g Gomega) error { out, err := kubectl(nil, "-n", "kube-system", "get", "deploy", "sart-controller", "-ojson") g.Expect(err).NotTo(HaveOccurred()) - err = json.Unmarshal(out, &ds) + err = json.Unmarshal(out, &dp) g.Expect(err).NotTo(HaveOccurred()) if dp.Status.Replicas != dp.Status.AvailableReplicas { return fmt.Errorf("sart-controller is not ready") @@ -108,47 +125,14 @@ func testControllerWorkloads() { func testClusterBGPA() { It("should install BGP related resource", func() { By("applying sample resource") - _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR, "peer_template.yaml")) - Expect(err).NotTo(HaveOccurred()) - - _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR, "cluster_bgp_a.yaml")) - Expect(err).NotTo(HaveOccurred()) - - _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR, "lb_address_pool.yaml")) - Expect(err).NotTo(HaveOccurred()) - - }) - - It("should create AddressBlocks", func() { - By("preparing dynamic client") - dynamicClient, err := getDynamicClient() - Expect(err).NotTo(HaveOccurred()) - - ctx := context.Background() - - By("getting AddressPool") - - addressPoolList, err := dynamicClient.Resource(addressPool).List(ctx, metav1.ListOptions{}) - Expect(err).NotTo(HaveOccurred()) - Expect(len(addressPoolList.Items)).To(Equal(2)) - - By("getting AddressBlocks") Eventually(func(g Gomega) error { - for _, pool := range addressPoolList.Items { - cidr, found, err := unstructured.NestedString(pool.Object, "spec", "cidr") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(found).To(BeTrue()) - - addressBlock, err := dynamicClient.Resource(addressBlock).Get(ctx, pool.GetName(), metav1.GetOptions{}) - g.Expect(err).NotTo(HaveOccurred()) - blockCidr, found, err := unstructured.NestedString(addressBlock.Object, "spec", "cidr") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(found).To(BeTrue()) - - g.Expect(blockCidr).To(Equal(cidr)) - } + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR, "peer_template.yaml")) + g.Expect(err).NotTo(HaveOccurred()) + _, err = kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR, "cluster_bgp_a.yaml")) + g.Expect(err).NotTo(HaveOccurred()) return nil }).Should(Succeed()) + }) It("should create ClusterBGP and BGPPeers", func() { @@ -222,6 +206,57 @@ func testClusterBGPA() { }) } +func testCreateLBAddressPool() { + It("should create AddressPool", func() { + By("applying LB address pool") + Eventually(func(g Gomega) error { + _, err := kubectl(nil, "apply", "-f", path.Join(MANIFEST_DIR, "lb_address_pool.yaml")) + g.Expect(err).NotTo(HaveOccurred()) + return nil + }).Should(Succeed()) + + By("preparing dynamic client") + dynamicClient, err := getDynamicClient() + Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + + By("getting AddressPool") + + addressPoolList, err := dynamicClient.Resource(addressPool).List(ctx, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + lbPoolList := []unstructured.Unstructured{} + for _, obj := range addressPoolList.Items { + t, found, err := unstructured.NestedString(obj.Object, "spec", "type") + Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeTrue()) + if t == "service" { + lbPoolList = append(lbPoolList, obj) + } + } + Expect(len(lbPoolList)).To(Equal(2)) + + By("getting AddressBlocks") + Eventually(func(g Gomega) error { + for _, pool := range lbPoolList { + cidr, found, err := unstructured.NestedString(pool.Object, "spec", "cidr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + + addressBlock, err := dynamicClient.Resource(addressBlock).Get(ctx, pool.GetName(), metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + blockCidr, found, err := unstructured.NestedString(addressBlock.Object, "spec", "cidr") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + + g.Expect(blockCidr).To(Equal(cidr)) + } + return nil + }).Should(Succeed()) + }) + +} + func testClusterBGPB() { It("should install BGP related resources", func() { diff --git a/e2e/suite_test.go b/e2e/suite_test.go index b889479..f7ddc30 100644 --- a/e2e/suite_test.go +++ b/e2e/suite_test.go @@ -37,6 +37,8 @@ var _ = Describe("End to End test for Sart", func() { testBgp() case "kubernetes": testKubernetes() + case "cni": + testKubernetesCNI() default: fmt.Println("target not set") os.Exit(1) @@ -56,6 +58,7 @@ func testKubernetes() { Context("workloads", testControllerWorkloads) Context("bgp=a", testClusterBGPA) Context("bgp=b", testClusterBGPB) + Context("create address pool", testCreateLBAddressPool) Context("create load-balancer", testCreatingLoadBalancer) Context("load-balancer connectivity", testLoadBalancerConnectivity) Context("address pool", testAddressPool) @@ -67,5 +70,20 @@ func testKubernetes() { Context("restart agent", testRestartAgent) Context("restart controller", testRestartController) Context("restart bgp", testRestartBGP) - // Context("controller", testController) +} + +func testKubernetesCNI() { + Context("workloads", testControllerWorkloads) + Context("prepare BGP", testClusterBGPForCNI) + Context("create address pools for pod", testPodAddressPool) + Context("create pods", testCreatePods) + Context("delete pod", testDeletePod) + Context("create pod with non default pool", testNonDefaultPool) + Context("create pods in test-non-default namespace", testNonDefaultPoolInNamespace) + Context("release unused address block", testReleaseAddressBlock) + Context("recover from restart", testRecoverAllocationsAfterRestart) + Context("switch the mode", testSwitchModeToDual) + Context("create LB address pool", testCreateLBAddressPool) + Context("create load-balancer", testCreatingLoadBalancer) + Context("lb connectivity", testLBConnectivityWithDualMode) } diff --git a/e2e/topology/generator/go.mod b/e2e/topology/generator/go.mod new file mode 100644 index 0000000..de4956f --- /dev/null +++ b/e2e/topology/generator/go.mod @@ -0,0 +1,48 @@ +module github.com/terassyi/sart/e2e/topology/generator + +go 1.22.0 + +require ( + k8s.io/apimachinery v0.29.2 + k8s.io/client-go v0.29.2 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/go-logr/logr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/net v0.19.0 // indirect + golang.org/x/oauth2 v0.10.0 // indirect + golang.org/x/sys v0.15.0 // indirect + golang.org/x/term v0.15.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/protobuf v1.31.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.29.2 // indirect + k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/e2e/topology/generator/go.sum b/e2e/topology/generator/go.sum new file mode 100644 index 0000000..52918b9 --- /dev/null +++ b/e2e/topology/generator/go.sum @@ -0,0 +1,156 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= +github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= +github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= +github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= +golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= +golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= +golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= +golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= +golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= +k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= +k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= +k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= +k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg= +k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/e2e/topology/generator/main.go b/e2e/topology/generator/main.go new file mode 100644 index 0000000..62d3a88 --- /dev/null +++ b/e2e/topology/generator/main.go @@ -0,0 +1,125 @@ +package main + +import ( + "context" + "flag" + "io" + "os" + "path/filepath" + "text/template" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +const ( + TOPOLOGY_CONFIG_TMPL string = "kubernetes.yaml.tmpl" + TOPOLOGY_CONFIG string = "kubernetes.yaml" +) + +type NodeAddrs struct { + ControlPlane string + Worker string + Worker2 string + Worker3 string + ControlPlanePodCIDR string + WorkerPodCIDR string + Worker2PodCIDR string + Worker3PodCIDR string +} + +var ( + tmplPath = flag.String("template-path", TOPOLOGY_CONFIG_TMPL, "path to template") + outputPath = flag.String("output-path", TOPOLOGY_CONFIG, "path to output") +) + +func main() { + + flag.Parse() + + homeDir := os.Getenv("HOME") + kubeConfigPath := filepath.Join(homeDir, ".kube/config") + config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) + if err != nil { + panic(err) + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + panic(err) + } + ctx := context.Background() + nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + panic(err) + } + var nodeAddrs NodeAddrs + for _, node := range nodes.Items { + switch node.GetName() { + case "sart-control-plane": + for _, statusAddrs := range node.Status.Addresses { + if statusAddrs.Type == "InternalIP" { + nodeAddrs.ControlPlane = statusAddrs.Address + } + } + nodeAddrs.ControlPlanePodCIDR = node.Spec.PodCIDR + case "sart-worker": + for _, statusAddrs := range node.Status.Addresses { + if statusAddrs.Type == "InternalIP" { + nodeAddrs.Worker = statusAddrs.Address + } + } + nodeAddrs.WorkerPodCIDR = node.Spec.PodCIDR + case "sart-worker2": + for _, statusAddrs := range node.Status.Addresses { + if statusAddrs.Type == "InternalIP" { + nodeAddrs.Worker2 = statusAddrs.Address + } + } + nodeAddrs.Worker2PodCIDR = node.Spec.PodCIDR + case "sart-worker3": + for _, statusAddrs := range node.Status.Addresses { + if statusAddrs.Type == "InternalIP" { + nodeAddrs.Worker3 = statusAddrs.Address + } + } + nodeAddrs.Worker3PodCIDR = node.Spec.PodCIDR + default: + } + } + + if !nodeAddrs.check() { + panic("Failed to get Node Address") + } + + file, err := os.Open(*tmplPath) + if err != nil { + panic(err) + } + defer file.Close() + + tmplData, err := io.ReadAll(file) + if err != nil { + panic(err) + } + + tmpl, err := template.New(TOPOLOGY_CONFIG_TMPL).Parse(string(tmplData)) + if err != nil { + panic(err) + } + + outFile, err := os.Create(*outputPath) + if err != nil { + panic(err) + } + defer outFile.Close() + + if err := tmpl.Execute(outFile, nodeAddrs); err != nil { + panic(err) + } + +} + +func (n *NodeAddrs) check() bool { + return !(n.ControlPlane == "" || n.Worker == "" || n.Worker2 == "" || n.Worker3 == "") +} diff --git a/e2e/topology/kubernetes-cni-compact.yaml.tmpl b/e2e/topology/kubernetes-cni-compact.yaml.tmpl new file mode 100644 index 0000000..1d5e690 --- /dev/null +++ b/e2e/topology/kubernetes-cni-compact.yaml.tmpl @@ -0,0 +1,210 @@ +name: sart +topology: + kinds: + linux: + cmd: bash + nodes: + router0: + kind: linux + image: frrouting/frr:v8.4.0 + exec: + - ip addr add 169.254.1.1/24 dev net0 scope link + - ip addr add 169.254.2.1/24 dev net1 scope link + - ip addr add 169.254.3.1/24 dev net2 scope link + - ip addr add 169.254.4.1/24 dev net3 scope link + - ip addr add 192.168.0.1/24 dev net4 scope link + # route for node primary address + - ip route add {{ .ControlPlane }}/32 dev net0 + - ip route add {{ .Worker }}/32 dev net1 + - ip route add {{ .Worker2 }}/32 dev net2 + - ip route add {{ .Worker3 }}/32 dev net3 + - ip route add 6.6.6.6/32 via 192.168.0.2 dev net4 + - ip link add dummy0 type dummy + - ip addr add 9.9.9.9/32 dev dummy0 + - ip link set up dev dummy0 + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Boiler plate to make FRR work + - touch /etc/frr/vtysh.conf + - sed -i -e 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons + - /usr/lib/frr/frrinit.sh start + # FRR configuration + - >- + vtysh -c 'conf t' + -c 'frr defaults datacenter' + -c '!' + -c 'ip prefix-list LAB-SUBNET permit 10.0.0.0/8 ge 8' + -c '!' + -c 'router bgp 65000' + -c ' bgp router-id 9.9.9.9' + -c ' bgp bestpath as-path multipath-relax' + -c ' neighbor 169.254.1.2 remote-as 65010' + -c ' neighbor 169.254.2.2 remote-as 65020' + -c ' neighbor 169.254.3.2 remote-as 65030' + -c ' neighbor 169.254.4.2 remote-as 65040' + -c ' neighbor 169.254.1.2 update-source dummy0' + -c ' neighbor 169.254.2.2 update-source dummy0' + -c ' neighbor 169.254.3.2 update-source dummy0' + -c ' neighbor 169.254.4.2 update-source dummy0' + -c ' neighbor 169.254.1.2 next-hop-self' + -c ' neighbor 169.254.2.2 next-hop-self' + -c ' neighbor 169.254.3.2 next-hop-self' + -c ' neighbor 169.254.4.2 next-hop-self' + -c '!' + router0-debug: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:clab-sart-router0 + router1: + kind: linux + image: frrouting/frr:v8.4.0 + exec: + - ip addr add 169.253.1.1/24 dev net0 scope link + - ip addr add 169.253.2.1/24 dev net1 scope link + - ip addr add 169.253.3.1/24 dev net2 scope link + - ip addr add 169.253.4.1/24 dev net3 scope link + - ip addr add 192.168.1.1/24 dev net4 scope link + - ip link add dummy0 type dummy + - ip addr add 7.7.7.7/32 dev dummy0 + - ip link set up dev dummy0 + - ip route add {{ .ControlPlane }}/32 dev net0 + - ip route add {{ .Worker }}/32 dev net1 + - ip route add {{ .Worker2 }}/32 dev net2 + - ip route add {{ .Worker3 }}/32 dev net3 + - ip route add 6.6.6.6/32 via 192.168.1.2 dev net4 + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Boiler plate to make FRR work + - touch /etc/frr/vtysh.conf + - sed -i -e 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons + - /usr/lib/frr/frrinit.sh start + # FRR configuration + - >- + vtysh -c 'conf t' + -c 'frr defaults datacenter' + -c '!' + -c 'ip prefix-list LAB-SUBNET permit 10.0.0.0/8 ge 8' + -c '!' + -c 'router bgp 65000' + -c ' bgp router-id 7.7.7.7' + -c ' bgp bestpath as-path multipath-relax' + -c ' neighbor 169.253.1.2 remote-as 65010' + -c ' neighbor 169.253.2.2 remote-as 65020' + -c ' neighbor 169.253.3.2 remote-as 65030' + -c ' neighbor 169.253.4.2 remote-as 65040' + -c ' neighbor 169.253.1.2 update-source dummy0' + -c ' neighbor 169.253.2.2 update-source dummy0' + -c ' neighbor 169.253.3.2 update-source dummy0' + -c ' neighbor 169.253.4.2 update-source dummy0' + -c ' neighbor 169.253.1.2 next-hop-self' + -c ' neighbor 169.253.2.2 next-hop-self' + -c ' neighbor 169.253.3.2 next-hop-self' + -c ' neighbor 169.253.4.2 next-hop-self' + -c '!' + router1-debug: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:clab-sart-router1 + control-plane0: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-control-plane + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.1.2/24 dev net0 scope link + - ip addr add 169.253.1.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .Worker }}/32 src {{ .ControlPlane }} nexthop via 169.254.1.1 weight 1 nexthop via 169.253.1.1 weight 1 + - ip route add {{ .Worker2 }}/32 src {{ .ControlPlane }} nexthop via 169.254.1.1 weight 1 nexthop via 169.253.1.1 weight 1 + - ip route add {{ .Worker3 }}/32 src {{ .ControlPlane }} nexthop via 169.254.1.1 weight 1 nexthop via 169.253.1.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.1.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.1.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.1.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.1.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.1.1 dev net1 + worker0: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-worker + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.2.2/24 dev net0 scope link + - ip addr add 169.253.2.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .ControlPlane }}/32 src {{ .Worker }} nexthop via 169.254.2.1 weight 1 nexthop via 169.253.2.1 weight 1 + - ip route add {{ .Worker2 }}/32 src {{ .Worker }} nexthop via 169.254.2.1 weight 1 nexthop via 169.253.2.1 weight 1 + - ip route add {{ .Worker3 }}/32 src {{ .Worker }} nexthop via 169.254.2.1 weight 1 nexthop via 169.253.2.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.2.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.2.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.2.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.2.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.2.1 dev net1 + worker1: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-worker2 + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.3.2/24 dev net0 scope link + - ip addr add 169.253.3.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .ControlPlane }}/32 src {{ .Worker2 }} nexthop via 169.254.3.1 weight 1 nexthop via 169.253.3.1 weight 1 + - ip route add {{ .Worker }}/32 src {{ .Worker2 }} nexthop via 169.254.3.1 weight 1 nexthop via 169.253.3.1 weight 1 + - ip route add {{ .Worker3 }}/32 src {{ .Worker2 }} nexthop via 169.254.3.1 weight 1 nexthop via 169.253.3.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.3.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.3.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.3.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.3.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.3.1 dev net1 + worker2: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-worker3 + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.4.2/24 dev net0 scope link + - ip addr add 169.253.4.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .ControlPlane }}/32 src {{ .Worker3 }} nexthop via 169.254.4.1 weight 1 nexthop via 169.253.4.1 weight 1 + - ip route add {{ .Worker }}/32 src {{ .Worker3 }} nexthop via 169.254.4.1 weight 1 nexthop via 169.253.4.1 weight 1 + - ip route add {{ .Worker2 }}/32 src {{ .Worker3 }} nexthop via 169.254.4.1 weight 1 nexthop via 169.253.4.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.4.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.4.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.4.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.4.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.4.1 dev net1 + client0: + kind: linux + image: nicolaka/netshoot:latest + exec: + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + - ip addr add 192.168.0.2/24 dev net0 + - ip addr add 192.168.1.2/24 dev net1 + - ip link add dummy0 type dummy + - ip addr add 6.6.6.6/32 dev dummy0 + - ip link set up dev dummy0 + - ip route change default src 6.6.6.6 nexthop via 192.168.0.1 weight 1 via 192.168.1.1 weight 1 + links: + - endpoints: ["router0:net0", "control-plane0:net0"] + - endpoints: ["router0:net1", "worker0:net0"] + - endpoints: ["router0:net2", "worker1:net0"] + - endpoints: ["router0:net3", "worker2:net0"] + - endpoints: ["router1:net0", "control-plane0:net1"] + - endpoints: ["router1:net1", "worker0:net1"] + - endpoints: ["router1:net2", "worker1:net1"] + - endpoints: ["router1:net3", "worker2:net1"] + - endpoints: ["router0:net4", "client0:net0"] + - endpoints: ["router1:net4", "client0:net1"] diff --git a/e2e/topology/kubernetes-cni.yaml.tmpl b/e2e/topology/kubernetes-cni.yaml.tmpl new file mode 100644 index 0000000..e10ae5c --- /dev/null +++ b/e2e/topology/kubernetes-cni.yaml.tmpl @@ -0,0 +1,235 @@ +name: sart +topology: + kinds: + linux: + cmd: bash + nodes: + core0: + kind: linux + image: frrouting/frr:v8.4.0 + exec: + - ip addr add 169.252.1.0/24 dev net0 scope link + - ip addr add 169.252.2.0/24 dev net1 scope link + - ip addr add 192.168.0.1/24 dev net2 scope link + - ip link add dummy0 type dummy + - ip addr add 5.5.5.5/32 dev dummy0 scope global + - ip link set up dev dummy0 + - ip route add 6.6.6.6/32 via 192.168.0.2 dev net2 + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Boiler plate to make FRR work + - touch /etc/frr/vtysh.conf + - sed -i -e 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons + - /usr/lib/frr/frrinit.sh start + # FRR configuration + - >- + vtysh -c 'conf t' + -c 'frr defaults datacenter' + -c '!' + -c 'router bgp 65000' + -c ' bgp router-id 5.5.5.5' + -c ' bgp bestpath as-path multipath-relax' + -c ' neighbor 169.252.1.1 remote-as 65001' + -c ' neighbor 169.252.2.1 remote-as 65002' + -c '!' + core0-debug: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:clab-sart-core0 + spine0: + kind: linux + image: frrouting/frr:v8.4.0 + exec: + - ip addr add 169.254.1.1/24 dev net0 scope link + - ip addr add 169.254.2.1/24 dev net1 scope link + - ip addr add 169.254.3.1/24 dev net2 scope link + - ip addr add 169.254.4.1/24 dev net3 scope link + - ip addr add 169.252.1.1/24 dev net4 scope link + # route for node primary address + - ip route add {{ .ControlPlane }}/32 dev net0 + - ip route add {{ .Worker }}/32 dev net1 + - ip route add {{ .Worker2 }}/32 dev net2 + - ip route add {{ .Worker3 }}/32 dev net3 + - ip link add dummy0 type dummy + - ip addr add 9.9.9.9/32 dev dummy0 + - ip link set up dev dummy0 + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Boiler plate to make FRR work + - touch /etc/frr/vtysh.conf + - sed -i -e 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons + - /usr/lib/frr/frrinit.sh start + # FRR configuration + - >- + vtysh -c 'conf t' + -c 'frr defaults datacenter' + -c '!' + -c 'router bgp 65001' + -c ' bgp router-id 9.9.9.9' + -c ' bgp bestpath as-path multipath-relax' + -c ' neighbor 169.252.1.0 remote-as 65000' + -c ' neighbor 169.254.1.2 remote-as 65010' + -c ' neighbor 169.254.2.2 remote-as 65020' + -c ' neighbor 169.254.3.2 remote-as 65030' + -c ' neighbor 169.254.4.2 remote-as 65040' + -c ' neighbor 169.254.1.2 update-source dummy0' + -c ' neighbor 169.254.2.2 update-source dummy0' + -c ' neighbor 169.254.3.2 update-source dummy0' + -c ' neighbor 169.254.4.2 update-source dummy0' + -c ' neighbor 169.254.1.2 next-hop-self' + -c ' neighbor 169.254.2.2 next-hop-self' + -c ' neighbor 169.254.3.2 next-hop-self' + -c ' neighbor 169.254.4.2 next-hop-self' + -c '!' + spine0-debug: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:clab-sart-spine0 + spine1: + kind: linux + image: frrouting/frr:v8.4.0 + exec: + - ip addr add 169.253.1.1/24 dev net0 scope link + - ip addr add 169.253.2.1/24 dev net1 scope link + - ip addr add 169.253.3.1/24 dev net2 scope link + - ip addr add 169.253.4.1/24 dev net3 scope link + - ip addr add 169.252.2.1/24 dev net4 scope link + - ip link add dummy0 type dummy + - ip addr add 7.7.7.7/32 dev dummy0 + - ip link set up dev dummy0 + - ip route add {{ .ControlPlane }}/32 dev net0 + - ip route add {{ .Worker }}/32 dev net1 + - ip route add {{ .Worker2 }}/32 dev net2 + - ip route add {{ .Worker3 }}/32 dev net3 + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Boiler plate to make FRR work + - touch /etc/frr/vtysh.conf + - sed -i -e 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons + - /usr/lib/frr/frrinit.sh start + # FRR configuration + - >- + vtysh -c 'conf t' + -c 'frr defaults datacenter' + -c '!' + -c 'router bgp 65002' + -c ' bgp router-id 7.7.7.7' + -c ' bgp bestpath as-path multipath-relax' + -c ' neighbor 169.252.2.0 remote-as 65000' + -c ' neighbor 169.253.1.2 remote-as 65010' + -c ' neighbor 169.253.2.2 remote-as 65020' + -c ' neighbor 169.253.3.2 remote-as 65030' + -c ' neighbor 169.253.4.2 remote-as 65040' + -c ' neighbor 169.253.1.2 update-source dummy0' + -c ' neighbor 169.253.2.2 update-source dummy0' + -c ' neighbor 169.253.3.2 update-source dummy0' + -c ' neighbor 169.253.4.2 update-source dummy0' + -c ' neighbor 169.253.1.2 next-hop-self' + -c ' neighbor 169.253.2.2 next-hop-self' + -c ' neighbor 169.253.3.2 next-hop-self' + -c ' neighbor 169.253.4.2 next-hop-self' + -c '!' + spine1-debug: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:clab-sart-spine1 + control-plane0: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-control-plane + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.1.2/24 dev net0 scope link + - ip addr add 169.253.1.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .Worker }}/32 src {{ .ControlPlane }} nexthop via 169.254.1.1 weight 1 nexthop via 169.253.1.1 weight 1 + - ip route add {{ .Worker2 }}/32 src {{ .ControlPlane }} nexthop via 169.254.1.1 weight 1 nexthop via 169.253.1.1 weight 1 + - ip route add {{ .Worker3 }}/32 src {{ .ControlPlane }} nexthop via 169.254.1.1 weight 1 nexthop via 169.253.1.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.1.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.1.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.1.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.1.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.1.1 dev net1 + worker0: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-worker + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.2.2/24 dev net0 scope link + - ip addr add 169.253.2.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .ControlPlane }}/32 src {{ .Worker }} nexthop via 169.254.2.1 weight 1 nexthop via 169.253.2.1 weight 1 + - ip route add {{ .Worker2 }}/32 src {{ .Worker }} nexthop via 169.254.2.1 weight 1 nexthop via 169.253.2.1 weight 1 + - ip route add {{ .Worker3 }}/32 src {{ .Worker }} nexthop via 169.254.2.1 weight 1 nexthop via 169.253.2.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.2.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.2.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.2.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.2.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.2.1 dev net1 + worker1: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-worker2 + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.3.2/24 dev net0 scope link + - ip addr add 169.253.3.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .ControlPlane }}/32 src {{ .Worker2 }} nexthop via 169.254.3.1 weight 1 nexthop via 169.253.3.1 weight 1 + - ip route add {{ .Worker }}/32 src {{ .Worker2 }} nexthop via 169.254.3.1 weight 1 nexthop via 169.253.3.1 weight 1 + - ip route add {{ .Worker3 }}/32 src {{ .Worker2 }} nexthop via 169.254.3.1 weight 1 nexthop via 169.253.3.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.3.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.3.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.3.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.3.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.3.1 dev net1 + worker2: + kind: linux + image: nicolaka/netshoot:latest + network-mode: container:sart-worker3 + exec: + # Enable ECMP + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + # Address for peering + - ip addr add 169.254.4.2/24 dev net0 scope link + - ip addr add 169.253.4.2/24 dev net1 scope link + # Route traffic to the lab through router + - ip route add {{ .ControlPlane }}/32 src {{ .Worker3 }} nexthop via 169.254.4.1 weight 1 nexthop via 169.253.4.1 weight 1 + - ip route add {{ .Worker }}/32 src {{ .Worker3 }} nexthop via 169.254.4.1 weight 1 nexthop via 169.253.4.1 weight 1 + - ip route add {{ .Worker2 }}/32 src {{ .Worker3 }} nexthop via 169.254.4.1 weight 1 nexthop via 169.253.4.1 weight 1 + - ip route add 192.168.0.0/24 via 169.254.4.1 dev net0 + - ip route add 192.168.1.0/24 via 169.253.4.1 dev net1 + - ip route add 6.6.6.6/32 via 169.254.4.1 dev net0 + - ip route add 9.9.9.9/32 via 169.254.4.1 dev net0 + - ip route add 7.7.7.7/32 via 169.253.4.1 dev net1 + client0: + kind: linux + image: nicolaka/netshoot:latest + exec: + - ip addr add 192.168.0.2/24 dev net0 scope global + - ip route change default dev net0 + - sysctl -w net.ipv4.fib_multipath_hash_policy=1 + - sysctl -p + links: + - endpoints: ["core0:net0", "spine0:net4"] + - endpoints: ["core0:net1", "spine1:net4"] + - endpoints: ["spine0:net0", "control-plane0:net0"] + - endpoints: ["spine0:net1", "worker0:net0"] + - endpoints: ["spine0:net2", "worker1:net0"] + - endpoints: ["spine0:net3", "worker2:net0"] + - endpoints: ["spine1:net0", "control-plane0:net1"] + - endpoints: ["spine1:net1", "worker0:net1"] + - endpoints: ["spine1:net2", "worker1:net1"] + - endpoints: ["spine1:net3", "worker2:net1"] + - endpoints: ["core0:net2", "client0:net0"] diff --git a/manifests/base/certs/tls.cert b/manifests/base/certs/tls.cert deleted file mode 100644 index a9df8f6..0000000 --- a/manifests/base/certs/tls.cert +++ /dev/null @@ -1,11 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIBnTCCAUSgAwIBAgIUfj7gkzzS0q1jxWhFL/cSPPnrp7IwCgYIKoZIzj0EAwIw -ITEfMB0GA1UEAwwWcmNnZW4gc2VsZiBzaWduZWQgY2VydDAgFw0yNDAyMDQxNTUy -MTlaGA8yMTI0MDExMTE1NTIxOVowITEfMB0GA1UEAwwWcmNnZW4gc2VsZiBzaWdu -ZWQgY2VydDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABDJCNhpBRg8XKxa8ybWf -yHuwxx2XZJgjstZ/zZXu4Piym1MAXhEQSWXGv+Z0gnJ+EhOMG19PMkprAyWgOlK6 -/36jWDBWMC8GA1UdEQQoMCaCJHNhcnQtd2ViaG9vay1zZXJ2aWNlLmt1YmUtc3lz -dGVtLnN2YzAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwCgYI -KoZIzj0EAwIDRwAwRAIgUNDkqQSc219TlOE/6N4g29Jj5MNge70kj3f8CaNtJRsC -IGC4ZYFnzzsdFhz0Tn1a1pkY2Uefdwppac8ptPgk3p/K ------END CERTIFICATE----- diff --git a/manifests/base/certs/tls.key b/manifests/base/certs/tls.key deleted file mode 100644 index c1ed052..0000000 --- a/manifests/base/certs/tls.key +++ /dev/null @@ -1,5 +0,0 @@ ------BEGIN PRIVATE KEY----- -MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgcOocjh9jn1PN0A+V -4m1LuZwpRU3fOjDpn5OCZ2JphamhRANCAAQyQjYaQUYPFysWvMm1n8h7sMcdl2SY -I7LWf82V7uD4sptTAF4REEllxr/mdIJyfhITjBtfTzJKawMloDpSuv9+ ------END PRIVATE KEY----- diff --git a/manifests/base/webhook/admission_webhook.yaml b/manifests/base/webhook/admission_webhook.yaml index 671da8b..89e1b88 100644 --- a/manifests/base/webhook/admission_webhook.yaml +++ b/manifests/base/webhook/admission_webhook.yaml @@ -63,7 +63,7 @@ webhooks: - CREATE - UPDATE resources: - - addresspool + - addresspools sideEffects: None --- apiVersion: admissionregistration.k8s.io/v1 diff --git a/manifests/base/webhook/admission_webhook_patch.yaml b/manifests/base/webhook/admission_webhook_patch.yaml index cb572f0..9875998 100644 --- a/manifests/base/webhook/admission_webhook_patch.yaml +++ b/manifests/base/webhook/admission_webhook_patch.yaml @@ -5,13 +5,13 @@ metadata: webhooks: - name: vbgppeer.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuVENDQVVTZ0F3SUJBZ0lVZmo3Z2t6elMwcTFqeFdoRkwvY1NQUG5ycDdJd0NnWUlLb1pJemowRUF3SXcKSVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1WldRZ1kyVnlkREFnRncweU5EQXlNRFF4TlRVeQpNVGxhR0E4eU1USTBNREV4TVRFMU5USXhPVm93SVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1ClpXUWdZMlZ5ZERCWk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkRKQ05ocEJSZzhYS3hhOHliV2YKeUh1d3h4MlhaSmdqc3RaL3paWHU0UGl5bTFNQVhoRVFTV1hHditaMGduSitFaE9NRzE5UE1rcHJBeVdnT2xLNgovMzZqV0RCV01DOEdBMVVkRVFRb01DYUNKSE5oY25RdGQyVmlhRzl2YXkxelpYSjJhV05sTG10MVltVXRjM2x6CmRHVnRMbk4yWXpBT0JnTlZIUThCQWY4RUJBTUNCYUF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0NnWUkKS29aSXpqMEVBd0lEUndBd1JBSWdVTkRrcVFTYzIxOVRsT0UvNk40ZzI5Smo1TU5nZTcwa2ozZjhDYU50SlJzQwpJR0M0WllGbnp6c2RGaHowVG4xYTFwa1kyVWVmZHdwcGFjOHB0UGdrM3AvSwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: vbgpadvertisement.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuVENDQVVTZ0F3SUJBZ0lVZmo3Z2t6elMwcTFqeFdoRkwvY1NQUG5ycDdJd0NnWUlLb1pJemowRUF3SXcKSVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1WldRZ1kyVnlkREFnRncweU5EQXlNRFF4TlRVeQpNVGxhR0E4eU1USTBNREV4TVRFMU5USXhPVm93SVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1ClpXUWdZMlZ5ZERCWk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkRKQ05ocEJSZzhYS3hhOHliV2YKeUh1d3h4MlhaSmdqc3RaL3paWHU0UGl5bTFNQVhoRVFTV1hHditaMGduSitFaE9NRzE5UE1rcHJBeVdnT2xLNgovMzZqV0RCV01DOEdBMVVkRVFRb01DYUNKSE5oY25RdGQyVmlhRzl2YXkxelpYSjJhV05sTG10MVltVXRjM2x6CmRHVnRMbk4yWXpBT0JnTlZIUThCQWY4RUJBTUNCYUF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0NnWUkKS29aSXpqMEVBd0lEUndBd1JBSWdVTkRrcVFTYzIxOVRsT0UvNk40ZzI5Smo1TU5nZTcwa2ozZjhDYU50SlJzQwpJR0M0WllGbnp6c2RGaHowVG4xYTFwa1kyVWVmZHdwcGFjOHB0UGdrM3AvSwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: vaddresspool.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuVENDQVVTZ0F3SUJBZ0lVZmo3Z2t6elMwcTFqeFdoRkwvY1NQUG5ycDdJd0NnWUlLb1pJemowRUF3SXcKSVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1WldRZ1kyVnlkREFnRncweU5EQXlNRFF4TlRVeQpNVGxhR0E4eU1USTBNREV4TVRFMU5USXhPVm93SVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1ClpXUWdZMlZ5ZERCWk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkRKQ05ocEJSZzhYS3hhOHliV2YKeUh1d3h4MlhaSmdqc3RaL3paWHU0UGl5bTFNQVhoRVFTV1hHditaMGduSitFaE9NRzE5UE1rcHJBeVdnT2xLNgovMzZqV0RCV01DOEdBMVVkRVFRb01DYUNKSE5oY25RdGQyVmlhRzl2YXkxelpYSjJhV05sTG10MVltVXRjM2x6CmRHVnRMbk4yWXpBT0JnTlZIUThCQWY4RUJBTUNCYUF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0NnWUkKS29aSXpqMEVBd0lEUndBd1JBSWdVTkRrcVFTYzIxOVRsT0UvNk40ZzI5Smo1TU5nZTcwa2ozZjhDYU50SlJzQwpJR0M0WllGbnp6c2RGaHowVG4xYTFwa1kyVWVmZHdwcGFjOHB0UGdrM3AvSwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" --- apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration @@ -20,10 +20,10 @@ metadata: webhooks: - name: mbgppeer.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuVENDQVVTZ0F3SUJBZ0lVZmo3Z2t6elMwcTFqeFdoRkwvY1NQUG5ycDdJd0NnWUlLb1pJemowRUF3SXcKSVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1WldRZ1kyVnlkREFnRncweU5EQXlNRFF4TlRVeQpNVGxhR0E4eU1USTBNREV4TVRFMU5USXhPVm93SVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1ClpXUWdZMlZ5ZERCWk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkRKQ05ocEJSZzhYS3hhOHliV2YKeUh1d3h4MlhaSmdqc3RaL3paWHU0UGl5bTFNQVhoRVFTV1hHditaMGduSitFaE9NRzE5UE1rcHJBeVdnT2xLNgovMzZqV0RCV01DOEdBMVVkRVFRb01DYUNKSE5oY25RdGQyVmlhRzl2YXkxelpYSjJhV05sTG10MVltVXRjM2x6CmRHVnRMbk4yWXpBT0JnTlZIUThCQWY4RUJBTUNCYUF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0NnWUkKS29aSXpqMEVBd0lEUndBd1JBSWdVTkRrcVFTYzIxOVRsT0UvNk40ZzI5Smo1TU5nZTcwa2ozZjhDYU50SlJzQwpJR0M0WllGbnp6c2RGaHowVG4xYTFwa1kyVWVmZHdwcGFjOHB0UGdrM3AvSwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: maddressblock.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuVENDQVVTZ0F3SUJBZ0lVZmo3Z2t6elMwcTFqeFdoRkwvY1NQUG5ycDdJd0NnWUlLb1pJemowRUF3SXcKSVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1WldRZ1kyVnlkREFnRncweU5EQXlNRFF4TlRVeQpNVGxhR0E4eU1USTBNREV4TVRFMU5USXhPVm93SVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1ClpXUWdZMlZ5ZERCWk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkRKQ05ocEJSZzhYS3hhOHliV2YKeUh1d3h4MlhaSmdqc3RaL3paWHU0UGl5bTFNQVhoRVFTV1hHditaMGduSitFaE9NRzE5UE1rcHJBeVdnT2xLNgovMzZqV0RCV01DOEdBMVVkRVFRb01DYUNKSE5oY25RdGQyVmlhRzl2YXkxelpYSjJhV05sTG10MVltVXRjM2x6CmRHVnRMbk4yWXpBT0JnTlZIUThCQWY4RUJBTUNCYUF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0NnWUkKS29aSXpqMEVBd0lEUndBd1JBSWdVTkRrcVFTYzIxOVRsT0UvNk40ZzI5Smo1TU5nZTcwa2ozZjhDYU50SlJzQwpJR0M0WllGbnp6c2RGaHowVG4xYTFwa1kyVWVmZHdwcGFjOHB0UGdrM3AvSwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" - name: mservice.kb.io clientConfig: - caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuVENDQVVTZ0F3SUJBZ0lVZmo3Z2t6elMwcTFqeFdoRkwvY1NQUG5ycDdJd0NnWUlLb1pJemowRUF3SXcKSVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1WldRZ1kyVnlkREFnRncweU5EQXlNRFF4TlRVeQpNVGxhR0E4eU1USTBNREV4TVRFMU5USXhPVm93SVRFZk1CMEdBMVVFQXd3V2NtTm5aVzRnYzJWc1ppQnphV2R1ClpXUWdZMlZ5ZERCWk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkRKQ05ocEJSZzhYS3hhOHliV2YKeUh1d3h4MlhaSmdqc3RaL3paWHU0UGl5bTFNQVhoRVFTV1hHditaMGduSitFaE9NRzE5UE1rcHJBeVdnT2xLNgovMzZqV0RCV01DOEdBMVVkRVFRb01DYUNKSE5oY25RdGQyVmlhRzl2YXkxelpYSjJhV05sTG10MVltVXRjM2x6CmRHVnRMbk4yWXpBT0JnTlZIUThCQWY4RUJBTUNCYUF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0NnWUkKS29aSXpqMEVBd0lEUndBd1JBSWdVTkRrcVFTYzIxOVRsT0UvNk40ZzI5Smo1TU5nZTcwa2ozZjhDYU50SlJzQwpJR0M0WllGbnp6c2RGaHowVG4xYTFwa1kyVWVmZHdwcGFjOHB0UGdrM3AvSwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" + caBundle: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJuekNDQVVXZ0F3SUJBZ0lWQUtTMmNUQmJMWjAySkIySkF5cm1GZElsbHlkUU1Bb0dDQ3FHU000OUJBTUMKTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuYm1Wa0lHTmxjblF3SUJjTk1qUXdNekV6TURJdwpNalEwV2hnUE1qRXlOREF5TVRnd01qQXlORFJhTUNFeEh6QWRCZ05WQkFNTUZuSmpaMlZ1SUhObGJHWWdjMmxuCmJtVmtJR05sY25Rd1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRZFVIaGcyelRNNXM3RVhneWQKOVBFQUNKQjRzd2JybGRBR1N1WS91SHRNOGpQRDFUY3pyZURDbm5mTDJBRXZYbk1NcTRQM0RhSm1mR28raDZucgpBU0RvbzFnd1ZqQXZCZ05WSFJFRUtEQW1naVJ6WVhKMExYZGxZbWh2YjJzdGMyVnlkbWxqWlM1cmRXSmxMWE41CmMzUmxiUzV6ZG1Nd0RnWURWUjBQQVFIL0JBUURBZ1dnTUJNR0ExVWRKUVFNTUFvR0NDc0dBUVVGQndNQk1Bb0cKQ0NxR1NNNDlCQU1DQTBnQU1FVUNJQVIwdzNLRjlvelNUU2FQY04vQ3dtQVliMDBHazdseUREMkJtdFR1RU5ncQpBaUVBZ29CZmdTKy9TelF2ckJOLy90VDBBbm5XcHphZEd2c3gvaG5OSksyWDZjST0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" diff --git a/manifests/base/workloads/agent.yaml b/manifests/base/workloads/agent.yaml index 7710a1f..4d8d34d 100644 --- a/manifests/base/workloads/agent.yaml +++ b/manifests/base/workloads/agent.yaml @@ -31,15 +31,9 @@ spec: fieldPath: status.hostIP image: sart:dev imagePullPolicy: IfNotPresent - name: sartd + name: agent securityContext: - allowPrivilegeEscalation: false - capabilities: - add: - - NET_RAW - drop: - - ALL - readOnlyRootFilesystem: true + privileged: true livenessProbe: httpGet: path: /healthz @@ -64,6 +58,7 @@ spec: name: cert readOnly: true hostNetwork: true + hostPID: true nodeSelector: kubernetes.io/os: linux terminationGracePeriodSeconds: 2 diff --git a/manifests/base/workloads/controller.yaml b/manifests/base/workloads/controller.yaml index d50e460..4a96bc0 100644 --- a/manifests/base/workloads/controller.yaml +++ b/manifests/base/workloads/controller.yaml @@ -18,7 +18,7 @@ spec: securityContext: runAsNonRoot: false containers: - - name: sartd + - name: controller command: - sartd args: @@ -54,6 +54,23 @@ spec: - mountPath: /etc/sartd/cert name: cert readOnly: true + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: control-plane + operator: In + values: + - sart-controller + topologyKey: "kubernetes.io/hostname" + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + control-plane: sart-controller volumes: - name: cert secret: diff --git a/manifests/base/workloads/speaker.yaml b/manifests/base/workloads/speaker.yaml index 4895658..f3ebf99 100644 --- a/manifests/base/workloads/speaker.yaml +++ b/manifests/base/workloads/speaker.yaml @@ -32,7 +32,7 @@ spec: fieldPath: status.hostIP image: sart:dev imagePullPolicy: IfNotPresent - name: sartd + name: bgp securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/manifests/cni/agent-patch.yaml b/manifests/cni/agent-patch.yaml index 23fb680..d5c762e 100644 --- a/manifests/cni/agent-patch.yaml +++ b/manifests/cni/agent-patch.yaml @@ -7,7 +7,7 @@ spec: template: spec: containers: - - name: sartd + - name: agent args: - agent - --mode=cni @@ -15,6 +15,10 @@ spec: volumeMounts: - mountPath: /var/run name: run + mountPropagation: HostToContainer # to see bind mount netns file under /run/netns + - mountPath: /run + name: run2 + mountPropagation: HostToContainer # to see bind mount netns file under /run/netns initContainers: - name: installer image: sart:dev @@ -30,6 +34,9 @@ spec: - name: run hostPath: path: /var/run + - name: run2 + hostPath: + path: /run - name: cni-bin hostPath: path: /opt/cni/bin diff --git a/manifests/cni/configmap.yaml b/manifests/cni/configmap.yaml new file mode 100644 index 0000000..7c83234 --- /dev/null +++ b/manifests/cni/configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sartd-fib-config + namespace: kube-system +data: + fib-config.yaml: | + endpoint: 127.0.0.1:5001 + channels: + - name: bgp-to-kernel + ip_version: ipv4 + subscribers: + - protocol: bgp + endpoint: 127.0.0.1:5010 + publishers: + - protocol: kernel + tables: + - 254 diff --git a/manifests/cni/controller-patch.yaml b/manifests/cni/controller-patch.yaml index 137c215..72d66f9 100644 --- a/manifests/cni/controller-patch.yaml +++ b/manifests/cni/controller-patch.yaml @@ -6,6 +6,11 @@ metadata: spec: template: spec: + containers: + - name: controller + args: + - controller + - --mode=cni hostNetwork: true priorityClassName: system-cluster-critical tolerations: diff --git a/manifests/cni/kustomization.yaml b/manifests/cni/kustomization.yaml index c4c0cff..01b2db0 100644 --- a/manifests/cni/kustomization.yaml +++ b/manifests/cni/kustomization.yaml @@ -1,6 +1,8 @@ resources: - ../base + - configmap.yaml patchesStrategicMerge: - agent-patch.yaml - controller-patch.yaml + - speaker-patch.yaml diff --git a/manifests/cni/sample/bgp_peer.yaml b/manifests/cni/sample/bgp_peer.yaml new file mode 100644 index 0000000..31a1b88 --- /dev/null +++ b/manifests/cni/sample/bgp_peer.yaml @@ -0,0 +1,31 @@ +apiVersion: sart.terassyi.net/v1alpha2 +kind: BGPPeer +metadata: + labels: + bgp: b + bgppeer.sart.terassyi.net/node: sart-control-plane + name: bgppeer-sart-cp-spine0 +spec: + addr: 9.9.9.9 + asn: 65001 + groups: + - to-spine0 + nodeBGPRef: sart-control-plane + speaker: + path: 127.0.0.1:5000 +--- +apiVersion: sart.terassyi.net/v1alpha2 +kind: BGPPeer +metadata: + labels: + bgp: b + bgppeer.sart.terassyi.net/node: sart-control-plane + name: bgppeer-sart-cp-spine1 +spec: + addr: 7.7.7.7 + asn: 65002 + groups: + - to-spine1 + nodeBGPRef: sart-control-plane + speaker: + path: 127.0.0.1:5000 diff --git a/manifests/cni/sample/client.yaml b/manifests/cni/sample/client.yaml new file mode 100644 index 0000000..2cd4df3 --- /dev/null +++ b/manifests/cni/sample/client.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Pod +metadata: + name: client + namespace: test +spec: + containers: + - name: client + image: ghcr.io/terassyi/test-server:0.1.2 diff --git a/manifests/cni/sample/cluster_bgp_spine0.yaml b/manifests/cni/sample/cluster_bgp_spine0.yaml new file mode 100644 index 0000000..3f14328 --- /dev/null +++ b/manifests/cni/sample/cluster_bgp_spine0.yaml @@ -0,0 +1,18 @@ +apiVersion: sart.terassyi.net/v1alpha2 +kind: ClusterBGP +metadata: + name: clusterbgp-a +spec: + nodeSelector: + bgp: a + asnSelector: + from: label + routerIdSelector: + from: internalAddress + speaker: + path: 127.0.0.1:5000 + multipath: true + peers: + - peerTemplateRef: bgppeertemplate-spine0 + nodeBGPSelector: + bgp: a diff --git a/manifests/cni/sample/cluster_bgp_spine1.yaml b/manifests/cni/sample/cluster_bgp_spine1.yaml new file mode 100644 index 0000000..848ba7f --- /dev/null +++ b/manifests/cni/sample/cluster_bgp_spine1.yaml @@ -0,0 +1,18 @@ +apiVersion: sart.terassyi.net/v1alpha2 +kind: ClusterBGP +metadata: + name: clusterbgp-b +spec: + nodeSelector: + bgp: a + asnSelector: + from: label + routerIdSelector: + from: internalAddress + speaker: + path: 127.0.0.1:5000 + multipath: true + peers: + - peerTemplateRef: bgppeertemplate-spine1 + nodeBGPSelector: + bgp: a diff --git a/manifests/cni/sample/deployment.yaml b/manifests/cni/sample/deployment.yaml new file mode 100644 index 0000000..5d329d3 --- /dev/null +++ b/manifests/cni/sample/deployment.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app + namespace: test +spec: + replicas: 3 + selector: + matchLabels: + app: app + template: + metadata: + labels: + app: app + spec: + containers: + - name: app + image: ghcr.io/terassyi/test-server:0.1.2 +--- +# LoadBalancer Service +apiVersion: v1 +kind: Service +metadata: + name: app-svc + namespace: test +spec: + selector: + app: app + ports: + - name: http + port: 80 + targetPort: 8080 diff --git a/manifests/cni/sample/kustomization.yaml b/manifests/cni/sample/kustomization.yaml index d2ed1ad..beaeaef 100644 --- a/manifests/cni/sample/kustomization.yaml +++ b/manifests/cni/sample/kustomization.yaml @@ -1,2 +1,9 @@ resources: + - cluster_bgp_a.yaml + - cluster_bgp_b.yaml + - peer_template.yaml + - bgp_peer.yaml + - namespace.yaml - pool.yaml + - deployment.yaml + - client.yaml diff --git a/manifests/cni/sample/namespace.yaml b/manifests/cni/sample/namespace.yaml new file mode 100644 index 0000000..aa2f98c --- /dev/null +++ b/manifests/cni/sample/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test + labels: + name: test diff --git a/manifests/cni/sample/peer_template.yaml b/manifests/cni/sample/peer_template.yaml new file mode 100644 index 0000000..70df9e9 --- /dev/null +++ b/manifests/cni/sample/peer_template.yaml @@ -0,0 +1,19 @@ +apiVersion: sart.terassyi.net/v1alpha2 +kind: BGPPeerTemplate +metadata: + name: bgppeertemplate-spine0 +spec: + asn: 65001 + addr: 9.9.9.9 + groups: + - to-spine0 +--- +apiVersion: sart.terassyi.net/v1alpha2 +kind: BGPPeerTemplate +metadata: + name: bgppeertemplate-spine1 +spec: + asn: 65002 + addr: 7.7.7.7 + groups: + - to-spine1 diff --git a/manifests/cni/sample/test_pod.yaml b/manifests/cni/sample/test_pod.yaml new file mode 100644 index 0000000..d3177ee --- /dev/null +++ b/manifests/cni/sample/test_pod.yaml @@ -0,0 +1,59 @@ +apiVersion: v1 +kind: Pod +metadata: + name: test-cp + namespace: test +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-control-plane +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-worker + namespace: test +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-worker2 + namespace: test +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker2 +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-worker3 + namespace: test +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker3 +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-worker3-2 + namespace: test +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker3 diff --git a/manifests/cni/sample/test_pod2.yaml b/manifests/cni/sample/test_pod2.yaml new file mode 100644 index 0000000..bd943e9 --- /dev/null +++ b/manifests/cni/sample/test_pod2.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Pod +metadata: + name: test-worker3-3 + namespace: test +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker3 diff --git a/manifests/cni/sample/test_pod_another_pool.yaml b/manifests/cni/sample/test_pod_another_pool.yaml new file mode 100644 index 0000000..05f08cd --- /dev/null +++ b/manifests/cni/sample/test_pod_another_pool.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: test-worker-non-default + namespace: test + annotations: + sart.terassyi.net/addresspool: non-default-pod-pool +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker diff --git a/manifests/cni/sample/test_pod_in_namespace.yaml b/manifests/cni/sample/test_pod_in_namespace.yaml new file mode 100644 index 0000000..4653b62 --- /dev/null +++ b/manifests/cni/sample/test_pod_in_namespace.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test-non-default + labels: + name: test + annotations: + sart.terassyi.net/addresspool: non-default-pod-pool +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-worker2-non-default + namespace: test-non-default +spec: + containers: + - name: test-non-default + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker2 +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-worker3-non-default + namespace: test-non-default +spec: + containers: + - name: test + image: ghcr.io/terassyi/test-server:0.1.2 + nodeSelector: + kubernetes.io/hostname: sart-worker3 diff --git a/manifests/cni/speaker-patch.yaml b/manifests/cni/speaker-patch.yaml new file mode 100644 index 0000000..bca652b --- /dev/null +++ b/manifests/cni/speaker-patch.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: sartd-bgp + namespace: system +spec: + template: + spec: + containers: + - name: bgp + command: ["sartd"] + args: + - bgp + - --fib + - localhost:5010 + - --exporter=127.0.0.1:5003 + - name: fib + image: sart:dev + imagePullPolicy: IfNotPresent + command: ["sartd"] + args: + - fib + - -f + - /etc/sart/fib-config.yaml + securityContext: + privileged: true + volumeMounts: + - name: sartd-fib-config + mountPath: /etc/sart/fib-config.yaml + subPath: fib-config.yaml + volumes: + - name: sartd-fib-config + configMap: + name: sartd-fib-config + items: + - key: fib-config.yaml + path: fib-config.yaml diff --git a/manifests/dual/agent-patch.yaml b/manifests/dual/agent-patch.yaml new file mode 100644 index 0000000..81bd832 --- /dev/null +++ b/manifests/dual/agent-patch.yaml @@ -0,0 +1,14 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: sartd-agent + namespace: kube-system +spec: + template: + spec: + containers: + - name: agent + args: + - agent + - --mode=dual + - --cni-endpoint=0.0.0.0:6000 diff --git a/manifests/dual/controller-patch.yaml b/manifests/dual/controller-patch.yaml new file mode 100644 index 0000000..1a50c0c --- /dev/null +++ b/manifests/dual/controller-patch.yaml @@ -0,0 +1,13 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sart-controller + namespace: kube-system +spec: + template: + spec: + containers: + - name: controller + args: + - controller + - --mode=dual diff --git a/manifests/lb/agent-patch.yaml b/manifests/lb/agent-patch.yaml index 464cb6b..dfa5fca 100644 --- a/manifests/lb/agent-patch.yaml +++ b/manifests/lb/agent-patch.yaml @@ -6,7 +6,8 @@ metadata: spec: template: spec: - name: sartd - args: + containers: + - name: agent + args: - agent - --mode=lb diff --git a/manifests/lb/controller-patch.yaml b/manifests/lb/controller-patch.yaml new file mode 100644 index 0000000..6977c9f --- /dev/null +++ b/manifests/lb/controller-patch.yaml @@ -0,0 +1,13 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sart-controller + namespace: kube-system +spec: + template: + spec: + containers: + - name: controller + args: + - controller + - --mode=lb diff --git a/manifests/lb/kustomization.yaml b/manifests/lb/kustomization.yaml index 446f217..c4c0cff 100644 --- a/manifests/lb/kustomization.yaml +++ b/manifests/lb/kustomization.yaml @@ -3,3 +3,4 @@ resources: patchesStrategicMerge: - agent-patch.yaml + - controller-patch.yaml diff --git a/manifests/base/sample/bgp_peer.yaml b/manifests/lb/sample/bgp_peer.yaml similarity index 100% rename from manifests/base/sample/bgp_peer.yaml rename to manifests/lb/sample/bgp_peer.yaml diff --git a/manifests/base/sample/cluster_bgp_a.yaml b/manifests/lb/sample/cluster_bgp_a.yaml similarity index 94% rename from manifests/base/sample/cluster_bgp_a.yaml rename to manifests/lb/sample/cluster_bgp_a.yaml index 0bc3874..a3a8825 100644 --- a/manifests/base/sample/cluster_bgp_a.yaml +++ b/manifests/lb/sample/cluster_bgp_a.yaml @@ -11,6 +11,7 @@ spec: from: internalAddress speaker: path: 127.0.0.1:5000 + multipath: true peers: - peerTemplateRef: bgppeertemplate-sample nodeBGPSelector: diff --git a/manifests/base/sample/cluster_bgp_b.yaml b/manifests/lb/sample/cluster_bgp_b.yaml similarity index 92% rename from manifests/base/sample/cluster_bgp_b.yaml rename to manifests/lb/sample/cluster_bgp_b.yaml index 89ae9db..ba1de81 100644 --- a/manifests/base/sample/cluster_bgp_b.yaml +++ b/manifests/lb/sample/cluster_bgp_b.yaml @@ -11,3 +11,4 @@ spec: from: internalAddress speaker: path: 127.0.0.1:5000 + multipath: true diff --git a/manifests/base/sample/cluster_bgp_c.yaml b/manifests/lb/sample/cluster_bgp_c.yaml similarity index 95% rename from manifests/base/sample/cluster_bgp_c.yaml rename to manifests/lb/sample/cluster_bgp_c.yaml index 8219666..401dfca 100644 --- a/manifests/base/sample/cluster_bgp_c.yaml +++ b/manifests/lb/sample/cluster_bgp_c.yaml @@ -11,6 +11,7 @@ spec: from: internalAddress speaker: path: 127.0.0.1:5000 + multipath: true peers: - peerConfig: asn: 65000 diff --git a/manifests/base/sample/kustomization.yaml b/manifests/lb/sample/kustomization.yaml similarity index 100% rename from manifests/base/sample/kustomization.yaml rename to manifests/lb/sample/kustomization.yaml diff --git a/manifests/base/sample/lb.yaml b/manifests/lb/sample/lb.yaml similarity index 100% rename from manifests/base/sample/lb.yaml rename to manifests/lb/sample/lb.yaml diff --git a/manifests/base/sample/lb_address_pool.yaml b/manifests/lb/sample/lb_address_pool.yaml similarity index 100% rename from manifests/base/sample/lb_address_pool.yaml rename to manifests/lb/sample/lb_address_pool.yaml diff --git a/manifests/base/sample/lb_another.yaml b/manifests/lb/sample/lb_another.yaml similarity index 100% rename from manifests/base/sample/lb_another.yaml rename to manifests/lb/sample/lb_another.yaml diff --git a/manifests/base/sample/peer_template.yaml b/manifests/lb/sample/peer_template.yaml similarity index 100% rename from manifests/base/sample/peer_template.yaml rename to manifests/lb/sample/peer_template.yaml diff --git a/sartd/Cargo.lock b/sartd/Cargo.lock index 6f3a913..9596663 100644 --- a/sartd/Cargo.lock +++ b/sartd/Cargo.lock @@ -1529,6 +1529,20 @@ dependencies = [ "netlink-packet-utils", ] +[[package]] +name = "netlink-packet-route" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c171cd77b4ee8c7708da746ce392440cb7bcf618d122ec9ecc607b12938bf4" +dependencies = [ + "anyhow", + "byteorder", + "libc", + "log", + "netlink-packet-core", + "netlink-packet-utils", +] + [[package]] name = "netlink-packet-utils" version = "0.5.2" @@ -1580,6 +1594,17 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.1", + "cfg-if", + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2116,11 +2141,29 @@ dependencies = [ "futures", "log", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.17.1", + "netlink-packet-utils", + "netlink-proto", + "netlink-sys", + "nix 0.26.4", + "thiserror", + "tokio", +] + +[[package]] +name = "rtnetlink" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b684475344d8df1859ddb2d395dd3dac4f8f3422a1aa0725993cb375fc5caba5" +dependencies = [ + "futures", + "log", + "netlink-packet-core", + "netlink-packet-route 0.19.0", "netlink-packet-utils", "netlink-proto", "netlink-sys", - "nix", + "nix 0.27.1", "thiserror", "tokio", ] @@ -2280,9 +2323,9 @@ dependencies = [ "futures", "ipnet", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.17.1", "netlink-sys", - "rtnetlink", + "rtnetlink 0.13.1", "sartd-proto", "sartd-trace", "serde", @@ -2320,10 +2363,13 @@ dependencies = [ "json-patch", "k8s-openapi", "kube", + "netlink-packet-route 0.19.0", + "nix 0.27.1", "prometheus", + "rand", "rscni 0.0.3", "rstest", - "rtnetlink", + "rtnetlink 0.14.1", "rustls", "sartd-cert", "sartd-ipam", diff --git a/sartd/src/bgp/src/error.rs b/sartd/src/bgp/src/error.rs index e400ea1..7f7a382 100644 --- a/sartd/src/bgp/src/error.rs +++ b/sartd/src/bgp/src/error.rs @@ -4,6 +4,8 @@ use thiserror::Error; pub enum Error { #[error("system error")] System, + #[error("Got empty message")] + GotEmptyMessage, #[error("message header error")] MessageHeader(#[from] MessageHeaderError), #[error("OPEN message error")] @@ -174,12 +176,16 @@ pub enum ControlError { pub enum PeerError { #[error("connection is not established")] ConnectionNotEstablished, + #[error("Connection is not found")] + ConnectionNotFound, #[error("failed to send message")] FailedToSendMessage, #[error("peer is down")] Down, #[error("duplicate connection")] DuplicateConnection, + #[error("Connections must be one or two")] + TooManyConnections, } #[derive(Debug, Error)] diff --git a/sartd/src/bgp/src/event.rs b/sartd/src/bgp/src/event.rs index c73599b..6b98498 100644 --- a/sartd/src/bgp/src/event.rs +++ b/sartd/src/bgp/src/event.rs @@ -56,6 +56,42 @@ impl Event { pub const MESSAGE_KEEPALIVE_MSG: u8 = 26; pub const MESSAGE_UPDATE_MSG: u8 = 27; pub const MESSAGE_UPDATE_MSG_ERROR: u8 = 28; + + pub fn to_str(event: u8) -> String { + match event { + 1 => "ManualStart".to_string(), + 2 => "ManualStop".to_string(), + 3 => "AutomaticStart".to_string(), + 4 => "ManualStartWithPassiveTcpEstablishment".to_string(), + 5 => "AutomaticStartWithPassiveTcpEstablishment".to_string(), + 6 => "AutomaticStartWithDampPeerOscillations".to_string(), + 7 => "AutomaticStartWithDampPeerOscillationsAndPassiveTcpEstablishment".to_string(), + 8 => "AutomaticStop".to_string(), + 9 => "ConnectRetryTimerExpire".to_string(), + 10 => "HoldTimerExpire".to_string(), + 11 => "KeepaliveTimerExpire".to_string(), + 12 => "DelayOpenTimerExpire".to_string(), + 13 => "IdleHoldTimerExpire".to_string(), + 14 => "TcpConnectionValid".to_string(), + 15 => "TcpCRInvalid".to_string(), + 16 => "TcpCRAcked".to_string(), + 17 => "TcpConnectionConfirmed".to_string(), + 18 => "TcpConnectionFail".to_string(), + 19 => "BgpOpen".to_string(), + 20 => "BgpOpenWithDelayOpenTimerRunning".to_string(), + 21 => "BgpHeaderError".to_string(), + 22 => "BgpOpenMsgErr".to_string(), + 23 => "OpenCollisionDump".to_string(), + 24 => "NotifMsgVerErr".to_string(), + 25 => "NotifMsg".to_string(), + 26 => "KeepAliveMsg".to_string(), + 27 => "UpdateMsg".to_string(), + 28 => "UpdateMsgErr".to_string(), + 100 => "RouteRefreshMsg".to_string(), + 101 => "RouteRefreshMsgErr".to_string(), + _ => "Unknown".to_string(), + } + } } impl From<&Event> for u8 { @@ -86,7 +122,7 @@ impl From<&Event> for u8 { Event::Message(BgpMessageEvent::OpenCollisionDump) => 23, Event::Message(BgpMessageEvent::NotifMsgVerErr) => 24, Event::Message(BgpMessageEvent::NotifMsg(_)) => 25, - Event::Message(BgpMessageEvent::KeepAliveMsg) => 26, + Event::Message(BgpMessageEvent::KeepAliveMsg{ local_port: _, peer_port: _}) => 26, Event::Message(BgpMessageEvent::UpdateMsg(_)) => 27, Event::Message(BgpMessageEvent::UpdateMsgErr(_)) => 28, Event::Message(BgpMessageEvent::RouteRefreshMsg(_)) => 100, @@ -109,12 +145,6 @@ impl std::fmt::Display for Event { } } -impl From for Event { - fn from(_: u8) -> Self { - Self::Admin(AdministrativeEvent::ManualStart) - } -} - // https://www.rfc-editor.org/rfc/rfc4271#section-8.1.2 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum AdministrativeEvent { @@ -251,7 +281,10 @@ pub enum BgpMessageEvent { OpenCollisionDump, NotifMsgVerErr, NotifMsg(Message), - KeepAliveMsg, + KeepAliveMsg { + local_port: u16, + peer_port: u16, + }, UpdateMsg(Message), UpdateMsgErr(UpdateMessageError), RouteRefreshMsg(Message), @@ -274,7 +307,10 @@ impl std::fmt::Display for BgpMessageEvent { BgpMessageEvent::OpenCollisionDump => write!(f, "Message::OpenCollisionDump"), BgpMessageEvent::NotifMsgVerErr => write!(f, "Message::NotifMsgVerErr"), BgpMessageEvent::NotifMsg(_) => write!(f, "Message::NotifMsg"), - BgpMessageEvent::KeepAliveMsg => write!(f, "Message::KeepAliveMsg"), + BgpMessageEvent::KeepAliveMsg { + local_port: _, + peer_port: _, + } => write!(f, "Message::KeepAliveMsg"), BgpMessageEvent::UpdateMsg(_) => write!(f, "Message::UpdateMsg"), BgpMessageEvent::UpdateMsgErr(_) => write!(f, "Message::UpdateMsgErr"), BgpMessageEvent::RouteRefreshMsg(_) => write!(f, "Message::RouteRefreshMsg"), @@ -297,7 +333,10 @@ impl From for u8 { BgpMessageEvent::OpenCollisionDump => 23, BgpMessageEvent::NotifMsgVerErr => 24, BgpMessageEvent::NotifMsg(_) => 25, - BgpMessageEvent::KeepAliveMsg => 26, + BgpMessageEvent::KeepAliveMsg { + local_port: _, + peer_port: _, + } => 26, BgpMessageEvent::UpdateMsg(_) => 27, BgpMessageEvent::UpdateMsgErr(_) => 28, BgpMessageEvent::RouteRefreshMsg(_) => 100, diff --git a/sartd/src/bgp/src/packet/codec.rs b/sartd/src/bgp/src/packet/codec.rs index 8fab019..18d5c65 100644 --- a/sartd/src/bgp/src/packet/codec.rs +++ b/sartd/src/bgp/src/packet/codec.rs @@ -57,12 +57,17 @@ impl Decoder for Codec { #[tracing::instrument(skip(self, src))] fn decode(&mut self, src: &mut BytesMut) -> Result, Self::Error> { if src.is_empty() { - tracing::debug!("got empty packet"); return Ok(None); } let mut messages = Vec::new(); while src.remaining() > 0 { - let msg = decode_msg(src, &self.family, self.as4_enabled, self.path_id_enabled)?; + let msg = match decode_msg(src, &self.family, self.as4_enabled, self.path_id_enabled) { + Ok(msg) => msg, + Err(e) => { + tracing::error!(error=?e, "Failed to decode message"); + return Err(e); + } + }; messages.push(msg); } Ok(Some(messages)) @@ -337,7 +342,11 @@ mod tests { const BASE: &str = "../../testdata/messages"; fn input_file(name: &str) -> String { - std::path::Path::new(BASE).join(name).to_str().unwrap().to_string() + std::path::Path::new(BASE) + .join(name) + .to_str() + .unwrap() + .to_string() } #[tokio::test] diff --git a/sartd/src/bgp/src/peer/fsm.rs b/sartd/src/bgp/src/peer/fsm.rs index a824d2f..1bb1ff5 100644 --- a/sartd/src/bgp/src/peer/fsm.rs +++ b/sartd/src/bgp/src/peer/fsm.rs @@ -50,7 +50,7 @@ impl FiniteStateMachine { match event { Event::CONNECTION_TCP_CONNECTION_FAIL => State::Active, Event::MESSAGE_BGP_OPEN => State::OpenConfirm, - 1 | 3 | 4 | 5 | 6 | 7 | 14 | 15 => current, + 1 | 3 | 4 | 5 | 6 | 7 | 14 | 15 | 16 | 17 => current, _ => State::Idle, } }, diff --git a/sartd/src/bgp/src/peer/peer.rs b/sartd/src/bgp/src/peer/peer.rs index 8220750..08d7453 100644 --- a/sartd/src/bgp/src/peer/peer.rs +++ b/sartd/src/bgp/src/peer/peer.rs @@ -220,6 +220,12 @@ impl Peer { info.name.clone(), ) }; + tracing::info!( + from =? old_state, + to =? state, + event = Event::to_str(event), + "Move state" + ); if let Some(exporter) = &mut self.exporter { if state.ne(&old_state) { if let Err(e) = exporter @@ -244,7 +250,7 @@ impl Peer { let (msg_event_tx, mut msg_event_rx) = unbounded_channel::(); - tracing::debug!("handlng the peer event"); + tracing::debug!("handling the peer event"); let (conn_close_tx, mut conn_close_rx) = channel::(2); @@ -333,7 +339,7 @@ impl Peer { BgpMessageEvent::BgpOpenMsgErr(e) => self.bgp_open_msg_error(e).await, BgpMessageEvent::NotifMsgVerErr => self.notification_msg_ver_error().await, BgpMessageEvent::NotifMsg(msg) => self.notification_msg(msg).await, - BgpMessageEvent::KeepAliveMsg => self.keepalive_msg().await, + BgpMessageEvent::KeepAliveMsg{local_port, peer_port} => self.keepalive_msg(local_port, peer_port).await, BgpMessageEvent::UpdateMsg(msg) => self.update_msg(msg).await, BgpMessageEvent::UpdateMsgErr(e) => self.update_msg_error(e).await, BgpMessageEvent::RouteRefreshMsg(_msg) => self.route_refresh_msg(), @@ -395,17 +401,122 @@ impl Peer { connections[0].send(msg) } - #[tracing::instrument(skip(self, msg))] - fn send_to_dup_conn(&self, msg: Message, passive: bool) -> Result<(), Error> { + #[tracing::instrument(skip(self))] + fn send_all_conn(&self, msg: Message) -> Result<(), Error> { let connections = self.connections.lock().unwrap(); for conn in connections.iter() { - if conn.is_passive() == passive { + if conn.state.eq(&State::OpenConfirm) { + conn.send(msg.clone())?; + } + } + Ok(()) + } + + #[tracing::instrument(skip(self, msg))] + fn send_to_dup_conn(&self, msg: Message, passive: bool) -> Result<(), Error> { + let mut connections = self.connections.lock().unwrap(); + for conn in connections.iter_mut() { + if conn.is_passive() == passive + && (msg.msg_type().eq(&MessageType::Open) + || msg.msg_type().eq(&MessageType::Keepalive)) + { + if !(conn.state.eq(&State::Established) || conn.state.eq(&State::OpenConfirm)) { + tracing::info!( + local_addr = conn.local_addr.to_string(), + remote_addr = conn.peer_addr.to_string(), + local_port = conn.local_port, + remote_port = conn.peer_port, + target_passive = passive, + "Per connection status moves to OpenSent" + ); + conn.state = State::OpenSent; + } return conn.send(msg); } } Err(Error::Peer(PeerError::ConnectionNotEstablished)) } + #[tracing::instrument(skip_all)] + fn decide_connection(&self, passive: bool) -> Result, Error> { + // ref: https://datatracker.ietf.org/doc/html/rfc4271#section-6.8 + let (local_id, remote_id) = { + let info = self.info.lock().unwrap(); + (info.router_id, info.neighbor.get_router_id()) + }; + + let prefer_local_conn = local_id > remote_id; + + let mut connections = self.connections.lock().unwrap(); + + let (incoming_idx, incoming_conn) = connections + .iter() + .enumerate() + .find(|(_, c)| c.is_passive() == passive) + .ok_or(Error::Peer(PeerError::ConnectionNotFound))?; + // incoming.state = State::OpenConfirm; + let drop_idx = if let Some((colliding_idx, colliding_conn)) = connections + .iter() + .enumerate() + .find(|(_, c)| c.ne(&incoming_conn)) + { + match colliding_conn.state { + State::OpenConfirm => { + // detect the connection collision + if prefer_local_conn { + // close the connection initiated by remote + if incoming_conn.is_passive() { + // close + Some(incoming_idx) + } else if colliding_conn.is_passive() { + // close + Some(colliding_idx) + } else { + None + } + } else { + // close the connection initiated by local + if !incoming_conn.is_passive() { + Some(incoming_idx) + // close + } else if !colliding_conn.is_passive() { + // close + Some(colliding_idx) + } else { + None + } + } + } + State::Established => { + // existing the already established connection + Some(incoming_idx) + } + _ => { + // incoming connection is should be established + Some(colliding_idx) + } + } + } else { + None + }; + + if let Some(drop_idx) = drop_idx { + let dropped_conn = connections.remove(drop_idx); + tracing::warn!( + local_addr = dropped_conn.local_addr.to_string(), + remote_addr = dropped_conn.peer_addr.to_string(), + local_port = dropped_conn.local_port, + remote_port = dropped_conn.peer_port, + "Drop the collided connection" + ); + drop(dropped_conn); + + return Ok(Some(drop_idx)); + } + + Ok(None) + } + #[tracing::instrument(skip(self, stream, msg_event_tx, close_signal))] fn handle_connection( &mut self, @@ -413,10 +524,11 @@ impl Peer { msg_event_tx: UnboundedSender, close_signal: Sender, ) -> Result { + let peer_addr = stream.peer_addr().unwrap().ip(); let peer_port = stream.peer_addr().unwrap().port(); + let local_addr = stream.local_addr().unwrap().ip(); let local_port = stream.local_addr().unwrap().port(); - let local_addr = stream.local_addr().unwrap().ip(); self.info.lock().unwrap().addr = local_addr; let passive = local_port == Bgp::BGP_PORT; @@ -427,8 +539,13 @@ impl Peer { let conn_down_signal = Arc::new(Notify::new()); { let mut connections = self.connections.lock().unwrap(); + if connections.len() >= 2 { + return Err(Error::Peer(PeerError::TooManyConnections)); + } connections.push(Connection::new( + local_addr, local_port, + peer_addr, peer_port, msg_tx, conn_down_signal.clone(), @@ -442,8 +559,11 @@ impl Peer { tracing::info!( local_addr = local_addr.to_string(), + local_port = local_port, + peer_addr = peer_addr.to_string(), + peer_port = peer_port, passive = passive, - "initialize the connection" + "Initialize the connection" ); self.initialized.store(false, Ordering::Relaxed); @@ -469,7 +589,7 @@ impl Peer { }, MessageType::Keepalive => { recv_counter.lock().unwrap().keepalive += 1; - msg_event_tx.send(BgpMessageEvent::KeepAliveMsg) + msg_event_tx.send(BgpMessageEvent::KeepAliveMsg{local_port, peer_port}) }, MessageType::Notification => { recv_counter.lock().unwrap().notification += 1; @@ -774,7 +894,7 @@ impl Peer { // restarts the KeepaliveTimer, and // remains in the OpenConfirmed state let msg = Self::build_keepalive_msg()?; - self.send(msg)?; + self.send_all_conn(msg)?; } _ => { // if the ConnectRetryTimer is running, stops and resets the ConnectRetryTimer (sets to zero), @@ -945,7 +1065,7 @@ impl Peer { self.send(msg)?; let msg = Self::build_keepalive_msg()?; self.send(msg)?; - tracing::info!(state=?self.state(),"establish bgp session"); + tracing::info!(local_port=local_port, remote_port=peer_port,state=?self.state(),"Establish bgp session"); let _negotiated_hold_time = self.negotiate_hold_time(hold_time as u64)?; // when new session is established, it needs to advertise all paths. @@ -976,9 +1096,9 @@ impl Peer { // sets the HoldTimer according to the negotiated value (see Section 4.2), // changes its state to OpenConfirm let msg = Self::build_keepalive_msg()?; - tracing::info!(state=?self.state(),"establish bgp session"); + tracing::info!(local_port=local_port, remote_port=peer_port,state=?self.state(),"Establish bgp session"); let _negotiated_hold_time = self.negotiate_hold_time(hold_time as u64)?; - self.send(msg)?; + self.send_to_dup_conn(msg, local_port == Bgp::BGP_PORT)?; // when new session is established, it needs to advertise all paths. let (peer_asn, peer_addr, families) = { @@ -1010,22 +1130,29 @@ impl Peer { // - releases all BGP resources, // - drops the TCP connection (send TCP FIN), // - increments the ConnectRetryCounter by 1, + + // If local_port is 179, its connection should be the passively opened connection. let passive = local_port == Bgp::BGP_PORT; - let mut builder = MessageBuilder::builder(MessageType::Notification); - let msg = builder.code(NotificationCode::Cease)?.build()?; - self.send_to_dup_conn(msg, passive)?; + + // move per connection state to OpenConfirm { let mut connections = self.connections.lock().unwrap(); - if let Some(idx) = connections - .iter() - .position(|conn| conn.is_passive() == passive) - { - tracing::warn!(passive = passive, "drop duplicate connection"); - let dup_conn = connections.remove(idx); - drop(dup_conn); + for conn in connections.iter_mut() { + if conn.is_passive() == passive { + tracing::info!( + local_addr = conn.local_addr.to_string(), + remote_addr = conn.peer_addr.to_string(), + local_port = conn.local_port, + remote_port = conn.peer_port, + target_passive = passive, + "Per connection status moves to OpenConfirm" + ); + conn.state = State::OpenConfirm; + } } } - self.connect_retry_counter += 1; + + self.decide_connection(passive)?; return Ok(()); } State::Established => { @@ -1261,7 +1388,7 @@ impl Peer { // Event 26 #[tracing::instrument(skip(self))] - async fn keepalive_msg(&mut self) -> Result<(), Error> { + async fn keepalive_msg(&mut self, local_port: u16, peer_port: u16) -> Result<(), Error> { tracing::debug!(state=?self.state(),"received keepalive message"); match self.state() { State::OpenSent => { @@ -1278,13 +1405,39 @@ impl Peer { self.release(true).await?; self.connect_retry_counter += 1; } - State::OpenConfirm | State::Established => { + State::OpenConfirm => { // restarts the HoldTimer and if the negotiated HoldTime value is non-zero, and // changes(remains) its state to Established. // self.hold_timer.push(self.negotiated_hold_time); self.hold_timer.last = Instant::now(); // workaround for https://github.com/terassyi/sart/issues/44 self.invalid_msg_count = 0; + + let passive = local_port == Bgp::BGP_PORT; + // move per connection state to OpenConfirm + { + let mut connections = self.connections.lock().unwrap(); + for conn in connections.iter_mut() { + if conn.is_passive() == passive { + tracing::info!( + local_addr = conn.local_addr.to_string(), + remote_addr = conn.peer_addr.to_string(), + local_port = conn.local_port, + remote_port = conn.peer_port, + target_passive = passive, + "Per connection status moves to OpenConfirm" + ); + conn.state = State::OpenConfirm; + } + } + } + + self.decide_connection(passive)?; + } + State::Established => { + self.hold_timer.last = Instant::now(); + // workaround for https://github.com/terassyi/sart/issues/44 + self.invalid_msg_count = 0; } _ => { // sets the ConnectRetryTimer to zero, @@ -1740,22 +1893,39 @@ impl Peer { #[derive(Debug)] struct Connection { + local_addr: IpAddr, local_port: u16, + peer_addr: IpAddr, peer_port: u16, + state: State, msg_tx: UnboundedSender, active_close_signal: Arc, } +impl PartialEq for Connection { + fn eq(&self, other: &Self) -> bool { + self.local_addr.eq(&other.local_addr) + && self.peer_addr.eq(&other.peer_addr) + && self.local_port == other.local_port + && self.peer_port == other.peer_port + } +} + impl Connection { fn new( + local_addr: IpAddr, local_port: u16, + peer_addr: IpAddr, peer_port: u16, msg_tx: UnboundedSender, active_close_signal: Arc, ) -> Connection { Connection { + local_addr, local_port, + peer_addr, peer_port, + state: State::Connect, // temporary state msg_tx, active_close_signal, } diff --git a/sartd/src/bgp/src/server.rs b/sartd/src/bgp/src/server.rs index 44e8df5..9a1ed05 100644 --- a/sartd/src/bgp/src/server.rs +++ b/sartd/src/bgp/src/server.rs @@ -228,7 +228,7 @@ impl Bgp { #[tracing::instrument(skip(self, event))] async fn handle_event(&mut self, event: ControlEvent) -> Result<(), Error> { - tracing::info!(event=%event); + tracing::info!(event=%event, "Handle control event"); match event { ControlEvent::Health => {} ControlEvent::GetBgpInfo => self.get_bgp_info().await?, @@ -251,7 +251,7 @@ impl Bgp { } ControlEvent::DeletePath(family, prefixes) => { self.delete_path(family, prefixes).await?; - }, + } ControlEvent::ConfigureMultiPath(enable) => { self.set_multipath(enable).await?; } diff --git a/sartd/src/bin/cni-installer.rs b/sartd/src/bin/cni-installer.rs new file mode 100644 index 0000000..3fa5be7 --- /dev/null +++ b/sartd/src/bin/cni-installer.rs @@ -0,0 +1,77 @@ +use anyhow::Result; +use clap::Parser; +use std::path::Path; + +#[derive(Debug, Parser)] +struct Args { + #[arg(long = "bin-dir", default_value = DEFAULT_BIN_DIR)] + bin_dir: String, + + #[arg(long = "conf-dir", default_value = DEFAULT_CONF_DIR)] + conf_dir: String, + + #[arg(long = "src-bin-dir", default_value = DEFAULT_SRC_BIN_DIR)] + src_bin_dir: String, + + #[arg(long = "src-conf-dir", default_value = DEFAULT_SRC_CONF_DIR)] + src_conf_dir: String, + + #[arg(long)] + binaries: Option>, +} + +const BIN_NAME: &str = "sart-cni"; + +const DEFAULT_SRC_BIN_DIR: &str = "/host/opt/cni/bin"; +const DEFAULT_SRC_CONF_DIR: &str = "/host/etc/cni/net.d"; + +const DEFAULT_BIN_DIR: &str = "/opt/cni/bin"; +const DEFAULT_CONF_DIR: &str = "/etc/cni/net.d"; + +fn main() -> Result<()> { + println!("Install CNI binary and configuration file"); + let arg = Args::parse(); + + let binaries = match arg.binaries { + Some(b) => b.clone(), + None => vec![BIN_NAME.to_string()], + }; + + install_cni_binaries(&binaries, &arg.src_bin_dir, &arg.bin_dir)?; + install_cni_conf(&arg.src_conf_dir, &arg.conf_dir)?; + + Ok(()) +} + +fn install_cni_binaries(binaries: &[String], src_dir: &str, dst_dir: &str) -> Result<()> { + std::fs::create_dir_all(dst_dir)?; + + let src = Path::new(src_dir); + let dst = Path::new(dst_dir); + for binary in binaries.iter() { + let src_path = src.join(binary); + let dst_path = dst.join(binary); + std::fs::copy(src_path, dst_path)?; + } + Ok(()) +} + +fn install_cni_conf(src: &str, dst: &str) -> Result<()> { + std::fs::create_dir_all(dst)?; + + // clean up existing conf + let files = std::fs::read_dir(dst)?; + for file in files.into_iter() { + let file = file?; + std::fs::remove_file(file.path())?; + } + + let dst_dir = Path::new(dst); + let new_files = std::fs::read_dir(src)?; + for file in new_files.into_iter() { + let file = file?; + std::fs::copy(file.path(), dst_dir.join(file.file_name()))?; + } + + Ok(()) +} diff --git a/sartd/src/cmd/Cargo.lock b/sartd/src/cmd/Cargo.lock index f7d8ddd..0ecaaa2 100644 --- a/sartd/src/cmd/Cargo.lock +++ b/sartd/src/cmd/Cargo.lock @@ -1540,6 +1540,20 @@ dependencies = [ "netlink-packet-utils", ] +[[package]] +name = "netlink-packet-route" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c171cd77b4ee8c7708da746ce392440cb7bcf618d122ec9ecc607b12938bf4" +dependencies = [ + "anyhow", + "byteorder", + "libc", + "log", + "netlink-packet-core", + "netlink-packet-utils", +] + [[package]] name = "netlink-packet-utils" version = "0.5.2" @@ -1591,6 +1605,17 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.1", + "cfg-if", + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2117,11 +2142,29 @@ dependencies = [ "futures", "log", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.17.1", + "netlink-packet-utils", + "netlink-proto", + "netlink-sys", + "nix 0.26.4", + "thiserror", + "tokio", +] + +[[package]] +name = "rtnetlink" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b684475344d8df1859ddb2d395dd3dac4f8f3422a1aa0725993cb375fc5caba5" +dependencies = [ + "futures", + "log", + "netlink-packet-core", + "netlink-packet-route 0.19.0", "netlink-packet-utils", "netlink-proto", "netlink-sys", - "nix", + "nix 0.27.1", "thiserror", "tokio", ] @@ -2265,9 +2308,9 @@ dependencies = [ "futures", "ipnet", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.17.1", "netlink-sys", - "rtnetlink", + "rtnetlink 0.13.1", "sartd-proto", "sartd-trace", "serde", @@ -2305,10 +2348,13 @@ dependencies = [ "json-patch", "k8s-openapi", "kube", + "netlink-packet-route 0.19.0", + "nix 0.27.1", "prometheus", + "rand", "rscni", "rstest", - "rtnetlink", + "rtnetlink 0.14.1", "rustls", "sartd-cert", "sartd-ipam", diff --git a/sartd/src/cmd/src/cmd.rs b/sartd/src/cmd/src/cmd.rs index 84a283b..eb94f6f 100644 --- a/sartd/src/cmd/src/cmd.rs +++ b/sartd/src/cmd/src/cmd.rs @@ -177,6 +177,8 @@ pub fn run() { config.tls.key = key; } + config.mode = c.mode; + sartd_kubernetes::controller::server::start(config, trace_conf); } } diff --git a/sartd/src/cmd/src/controller.rs b/sartd/src/cmd/src/controller.rs index b5021c9..e7e8b78 100644 --- a/sartd/src/cmd/src/controller.rs +++ b/sartd/src/cmd/src/controller.rs @@ -1,5 +1,8 @@ use clap::Parser; -use sartd_kubernetes::controller::config::{DEFAULT_HTTPS_PORT, DEFAULT_HTTP_PORT}; +use sartd_kubernetes::{ + config::Mode, + controller::config::{DEFAULT_HTTPS_PORT, DEFAULT_HTTP_PORT}, +}; #[derive(Debug, Clone, Parser)] pub struct ControllerCmd { @@ -25,4 +28,11 @@ pub struct ControllerCmd { #[arg(long = "tls-key", help = "path to TLS Key for controller")] pub tls_key: Option, + + #[arg( + long = "mode", + help = "Running mode(Default is Dual)", + default_value_t = Mode::Dual, + )] + pub mode: Mode, } diff --git a/sartd/src/fib/src/kernel.rs b/sartd/src/fib/src/kernel.rs index 5cbfe99..851449f 100644 --- a/sartd/src/fib/src/kernel.rs +++ b/sartd/src/fib/src/kernel.rs @@ -184,9 +184,15 @@ impl KernelRtPoller { request = self.rx.recv().fuse() => { if let Some((req, route)) = request { let res = match req { - RequestType::Add | RequestType::Replace => add_route(&handle, req, route).await, + RequestType::Add | RequestType::Replace => { + let new_route = extract_nexthop(route).await?; + add_route(req, new_route).await + }, RequestType::Delete => delete_route(&handle, route).await, - RequestType::AddMultiPath => add_multi_path_route(&handle, route).await, + RequestType::AddMultiPath => { + let new_route = extract_nexthop(route).await?; + add_multi_path_route(&handle, new_route).await + }, RequestType::DeleteMultiPath => delete_multi_path_route(&handle, route).await, }; match res { @@ -202,13 +208,56 @@ impl KernelRtPoller { } } -#[tracing::instrument(skip(handle))] -async fn add_route( - handle: &rtnetlink::Handle, - req: RequestType, - route: Route, -) -> Result<(), Error> { - tracing::info!("add or replace the rouet to kernel"); +#[tracing::instrument()] +async fn extract_nexthop(route: Route) -> Result { + let (conn, handle, _) = rtnetlink::new_connection()?; + tokio::spawn(conn); + + let targets: Vec = route.next_hops.iter().map(|nxt| nxt.gateway).collect(); + let mut new_targets: HashMap = HashMap::new(); + + let mut get_route_res = handle.route().get(route.version.clone()).execute(); + while let Some(r) = get_route_res.try_next().await? { + if let Some(dst) = r.destination_prefix() { + let dst_net = IpNet::new(dst.0, dst.1).unwrap(); // we can unwrap + for target in targets.iter() { + if dst_net.contains(target) { + if let Some(gateway) = r.gateway() { + if let Some(iface) = r.output_interface() { + new_targets.insert(*target, (gateway, iface)); + } else { + new_targets.insert(*target, (gateway, 0)); + } + } + } + } + } + } + let new_nexthops = route + .clone() + .next_hops + .iter() + .map(|nxt| { + let mut new_nxt = nxt.clone(); + if let Some(new_gw) = new_targets.get(&nxt.gateway) { + new_nxt.gateway = new_gw.0; + new_nxt.interface = new_gw.1; + } + new_nxt + }) + .collect::>(); + let mut new_route = route.clone(); + new_route.next_hops = new_nexthops; + Ok(new_route) +} + +#[tracing::instrument()] +async fn add_route(req: RequestType, route: Route) -> Result<(), Error> { + tracing::info!("add or replace the route to kernel"); + + let (conn, handle, _) = rtnetlink::new_connection()?; + tokio::spawn(conn); + let rt = handle.route(); let mut existing = false; @@ -334,7 +383,7 @@ async fn add_multi_path_route(handle: &rtnetlink::Handle, route: Route) -> Resul r.next_hops.push(next_hop); } - add_route(handle, RequestType::Replace, r).await + add_route(RequestType::Replace, r).await } #[tracing::instrument(skip(handle))] @@ -344,7 +393,7 @@ async fn delete_multi_path_route(handle: &rtnetlink::Handle, route: Route) -> Re r.next_hops .retain(|n| route.next_hops.iter().any(|nn| nn.gateway.ne(&n.gateway))); - add_route(handle, RequestType::Replace, r).await + add_route(RequestType::Replace, r).await } #[tracing::instrument(skip(handle))] diff --git a/sartd/src/kubernetes/Cargo.lock b/sartd/src/kubernetes/Cargo.lock index d376325..c2e2aa7 100644 --- a/sartd/src/kubernetes/Cargo.lock +++ b/sartd/src/kubernetes/Cargo.lock @@ -1528,14 +1528,14 @@ dependencies = [ [[package]] name = "netlink-packet-route" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053998cea5a306971f88580d0829e90f270f940befd7cf928da179d4187a5a66" +checksum = "74c171cd77b4ee8c7708da746ce392440cb7bcf618d122ec9ecc607b12938bf4" dependencies = [ "anyhow", - "bitflags 1.3.2", "byteorder", "libc", + "log", "netlink-packet-core", "netlink-packet-utils", ] @@ -1582,11 +1582,11 @@ dependencies = [ [[package]] name = "nix" -version = "0.26.4" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.1", "cfg-if", "libc", ] @@ -2110,9 +2110,9 @@ dependencies = [ [[package]] name = "rtnetlink" -version = "0.13.1" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a552eb82d19f38c3beed3f786bd23aa434ceb9ac43ab44419ca6d67a7e186c0" +checksum = "b684475344d8df1859ddb2d395dd3dac4f8f3422a1aa0725993cb375fc5caba5" dependencies = [ "futures", "log", @@ -2247,7 +2247,10 @@ dependencies = [ "json-patch", "k8s-openapi", "kube", + "netlink-packet-route", + "nix", "prometheus", + "rand", "rscni", "rstest", "rtnetlink", diff --git a/sartd/src/kubernetes/Cargo.toml b/sartd/src/kubernetes/Cargo.toml index 55b7a88..3239bc9 100644 --- a/sartd/src/kubernetes/Cargo.toml +++ b/sartd/src/kubernetes/Cargo.toml @@ -30,7 +30,7 @@ sartd-proto = { path = "../proto" } sartd-ipam = { path = "../ipam" } sartd-mock = { path = "../mock" } futures = "0.3.30" -rtnetlink = "0.13.1" # Ignore v0.14.x +rtnetlink = "0.14.1" actix-web = { version = "4.4.1", features = ["rustls-0_21"] } rustls = "0.21.9" json-patch = "1.2.0" @@ -47,3 +47,6 @@ assert-json-diff = "2.0.2" tokio-stream = { version = "0.1.14", features = ["net"] } # rscni = "0.0.3" rscni = { git = "https://github.com/terassyi/rscni", branch = "fix-error-result" } +nix = { version = "0.27.1", features = ["sched", "process"] } +netlink-packet-route = "0.19.0" +rand = "0.8.5" diff --git a/sartd/src/kubernetes/config/.cargo b/sartd/src/kubernetes/config/.cargo new file mode 100644 index 0000000..ca6c72f --- /dev/null +++ b/sartd/src/kubernetes/config/.cargo @@ -0,0 +1,2 @@ +[target.x86_64-unknown-linux-gnu] +runner = 'sudo -E' diff --git a/sartd/src/kubernetes/src/agent/cni.rs b/sartd/src/kubernetes/src/agent/cni.rs index 90b584e..d9b589b 100644 --- a/sartd/src/kubernetes/src/agent/cni.rs +++ b/sartd/src/kubernetes/src/agent/cni.rs @@ -1,2 +1,6 @@ +pub mod error; +pub mod gc; +mod netlink; +pub mod netns; pub mod pod; pub mod server; diff --git a/sartd/src/kubernetes/src/agent/cni/error.rs b/sartd/src/kubernetes/src/agent/cni/error.rs new file mode 100644 index 0000000..cd9f274 --- /dev/null +++ b/sartd/src/kubernetes/src/agent/cni/error.rs @@ -0,0 +1,48 @@ +use thiserror::Error; + +use super::{netlink, netns}; + +#[derive(Debug, Error)] +pub enum Error { + #[error("NetNS: {0}")] + NetNS(#[source] netns::Error), + + #[error("Netlink: {0}")] + Netlink(#[source] netlink::Error), + + #[error("Kubernetes: {0}")] + Kube(#[source] kube::Error), + + #[error("Missing fields: {0}")] + MissingField(String), + + #[error("Invalid address: {0}")] + InvalidAddress(String), + + #[error("Failed to get lock")] + Lock, + + #[error("Pod already configured")] + AlreadyConfigured(String), + + #[error("Default pool not found")] + DefaultPoolNotFound, + + #[error("Block not found")] + BlockNotFound(String), + + #[error("Failed to receive notification")] + ReceiveNotify, + + #[error("Ipam: {0}")] + Ipam(#[source] sartd_ipam::error::Error), + + #[error("Pod address is not found")] + PodAddressIsNotFound, + + #[error("Allocation not found")] + AllocationNotFound, + + #[error("Addresses don't match")] + AddressNotMatched, +} diff --git a/sartd/src/kubernetes/src/agent/cni/gc.rs b/sartd/src/kubernetes/src/agent/cni/gc.rs new file mode 100644 index 0000000..f56e910 --- /dev/null +++ b/sartd/src/kubernetes/src/agent/cni/gc.rs @@ -0,0 +1,163 @@ +use std::{collections::HashMap, sync::Arc, time::Duration}; + +use kube::{api::{DeleteParams, ListParams}, Api, Client, ResourceExt}; + +use sartd_ipam::manager::AllocatorSet; + +use crate::{agent::reconciler::node_bgp::ENV_HOSTNAME, crd::address_block::{AddressBlock, ADDRESS_BLOCK_NODE_LABEL}}; + +pub struct GarbageCollector { + interval: Duration, + client: Client, + allocator: Arc, + node: String, + blocks: HashMap, + pods: HashMap, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum GarbageCollectorMarker { + Unused, + Deleted, +} + +impl GarbageCollector { + pub fn new( + interval: Duration, + client: Client, + allocator: Arc, + ) -> GarbageCollector { + let node_name = std::env::var(ENV_HOSTNAME).expect("HOSTNAME environment value is not set"); + GarbageCollector { + interval, + client, + allocator, + node: node_name, + blocks: HashMap::new(), + pods: HashMap::new(), + } + } + + #[tracing::instrument(skip_all)] + pub async fn run(&mut self) { + let mut ticker = tokio::time::interval(self.interval); + + loop { + ticker.tick().await; + + let address_block_api = Api::::all(self.client.clone()); + + // let label_selector = form + let list_params = ListParams::default().labels(&format!("{}={}", ADDRESS_BLOCK_NODE_LABEL, self.node)); + let block_list = match address_block_api.list(&list_params).await { + Ok(list) => list, + Err(e) => { + tracing::warn!(error=?e,"Failed to list AddressBlock"); + continue; + } + }; + + { + let alloc_set = self.allocator.clone(); + let allocator = alloc_set.inner.lock().unwrap(); + + for ab in block_list.iter() { + let block_opt = allocator.blocks.get(&ab.name_any()); + match block_opt { + Some(block) => { + if block.allocator.is_empty() { + match self.blocks.get_mut(&block.name) { + Some(status) => { + if GarbageCollectorMarker::Unused.eq(status) { + *status = GarbageCollectorMarker::Deleted; + tracing::info!( + block = block.name, + gc_mark =? GarbageCollectorMarker::Deleted, + "Update GC marker", + ); + } + }, + None => { + self.blocks.insert(block.name.clone(), GarbageCollectorMarker::Unused); + tracing::info!( + block = block.name, + gc_mark =? GarbageCollectorMarker::Unused, + "Add GC marker", + ); + } + } + } + }, + None => { + match self.blocks.get_mut(&ab.name_any()) { + Some(status) => { + if GarbageCollectorMarker::Unused.eq(status) { + *status = GarbageCollectorMarker::Deleted; + tracing::info!( + block = ab.name_any(), + gc_mark =? GarbageCollectorMarker::Deleted, + "Update GC marker", + ); + } + }, + None => { + self.blocks.insert(ab.name_any(), GarbageCollectorMarker::Unused); + tracing::info!( + block = ab.name_any(), + gc_mark =? GarbageCollectorMarker::Unused, + "Add GC marker", + ); + } + } + } + } + } + + for (block_name, block) in allocator.blocks.iter() { + if block.allocator.is_empty() { + match self.blocks.get_mut(block_name) { + Some(status) => { + if GarbageCollectorMarker::Unused.eq(status) { + *status = GarbageCollectorMarker::Deleted; + } + tracing::info!( + block = block_name, + gc_mark =? GarbageCollectorMarker::Deleted, + "Update GC marker", + ); + } + None => { + self.blocks + .insert(block_name.clone(), GarbageCollectorMarker::Unused); + tracing::info!( + block = block_name, + gc_mark =? GarbageCollectorMarker::Unused, + "Add GC marker", + ); + } + } + } + } + } + + let mut deleted_keys = Vec::new(); + for (block, status) in self.blocks.iter() { + if GarbageCollectorMarker::Deleted.eq(status) { + if let Ok(Some(_ab)) = address_block_api.get_opt(block).await { + if let Err(err) = address_block_api + .delete(block, &DeleteParams::default()) + .await + { + tracing::warn!(err=?err, "Failed to delete AddressBlock by GC"); + continue; + } + deleted_keys.push(block.clone()); + } + } + } + for k in deleted_keys.iter() { + self.blocks.remove(k); + } + } + } +} diff --git a/sartd/src/kubernetes/src/agent/cni/netlink.rs b/sartd/src/kubernetes/src/agent/cni/netlink.rs new file mode 100644 index 0000000..3020108 --- /dev/null +++ b/sartd/src/kubernetes/src/agent/cni/netlink.rs @@ -0,0 +1,1030 @@ +use std::{collections::HashMap, net::IpAddr, str::FromStr}; + +use futures::TryStreamExt; + +use ipnet::{IpAdd, IpNet}; +use netlink_packet_route::{ + link::{self, LinkAttribute}, + route::{RouteAddress, RouteAttribute, RouteProtocol, RouteScope}, + rule::RuleAction, +}; + +use rtnetlink::{Handle, IpVersion}; +use thiserror::Error; + +use super::netns::NetNS; + +const ROUTE_SCOPE_SART: u8 = 50; +pub const DEFAULT_ROUTE_IPV4: &str = "0.0.0.0/0"; +pub const DEFAULT_ROUTE_IPV6: &str = "::/0"; + +#[derive(Debug, Error)] +pub enum Error { + #[error("Open netlink socket: {0}")] + Open(#[source] std::io::Error), + + #[error("Netlink: {0}")] + Netlink(#[source] rtnetlink::Error), + + #[error("Create veth: {0}")] + Veth(#[source] rtnetlink::Error), + + #[error("Set NetNS: {0}")] + NetNS(#[source] rtnetlink::Error), + + #[error("Link up: {0}")] + LinkUp(#[source] rtnetlink::Error), + + #[error("Link: {0}")] + Link(#[source] rtnetlink::Error), + + #[error("Address: {0}")] + Address(#[source] rtnetlink::Error), + + #[error("Route: {0}")] + Route(#[source] rtnetlink::Error), + + #[error("Rule: {0}")] + Rule(#[source] rtnetlink::Error), + + #[error("Link not found: {0}")] + LinkNotFound(String), + + #[error("Invalid container-id")] + InvalidContainerId, + + #[error("Invalid addrss: {0}")] + InvalidAddress(String), + + #[error("Route not found: {0}")] + RouteNotFound(String), + + #[error("Invalid Mac address")] + InvalidMacAddress, + + #[error("Invalid link alias: {0}")] + InvalidLinkAlias(String), +} + +pub async fn add_veth_pair(container_id: &str, ifname: &str) -> Result { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + if container_id.len() < 8 { + return Err(Error::InvalidContainerId); + } + + let short_id = &container_id[..8]; + let host_side = format!("veth{short_id}"); + + handle + .link() + .add() + .veth(host_side.clone(), ifname.to_string()) + .execute() + .await + .map_err(Error::Veth)?; + Ok(host_side) +} + +pub async fn move_netns(name: &str, netns: &NetNS) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, name).await?; + handle + .link() + .set(index) + .setns_by_fd(netns.fd()) + .execute() + .await + .map_err(Error::NetNS)?; + Ok(()) +} + +async fn del_veth(name: &str) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, name).await?; + handle + .link() + .del(index) + .execute() + .await + .map_err(Error::Veth)?; + Ok(()) +} + +pub async fn link_up(name: &str) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, name).await?; + handle + .link() + .set(index) + .up() + .execute() + .await + .map_err(Error::LinkUp)?; + Ok(()) +} + +pub async fn get_link_mac(name: &str) -> Result { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let mut links = handle.link().get().match_name(name.to_string()).execute(); + if let Some(msg) = links.try_next().await.map_err(Error::Link)? { + for attr in msg.attributes.into_iter() { + if let LinkAttribute::Address(v) = attr { + if v.len() != 6 { + return Err(Error::InvalidMacAddress); + } + return Ok(format!( + "{:#02x}:{:#02x}:{:#02x}:{:#02x}:{:#02x}:{:#02x}", + v[0], v[1], v[2], v[3], v[4], v[5] + )); + } + } + } + Err(Error::LinkNotFound(name.to_string())) +} + +pub async fn set_alias(name: &str, alias: &str) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, name).await?; + let mut req = handle.link().set(index); + let msg = req.message_mut(); + msg.attributes + .push(LinkAttribute::IfAlias(alias.to_string())); + req.execute().await.map_err(Error::Link) +} + +pub async fn add_addr(name: &str, addr: &IpNet) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, name).await?; + + handle + .address() + .add(index, addr.addr(), addr.prefix_len()) + .execute() + .await + .map_err(Error::Address) +} + +pub async fn add_route( + dst: &IpNet, + gateway: Option, + device: &str, + scope: RouteScope, +) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, device).await?; + + let req = handle + .route() + .add() + .scope(scope) + .output_interface(index) + .protocol(RouteProtocol::Other(ROUTE_SCOPE_SART)); + + match dst { + IpNet::V4(dst) => { + let req = req.v4().destination_prefix(dst.addr(), dst.prefix_len()); + if let Some(IpAddr::V4(gw)) = gateway { + req.gateway(gw).execute().await.map_err(Error::Route)?; + } else { + req.execute().await.map_err(Error::Route)?; + } + } + IpNet::V6(dst) => { + let req = req.v6().destination_prefix(dst.addr(), dst.prefix_len()); + if let Some(IpAddr::V6(gw)) = gateway { + req.gateway(gw).execute().await.map_err(Error::Route)?; + } else { + req.execute().await.map_err(Error::Route)?; + } + } + }; + + Ok(()) +} + +pub async fn add_rule(table: u32, protocol: IpAddr) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let req = handle + .rule() + .add() + .table_id(table) + .action(RuleAction::ToTable) + .priority(2000); + + match protocol { + IpAddr::V4(_) => req.v4().execute().await.map_err(Error::Rule), + IpAddr::V6(_) => req.v6().execute().await.map_err(Error::Rule), + } +} + +pub async fn add_route_in_table( + dst: &IpNet, + gateway: Option, + device: &str, + scope: RouteScope, + table: u32, +) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, device).await?; + + let req = handle + .route() + .add() + .scope(scope) + .output_interface(index) + .table_id(table) + .protocol(RouteProtocol::Other(ROUTE_SCOPE_SART)); + match dst { + IpNet::V4(dst) => { + if let Some(IpAddr::V4(gw)) = gateway { + req.v4() + .gateway(gw) + .destination_prefix(dst.addr(), dst.prefix_len()) + .execute() + .await + .map_err(Error::Route)?; + } else { + req.v4() + .destination_prefix(dst.addr(), dst.prefix_len()) + .execute() + .await + .map_err(Error::Route)?; + } + } + IpNet::V6(dst) => { + if let Some(IpAddr::V6(gw)) = gateway { + req.v6() + .gateway(gw) + .destination_prefix(dst.addr(), dst.prefix_len()) + .execute() + .await + .map_err(Error::Route)?; + } else { + req.v6() + .destination_prefix(dst.addr(), dst.prefix_len()) + .execute() + .await + .map_err(Error::Route)?; + } + } + } + Ok(()) +} + +pub async fn del_link(name: &str) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let index = get_link_index_by_name(&handle, name).await?; + + handle + .link() + .del(index) + .execute() + .await + .map_err(Error::Link)?; + Ok(()) +} + +pub async fn del_route(dst: &IpNet) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let ip_version = match dst { + IpNet::V4(_) => IpVersion::V4, + IpNet::V6(_) => IpVersion::V6, + }; + let mut res = handle.route().get(ip_version.clone()).execute(); + + let default_route = match dst { + IpNet::V4(_) => IpNet::from_str(DEFAULT_ROUTE_IPV4).unwrap(), + IpNet::V6(_) => IpNet::from_str(DEFAULT_ROUTE_IPV6).unwrap(), + }; + + while let Some(r) = res.try_next().await.map_err(Error::Route)? { + let dst_prefix_len = r.header.destination_prefix_length; + let mut is_default = true; + for attr in r.attributes.iter() { + match ip_version { + IpVersion::V4 => { + if let RouteAttribute::Destination(RouteAddress::Inet(addr)) = attr { + is_default = false; + if addr.eq(&dst.addr()) && dst_prefix_len == dst.prefix_len() { + // delete it + return handle.route().del(r).execute().await.map_err(Error::Route); + } + } + } + IpVersion::V6 => { + if let RouteAttribute::Destination(RouteAddress::Inet6(addr)) = attr { + is_default = false; + if addr.eq(&dst.addr()) && dst_prefix_len == dst.prefix_len() { + // delete it + return handle.route().del(r).execute().await.map_err(Error::Route); + } + } + } + } + } + if is_default && default_route.eq(dst) { + return handle.route().del(r).execute().await.map_err(Error::Route); + } + } + Err(Error::RouteNotFound(dst.to_string())) +} + +pub async fn del_route_in_table(dst: &IpNet, table: u32) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let ip_version = match dst { + IpNet::V4(_) => IpVersion::V4, + IpNet::V6(_) => IpVersion::V6, + }; + let mut res = handle.route().get(ip_version.clone()).execute(); + + let default_route = match dst { + IpNet::V4(_) => IpNet::from_str(DEFAULT_ROUTE_IPV4).unwrap(), + IpNet::V6(_) => IpNet::from_str(DEFAULT_ROUTE_IPV6).unwrap(), + }; + + while let Some(r) = res.try_next().await.map_err(Error::Route)? { + let dst_prefix_len = r.header.destination_prefix_length; + let mut is_default = true; + let mut is_target_table = false; + for attr in r.attributes.iter() { + if let RouteAttribute::Table(id) = attr { + if table.ne(id) { + continue; + } + } + is_target_table = true; + match ip_version { + IpVersion::V4 => { + if let RouteAttribute::Destination(RouteAddress::Inet(addr)) = attr { + is_default = false; + if addr.eq(&dst.addr()) && dst_prefix_len == dst.prefix_len() { + // delete it + return handle.route().del(r).execute().await.map_err(Error::Route); + } + } + } + IpVersion::V6 => { + if let RouteAttribute::Destination(RouteAddress::Inet6(addr)) = attr { + is_default = false; + if addr.eq(&dst.addr()) && dst_prefix_len == dst.prefix_len() { + // delete it + return handle.route().del(r).execute().await.map_err(Error::Route); + } + } + } + } + } + if is_target_table && is_default && default_route.eq(dst) { + return handle.route().del(r).execute().await.map_err(Error::Route); + } + } + Err(Error::RouteNotFound(dst.to_string())) +} + +pub async fn del_rule(table: u32, protocol: &IpAddr) -> Result<(), Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + let ip_version = match protocol { + IpAddr::V4(_) => IpVersion::V4, + IpAddr::V6(_) => IpVersion::V6, + }; + + let mut rules = handle.rule().get(ip_version).execute(); + while let Some(r) = rules.try_next().await.map_err(Error::Rule)? { + if r.header.table == table as u8 { + return handle.rule().del(r).execute().await.map_err(Error::Rule); + } + } + + Ok(()) +} + +pub async fn get_rule(table: u32, protocol: &IpAddr) -> Result, Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + let ip_version = match protocol { + IpAddr::V4(_) => IpVersion::V4, + IpAddr::V6(_) => IpVersion::V6, + }; + let mut rules = handle.rule().get(ip_version).execute(); + while let Some(r) = rules.try_next().await.map_err(Error::Rule)? { + if r.header.table == table as u8 { + return Ok(Some(())); + } + } + Ok(None) +} + +pub async fn get_link_index_by_name(handle: &Handle, name: &str) -> Result { + let mut links = handle.link().get().match_name(name.to_string()).execute(); + if let Some(msg) = links.try_next().await.map_err(Error::Link)? { + return Ok(msg.header.index); + } + Err(Error::LinkNotFound(name.to_string())) +} + +async fn get_link_name_by_index(handle: &Handle, index: u32) -> Result { + let mut links = handle.link().get().match_index(index).execute(); + while let Some(l) = links.try_next().await.map_err(Error::Link)? { + for attr in l.attributes.into_iter() { + if let LinkAttribute::IfName(name) = attr { + return Ok(name); + } + } + } + Err(Error::LinkNotFound(format!("{index}"))) +} + +/// This function gets ifindex from container_id. +/// To get it from container_id, container_id must be set as alias in advance. +pub async fn get_link_by_container_id(handle: &Handle, container_id: &str) -> Result { + let mut links = handle.link().get().execute(); + while let Some(l) = links.try_next().await.map_err(Error::Link)? { + for attr in l.attributes.into_iter() { + if let LinkAttribute::IfAlias(alias) = attr { + if let Ok(link_info) = ContainerLinkInfo::from_str(&alias) { + if container_id.eq(&link_info.id) { + return Ok(l.header.index); + } + } + } + } + } + Err(Error::LinkNotFound(container_id.to_string())) +} +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ContainerLinkInfo { + pub id: String, + pub ifname: String, + pub pool: String, +} + +impl ContainerLinkInfo { + pub fn new(id: &str, ifname: &str, pool: &str) -> ContainerLinkInfo { + ContainerLinkInfo { + id: id.to_string(), + ifname: ifname.to_string(), + pool: pool.to_string(), + } + } + + pub fn to_alias(&self) -> String { + format!("{}/{}/{}", self.pool, self.id, self.ifname) + } +} + +impl FromStr for ContainerLinkInfo { + type Err = Error; + fn from_str(s: &str) -> Result { + let alias_list = s.split('/').collect::>(); + if alias_list.len() != 3 { + return Err(Error::InvalidLinkAlias(s.to_string())); + } + Ok(ContainerLinkInfo { + pool: alias_list[0].to_string(), + id: alias_list[1].to_string(), + ifname: alias_list[2].to_string(), + }) + } +} + +// link_list returns key-value map of the pair of host side interface name and link alias. +pub async fn link_list() -> Result, Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let mut containers = HashMap::new(); + + let mut links = handle.link().get().execute(); + while let Some(l) = links.try_next().await.map_err(Error::Link)? { + let mut ifname: Option = None; + let mut link_info: Option = None; + for attr in l.attributes.into_iter() { + if let LinkAttribute::IfAlias(ref alias) = attr { + if let Ok(info) = ContainerLinkInfo::from_str(alias) { + link_info = Some(info); + } + } + if let LinkAttribute::IfName(name) = attr { + ifname = Some(name); + } + } + if ifname.is_some() && link_info.is_some() { + containers.insert(ifname.unwrap(), link_info.unwrap()); + } + } + Ok(containers) +} + +pub async fn route_list(protocol: IpAddr, table_id: u32) -> Result, Error> { + let (conn, handle, _) = rtnetlink::new_connection().map_err(Error::Open)?; + tokio::spawn(conn); + + let mut route_map = HashMap::new(); + + let ip_version = match protocol { + IpAddr::V4(_) => IpVersion::V4, + IpAddr::V6(_) => IpVersion::V6, + }; + let mut res = handle.route().get(ip_version.clone()).execute(); + + while let Some(r) = res.try_next().await.map_err(Error::Route)? { + if r.header.table != table_id as u8 { + continue; + } + let mut ifname: Option = None; + let mut addr: Option = None; + for attr in r.attributes.iter() { + if let RouteAttribute::Oif(n) = attr { + ifname = Some(get_link_name_by_index(&handle, *n).await?); + } + match ip_version { + IpVersion::V4 => { + if let RouteAttribute::Destination(RouteAddress::Inet(a)) = attr { + addr = Some(IpAddr::V4(*a)); + } + } + IpVersion::V6 => { + if let RouteAttribute::Destination(RouteAddress::Inet6(a)) = attr { + addr = Some(IpAddr::V6(*a)); + } + } + } + } + if ifname.is_some() && addr.is_some() { + route_map.insert(ifname.unwrap(), addr.unwrap()); + } + } + + Ok(route_map) +} + +#[cfg(test)] +mod tests { + + use std::str::FromStr; + + use netlink_packet_route::{address::AddressAttribute, link::LinkFlag}; + + use crate::agent::cni::{netns, server::CNI_ROUTE_TABLE_ID}; + + use super::*; + + struct TestNetNS { + name: String, + ns: NetNS, + host_ns: NetNS, + } + + impl TestNetNS { + fn new() -> Self { + use rand::Rng; + + let host_ns = netns::get_current_netns().unwrap(); + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789"; + let mut rng = rand::thread_rng(); + + let ns_prefix: String = (0..8) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect(); + + let ns_name = format!("test-{ns_prefix}"); + let mut cmd = std::process::Command::new("ip"); + + cmd.args(["netns", "add", &ns_name]); + cmd.output().unwrap(); + let ns = NetNS::try_from(format!("/var/run/netns/{}", ns_name).as_str()).unwrap(); + TestNetNS { + name: ns_name, + ns, + host_ns, + } + } + + fn enter(&self) { + self.ns.enter().unwrap(); + } + + fn return_host_ns(&self) { + self.host_ns.enter().unwrap(); + } + + fn cleanup(&self) { + let mut cmd = std::process::Command::new("ip"); + cmd.args(["netns", "del", &self.name]); + cmd.output().unwrap(); + } + } + + impl Drop for TestNetNS { + fn drop(&mut self) { + self.cleanup() + } + } + + #[tokio::test] + async fn test_add_veth_pair() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + assert_eq!("veth00001111", host_side.as_str()); + + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + + let _host_ifindex = get_link_index_by_name(&handle, &host_side).await.unwrap(); + let _container_ifindex = get_link_index_by_name(&handle, "c-test").await.unwrap(); + } + + #[tokio::test] + async fn test_del_link() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + assert_eq!("veth00001111", host_side.as_str()); + + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + + let _host_ifindex = get_link_index_by_name(&handle, &host_side).await.unwrap(); + let _container_ifindex = get_link_index_by_name(&handle, "c-test").await.unwrap(); + + del_link(&host_side).await.unwrap(); + let err = get_link_index_by_name(&handle, &host_side).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn test_move_netns() { + let test_ns = TestNetNS::new(); + + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + move_netns("c-test", &test_ns.ns).await.unwrap(); + + let _exist = get_link_index_by_name(&handle, &host_side).await.unwrap(); + let err = get_link_index_by_name(&handle, "c-test").await; + assert!(err.is_err()); // This error should be LinkNotFound + + test_ns.enter(); + let _exist = get_link_index_by_name(&handle, "c-test").await; + } + + #[tokio::test] + async fn test_link_up() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + link_up(&host_side).await.unwrap(); + + let mut links = handle.link().get().match_name(host_side).execute(); + let mut link_up = false; + match links.try_next().await.unwrap() { + Some(msg) => { + for flag in msg.header.flags.into_iter() { + if flag.eq(&LinkFlag::Up) { + link_up = true; + } + } + } + None => panic!("Link not found"), + } + assert!(link_up); + } + + #[tokio::test] + async fn test_link_set_alias() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + set_alias(&host_side, "test-alias").await.unwrap(); + + let mut links = handle.link().get().match_name(host_side).execute(); + let mut alias_set = false; + match links.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let LinkAttribute::IfAlias(alias) = attr { + alias_set = true; + assert_eq!("test-alias", alias); + } + } + } + None => panic!("Link not found"), + } + assert!(alias_set); + } + + #[tokio::test] + async fn test_add_addr() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + let test_addr = IpNet::from_str("10.0.0.1/24").unwrap(); + add_addr(&host_side, &test_addr).await.unwrap(); + + let mut addrs = handle.address().get().execute(); + let mut addr_added = false; + match addrs.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let AddressAttribute::Address(addr) = attr { + addr_added = true; + assert_eq!(addr, test_addr.addr()); + } + } + } + None => panic!("Address not found"), + } + assert!(addr_added); + } + + #[tokio::test] + async fn test_add_route() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + let test_addr = IpNet::from_str("10.0.0.1/24").unwrap(); + add_addr(&host_side, &test_addr).await.unwrap(); + link_up(&host_side).await.unwrap(); + + let dst = IpNet::from_str("0.0.0.0/0").unwrap(); + let gateway = Some(test_addr.addr()); + + add_route(&dst, gateway, &host_side, RouteScope::Universe) + .await + .unwrap(); + + let mut routes = handle.route().get(IpVersion::V4).execute(); + let mut route_added = false; + match routes.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let RouteAttribute::Gateway(RouteAddress::Inet(gw)) = attr { + route_added = true; + assert_eq!(test_addr.addr(), IpAddr::V4(gw)); + } + } + } + None => panic!("Route not found"), + } + assert!(route_added); + } + + #[tokio::test] + async fn test_add_route_in_table() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + let test_addr = IpNet::from_str("10.0.0.1/24").unwrap(); + add_addr(&host_side, &test_addr).await.unwrap(); + link_up(&host_side).await.unwrap(); + + let dst = IpNet::from_str("0.0.0.0/0").unwrap(); + let gateway = Some(test_addr.addr()); + + add_route_in_table(&dst, gateway, &host_side, RouteScope::Universe, 160) + .await + .unwrap(); + + let mut routes = handle.route().get(IpVersion::V4).execute(); + let mut route_added = false; + match routes.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let RouteAttribute::Gateway(RouteAddress::Inet(gw)) = attr { + route_added = true; + assert_eq!(test_addr.addr(), IpAddr::V4(gw)); + } + if let RouteAttribute::Table(table_id) = attr { + assert_eq!(table_id, 160); + } + } + } + None => panic!("Route not found"), + } + assert!(route_added); + } + + #[tokio::test] + async fn test_del_route() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + let test_addr = IpNet::from_str("10.0.0.1/24").unwrap(); + add_addr(&host_side, &test_addr).await.unwrap(); + link_up(&host_side).await.unwrap(); + + let dst = IpNet::from_str("0.0.0.0/0").unwrap(); + let gateway = Some(test_addr.addr()); + + add_route(&dst, gateway, &host_side, RouteScope::Universe) + .await + .unwrap(); + + let mut routes = handle.route().get(IpVersion::V4).execute(); + let mut route_added = false; + match routes.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let RouteAttribute::Gateway(RouteAddress::Inet(gw)) = attr { + route_added = true; + assert_eq!(test_addr.addr(), IpAddr::V4(gw)); + } + } + } + None => panic!("Route not found"), + } + assert!(route_added); + + del_route(&dst).await.unwrap(); + let mut routes = handle.route().get(IpVersion::V4).execute(); + let mut route_added = false; + match routes.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let RouteAttribute::Gateway(RouteAddress::Inet(gw)) = attr { + route_added = true; + assert_eq!(test_addr.addr(), IpAddr::V4(gw)); + } + } + } + None => panic!("Route not found"), + } + assert!(!route_added); + } + + #[tokio::test] + async fn test_del_route_in_table() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + let host_side = add_veth_pair("000011112222", "c-test").await.unwrap(); + + let test_addr = IpNet::from_str("10.0.0.1/24").unwrap(); + add_addr(&host_side, &test_addr).await.unwrap(); + link_up(&host_side).await.unwrap(); + + let dst = IpNet::from_str("0.0.0.0/0").unwrap(); + let gateway = Some(test_addr.addr()); + + add_route_in_table(&dst, gateway, &host_side, RouteScope::Universe, 160) + .await + .unwrap(); + + let mut routes = handle.route().get(IpVersion::V4).execute(); + let mut route_added = false; + match routes.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let RouteAttribute::Gateway(RouteAddress::Inet(gw)) = attr { + route_added = true; + assert_eq!(test_addr.addr(), IpAddr::V4(gw)); + } + if let RouteAttribute::Table(table_id) = attr { + assert_eq!(table_id, 160); + } + } + } + None => panic!("Route not found"), + } + assert!(route_added); + + del_route_in_table(&dst, 160).await.unwrap(); + let mut routes = handle.route().get(IpVersion::V4).execute(); + let mut route_added = false; + match routes.try_next().await.unwrap() { + Some(msg) => { + for attr in msg.attributes.into_iter() { + if let RouteAttribute::Gateway(RouteAddress::Inet(gw)) = attr { + route_added = true; + assert_eq!(test_addr.addr(), IpAddr::V4(gw)); + } + } + } + None => panic!("Route not found"), + } + assert!(!route_added); + } + + #[tokio::test] + async fn test_link_list() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let cont1 = ContainerLinkInfo::new("000011112222", "c0", "default"); + let cont2 = ContainerLinkInfo::new("333344445555", "c1", "another"); + let host_side1 = add_veth_pair(&cont1.id, &cont1.ifname).await.unwrap(); + let host_side2 = add_veth_pair(&cont2.id, &cont2.ifname).await.unwrap(); + + set_alias(&host_side1, &cont1.to_alias()).await.unwrap(); + set_alias(&host_side2, &cont2.to_alias()).await.unwrap(); + + let link_list = link_list().await.unwrap(); + + assert_eq!(link_list.len(), 2); + let res_cont1 = link_list.get(&host_side1).unwrap(); + let res_cont2 = link_list.get(&host_side2).unwrap(); + + assert_eq!(&cont1, res_cont1); + assert_eq!(&cont2, res_cont2); + } + + #[tokio::test] + async fn test_route_list() { + let test_ns = TestNetNS::new(); + + test_ns.enter(); + let host_side1 = add_veth_pair("000011112222", "c0").await.unwrap(); + + let test_addr1 = IpNet::from_str("10.0.0.1/24").unwrap(); + let test_cont_addr1 = IpNet::from_str("10.10.0.1/32").unwrap(); + add_addr(&host_side1, &test_addr1).await.unwrap(); + add_addr("c0", &test_cont_addr1).await.unwrap(); + link_up(&host_side1).await.unwrap(); + + let dst = IpNet::from_str("10.10.0.1/32").unwrap(); + + add_route_in_table( + &dst, + None, + &host_side1, + RouteScope::Universe, + CNI_ROUTE_TABLE_ID, + ) + .await + .unwrap(); + + let r_l = route_list(IpAddr::from_str("127.0.0.1").unwrap(), CNI_ROUTE_TABLE_ID) + .await + .unwrap(); + + assert_eq!(r_l.len(), 1); + + let res_route = r_l.get(&host_side1).unwrap(); + + assert_eq!(res_route, &test_cont_addr1.addr()); + } +} diff --git a/sartd/src/kubernetes/src/agent/cni/netns.rs b/sartd/src/kubernetes/src/agent/cni/netns.rs new file mode 100644 index 0000000..3368a70 --- /dev/null +++ b/sartd/src/kubernetes/src/agent/cni/netns.rs @@ -0,0 +1,182 @@ +use std::{ + fs::File, + os::{ + fd::{AsFd, AsRawFd}, + unix::fs::MetadataExt, + }, + path::{Path, PathBuf}, +}; + +use thiserror::Error; + +use super::netlink; + +const NETNS_PATH_BASE: &str = "/var/run/netns"; + +#[derive(Debug)] +pub struct NetNS { + path: PathBuf, + file: File, +} + +impl NetNS { + pub fn new(name: &str) -> NetNS { + let base = Path::new(NETNS_PATH_BASE); + let file = std::fs::File::create(base).unwrap(); + NetNS { + path: base.join(name), + file, + } + } + + pub fn enter(&self) -> Result<(), Error> { + nix::sched::setns(self.file.as_fd(), nix::sched::CloneFlags::CLONE_NEWNET) + .map_err(Error::SetNS) + } + + pub fn run(&self, f: F) -> Result + where + F: FnOnce(&Self) -> T, + { + let src = get_current_netns()?; + if src.path.eq(&self.path) { + return Ok(f(self)); + } + + self.enter()?; + + let res = f(self); + + src.enter()?; + + Ok(res) + } + + pub fn fd(&self) -> i32 { + self.file.as_raw_fd() + } + + pub fn path(&self) -> PathBuf { + self.path.clone() + } +} + +pub fn get_current_netns() -> Result { + let path = get_current_netns_path(); + let file = std::fs::File::open(&path).map_err(|e| Error::OpenNetNS(path.clone(), e))?; + Ok(NetNS { path, file }) +} + +fn get_current_netns_path() -> PathBuf { + let id = nix::unistd::gettid(); + PathBuf::from(format!("/proc/self/task/{}/ns/net", id)) +} + +impl PartialEq for NetNS { + fn eq(&self, other: &Self) -> bool { + if self.file.as_raw_fd() == other.file.as_raw_fd() { + return true; + } + let cmp_meta = |f1: &File, f2: &File| -> Option { + let m1 = match f1.metadata() { + Ok(m) => m, + Err(_) => return None, + }; + let m2 = match f2.metadata() { + Ok(m) => m, + Err(_) => return None, + }; + Some(m1.dev() == m2.dev() && m1.ino() == m2.ino()) + }; + cmp_meta(&self.file, &other.file).unwrap_or_else(|| self.path == other.path) + } +} + +impl TryFrom<&str> for NetNS { + type Error = Error; + fn try_from(path: &str) -> Result { + let p = PathBuf::from(path); + let file = match std::fs::File::open(path) { + Ok(file) => file, + Err(e) => { + if e.kind().eq(&std::io::ErrorKind::NotFound) { + return Err(Error::NotExist(p)); + } + return Err(Error::OpenNetNS(p, e)); + } + }; + Ok(NetNS { path: p, file }) + } +} + +impl TryFrom for NetNS { + type Error = Error; + fn try_from(path: PathBuf) -> Result { + let file = + std::fs::File::open(path.clone()).map_err(|e| Error::OpenNetNS(path.clone(), e))?; + Ok(NetNS { path, file }) + } +} + +impl TryFrom<&PathBuf> for NetNS { + type Error = Error; + fn try_from(path: &PathBuf) -> Result { + let file = + std::fs::File::open(path.clone()).map_err(|e| Error::OpenNetNS(path.clone(), e))?; + Ok(NetNS { + path: path.clone(), + file, + }) + } +} + +impl std::fmt::Display for NetNS { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.path().as_os_str().to_str() { + Some(p) => write!(f, "{}", p), + None => write!(f, "invalid netns path"), + } + } +} + +#[derive(Debug, Error)] +pub enum Error { + #[error("Open NetNS {0} : {1}")] + OpenNetNS(PathBuf, std::io::Error), + + #[error("NetNS doesn't exist: {0}")] + NotExist(PathBuf), + + #[error("Close NetNS")] + CloseNetNS, + + #[error("SetNS: {0}")] + SetNS(#[source] nix::Error), + + #[error("Netlink: {0}")] + Netlink(#[source] netlink::Error), +} + +#[cfg(test)] +mod tests { + + use super::{get_current_netns, NetNS}; + + #[tokio::test] + async fn test_netns_enter() { + let base_ns = get_current_netns().unwrap(); + let mut cmd = std::process::Command::new("ip"); + cmd.args(["netns", "add", "test"]); + cmd.output().unwrap(); + + let target = NetNS::try_from("/var/run/netns/test").unwrap(); + + target.enter().unwrap(); + + base_ns.enter().unwrap(); + + let mut cmd = std::process::Command::new("ip"); + cmd.args(["netns", "del", "test"]); + cmd.output().unwrap(); + } +} diff --git a/sartd/src/kubernetes/src/agent/cni/pod.rs b/sartd/src/kubernetes/src/agent/cni/pod.rs index 3242929..665071d 100644 --- a/sartd/src/kubernetes/src/agent/cni/pod.rs +++ b/sartd/src/kubernetes/src/agent/cni/pod.rs @@ -1,6 +1,14 @@ -use std::str::FromStr; +use std::{net::IpAddr, str::FromStr}; -use ipnet::IpNet; +use ipnet::{IpNet, Ipv4Net, Ipv6Net}; +use netlink_packet_route::route::RouteScope; +use sartd_proto::sart::{CniResult, Interface, IpConf, RouteConf}; + +use super::{ + error::Error, + netlink::{self, ContainerLinkInfo, DEFAULT_ROUTE_IPV4, DEFAULT_ROUTE_IPV6}, + netns::NetNS, +}; // K8S_POD_INFRA_CONTAINER_ID=0a6a4b09df59d64e3be5cf662808076fee664447a1c90dd05a5d5588e2cd6b5a;K8S_POD_UID=b0e1fc4a-f842-4ec2-8e23-8c0c8da7b5e5;IgnoreUnknown=1;K8S_POD_NAMESPACE=kube-system;K8S_POD_NAME=coredns-787d4945fb-7xrrd const K8S_POD_INFRA_CONTAINER_ID: &str = "K8S_POD_INFRA_CONTAINER_ID"; @@ -17,7 +25,7 @@ pub struct PodInfo { } impl FromStr for PodInfo { - type Err = rscni::error::Error; + type Err = Error; fn from_str(s: &str) -> Result { let mut info = PodInfo { container_id: String::new(), @@ -42,28 +50,16 @@ impl FromStr for PodInfo { } } if info.container_id.is_empty() { - return Err(rscni::error::Error::FailedToDecode(format!( - "{} is not set", - K8S_POD_INFRA_CONTAINER_ID - ))); + return Err(Error::MissingField(K8S_POD_INFRA_CONTAINER_ID.to_string())); } if info.uid.is_empty() { - return Err(rscni::error::Error::FailedToDecode(format!( - "{} is not set", - K8S_POD_UID - ))); + return Err(Error::MissingField(K8S_POD_UID.to_string())); } if info.namespace.is_empty() { - return Err(rscni::error::Error::FailedToDecode(format!( - "{} is not set", - K8S_POD_NAMESPACE - ))); + return Err(Error::MissingField(K8S_POD_NAMESPACE.to_string())); } if info.name.is_empty() { - return Err(rscni::error::Error::FailedToDecode(format!( - "{} is not set", - K8S_POD_NAME - ))); + return Err(Error::MissingField(K8S_POD_NAME.to_string())); } Ok(info) } @@ -71,14 +67,279 @@ impl FromStr for PodInfo { #[derive(Debug, Clone, PartialEq, Eq)] pub struct PodAllocation { + pub container_id: String, pub block: String, pub addr: IpNet, } +#[tracing::instrument()] +pub async fn setup_links( + link_info: &ContainerLinkInfo, + host_ns: &NetNS, + container_ns: &NetNS, + container_addr: &IpNet, + host_addr: &IpAddr, + table: u32, +) -> Result { + // enter into container netns + container_ns.enter().map_err(Error::NetNS)?; + + // create veth pair in the container netns + let host_ifname = netlink::add_veth_pair(&link_info.id, &link_info.ifname) + .await + .map_err(Error::Netlink)?; + + // move the host side interface to the host netns + netlink::move_netns(&host_ifname, host_ns) + .await + .map_err(Error::Netlink)?; + + // up a container side interface + netlink::link_up(&link_info.ifname) + .await + .map_err(Error::Netlink)?; + + // get the container interface's mac address + let container_mac = netlink::get_link_mac(&link_info.ifname) + .await + .map_err(Error::Netlink)?; + + // exit from the container netns + // and enter the host netns + host_ns.enter().map_err(Error::NetNS)?; + + // set container-id as alias + let alias = link_info.to_alias(); + netlink::set_alias(&host_ifname, &alias) + .await + .map_err(Error::Netlink)?; + + // add address(host node address) to the host side interface + let host_net = match host_addr { + IpAddr::V4(addr) => IpNet::V4( + Ipv4Net::new(*addr, 32).map_err(|_| Error::InvalidAddress(host_addr.to_string()))?, + ), + IpAddr::V6(addr) => IpNet::V6( + Ipv6Net::new(*addr, 128).map_err(|_| Error::InvalidAddress(host_addr.to_string()))?, + ), + }; + netlink::add_addr(&host_ifname, &host_net) + .await + .map_err(Error::Netlink)?; + + // up the host side interface + netlink::link_up(&host_ifname) + .await + .map_err(Error::Netlink)?; + + // add the route to the container in the specified routing table + netlink::add_route_in_table( + container_addr, + None, + &host_ifname, + RouteScope::Universe, + table, + ) + .await + .map_err(Error::Netlink)?; + + // enter the container netns again + container_ns.enter().map_err(Error::NetNS)?; + + // add an address to the container interface + netlink::add_addr(&link_info.ifname, container_addr) + .await + .map_err(Error::Netlink)?; + + // add a route to the container netns + netlink::add_route(&host_net, None, &link_info.ifname, RouteScope::Link) + .await + .map_err(Error::Netlink)?; + + // add a default route to the container netns + let default_route = match host_net { + IpNet::V4(_) => DEFAULT_ROUTE_IPV4 + .parse::() + .map_err(|_| Error::InvalidAddress(DEFAULT_ROUTE_IPV4.to_string()))?, + IpNet::V6(_) => DEFAULT_ROUTE_IPV6 + .parse::() + .map_err(|_| Error::InvalidAddress(DEFAULT_ROUTE_IPV6.to_string()))?, + }; + netlink::add_route( + &default_route, + Some(*host_addr), + &link_info.ifname, + RouteScope::Universe, + ) + .await + .map_err(Error::Netlink)?; + + tracing::info!("Add the route in the container"); + + // return into the host netns + host_ns.enter().map_err(Error::NetNS)?; + + Ok(CniResult { + interfaces: vec![Interface { + name: link_info.ifname.clone(), + mac: container_mac, + sandbox: container_ns.path().as_path().display().to_string(), + }], + ips: vec![IpConf { + interface: 0, + address: container_addr.to_string(), + gateway: host_addr.to_string(), + }], + routes: vec![RouteConf { + dst: default_route.to_string(), + gw: host_addr.to_string(), + mtu: 0, + advmss: 0, + }], + dns: None, + }) +} + +pub async fn cleanup_links( + container_id: &str, + host_ns: &NetNS, + container_ns: Option, + container_addr: &IpNet, + host_addr: &IpAddr, + container_ifname: &str, + table: u32, +) -> Result { + // enter the container netns + let container_mac = if let Some(ref container_ns) = container_ns { + container_ns.enter().map_err(Error::NetNS)?; + // get the container interface's mac address + netlink::get_link_mac(container_ifname) + .await + .map_err(Error::Netlink)? + } else { + String::new() + }; + + // enter the host netns + host_ns.enter().map_err(Error::NetNS)?; + + // delete routes in host netns + netlink::del_route_in_table(container_addr, table) + .await + .map_err(Error::Netlink)?; + + // delete link + if container_id.len() < 8 { + return Err(Error::Netlink(netlink::Error::InvalidContainerId)); + } + + let short_id = &container_id[..8]; + let host_ifname = format!("veth{short_id}"); + netlink::del_link(&host_ifname) + .await + .map_err(Error::Netlink)?; + + let default_route = match container_addr { + IpNet::V4(_) => DEFAULT_ROUTE_IPV4 + .parse::() + .map_err(|_| Error::InvalidAddress(DEFAULT_ROUTE_IPV4.to_string()))?, + IpNet::V6(_) => DEFAULT_ROUTE_IPV6 + .parse::() + .map_err(|_| Error::InvalidAddress(DEFAULT_ROUTE_IPV6.to_string()))?, + }; + + Ok(CniResult { + interfaces: vec![Interface { + name: container_ifname.to_string(), + mac: container_mac, + sandbox: match container_ns { + Some(container_ns) => container_ns.path().as_path().display().to_string(), + None => String::new(), + }, + }], + ips: vec![IpConf { + interface: 0, + address: container_addr.to_string(), + gateway: host_addr.to_string(), + }], + routes: vec![RouteConf { + dst: default_route.to_string(), + gw: host_addr.to_string(), + mtu: 0, + advmss: 0, + }], + dns: None, + }) +} + #[cfg(test)] mod tests { + + use tests::netlink::{ + add_rule, del_rule, get_link_by_container_id, get_link_index_by_name, route_list, + }; + + use crate::agent::cni::{netns::get_current_netns, server::CNI_ROUTE_TABLE_ID}; + use super::*; + struct TestContainer { + id: String, + netns: NetNS, + ns_name: String, + container_addr: IpNet, + container_ifname: String, + host_addr: IpAddr, + host_route_table_id: u32, + } + + impl TestContainer { + fn new(container_addr: IpNet, host_addr: IpAddr) -> Self { + use rand::Rng; + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789"; + let mut rng = rand::thread_rng(); + + let container_id: String = (0..32) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect(); + + let ns_id: String = (0..8) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect(); + + let ns_name = format!("test-{ns_id}"); + let mut cmd = std::process::Command::new("ip"); + + cmd.args(["netns", "add", &ns_name]); + cmd.output().unwrap(); + let ns = NetNS::try_from(format!("/var/run/netns/{}", ns_name).as_str()).unwrap(); + TestContainer { + id: container_id, + netns: ns, + ns_name, + container_addr, + container_ifname: "eth0".to_string(), + host_addr, + host_route_table_id: 160, + } + } + } + + impl Drop for TestContainer { + fn drop(&mut self) { + let mut cmd = std::process::Command::new("ip"); + + cmd.args(["netns", "del", &self.ns_name]); + cmd.output().unwrap(); + } + } + #[test] fn test_pod_info_from_str() { let s = "K8S_POD_INFRA_CONTAINER_ID=0a6a4b09df59d64e3be5cf662808076fee664447a1c90dd05a5d5588e2cd6b5a;K8S_POD_UID=b0e1fc4a-f842-4ec2-8e23-8c0c8da7b5e5;IgnoreUnknown=1;K8S_POD_NAMESPACE=kube-system;K8S_POD_NAME=coredns-787d4945fb-7xrrd"; @@ -92,4 +353,68 @@ mod tests { let info = PodInfo::from_str(s).unwrap(); assert_eq!(expected, info); } + + #[tokio::test] + async fn test_setup_and_cleanup_links() { + let host_ns = get_current_netns().unwrap(); + let host_addr = IpAddr::from_str("10.10.0.2").unwrap(); + let container_addr = IpNet::from_str("10.0.0.1/32").unwrap(); + + add_rule(160, host_addr).await.unwrap(); + + let container = TestContainer::new(container_addr, host_addr); + let link_info = ContainerLinkInfo { + id: container.id.clone(), + ifname: container.container_ifname.clone(), + pool: String::new(), + }; + + let cni_result = setup_links( + &link_info, + &host_ns, + &container.netns, + &container_addr, + &host_addr, + container.host_route_table_id, + ) + .await + .unwrap(); + + assert_eq!(cni_result.interfaces.len(), 1); + assert_eq!(cni_result.ips.len(), 1); + assert_eq!(cni_result.routes.len(), 1); + + let (conn, handle, _) = rtnetlink::new_connection().unwrap(); + tokio::spawn(conn); + + let _exist = get_link_by_container_id(&handle, &container.id) + .await + .unwrap(); + + let mut cmd = std::process::Command::new("ping"); + cmd.args(["-c", "1", container_addr.addr().to_string().as_str()]); + let out = cmd.output().unwrap(); + assert!(out.status.success()); + + let r_l = route_list(IpAddr::from_str("127.0.0.1").unwrap(), 160) + .await + .unwrap(); + + cleanup_links( + &container.id, + &host_ns, + None, + &container_addr, + &host_addr, + &container.container_ifname, + container.host_route_table_id, + ) + .await + .unwrap(); + + let should_err = get_link_by_container_id(&handle, &container.id).await; + assert!(should_err.is_err()); + + del_rule(160, &host_addr).await.unwrap(); + } } diff --git a/sartd/src/kubernetes/src/agent/cni/server.rs b/sartd/src/kubernetes/src/agent/cni/server.rs index 73e3450..4d6bc5a 100644 --- a/sartd/src/kubernetes/src/agent/cni/server.rs +++ b/sartd/src/kubernetes/src/agent/cni/server.rs @@ -1,18 +1,24 @@ -use std::{collections::HashMap, net::IpAddr, path::Path, str::FromStr, sync::Arc}; +use std::{ + collections::HashMap, + net::{IpAddr, Ipv6Addr}, + path::Path, + str::FromStr, + sync::Arc, +}; use bytes::Bytes; -use ipnet::IpNet; +use ipnet::{IpNet, Ipv4Net, Ipv6Net}; use k8s_openapi::api::core::v1::{Namespace, Pod}; use kube::{ api::{DeleteParams, ListParams, PostParams}, core::ObjectMeta, Api, Client, ResourceExt, }; -use rscni::error::Error; -use sartd_ipam::manager::AllocatorSet; + +use sartd_ipam::manager::{AllocatorSet, Block}; use sartd_proto::sart::{ cni_api_server::{CniApi, CniApiServer}, - Args, CniResult, Interface, IpConf, RouteConf, + Args, CniResult, }; use serde::{Deserialize, Serialize}; use tokio::{ @@ -22,32 +28,40 @@ use tokio::{ use tokio_stream::wrappers::UnixListenerStream; use tonic::{async_trait, transport::Server, Request, Response, Status}; -use crate::crd::{ - address_block::AddressBlock, - address_pool::{AddressPool, AddressType, ADDRESS_POOL_ANNOTATION}, - block_request::{BlockRequest, BlockRequestSpec}, +use crate::{ + agent::cni::{ + netlink::{self, ContainerLinkInfo}, + netns::{self, get_current_netns, NetNS}, + pod::{cleanup_links, setup_links}, + }, + crd::{ + address_block::{AddressBlock, ADDRESS_BLOCK_NODE_LABEL}, + address_pool::{AddressPool, AddressType, ADDRESS_POOL_ANNOTATION}, + block_request::{BlockRequest, BlockRequestSpec}, + }, }; -use super::pod::{PodAllocation, PodInfo}; +use super::{ + error::Error, + pod::{PodAllocation, PodInfo}, +}; pub const CNI_SERVER_ENDPOINT: &str = "/var/run/sart.sock"; +pub const CNI_ROUTE_TABLE_ID: u32 = 120; -const CNI_ERROR_CODE_KUBE: u32 = 200; -const CNI_ERROR_MSG_KUBE: &str = "Kubernetes error"; -const CNI_ERROR_CODE_INTERNAL: u32 = 210; -const CNI_ERROR_MSG_INTERNAL: &str = "Internal error"; -const CNI_ERROR_CODE_ALLOCATOR: u32 = 220; -const CNI_ERROR_MSG_ALLOCATOR: &str = "Allocation error"; +const CONTAINER_INTERFACE_NAME: &str = "eth0"; struct CNIServerInner { client: Client, allocator: Arc, allocation: HashMap, node: String, + node_addr: IpAddr, + table: u32, receiver: UnboundedReceiver, } -pub(crate) struct CNIServer { +pub struct CNIServer { // consider some better type. inner: Arc>, } @@ -57,14 +71,21 @@ impl CNIServer { client: Client, allocator: Arc, node: String, + node_addr: IpAddr, + table: u32, receiver: UnboundedReceiver, ) -> CNIServer { CNIServer { inner: Arc::new(Mutex::new(CNIServerInner::new( - client, allocator, node, receiver, + client, allocator, node, node_addr, table, receiver, ))), } } + + async fn recover(&mut self) -> Result<(), Error> { + let inner = self.inner.lock().await; + inner.recover().await + } } impl CNIServerInner { @@ -72,6 +93,8 @@ impl CNIServerInner { client: Client, allocator: Arc, node: String, + node_addr: IpAddr, + table: u32, receiver: UnboundedReceiver, ) -> CNIServerInner { CNIServerInner { @@ -79,6 +102,8 @@ impl CNIServerInner { allocator, allocation: HashMap::new(), node, + node_addr, + table, receiver, } } @@ -87,26 +112,27 @@ impl CNIServerInner { async fn add(&mut self, args: &Args) -> Result { let pod_info = PodInfo::from_str(&args.args)?; + let pod_key = format!("{}/{}", pod_info.namespace, pod_info.name); + + let ns_path = args.netns.clone(); + let ns = NetNS::try_from(ns_path.as_str()).map_err(Error::NetNS)?; + tracing::info!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, cmd = "ADD", - "CNI Add is called" + "Get pod info" ); - let pod_api = Api::::namespaced(self.client.clone(), &pod_info.namespace); - let pod = pod_api.get(&pod_info.name).await.map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; + let pod = pod_api.get(&pod_info.name).await.map_err(Error::Kube)?; let pool = self.get_pool(&pod_info, &pod).await?; - let allocated_addr = self.allocate(&pool)?; + if self.allocation.get(&pod_key).is_some() { + return Err(Error::AlreadyConfigured(pod_key)); + } + + let allocated_addr = self.allocate(&pool, &pod_info)?; let alloc = match allocated_addr { Some(alloc) => { @@ -114,9 +140,7 @@ impl CNIServerInner { name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, address = alloc.addr.to_string(), - block = alloc.block, cmd = "ADD", "Allocate addresses" ); @@ -124,11 +148,10 @@ impl CNIServerInner { } None => { // request - tracing::warn!( + tracing::info!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, cmd = "ADD", "No allocatable block. Request new block." ); @@ -139,7 +162,7 @@ impl CNIServerInner { ..Default::default() }, spec: BlockRequestSpec { - pool, + pool: pool.clone(), node: self.node.clone(), }, status: None, @@ -147,54 +170,27 @@ impl CNIServerInner { block_request_api .create(&PostParams::default(), &br) .await - .map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; + .map_err(Error::Kube)?; tracing::info!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, cmd = "ADD", "Waiting to create new block" ); // let created_br = self.receiver.blocking_recv(); - let created_br = self.receiver.recv().await.ok_or(Error::Custom( - CNI_ERROR_CODE_INTERNAL, - CNI_ERROR_MSG_INTERNAL.to_string(), - "Failed to notify block creation".to_string(), - ))?; - tracing::info!( - name = pod_info.name, - namespace = pod_info.namespace, - container_id = pod_info.container_id, - uid = pod_info.uid, - cmd = "ADD", - "Delete the satisfied block request" - ); + let created_br = self.receiver.recv().await.ok_or(Error::ReceiveNotify)?; block_request_api .delete(&br.name_any(), &DeleteParams::default()) .await - .map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; - let alloc = self.allocate_with_block(&created_br.name_any())?; + .map_err(Error::Kube)?; + let alloc = self.allocate_with_block(&created_br.name_any(), &pod_info)?; tracing::info!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, address = alloc.addr.to_string(), - block = alloc.block, cmd = "ADD", "Allocate addresses" ); @@ -206,97 +202,169 @@ impl CNIServerInner { format!("{}/{}", pod_info.namespace, pod_info.name), alloc.clone(), ); - - let result = CniResult { - interfaces: vec![Interface { - name: "dummy".to_string(), - mac: "dummy".to_string(), - sandbox: "/var/run/dummy".to_string(), - }], - ips: vec![IpConf { - interface: 0, - address: alloc.addr.to_string(), - gateway: "10.1.0.100".to_string(), - }], - routes: vec![RouteConf { - dst: "0.0.0.0/0".to_string(), - gw: "10.1.0.100".to_string(), - mtu: 0, - advmss: 0, - }], - dns: None, + let host_ns = get_current_netns().map_err(Error::NetNS)?; + let allocated_addr = match alloc.addr { + IpNet::V4(n) => IpNet::V4(Ipv4Net::new(n.addr(), 32).unwrap()), + IpNet::V6(n) => IpNet::V6(Ipv6Net::new(n.addr(), 128).unwrap()), }; + let link_info = + ContainerLinkInfo::new(&pod_info.container_id, CONTAINER_INTERFACE_NAME, &pool); + + let result = setup_links( + &link_info, + &host_ns, + &ns, + &allocated_addr, + &self.node_addr, + CNI_ROUTE_TABLE_ID, + ) + .await?; + Ok(result) } async fn del(&mut self, args: &Args) -> Result { let pod_info = PodInfo::from_str(&args.args)?; - - tracing::info!( - name = pod_info.name, - namespace = pod_info.namespace, - container_id = pod_info.container_id, - uid = pod_info.uid, - cmd = "DEL", - "CNI Del is called" - ); - let pod_api = Api::::namespaced(self.client.clone(), &pod_info.namespace); - let pod = pod_api.get(&pod_info.name).await.map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; + // Should we return error here? + let pod = pod_api.get(&pod_info.name).await.map_err(Error::Kube)?; let addr_opt = get_pod_addr(&pod); - let _pool = self.get_pool(&pod_info, &pod).await?; + if let Err(e) = self.get_pool(&pod_info, &pod).await { + match e { + Error::DefaultPoolNotFound => { + tracing::warn!( + name = pod_info.name, + namespace = pod_info.namespace, + container_id = pod_info.container_id, + cmd = "DEL", + "Default pool is not found" + ); + return Ok(CniResult { + interfaces: vec![], + ips: vec![], + routes: vec![], + dns: None, + }); + } + _ => return Err(e), + } + } if addr_opt.is_none() { tracing::warn!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, cmd = "DEL", "Pod's address doesn't exist" ); } - let alloc_opt = self.get_allocation(&pod_info); + let alloc_opt = match self.get_allocation(&pod_info) { + Ok(alloc_opt) => alloc_opt, + Err(_e) => { + tracing::warn!( + name = pod_info.name, + namespace = pod_info.namespace, + container_id = pod_info.container_id, + cmd = "DEL", + "container-id is not matched with actual stored allocation" + ); + None + } + }; if alloc_opt.is_none() { tracing::warn!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, cmd = "DEL", "Allocation information doesn't exist" ); } - // get actual assigned address in pod netns + let mut deletable_block = None; + // get actual assigned address in pod netns let result = match alloc_opt { Some(alloc) => match self.release(&alloc.block, &alloc.addr.addr()) { - Ok(_) => { + Ok(is_empty) => { tracing::info!( name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, address = alloc.addr.to_string(), - block = alloc.block, + empty = is_empty, cmd = "DEL", "Release allocated address" ); self.allocation .remove(&format!("{}/{}", pod_info.namespace, pod_info.name)); - CniResult { - interfaces: Vec::new(), - ips: Vec::new(), - routes: Vec::new(), - dns: None, + if is_empty { + deletable_block = Some(alloc.block.clone()); + } + + let host_ns = get_current_netns().map_err(Error::NetNS)?; + let container_ns = + match NetNS::try_from(args.netns.as_str()).map_err(Error::NetNS) { + Ok(ns) => Ok(Some(ns)), + Err(Error::NetNS(e)) => match e { + netns::Error::NotExist(_) => Ok(None), + _ => Err(Error::NetNS(e)), + }, + Err(e) => Err(e), + }?; + + match cleanup_links( + &pod_info.container_id, + &host_ns, + container_ns, + &alloc.addr, + &self.node_addr, + CONTAINER_INTERFACE_NAME, + CNI_ROUTE_TABLE_ID, + ) + .await + { + Ok(result) => result, + Err(e) => match e { + Error::Netlink(e) => match e { + netlink::Error::LinkNotFound(_) => { + tracing::warn!( + name = pod_info.name, + namespace = pod_info.namespace, + container_id = pod_info.container_id, + address = alloc.addr.to_string(), + cmd = "DEL", + "{e}", + ); + CniResult { + interfaces: vec![], + ips: vec![], + routes: vec![], + dns: None, + } + } + netlink::Error::RouteNotFound(_) => { + tracing::warn!( + name = pod_info.name, + namespace = pod_info.namespace, + container_id = pod_info.container_id, + address = alloc.addr.to_string(), + cmd = "DEL", + "{e}", + ); + CniResult { + interfaces: vec![], + ips: vec![], + routes: vec![], + dns: None, + } + } + _ => return Err(Error::Netlink(e)), + }, + _ => return Err(e), + }, } } Err(e) => { @@ -304,7 +372,6 @@ impl CNIServerInner { name = pod_info.name, namespace = pod_info.namespace, container_id = pod_info.container_id, - uid = pod_info.uid, cmd = "DEL", error=?e, "Failed to release address allocation", @@ -325,6 +392,36 @@ impl CNIServerInner { }, }; + // If the block is not be used, remove it. + // This operation must not affect the result of CNI Del command. + if let Some(block) = deletable_block { + tracing::info!( + name = pod_info.name, + namespace = pod_info.namespace, + container_id = pod_info.container_id, + block = block, + cmd = "DEL", + "Block is unused. Delete this block." + ); + let address_block_api = Api::::all(self.client.clone()); + if let Err(err) = address_block_api + .delete(&block, &DeleteParams::default()) + .await + .map_err(Error::Kube) + { + tracing::error!( + name = pod_info.name, + namespace = pod_info.namespace, + container_id = pod_info.container_id, + block = block, + error=?err, + cmd = "DEL", + "Failed to delete unused address block", + + ) + } + } + Ok(result) } @@ -342,38 +439,18 @@ impl CNIServerInner { ); let pod_api = Api::::namespaced(self.client.clone(), &pod_info.namespace); - let pod = pod_api.get(&pod_info.name).await.map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; + let pod = pod_api.get(&pod_info.name).await.map_err(Error::Kube)?; let pool = self.get_pool(&pod_info, &pod).await?; - let pod_addr = get_pod_addr(&pod).ok_or(Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - "Pod doesn't have an address".to_string(), - ))?; - let alloc = self.get_allocation(&pod_info).ok_or(Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - "Allocation information not found".to_string(), - ))?; + let pod_addr = get_pod_addr(&pod).ok_or(Error::PodAddressIsNotFound)?; + let alloc = self + .get_allocation(&pod_info)? + .ok_or(Error::AllocationNotFound)?; if alloc.addr.addr().ne(&pod_addr) { - return Err(Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - "Pod address and actual allocation is mismatched".to_string(), - )); + return Err(Error::AddressNotMatched); } if !self.is_allocated(&pool, &alloc.addr.addr()) { - return Err(Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - "Allocator result and actual allocation is mismatched".to_string(), - )); + return Err(Error::AllocationNotFound); } Ok(CniResult { @@ -384,6 +461,100 @@ impl CNIServerInner { }) } + #[tracing::instrument(skip_all)] + async fn recover(&self) -> Result<(), Error> { + tracing::info!("Recover existing allocation"); + + let link_list = netlink::link_list().await.map_err(Error::Netlink)?; + let route4_list = netlink::route_list(IpAddr::from_str("127.0.0.1").unwrap(), self.table) + .await + .map_err(Error::Netlink)?; + let route6_list = netlink::route_list(IpAddr::from_str("fe80::1").unwrap(), self.table) + .await + .map_err(Error::Netlink)?; + + let address_block_api = Api::::all(self.client.clone()); + let list_params = + ListParams::default().labels(&format!("{}={}", ADDRESS_BLOCK_NODE_LABEL, self.node)); + let address_block_list = address_block_api + .list(&list_params) + .await + .map_err(Error::Kube)?; + + let mut pool_block_map: HashMap> = HashMap::new(); + + for block in address_block_list.iter() { + if block.spec.r#type.ne(&AddressType::Pod) { + continue; + } + match pool_block_map.get_mut(&block.spec.pool_ref) { + Some(block_list) => block_list.push(block.name_any()), + None => { + pool_block_map.insert(block.spec.pool_ref.clone(), vec![block.name_any()]); + } + } + } + + { + let alloc_set = self.allocator.clone(); + let mut allocator = alloc_set.inner.lock().map_err(|_| Error::Lock)?; + + for ab in address_block_list.iter() { + match allocator.get_mut(&ab.name_any()) { + Some(_block) => { + tracing::info!( + pool_name = ab.spec.pool_ref, + block_name = ab.name_any(), + "Block already exists" + ); + } + None => { + let cidr = IpNet::from_str(ab.spec.cidr.as_str()) + .map_err(|_| Error::InvalidAddress(ab.spec.cidr.clone()))?; + let block = Block::new(ab.name_any(), ab.spec.pool_ref.clone(), cidr) + .map_err(Error::Ipam)?; + tracing::info!( + pool_name = ab.spec.pool_ref, + block_name = ab.name_any(), + "Register existing address block" + ); + allocator.insert(block, false).map_err(Error::Ipam)?; + } + } + } + + // Register existing addresses to the allocator + for (ifname, link_info) in link_list.iter() { + let addr_opt = match route4_list.get(ifname) { + Some(addr) => Some(*addr), + None => route6_list.get(ifname).copied(), + }; + if let Some(addr) = addr_opt { + // Register + if let Some(block_names) = pool_block_map.get(&link_info.pool) { + for block_name in block_names.iter() { + if let Some(block) = allocator.get_mut(block_name) { + if block.allocator.cidr().contains(&addr) { + // Recover existing address + tracing::info!( + pool_name = link_info.pool, + block_name = block_name, + address =? addr, + container_id = link_info.id, + "Recover the existing allocation" + ); + block.allocator.allocate(&addr, true).map_err(Error::Ipam)?; + break; + } + } + } + } + } + } + } + Ok(()) + } + async fn get_pool(&self, pod_info: &PodInfo, pod: &Pod) -> Result { let namespace_api = Api::::all(self.client.clone()); let address_pool_api = Api::::all(self.client.clone()); @@ -391,13 +562,10 @@ impl CNIServerInner { let pool_opt = match pod.annotations().get(ADDRESS_POOL_ANNOTATION) { Some(pool) => Some(pool.to_string()), None => { - let ns = namespace_api.get(&pod_info.namespace).await.map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; + let ns = namespace_api + .get(&pod_info.namespace) + .await + .map_err(Error::Kube)?; ns.annotations().get(ADDRESS_POOL_ANNOTATION).cloned() } }; @@ -405,13 +573,7 @@ impl CNIServerInner { Some(pool) => pool, None => { let default = { - let allocator = self.allocator.inner.lock().map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_INTERNAL, - CNI_ERROR_MSG_INTERNAL.to_string(), - e.to_string(), - ) - })?; + let allocator = self.allocator.inner.lock().map_err(|_| Error::Lock)?; allocator.auto_assign.clone() }; match default { @@ -421,25 +583,13 @@ impl CNIServerInner { let ap_list = address_pool_api .list(&ListParams::default()) .await - .map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_KUBE, - CNI_ERROR_MSG_KUBE.to_string(), - e.to_string(), - ) - })?; + .map_err(Error::Kube)?; match ap_list.into_iter().find(|p| { p.spec.auto_assign.unwrap_or(false) && p.spec.r#type.eq(&AddressType::Pod) }) { Some(default_pool) => default_pool.name_any(), - None => { - return Err(Error::Custom( - CNI_ERROR_CODE_INTERNAL, - CNI_ERROR_MSG_INTERNAL.to_string(), - "Default pool is not found".to_string(), - )); - } + None => return Err(Error::DefaultPoolNotFound), } } } @@ -449,20 +599,24 @@ impl CNIServerInner { Ok(pool) } - fn get_allocation(&self, pod_info: &PodInfo) -> Option { - self.allocation + fn get_allocation(&self, pod_info: &PodInfo) -> Result, Error> { + match self + .allocation .get(&format!("{}/{}", pod_info.namespace, pod_info.name)) .cloned() + { + Some(alloc) => { + if alloc.container_id.ne(&pod_info.container_id) { + return Err(Error::AllocationNotFound); + } + Ok(Some(alloc)) + } + None => Ok(None), + } } - fn allocate(&mut self, pool: &str) -> Result, Error> { - let mut allocator = self.allocator.inner.lock().map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_INTERNAL, - CNI_ERROR_MSG_INTERNAL.to_string(), - e.to_string(), - ) - })?; + fn allocate(&mut self, pool: &str, pod_info: &PodInfo) -> Result, Error> { + let mut allocator = self.allocator.inner.lock().map_err(|_| Error::Lock)?; let mut block_names: Vec = allocator .blocks .values() @@ -477,21 +631,11 @@ impl CNIServerInner { } // allocate! let prefix = block.allocator.prefix_len(); - let addr = block.allocator.allocate_next().map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - e.to_string(), - ) - })?; - let cidr = IpNet::new(addr, prefix).map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - e.to_string(), - ) - })?; + let addr = block.allocator.allocate_next().map_err(Error::Ipam)?; + let cidr = IpNet::new(addr, prefix) + .map_err(|_| Error::InvalidAddress(format!("{}/{}", addr, prefix)))?; return Ok(Some(PodAllocation { + container_id: pod_info.container_id.clone(), block: block.name.clone(), addr: cidr, })); @@ -500,62 +644,38 @@ impl CNIServerInner { Ok(None) } - fn allocate_with_block(&mut self, name: &str) -> Result { - let mut allocator = self.allocator.inner.lock().map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_INTERNAL, - CNI_ERROR_MSG_INTERNAL.to_string(), - e.to_string(), - ) - })?; - let block = allocator.blocks.get_mut(name).ok_or(Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - "Failed to get the internal allocator block".to_string(), - ))?; + fn allocate_with_block( + &mut self, + name: &str, + pod_info: &PodInfo, + ) -> Result { + let mut allocator = self.allocator.inner.lock().map_err(|_| Error::Lock)?; + let block = allocator + .blocks + .get_mut(name) + .ok_or(Error::BlockNotFound(name.to_string()))?; let prefix = block.allocator.prefix_len(); - let addr = block.allocator.allocate_next().map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - e.to_string(), - ) - })?; - let cidr = IpNet::new(addr, prefix).map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - e.to_string(), - ) - })?; + let addr = block.allocator.allocate_next().map_err(Error::Ipam)?; + let cidr = IpNet::new(addr, prefix) + .map_err(|_| Error::InvalidAddress(format!("{}/{}", addr, prefix)))?; Ok(PodAllocation { + container_id: pod_info.container_id.clone(), block: name.to_string(), addr: cidr, }) } - fn release(&mut self, pool: &str, addr: &IpAddr) -> Result<(), Error> { - let mut allocator = self.allocator.inner.lock().map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_INTERNAL, - CNI_ERROR_MSG_INTERNAL.to_string(), - e.to_string(), - ) - })?; - let block = allocator.blocks.get_mut(pool).ok_or(Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - "Failed to get the internal allocator block".to_string(), - ))?; - block.allocator.release(addr).map_err(|e| { - Error::Custom( - CNI_ERROR_CODE_ALLOCATOR, - CNI_ERROR_MSG_ALLOCATOR.to_string(), - e.to_string(), - ) - })?; + fn release(&mut self, name: &str, addr: &IpAddr) -> Result { + let mut allocator = self.allocator.inner.lock().map_err(|_| Error::Lock)?; + let block = allocator + .blocks + .get_mut(name) + .ok_or(Error::BlockNotFound(name.to_string()))?; + block.allocator.release(addr).map_err(Error::Ipam)?; - Ok(()) + let empty = block.allocator.is_empty(); + + Ok(empty) } fn is_allocated(&self, pool: &str, addr: &IpAddr) -> bool { @@ -580,9 +700,41 @@ fn get_pod_addr(pod: &Pod) -> Option { } #[tracing::instrument(skip_all)] -pub async fn run(endpoint: &str, server: CNIServer) { +pub async fn run(endpoint: &str, mut server: CNIServer) { + tracing::info!( + table = CNI_ROUTE_TABLE_ID, + "Initialize new routing rule in kernel" + ); + if netlink::get_rule(CNI_ROUTE_TABLE_ID, &IpAddr::V4([127, 0, 0, 1].into())) + .await + .unwrap() + .is_none() + { + netlink::add_rule(CNI_ROUTE_TABLE_ID, IpAddr::V4([127, 0, 0, 1].into())) + .await + .unwrap(); + } + if netlink::get_rule( + CNI_ROUTE_TABLE_ID, + &IpAddr::V6(Ipv6Addr::from_str("fe80::1").unwrap()), + ) + .await + .unwrap() + .is_none() + { + netlink::add_rule( + CNI_ROUTE_TABLE_ID, + IpAddr::V6(Ipv6Addr::from_str("fe80::1").unwrap()), + ) + .await + .unwrap(); + } + + server.recover().await.unwrap(); + if endpoint.contains(".sock") { // use UNIX Domain Socket + // FIXME: gRPC server via UNIX Domain Socket doesn't work tracing::info!( "CNI server is started at {} with Unix Domain Socket", endpoint @@ -611,24 +763,21 @@ impl CniApi for CNIServer { #[tracing::instrument(skip_all)] async fn add(&self, req: Request) -> Result, Status> { let args = req.get_ref(); - tracing::info!(arg=?args, "CNI Add"); let mut inner = self.inner.lock().await; match inner.add(args).await { - Ok(res) => { - tracing::info!(result=?res, "Success to add"); - Ok(Response::new(res)) - } + Ok(res) => Ok(Response::new(res)), Err(e) => { tracing::error!(error=?e, "Failed to add"); + let cni_err = rscni::error::Error::from(e); let error_result = CNIErrorDetail { - code: u32::from(&e), - msg: e.to_string(), - details: e.details(), + code: u32::from(&cni_err), + msg: cni_err.to_string(), + details: cni_err.details(), }; let v = match serde_json::to_vec(&error_result) { Ok(v) => v, Err(e) => { - let err = Error::FailedToDecode(e.to_string()); + let err = rscni::error::Error::FailedToDecode(e.to_string()); let error_result = CNIErrorDetail { code: u32::from(&err), msg: err.to_string(), @@ -657,24 +806,21 @@ impl CniApi for CNIServer { #[tracing::instrument(skip_all)] async fn del(&self, req: Request) -> Result, Status> { let args = req.get_ref(); - tracing::info!(arg=?args, "CNI Del"); let mut inner = self.inner.lock().await; match inner.del(args).await { - Ok(res) => { - tracing::info!(result=?res, "Success to delete"); - Ok(Response::new(res)) - } + Ok(res) => Ok(Response::new(res)), Err(e) => { tracing::error!(error=?e, "Failed to delete"); + let cni_err = rscni::error::Error::from(e); let error_result = CNIErrorDetail { - code: u32::from(&e), - msg: e.to_string(), - details: e.details(), + code: u32::from(&cni_err), + msg: cni_err.to_string(), + details: cni_err.details(), }; let v = match serde_json::to_vec(&error_result) { Ok(v) => v, Err(e) => { - let err = Error::FailedToDecode(e.to_string()); + let err = rscni::error::Error::FailedToDecode(e.to_string()); let error_result = CNIErrorDetail { code: u32::from(&err), msg: err.to_string(), @@ -703,24 +849,21 @@ impl CniApi for CNIServer { #[tracing::instrument(skip_all)] async fn check(&self, req: Request) -> Result, Status> { let args = req.get_ref(); - tracing::info!(arg=?args, "CNI Check"); let inner = self.inner.lock().await; match inner.check(args).await { - Ok(res) => { - tracing::info!(result=?res, "Success to check"); - Ok(Response::new(res)) - } + Ok(res) => Ok(Response::new(res)), Err(e) => { tracing::error!(error=?e, "Failed to check"); + let cni_err = rscni::error::Error::from(e); let error_result = CNIErrorDetail { - code: u32::from(&e), - msg: e.to_string(), - details: e.details(), + code: u32::from(&cni_err), + msg: cni_err.to_string(), + details: cni_err.details(), }; let v = match serde_json::to_vec(&error_result) { Ok(v) => v, Err(e) => { - let err = Error::FailedToDecode(e.to_string()); + let err = rscni::error::Error::FailedToDecode(e.to_string()); let error_result = CNIErrorDetail { code: u32::from(&err), msg: err.to_string(), @@ -753,21 +896,469 @@ pub(super) struct CNIErrorDetail { pub(super) details: String, } -impl From<&CNIErrorDetail> for Error { +impl From<&CNIErrorDetail> for rscni::error::Error { fn from(res: &CNIErrorDetail) -> Self { if res.code > 100 { - return Error::Custom(res.code, res.msg.clone(), res.details.clone()); + return Self::Custom(res.code, res.msg.clone(), res.details.clone()); } match res.code { - 1 => Error::IncompatibleVersion(res.details.clone()), - 2 => Error::UnsupportedNetworkConfiguration(res.details.clone()), - 3 => Error::NotExist(res.details.clone()), - 4 => Error::InvalidEnvValue(res.details.clone()), - 5 => Error::IOFailure(res.details.clone()), - 6 => Error::FailedToDecode(res.details.clone()), - 7 => Error::InvalidNetworkConfig(res.details.clone()), - 11 => Error::TryAgainLater(res.details.clone()), - _ => Error::FailedToDecode(format!("unknown error code: {}", res.code)), + 1 => Self::IncompatibleVersion(res.details.clone()), + 2 => Self::UnsupportedNetworkConfiguration(res.details.clone()), + 3 => Self::NotExist(res.details.clone()), + 4 => Self::InvalidEnvValue(res.details.clone()), + 5 => Self::IOFailure(res.details.clone()), + 6 => Self::FailedToDecode(res.details.clone()), + 7 => Self::InvalidNetworkConfig(res.details.clone()), + 11 => Self::TryAgainLater(res.details.clone()), + _ => Self::FailedToDecode(format!("unknown error code: {}", res.code)), + } + } +} + +const CNI_ERROR_CODE_KUBE: u32 = 200; +const CNI_ERROR_MSG_KUBE: &str = "Kubernetes error"; +const CNI_ERROR_CODE_INTERNAL: u32 = 210; +const CNI_ERROR_MSG_INTERNAL: &str = "Internal error"; +const CNI_ERROR_CODE_ALLOCATOR: u32 = 220; +const CNI_ERROR_MSG_ALLOCATOR: &str = "Allocation error"; +const CNI_ERROR_CODE_NETWORK_CONFIG: u32 = 230; +const CNI_ERROR_MSG_NETWORK_CONFIG: &str = "Network configuration error"; +const CNI_ERROR_CODE_IPAM: u32 = 240; +const CNI_ERROR_MSG_IPAM: &str = "Ipam error"; +const CNI_ERROR_CODE_NETNS: u32 = 250; +const CNI_ERROR_MSG_NETNS: &str = "NetNS error"; +const CNI_ERROR_CODE_NETLINK: u32 = 260; +const CNI_ERROR_MSG_NETLINK: &str = "Netlink error"; + +impl From for rscni::error::Error { + fn from(value: Error) -> Self { + match value { + Error::AddressNotMatched => Self::Custom( + CNI_ERROR_CODE_NETWORK_CONFIG, + CNI_ERROR_MSG_NETWORK_CONFIG.to_string(), + value.to_string(), + ), + Error::AllocationNotFound => Self::Custom( + CNI_ERROR_CODE_ALLOCATOR, + CNI_ERROR_MSG_ALLOCATOR.to_string(), + value.to_string(), + ), + Error::BlockNotFound(detail) => Self::Custom( + CNI_ERROR_CODE_ALLOCATOR, + CNI_ERROR_MSG_ALLOCATOR.to_string(), + detail, + ), + Error::DefaultPoolNotFound => { + Self::TryAgainLater("default pool is not found".to_string()) + } + Error::AlreadyConfigured(key) => Self::Custom( + CNI_ERROR_CODE_INTERNAL, + CNI_ERROR_MSG_INTERNAL.to_string(), + key, + ), + Error::InvalidAddress(detail) => Self::Custom( + CNI_ERROR_CODE_NETWORK_CONFIG, + CNI_ERROR_MSG_NETWORK_CONFIG.to_string(), + detail, + ), + Error::Ipam(e) => Self::Custom( + CNI_ERROR_CODE_IPAM, + CNI_ERROR_MSG_IPAM.to_string(), + e.to_string(), + ), + Error::Kube(e) => Self::Custom( + CNI_ERROR_CODE_KUBE, + CNI_ERROR_MSG_KUBE.to_string(), + e.to_string(), + ), + Error::Lock => Self::Custom( + CNI_ERROR_CODE_INTERNAL, + CNI_ERROR_MSG_INTERNAL.to_string(), + value.to_string(), + ), + Error::MissingField(detail) => Self::Custom( + CNI_ERROR_CODE_INTERNAL, + CNI_ERROR_MSG_INTERNAL.to_string(), + detail, + ), + Error::NetNS(e) => Self::Custom( + CNI_ERROR_CODE_NETNS, + CNI_ERROR_MSG_NETNS.to_string(), + e.to_string(), + ), + Error::Netlink(e) => Self::Custom( + CNI_ERROR_CODE_NETLINK, + CNI_ERROR_MSG_NETLINK.to_string(), + e.to_string(), + ), + Error::PodAddressIsNotFound => Self::Custom( + CNI_ERROR_CODE_INTERNAL, + CNI_ERROR_MSG_INTERNAL.to_string(), + value.to_string(), + ), + Error::ReceiveNotify => Self::Custom( + CNI_ERROR_CODE_INTERNAL, + CNI_ERROR_MSG_INTERNAL.to_string(), + value.to_string(), + ), + } + } +} + +#[cfg(test)] +mod tests { + use std::{net::IpAddr, str::FromStr, sync::Arc}; + + use ipnet::IpNet; + use kube::Client; + use sartd_ipam::manager::{AllocatorSet, Block}; + use tokio::sync::mpsc::unbounded_channel; + + use crate::agent::cni::{ + pod::{PodAllocation, PodInfo}, + server::{CNIServerInner, CNI_ROUTE_TABLE_ID}, + }; + + #[tokio::test] + async fn test_cni_server_allocate() { + // set dummy kube config path + std::env::set_var("KUBECONFIG", "tests/config/dummy_kubeconfig"); + let dummy_client = Client::try_default().await.unwrap(); + let allocator = Arc::new(AllocatorSet::new()); + let (_sender, receiver) = unbounded_channel(); + let mut inner_cni_server = CNIServerInner::new( + dummy_client, + allocator.clone(), + "test-node".to_string(), + IpAddr::from_str("127.0.0.1").unwrap(), + CNI_ROUTE_TABLE_ID, + receiver, + ); + + let pod1 = PodInfo { + container_id: "pod1-container-id".to_string(), + uid: "pod1-uid".to_string(), + namespace: "default".to_string(), + name: "pod1".to_string(), + }; + + // If pool doesn't exist, allocate() should return None without any error. + let should_none = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_none.is_none()); + + // register pool + let pool1_block1 = Block::new( + "pool1_block1".to_string(), + "pool1".to_string(), + IpNet::from_str("10.0.0.0/31").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool1_block1, false).unwrap(); + } + + let should_some = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block1".to_string(), + addr: IpNet::from_str("10.0.0.0/31").unwrap() + } + ); + + // create second pod + let should_some = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block1".to_string(), + addr: IpNet::from_str("10.0.0.1/31").unwrap() + } + ); + // When creating the third pod, specified block is full. + // In this case, allocate() returns None without any error. + let should_none = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_none.is_none()); + + // adding new block for pool1 + let pool1_block2 = Block::new( + "pool1_block2".to_string(), + "pool1".to_string(), + IpNet::from_str("10.0.0.2/31").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool1_block2, false).unwrap(); + } + let should_some = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + + // When creating fourth pod, the address should be allocated from pool1_block2 + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block2".to_string(), + addr: IpNet::from_str("10.0.0.2/31").unwrap() + } + ); + // adding new block for another pool(pool2) + let pool2_block1 = Block::new( + "pool2_block1".to_string(), + "pool2".to_string(), + IpNet::from_str("10.0.1.0/32").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool2_block1, false).unwrap(); + } + let should_some = inner_cni_server.allocate("pool2", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + + // When creating fourth pod, the address should be allocated from pool1_block2 + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool2_block1".to_string(), + addr: IpNet::from_str("10.0.1.0/32").unwrap() + } + ); + } + + #[tokio::test] + async fn test_cni_server_allocate_with_block() { + // set dummy kube config path + std::env::set_var("KUBECONFIG", "tests/config/dummy_kubeconfig"); + let dummy_client = Client::try_default().await.unwrap(); + let allocator = Arc::new(AllocatorSet::new()); + let (_sender, receiver) = unbounded_channel(); + let mut inner_cni_server = CNIServerInner::new( + dummy_client, + allocator.clone(), + "test-node".to_string(), + IpAddr::from_str("127.0.0.1").unwrap(), + CNI_ROUTE_TABLE_ID, + receiver, + ); + + let pod1 = PodInfo { + container_id: "pod1-container-id".to_string(), + uid: "pod1-uid".to_string(), + namespace: "default".to_string(), + name: "pod1".to_string(), + }; + + // If pool doesn't exist, allocate() should return None without any error. + let should_err = inner_cni_server.allocate_with_block("pool1_block1", &pod1); + // expect BlockNotFound error + assert!(should_err.is_err()); + + // register pool + let pool1_block1 = Block::new( + "pool1_block1".to_string(), + "pool1".to_string(), + IpNet::from_str("10.0.0.0/31").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool1_block1, false).unwrap(); + } + + let res = inner_cni_server + .allocate_with_block("pool1_block1", &pod1) + .unwrap(); + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block1".to_string(), + addr: IpNet::from_str("10.0.0.0/31").unwrap() + } + ); + + // allocating to the second pod + let res = inner_cni_server + .allocate_with_block("pool1_block1", &pod1) + .unwrap(); + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block1".to_string(), + addr: IpNet::from_str("10.0.0.1/31").unwrap() + } + ); + + let should_err_full = inner_cni_server.allocate_with_block("pool1_block1", &pod1); + // expect Ipam::Full error + assert!(should_err_full.is_err()); + } + + #[tokio::test] + async fn test_cni_server_release() { + std::env::set_var("KUBECONFIG", "tests/config/dummy_kubeconfig"); + let dummy_client = Client::try_default().await.unwrap(); + let allocator = Arc::new(AllocatorSet::new()); + let (_sender, receiver) = unbounded_channel(); + let mut inner_cni_server = CNIServerInner::new( + dummy_client, + allocator.clone(), + "test-node".to_string(), + IpAddr::from_str("127.0.0.1").unwrap(), + CNI_ROUTE_TABLE_ID, + receiver, + ); + + let pod1 = PodInfo { + container_id: "pod1-container-id".to_string(), + uid: "pod1-uid".to_string(), + namespace: "default".to_string(), + name: "pod1".to_string(), + }; + + // If pool doesn't exist, allocate() should return None without any error. + let should_none = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_none.is_none()); + + // register pool + let pool1_block1 = Block::new( + "pool1_block1".to_string(), + "pool1".to_string(), + IpNet::from_str("10.0.0.0/31").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool1_block1, false).unwrap(); + } + + let should_some = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block1".to_string(), + addr: IpNet::from_str("10.0.0.0/31").unwrap() + } + ); + + // create second pod + let should_some = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block1".to_string(), + addr: IpNet::from_str("10.0.0.1/31").unwrap() + } + ); + // When creating the third pod, specified block is full. + // In this case, allocate() returns None without any error. + let should_none = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_none.is_none()); + + // adding new block for pool1 + let pool1_block2 = Block::new( + "pool1_block2".to_string(), + "pool1".to_string(), + IpNet::from_str("10.0.0.2/31").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool1_block2, false).unwrap(); + } + let should_some = inner_cni_server.allocate("pool1", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + + // When creating fourth pod, the address should be allocated from pool1_block2 + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool1_block2".to_string(), + addr: IpNet::from_str("10.0.0.2/31").unwrap() + } + ); + // adding new block for another pool(pool2) + let pool2_block1 = Block::new( + "pool2_block1".to_string(), + "pool2".to_string(), + IpNet::from_str("10.0.1.0/32").unwrap(), + ) + .unwrap(); + { + let alloc = allocator.clone(); + let mut tmp_allocator = alloc.inner.lock().unwrap(); + tmp_allocator.insert(pool2_block1, false).unwrap(); + } + let should_some = inner_cni_server.allocate("pool2", &pod1).unwrap(); + assert!(should_some.is_some()); + let res = should_some.unwrap(); + + // When creating fourth pod, the address should be allocated from pool1_block2 + assert_eq!( + res, + PodAllocation { + container_id: pod1.container_id.clone(), + block: "pool2_block1".to_string(), + addr: IpNet::from_str("10.0.1.0/32").unwrap() + } + ); + + // check the address is allocated + { + let tmp_allocator_set = allocator.clone(); + let tmp_allocator = tmp_allocator_set.inner.lock().unwrap(); + assert!(tmp_allocator + .blocks + .get("pool2_block1") + .unwrap() + .allocator + .is_allocated(&IpAddr::from_str("10.0.1.0").unwrap())); + } + inner_cni_server + .release("pool2_block1", &IpAddr::from_str("10.0.1.0").unwrap()) + .unwrap(); + // check the address is released + { + let tmp_allocator_set = allocator.clone(); + let tmp_allocator = tmp_allocator_set.inner.lock().unwrap(); + assert!(!tmp_allocator + .blocks + .get("pool2_block1") + .unwrap() + .allocator + .is_allocated(&IpAddr::from_str("10.0.1.0").unwrap())); } + // try to release the address already released again. + let should_err = + inner_cni_server.release("pool2_block1", &IpAddr::from_str("10.0.1.0").unwrap()); + // expect Ipam::NoReleasableAddress error + assert!(should_err.is_err()); } } diff --git a/sartd/src/kubernetes/src/agent/reconciler/address_block.rs b/sartd/src/kubernetes/src/agent/reconciler/address_block.rs index 95a9bd6..cda72ab 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/address_block.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/address_block.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, str::FromStr, sync::Arc}; +use std::{collections::BTreeMap, str::FromStr, sync::Arc, time::Duration}; use futures::StreamExt; use ipnet::IpNet; @@ -26,6 +26,7 @@ use crate::{ AdvertiseStatus, BGPAdvertisement, BGPAdvertisementSpec, BGPAdvertisementStatus, Protocol, }, + node_bgp::NodeBGP, }, util::create_owner_reference, }; @@ -62,7 +63,7 @@ pub async fn reconciler( #[tracing::instrument(skip_all)] async fn reconcile( - _api: &Api, + api: &Api, ab: &AddressBlock, ctx: Arc>>, ) -> Result { @@ -79,7 +80,26 @@ async fn reconcile( .clone() .ok_or(Error::MissingFields("spec.node_ref".to_string()))?; + let adv_api = Api::::namespaced(ctx.client().clone(), &namespace); let mut create_adv = false; + let mut need_update_adv = false; + let mut need_gc = false; + + if let Some(adv) = adv_api.get_opt(&ab.name_any()).await.map_err(Error::Kube)? { + match adv.status.as_ref() { + Some(status) => match status.peers.as_ref() { + Some(peers) => { + if peers.is_empty() { + need_update_adv = true; + } + } + None => need_update_adv = true, + }, + None => need_update_adv = true, + } + } else { + create_adv = true; + } { let mut alloc_set = component @@ -87,10 +107,16 @@ async fn reconcile( .inner .lock() .map_err(|_| Error::FailedToGetLock)?; - match alloc_set.blocks.get(&ab.name_any()) { - Some(_a) => { + Some(block) => { tracing::info!(name = ab.name_any(), "Address block already exists"); + + // GC empty block + if block.allocator.is_empty() { + tracing::info!(name = ab.name_any(), "Block is empty"); + need_gc = true; + } + match ab.spec.auto_assign { true => { // Check if already set @@ -153,8 +179,15 @@ async fn reconcile( } } + if need_gc { + tracing::info!(name = ab.name_any(), "Delete empty AddressBlock"); + api.delete(&ab.name_any(), &DeleteParams::default()) + .await + .map_err(Error::Kube)?; + return Ok(Action::await_change()); + } + if create_adv { - tracing::info!(name = ab.name_any(), "Create new BGPAdvertisement"); let adv = BGPAdvertisement { metadata: ObjectMeta { name: Some(ab.name_any()), @@ -172,19 +205,49 @@ async fn reconcile( protocol: Protocol::from(&cidr), attrs: None, }, - status: Some(BGPAdvertisementStatus { - peers: Some(BTreeMap::from([( - node.clone(), - AdvertiseStatus::NotAdvertised, - )])), - }), + status: None, }; - let adv_api = Api::::namespaced(ctx.client().clone(), &namespace); adv_api .create(&PostParams::default(), &adv) .await .map_err(Error::Kube)?; + + return Ok(Action::requeue(Duration::from_secs(1))); + } + + if need_update_adv { + let mut adv = adv_api.get(&ab.name_any()).await.map_err(Error::Kube)?; + let peers = get_target_peer(ctx.client().clone(), &node) + .await? + .into_iter() + .map(|p| (p, AdvertiseStatus::NotAdvertised)) + .collect::>(); + match adv.status.as_mut() { + Some(status) => match status.peers.as_mut() { + Some(status_peers) => { + for (peer, _) in peers.iter() { + if status_peers.get(peer).is_none() { + status_peers.insert(peer.to_string(), AdvertiseStatus::NotAdvertised); + } + } + } + None => status.peers = Some(peers), + }, + None => { + adv.status = Some(BGPAdvertisementStatus { + peers: Some(peers.clone()), + }); + } + } + adv_api + .replace_status( + &adv.name_any(), + &PostParams::default(), + serde_json::to_vec(&adv).map_err(Error::Serialization)?, + ) + .await + .map_err(Error::Kube)?; } Ok(Action::await_change()) @@ -224,7 +287,6 @@ async fn cleanup( } if deletable { - tracing::warn!(name = ab.name_any(), "Delete BGPAdvertisement"); let adv_api = Api::::namespaced(ctx.client().clone(), &namespace); adv_api .delete(&ab.name_any(), &DeleteParams::default()) @@ -262,3 +324,16 @@ pub async fn run(state: State, interval: u64, pod_allocator: Arc) .for_each(|_| futures::future::ready(())) .await; } + +async fn get_target_peer(client: Client, node: &str) -> Result, Error> { + let node_bgp_api = Api::::all(client); + + let nb = node_bgp_api.get(node).await.map_err(Error::Kube)?; + match nb.spec.peers.as_ref() { + Some(peers) => Ok(peers + .iter() + .map(|p| p.name.clone()) + .collect::>()), + None => Ok(vec![]), + } +} diff --git a/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs b/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs index f7ce1a6..96c6520 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/bgp_advertisement.rs @@ -93,6 +93,7 @@ async fn reconcile( let mut new_ba = ba.clone(); let mut need_update = false; + let mut need_requeue = false; if let Some(peers) = nb.spec.peers { let bgp_peers = Api::::all(ctx.client.clone()); @@ -115,6 +116,7 @@ async fn reconcile( .is_none() { tracing::warn!(peer = bp.name_any(), "BGPPeer is not established"); + need_requeue = true; continue; } // peer is established @@ -185,5 +187,8 @@ async fn reconcile( return Ok(Action::requeue(Duration::from_secs(60))); } } + if need_requeue { + return Ok(Action::requeue(Duration::from_secs(10))); + } Ok(Action::await_change()) } diff --git a/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs b/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs index f3f3a39..e979b73 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/bgp_peer.rs @@ -1,9 +1,9 @@ -use std::{sync::Arc, time::Duration}; +use std::{net::IpAddr, sync::Arc, time::Duration}; use futures::StreamExt; use k8s_openapi::api::discovery::v1::EndpointSlice; use kube::{ - api::{ListParams, Patch, PatchParams, PostParams}, + api::{ListParams, PostParams}, runtime::{ controller::Action, finalizer::{finalizer, Event}, @@ -86,11 +86,6 @@ async fn reconcile(api: &Api, bp: &BGPPeer, ctx: Arc) -> Resul let mut need_status_update = false; match &peer.get_ref().peer { Some(peer) => { - tracing::info!( - asn = bp.spec.asn, - addr = bp.spec.addr, - "Peer already exists" - ); // update status match new_bp.status.as_mut() { Some(status) => match status.conditions.as_mut() { @@ -393,7 +388,6 @@ async fn cleanup(_api: &Api, bp: &BGPPeer, ctx: Arc) -> Result .list(&ListParams::default()) .await .map_err(Error::Kube)?; - tracing::warn!(name = bp.name_any(), "Reach here"); for ba in ba_list.iter_mut() { if let Some(status) = ba.status.as_mut() { if let Some(peers) = status.peers.as_mut() { diff --git a/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs b/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs index 6f79c78..1b0444c 100644 --- a/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs +++ b/sartd/src/kubernetes/src/agent/reconciler/node_bgp.rs @@ -129,6 +129,7 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul name = nb.name_any(), asn = nb.spec.asn, router_id = nb.spec.router_id, + multipath =? nb.spec.speaker.multipath, "Configure local BGP settings" ); speaker_client @@ -140,6 +141,14 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul }) .await?; + if let Some(multi_path) = nb.spec.speaker.multipath { + speaker_client + .configure_multi_path(sartd_proto::sart::ConfigureMultiPathRequest { + enable: multi_path, + }) + .await?; + } + let cond = NodeBGPCondition { status: NodeBGPConditionStatus::Available, reason: NodeBGPConditionReason::Configured, @@ -171,12 +180,6 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul backoff_advertisements(nb, &ctx.client.clone()).await?; new_status.backoff += 1; } - tracing::info!( - name = nb.name_any(), - asn = nb.spec.asn, - router_id = nb.spec.router_id, - "Update NodeBGP status" - ); // update status let mut new_nb = nb.clone(); new_nb.status = Some(new_status); @@ -308,12 +311,6 @@ async fn reconcile(api: &Api, nb: &NodeBGP, ctx: Arc) -> Resul // create peers based on NodeBGP.spec.peers if available { - let cluster_bgps = nb - .status - .as_ref() - .and_then(|status| status.cluster_bgp_refs.as_ref()) - .unwrap_or(&Vec::new()); - let bgp_peers = Api::::all(ctx.client.clone()); if let Some(peers) = new_nb.spec.peers.as_mut() { diff --git a/sartd/src/kubernetes/src/agent/server.rs b/sartd/src/kubernetes/src/agent/server.rs index 4d4b929..fc66a01 100644 --- a/sartd/src/kubernetes/src/agent/server.rs +++ b/sartd/src/kubernetes/src/agent/server.rs @@ -1,9 +1,12 @@ +use std::net::IpAddr; use std::sync::Arc; +use std::time::Duration; use actix_web::{ get, middleware, web::Data, App, HttpRequest, HttpResponse, HttpServer, Responder, }; -use kube::Client; +use k8s_openapi::api::core::v1::Node; +use kube::{Api, Client}; use prometheus::{Encoder, TextEncoder}; use rustls::ServerConfig; use sartd_cert::util::{load_certificates_from_pem, load_private_key_from_file}; @@ -11,8 +14,8 @@ use sartd_ipam::manager::AllocatorSet; use sartd_trace::init::{prepare_tracing, TraceConfig}; use tokio::sync::mpsc::unbounded_channel; -use crate::agent::cni; -use crate::agent::cni::server::{CNIServer, CNI_SERVER_ENDPOINT}; +use crate::agent::cni::server::{CNIServer, CNI_ROUTE_TABLE_ID}; +use crate::agent::cni::{self, gc}; use crate::agent::reconciler::address_block::PodAllocator; use crate::config::Mode; use crate::context::State; @@ -72,6 +75,12 @@ async fn run(a: Agent, trace_config: TraceConfig) { .unwrap() .shutdown_timeout(5); + tracing::info!( + http_port = a.server_http_port, + https_port = a.server_https_port, + "Agent server is running." + ); + tracing::info!("Start Agent Reconcilers"); let node_bgp_state = state.clone(); @@ -114,8 +123,17 @@ async fn run(a: Agent, trace_config: TraceConfig) { }); let node_name = std::env::var("HOSTNAME").unwrap(); + // get node internal ip + let node_addr = get_node_addr(&node_name).await; let kube_client = Client::try_default().await.unwrap(); - let cni_server = CNIServer::new(kube_client, allocator_set.clone(), node_name, receiver); + let cni_server = CNIServer::new( + kube_client.clone(), + allocator_set.clone(), + node_name, + node_addr, + CNI_ROUTE_TABLE_ID, + receiver, + ); let cni_endpoint = a.cni_endpoint.expect("cni endpoint must be given"); @@ -123,6 +141,16 @@ async fn run(a: Agent, trace_config: TraceConfig) { tokio::spawn(async move { cni::server::run(&cni_endpoint, cni_server).await; }); + + tracing::info!("Start Garbage collector"); + let mut garbage_collector = gc::GarbageCollector::new( + Duration::from_secs(60), + kube_client.clone(), + allocator_set.clone(), + ); + tokio::spawn(async move { + garbage_collector.run().await; + }); } server.run().await.unwrap() @@ -180,3 +208,16 @@ async fn index(c: Data, _req: HttpRequest) -> impl Responder { let d = c.diagnostics().await; HttpResponse::Ok().json(&d) } + +// This function can panic +async fn get_node_addr(name: &str) -> IpAddr { + let client = Client::try_default().await.unwrap(); + let node_api = Api::::all(client); + let node = node_api.get(name).await.unwrap(); + for addr in node.status.unwrap().addresses.unwrap().iter() { + if addr.type_.eq("InternalIP") { + return addr.address.parse::().unwrap(); + } + } + panic!("Failed to get Node internal IP"); +} diff --git a/sartd/src/kubernetes/src/config.rs b/sartd/src/kubernetes/src/config.rs index dbec4d0..1c400b6 100644 --- a/sartd/src/kubernetes/src/config.rs +++ b/sartd/src/kubernetes/src/config.rs @@ -1,4 +1,4 @@ -use std::{fmt::{self, write}, str::FromStr}; +use std::str::FromStr; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -18,7 +18,7 @@ pub enum Mode { } impl std::fmt::Display for Mode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::CNI => write!(f, "cni"), Self::LB => write!(f, "lb"), @@ -30,8 +30,8 @@ impl std::fmt::Display for Mode { #[derive(Debug, Clone, Copy, Error)] pub struct ParseModeError; -impl fmt::Display for ParseModeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for ParseModeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { "provided string was not `true` or `false`".fmt(f) } } diff --git a/sartd/src/kubernetes/src/controller/reconciler/address_block.rs b/sartd/src/kubernetes/src/controller/reconciler/address_block.rs index 9983b53..a7d3167 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/address_block.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/address_block.rs @@ -17,7 +17,7 @@ use sartd_ipam::manager::{AllocatorSet, Block}; use crate::{ context::{error_policy, ContextWith, Ctx, State}, controller::error::Error, - crd::address_block::{AddressBlock, ADDRESS_BLOCK_FINALIZER}, + crd::{address_block::{AddressBlock, ADDRESS_BLOCK_FINALIZER}, address_pool::AddressType}, }; #[tracing::instrument(skip_all, fields(trace_id))] @@ -48,16 +48,19 @@ async fn reconcile( ab: &AddressBlock, ctx: Arc>>, ) -> Result { - tracing::info!(name = ab.name_any(), "reconcile AddressBlock"); + // only handling lb address block here + if ab.spec.r#type.ne(&AddressType::Service) { + return Ok(Action::await_change()); + } + tracing::info!(name = ab.name_any(), "Reconcile AddressBlock"); let component = ctx.component.clone(); let mut alloc_set = component.inner.lock().map_err(|_| Error::FailedToGetLock)?; - let cidr = IpNet::from_str(&ab.spec.cidr).map_err(|_| Error::InvalidCIDR)?; + let cidr = IpNet::from_str(&ab.spec.cidr).map_err(|_| Error::InvalidCIDR)?; match alloc_set.blocks.get(&ab.name_any()) { Some(_a) => { - tracing::info!(name = ab.name_any(), "Address block already exists"); match ab.spec.auto_assign { true => { // Check if already set diff --git a/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs b/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs index b90cd66..495c174 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/address_pool.rs @@ -284,7 +284,6 @@ async fn reconcile_pod_pool( } } - tracing::info!(name = ap.name_any(), "Calculate allocatable blocks"); let all_blocks: Vec = (0..pool_size).step_by(block_size as usize).collect(); let allocated_blocks = if let Some(allocated) = ap .status diff --git a/sartd/src/kubernetes/src/controller/reconciler/block_request.rs b/sartd/src/kubernetes/src/controller/reconciler/block_request.rs index 316682e..c887f43 100644 --- a/sartd/src/kubernetes/src/controller/reconciler/block_request.rs +++ b/sartd/src/kubernetes/src/controller/reconciler/block_request.rs @@ -1,85 +1,124 @@ use std::sync::Arc; use futures::StreamExt; -use kube::{api::{ListParams, PostParams}, runtime::{controller::Action, finalizer::{finalizer, Event}, watcher::Config, Controller}, Api, Client, ResourceExt}; - - -use crate::{context::{error_policy, Context, Ctx, State}, controller::error::Error, crd::{address_pool::{AddressPool, AddressPoolStatus, AddressType}, block_request::{BlockRequest, BLOCK_REQUEST_FINALIZER}}}; - +use kube::{ + api::{ListParams, Patch, PostParams}, + runtime::{ + controller::Action, + finalizer::{finalizer, Event}, + watcher::Config, + Controller, + }, + Api, Client, ResourceExt, +}; + +use crate::{ + context::{error_policy, Context, Ctx, State}, + controller::error::Error, + crd::{ + address_pool::{AddressPool, AddressPoolStatus, AddressType}, + block_request::{BlockRequest, BLOCK_REQUEST_FINALIZER}, + }, +}; #[tracing::instrument(skip_all, fields(trace_id))] pub async fn reconciler(br: Arc, ctx: Arc) -> Result { - - let block_request_api = Api::::all(ctx.client().clone()); - - finalizer(&block_request_api, BLOCK_REQUEST_FINALIZER, br, |event| async { - match event { - Event::Apply(br) => reconcile(&block_request_api, &br, ctx.clone()).await, - Event::Cleanup(br) => cleanup(&block_request_api, &br, ctx.clone()).await, - } - }).await.map_err(|e| Error::Finalizer(Box::new(e))) + let block_request_api = Api::::all(ctx.client().clone()); + + finalizer( + &block_request_api, + BLOCK_REQUEST_FINALIZER, + br, + |event| async { + match event { + Event::Apply(br) => reconcile(&block_request_api, &br, ctx.clone()).await, + Event::Cleanup(br) => cleanup(&block_request_api, &br, ctx.clone()).await, + } + }, + ) + .await + .map_err(|e| Error::Finalizer(Box::new(e))) } #[tracing::instrument(skip_all)] -async fn reconcile(_api: &Api, br: &BlockRequest, ctx: Arc) -> Result { - tracing::info!(name = br.name_any(), "reconcile BlockRequest"); - - let address_pool_api = Api::::all(ctx.client.clone()); - - let mut pool = address_pool_api.get(&br.spec.pool).await.map_err(Error::Kube)?; - - if pool.spec.r#type.ne(&AddressType::Pod) { - tracing::error!(name=br.name_any(), "The requesting pool is not for Pods."); - return Err(Error::InvalidAddressType); - } - - match pool.status.as_mut() { - Some(status) => { - match status.requested.as_mut() { - Some(requested) => { - if requested.iter().any(|r| r.eq(&br.name_any())) { - tracing::warn!(name=br.name_any(), "Same BlockRequest already exists"); - return Err(Error::BlockRequestAlreadyExists); - } - requested.push(br.name_any()); - }, - None => { - status.requested = Some(vec![br.name_any()]); - } - } - }, - None => { - pool.status = Some(AddressPoolStatus{ - requested: Some(vec![br.name_any()]), - allocated: None, - released: None, - }); - } - } - - address_pool_api.replace_status(&pool.name_any(), &PostParams::default(), serde_json::to_vec(&pool).map_err(Error::Serialization)?).await.map_err(Error::Kube)?; - - Ok(Action::await_change()) -} +async fn reconcile( + _api: &Api, + br: &BlockRequest, + ctx: Arc, +) -> Result { + tracing::info!(name = br.name_any(), "Reconcile BlockRequest"); -#[tracing::instrument(skip_all)] -async fn cleanup(_api: &Api, br: &BlockRequest, ctx: Arc) -> Result { - tracing::info!(name = br.name_any(), "clean up BlockRequest"); + let address_pool_api = Api::::all(ctx.client.clone()); - let address_pool_api = Api::::all(ctx.client.clone()); + let mut pool = address_pool_api + .get(&br.spec.pool) + .await + .map_err(Error::Kube)?; + + if pool.spec.r#type.ne(&AddressType::Pod) { + return Err(Error::InvalidAddressType); + } + + match pool.status.as_mut() { + Some(status) => match status.requested.as_mut() { + Some(requested) => { + if requested.iter().any(|r| r.eq(&br.name_any())) { + tracing::warn!(name = br.name_any(), "Same BlockRequest already exists"); + return Err(Error::BlockRequestAlreadyExists); + } + requested.push(br.name_any()); + } + None => { + status.requested = Some(vec![br.name_any()]); + } + }, + None => { + pool.status = Some(AddressPoolStatus { + requested: Some(vec![br.name_any()]), + allocated: None, + released: None, + }); + } + } + + // If failing to update the status here, sometimes the controller may create an useless block. + address_pool_api + .replace_status( + &pool.name_any(), + &PostParams::default(), + serde_json::to_vec(&pool).map_err(Error::Serialization)?, + ) + .await + .map_err(Error::Kube)?; - let pool = address_pool_api.get(&br.spec.pool).await.map_err(Error::Kube)?; + Ok(Action::await_change()) +} - if let Some(status) = pool.status.as_ref() { - if let Some(requested) = status.requested.as_ref() { - if requested.iter().any(|r| r.eq(&br.name_any())) { - tracing::warn!(name=br.name_any(), "BlockRequest isn't performed yet."); - return Err(Error::BlockRequestNotPerformed); - } - } - } +#[tracing::instrument(skip_all)] +async fn cleanup( + _api: &Api, + br: &BlockRequest, + ctx: Arc, +) -> Result { + tracing::info!(name = br.name_any(), "clean up BlockRequest"); - Ok(Action::await_change()) + let address_pool_api = Api::::all(ctx.client.clone()); + + let pool = address_pool_api + .get(&br.spec.pool) + .await + .map_err(Error::Kube)?; + + if let Some(status) = pool.status.as_ref() { + if let Some(requested) = status.requested.as_ref() { + if requested.iter().any(|r| r.eq(&br.name_any())) { + tracing::warn!(name = br.name_any(), "BlockRequest isn't performed yet."); + return Err(Error::BlockRequestNotPerformed); + } + } + } + + Ok(Action::await_change()) } pub async fn run(state: State, interval: u64) { @@ -87,14 +126,17 @@ pub async fn run(state: State, interval: u64) { .await .expect("Failed to create kube client"); - let block_request_api = Api::::all(client.clone()); - if let Err(e) = block_request_api.list(&ListParams::default().limit(1)).await { + let block_request_api = Api::::all(client.clone()); + if let Err(e) = block_request_api + .list(&ListParams::default().limit(1)) + .await + { tracing::error!("CRD is not queryable; {e:?}. Is the CRD installed?"); tracing::info!("Installation: cargo run --bin crdgen | kubectl apply -f -"); std::process::exit(1); - } + } - tracing::info!("Start BlockRequest reconciler"); + tracing::info!("Start BlockRequest reconciler"); Controller::new(block_request_api, Config::default().any_semantic()) .shutdown_on_signal() diff --git a/sartd/src/kubernetes/src/controller/server.rs b/sartd/src/kubernetes/src/controller/server.rs index c3f86c4..a56754d 100644 --- a/sartd/src/kubernetes/src/controller/server.rs +++ b/sartd/src/kubernetes/src/controller/server.rs @@ -44,7 +44,7 @@ async fn run(c: Controller, trace_config: TraceConfig) { let cert_chain = load_certificates_from_pem(&c.server_cert).unwrap(); let private_key = load_private_key_from_file(&c.server_key).unwrap(); - // Initiatilize Kubernetes controller state + // Initialize Kubernetes controller state let state = State::new("controller"); // Start web server @@ -83,6 +83,12 @@ async fn run(c: Controller, trace_config: TraceConfig) { .unwrap() .shutdown_timeout(5); + tracing::info!( + http_port = c.server_port, + https_port = c.server_https_port, + "Controller server is running." + ); + let allocator_set = Arc::new(AllocatorSet::new()); // Start reconcilers @@ -106,11 +112,13 @@ async fn run(c: Controller, trace_config: TraceConfig) { .await; }); - tracing::info!("Start BlockRequest reconciler"); - let block_request_state = state.clone(); - tokio::spawn(async move { - reconciler::block_request::run(block_request_state, c.requeue_interval).await; - }); + if c.mode.eq(&Mode::CNI) || c.mode.eq(&Mode::Dual) { + tracing::info!("Start BlockRequest reconciler"); + let block_request_state = state.clone(); + tokio::spawn(async move { + reconciler::block_request::run(block_request_state, c.requeue_interval).await; + }); + } tracing::info!("Start Node watcher"); let node_state = state.clone(); @@ -118,19 +126,21 @@ async fn run(c: Controller, trace_config: TraceConfig) { reconciler::node_watcher::run(node_state, c.requeue_interval).await; }); - tracing::info!("Start Service watcher"); - let service_state = state.clone(); - let svc_allocator_set = allocator_set.clone(); - tokio::spawn(async move { - reconciler::service_watcher::run(service_state, c.requeue_interval, svc_allocator_set) - .await; - }); - - tracing::info!("Start Endpointslice watcher"); - let endpointslice_state = state.clone(); - tokio::spawn(async move { - reconciler::endpointslice_watcher::run(endpointslice_state, c.requeue_interval).await; - }); + if c.mode.eq(&Mode::LB) || c.mode.eq(&Mode::Dual) { + tracing::info!("Start Service watcher"); + let service_state = state.clone(); + let svc_allocator_set = allocator_set.clone(); + tokio::spawn(async move { + reconciler::service_watcher::run(service_state, c.requeue_interval, svc_allocator_set) + .await; + }); + + tracing::info!("Start Endpointslice watcher"); + let endpointslice_state = state.clone(); + tokio::spawn(async move { + reconciler::endpointslice_watcher::run(endpointslice_state, c.requeue_interval).await; + }); + } tracing::info!("Start BGPAdvertisement reconciler"); let bgp_advertisement_state = state.clone(); diff --git a/sartd/src/kubernetes/src/controller/webhook/address_pool.rs b/sartd/src/kubernetes/src/controller/webhook/address_pool.rs index f772f85..4a8a3ce 100644 --- a/sartd/src/kubernetes/src/controller/webhook/address_pool.rs +++ b/sartd/src/kubernetes/src/controller/webhook/address_pool.rs @@ -6,7 +6,7 @@ use kube::{ response::StatusSummary, Status, }, - Api, Client, + Api, Client, ResourceExt, }; use crate::crd::address_pool::{AddressPool, MAX_BLOCK_SIZE}; @@ -58,11 +58,11 @@ pub async fn handle_validation( let address_pool_api = Api::::all(client); match address_pool_api.list(&ListParams::default()).await { Ok(ap_list) => { - if ap_list - .items - .iter() - .any(|p| p.spec.auto_assign.unwrap_or(false) && p.spec.r#type.eq(&ap.spec.r#type)) - { + if ap_list.items.iter().any(|p| { + p.spec.auto_assign.unwrap_or(false) + && p.spec.r#type.eq(&ap.spec.r#type) + && p.name_any().ne(&ap.name_any()) + }) { tracing::warn!("Auto assignable AddressPool already exists."); resp.allowed = false; resp.result = Status { diff --git a/sartd/src/kubernetes/src/controller/webhook/bgp_peer.rs b/sartd/src/kubernetes/src/controller/webhook/bgp_peer.rs index 74e86b4..6474cb0 100644 --- a/sartd/src/kubernetes/src/controller/webhook/bgp_peer.rs +++ b/sartd/src/kubernetes/src/controller/webhook/bgp_peer.rs @@ -72,7 +72,7 @@ pub async fn handle_validation( tracing::info!( name = admission_req.name, - "incoming request try to updates existing object" + "incoming request tries to update existing object" ); let old = admission_req.old_object.unwrap(); diff --git a/sartd/src/kubernetes/src/crd/cluster_bgp.rs b/sartd/src/kubernetes/src/crd/cluster_bgp.rs index e466902..93f864c 100644 --- a/sartd/src/kubernetes/src/crd/cluster_bgp.rs +++ b/sartd/src/kubernetes/src/crd/cluster_bgp.rs @@ -80,4 +80,5 @@ pub enum RouterIdSelectionType { pub struct SpeakerConfig { pub path: String, pub timeout: Option, + pub multipath: Option, } diff --git a/sartd/src/kubernetes/src/fixture.rs b/sartd/src/kubernetes/src/fixture.rs index 5dfeb5a..2781749 100644 --- a/sartd/src/kubernetes/src/fixture.rs +++ b/sartd/src/kubernetes/src/fixture.rs @@ -29,14 +29,21 @@ pub mod reconciler { endpointslice_watcher::ENDPOINTSLICE_FINALIZER, service_watcher::SERVICE_FINALIZER, }, crd::{ - address_block::{AddressBlock, AddressBlockSpec, ADDRESS_BLOCK_FINALIZER}, address_pool::{ + address_block::{AddressBlock, AddressBlockSpec, ADDRESS_BLOCK_FINALIZER}, + address_pool::{ AddressPool, AddressPoolSpec, AddressType, AllocationType, ADDRESS_POOL_FINALIZER, - }, bgp_advertisement::{ + }, + bgp_advertisement::{ BGPAdvertisement, BGPAdvertisementSpec, Protocol, BGP_ADVERTISEMENT_FINALIZER, - }, bgp_peer::{BGPPeer, BGPPeerSlim, BGPPeerSpec, PeerConfig}, bgp_peer_template::{BGPPeerTemplate, BGPPeerTemplateSpec}, block_request::{BlockRequest, BlockRequestSpec, BLOCK_REQUEST_FINALIZER}, cluster_bgp::{ + }, + bgp_peer::{BGPPeer, BGPPeerSlim, BGPPeerSpec, PeerConfig}, + bgp_peer_template::{BGPPeerTemplate, BGPPeerTemplateSpec}, + block_request::{BlockRequest, BlockRequestSpec, BLOCK_REQUEST_FINALIZER}, + cluster_bgp::{ AsnSelectionType, AsnSelector, ClusterBGP, ClusterBGPSpec, RouterIdSelectionType, RouterIdSelector, SpeakerConfig, CLUSTER_BGP_FINALIZER, - }, node_bgp::{NodeBGP, NodeBGPSpec} + }, + node_bgp::{NodeBGP, NodeBGPSpec}, }, }; @@ -331,6 +338,24 @@ pub mod reconciler { } } + pub fn test_address_pool_pod_another() -> AddressPool { + AddressPool { + metadata: ObjectMeta { + name: Some("test-pool".to_string()), + finalizers: Some(vec![ADDRESS_POOL_FINALIZER.to_string()]), + ..Default::default() + }, + spec: AddressPoolSpec { + cidr: "10.0.0.0/30".to_string(), + r#type: AddressType::Pod, + alloc_type: Some(AllocationType::Bit), + block_size: 31, + auto_assign: Some(true), + }, + status: None, + } + } + pub fn test_address_block_pod() -> AddressBlock { AddressBlock { metadata: ObjectMeta { @@ -420,6 +445,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost:5000".to_string(), timeout: None, + multipath: Some(false), }, peers: Some(vec![PeerConfig { peer_template_ref: Some("test-bgp-peer-templ".to_string()), @@ -522,6 +548,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, peers: Some(vec![ BGPPeerSlim { @@ -533,6 +560,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, groups: None, ..Default::default() @@ -547,6 +575,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, groups: None, ..Default::default() @@ -568,6 +597,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, peers: Some(vec![BGPPeerSlim { name: "test2-peer1".to_string(), @@ -578,6 +608,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, groups: None, ..Default::default() @@ -598,6 +629,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, peers: Some(vec![BGPPeerSlim { name: "test3-peer1".to_string(), @@ -608,6 +640,7 @@ pub mod reconciler { speaker: SpeakerConfig { path: "localhost".to_string(), timeout: None, + multipath: Some(false), }, groups: None, ..Default::default() diff --git a/sartd/src/kubernetes/tests/agent_cni_server_test.rs b/sartd/src/kubernetes/tests/agent_cni_server_test.rs new file mode 100644 index 0000000..b530411 --- /dev/null +++ b/sartd/src/kubernetes/tests/agent_cni_server_test.rs @@ -0,0 +1,272 @@ +use std::{net::IpAddr, str::FromStr, sync::Arc, time::Duration}; + +use crate::common::{setup_kind, TestContainer, TestRoutingRule}; + +use ipnet::IpNet; +use k8s_openapi::api::core::v1::{Container, Node, Pod, PodSpec, ServiceAccount}; +use kube::{ + api::{ListParams, ObjectMeta, Patch, PatchParams}, + Api, Client, ResourceExt, +}; +use sartd_ipam::manager::AllocatorSet; +use sartd_kubernetes::{ + agent::{ + cni::server::{CNIServer, CNI_ROUTE_TABLE_ID}, + reconciler::address_block::{self, PodAllocator}, + }, + context::State, + controller, + crd::{ + address_block::{AddressBlock, ADDRESS_BLOCK_FINALIZER}, + address_pool::AddressPool, + block_request::{BlockRequest, BLOCK_REQUEST_FINALIZER}, + }, + fixture::{reconciler::test_address_pool_pod_another, test_trace}, +}; + +use tokio::sync::mpsc::unbounded_channel; + +mod common; + +#[tokio::test] +#[ignore = "use kind cluster"] +async fn integration_test_agent_cni_server() { + dbg!("Creating a kind cluster"); + setup_kind(); + + test_trace().await; + + dbg!("Getting kube client"); + let client = Client::try_default().await.unwrap(); + let ctx = State::default().to_context(client.clone(), 30); + let allocator_set = Arc::new(AllocatorSet::new()); + + let node_api = Api::::all(client.clone()); + let node_list = node_api.list(&ListParams::default()).await.unwrap(); + assert_eq!(node_list.items.len(), 1); + let node = node_list.items.first().unwrap(); + let node_addr = IpAddr::from_str( + &node + .status + .clone() + .unwrap() + .addresses + .unwrap() + .first() + .unwrap() + .address, + ) + .unwrap(); + + let (sender, receiver) = unbounded_channel(); + + let cni_server = CNIServer::new( + client.clone(), + allocator_set.clone(), + node.name_any(), + node_addr, + CNI_ROUTE_TABLE_ID, + receiver, + ); + + let endpoint = "127.0.0.1:6789"; + + dbg!("Spawning CNI server"); + tokio::spawn(async move { + sartd_kubernetes::agent::cni::server::run(endpoint, cni_server).await; + }); + + dbg!("Waiting to run CNI server"); + let mut cni_client = tokio::time::timeout(Duration::from_secs(60), async move { + loop { + if let Ok(client) = sartd_proto::sart::cni_api_client::CniApiClient::connect(format!( + "http://{endpoint}" + )) + .await + { + break client; + } + } + }) + .await + .unwrap(); + + // TestRoutingRule implements Drop trait to clean up routing rule in the kernel, when this test is finished. + let _rule4 = TestRoutingRule::new(CNI_ROUTE_TABLE_ID, false); + let _rule6 = TestRoutingRule::new(CNI_ROUTE_TABLE_ID, true); + + let ap = test_address_pool_pod_another(); + + dbg!("Creating an AddressPool"); + let address_pool_api = Api::::all(client.clone()); + let ssapply = PatchParams::apply("ctrltest"); + let ap_patch = Patch::Apply(ap.clone()); + address_pool_api + .patch(&ap.name_any(), &ssapply, &ap_patch) + .await + .unwrap(); + + let applied_ap = address_pool_api.get(&ap.name_any()).await.unwrap(); + + dbg!("Reconciling AddressPool"); + controller::reconciler::address_pool::reconciler(Arc::new(applied_ap.clone()), ctx.clone()) + .await + .unwrap(); + + dbg!("Getting the applied pool"); + let applied_ap = address_pool_api.get(&ap.name_any()).await.unwrap(); + assert!(applied_ap.status.is_none()); + + dbg!("Waiting creating the service account in default namespace"); + let service_account_api = Api::::namespaced(client.clone(), "default"); + tokio::time::timeout(Duration::from_secs(30), async move { + let mut ticker = tokio::time::interval(Duration::from_secs(1)); + loop { + ticker.tick().await; + if let Ok(_sa) = service_account_api.get("default").await { + break; + } + } + }) + .await + .unwrap(); + + dbg!("Creating a dummy pod"); + let pod1 = Pod { + metadata: ObjectMeta { + name: Some("pod1".to_string()), + namespace: Some("default".to_string()), + ..Default::default() + }, + spec: Some(PodSpec { + containers: vec![Container { + image: Some("ghcr.io/terassyi/test-server:0.1".to_string()), + name: "pod1".to_string(), + ..Default::default() + }], + service_account_name: Some("default".to_string()), + ..Default::default() + }), + status: None, + }; + let pod_api = Api::::namespaced(client.clone(), "default"); + let pod1_patch = Patch::Apply(pod1.clone()); + pod_api + .patch(&pod1.name_any(), &ssapply, &pod1_patch) + .await + .unwrap(); + let container1 = TestContainer::new( + "1111111111111111", + "/var/run/netns/pod1", + "eth0", + "opt/cni/bin/sart-cni", + "pod1-uid", + "pod1", + "default", + ); + + dbg!("Preparing AddressBlock reconciler"); + let pod_allocator = Arc::new(PodAllocator { + allocator: allocator_set.clone(), + notifier: sender.clone(), + }); + let ab_ctx = State::default().to_context_with(client.clone(), 30, pod_allocator.clone()); + let address_block_api = Api::::all(client.clone()); + + dbg!("Spawning BlockRequest reconciler"); + let block_request_api = Api::::all(client.clone()); + let block_request_api_cloned = block_request_api.clone(); + let address_pool_api_cloned = address_pool_api.clone(); + let ssapply_cloned = ssapply.clone(); + tokio::spawn(async move { + let mut br = tokio::time::timeout(Duration::from_secs(60), async move { + loop { + if let Ok(br) = block_request_api_cloned + .get("test-pool-sart-integration-control-plane") + .await + { + break br; + } + } + }) + .await + .unwrap(); + br.finalizers_mut() + .insert(0, BLOCK_REQUEST_FINALIZER.to_string()); + br.metadata.managed_fields = None; + let br_patch = Patch::Apply(br.clone()); + block_request_api + .patch(&br.name_any(), &ssapply_cloned, &br_patch) + .await + .unwrap(); + let applied_br = block_request_api.get(&br.name_any()).await.unwrap(); + + dbg!("Reconciling an BlockRequest"); + sartd_kubernetes::controller::reconciler::block_request::reconciler( + Arc::new(applied_br), + ctx.clone(), + ) + .await + .unwrap(); + + dbg!("Reconciling an AddressPool to create new AddressBlock"); + let ap = address_pool_api_cloned.get("test-pool").await.unwrap(); + controller::reconciler::address_pool::reconciler(Arc::new(ap.clone()), ctx.clone()) + .await + .unwrap(); + }); + + dbg!("Spawning AddressBlock reconciler"); + let address_block_api_cloned = address_block_api.clone(); + let ssapply_cloned = ssapply.clone(); + tokio::spawn(async move { + let mut ab = tokio::time::timeout(Duration::from_secs(60), async move { + loop { + if let Ok(ba_list) = address_block_api_cloned.list(&ListParams::default()).await { + if !ba_list.items.is_empty() { + break ba_list.items.first().unwrap().clone(); + } + } + } + }) + .await + .unwrap(); + + ab.finalizers_mut() + .insert(0, ADDRESS_BLOCK_FINALIZER.to_string()); + ab.metadata.managed_fields = None; + let ab_patch = Patch::Apply(ab.clone()); + address_block_api + .patch(&ab.name_any(), &ssapply_cloned, &ab_patch) + .await + .unwrap(); + + let applied_ab = address_block_api.get(&ab.name_any()).await.unwrap(); + + dbg!("Reconciling an AddressBlock"); + address_block::reconciler(Arc::new(applied_ab), ab_ctx.clone()) + .await + .unwrap(); + }); + dbg!("Calling Add command by client"); + let res = cni_client.add(container1.args.clone()).await.unwrap(); + + let resp = res.get_ref(); + + dbg!("Checking the response"); + assert_eq!(resp.interfaces.len(), 1); + assert_eq!(resp.ips.len(), 1); + assert_eq!(resp.routes.len(), 1); + + dbg!("Checking the allocation"); + let pod_addr = IpNet::from_str(&resp.ips[0].address).unwrap(); + { + let tmp_allocator_set = allocator_set.clone(); + let tmp_allocator = tmp_allocator_set.inner.lock().unwrap(); + let block = tmp_allocator + .blocks + .get("test-pool-sart-integration-control-plane-10.0.0.0") + .unwrap(); + assert!(block.allocator.is_allocated(&pod_addr.addr())); + } +} diff --git a/sartd/src/kubernetes/tests/common/mod.rs b/sartd/src/kubernetes/tests/common/mod.rs index 9b98d28..ab23d32 100644 --- a/sartd/src/kubernetes/tests/common/mod.rs +++ b/sartd/src/kubernetes/tests/common/mod.rs @@ -1,6 +1,9 @@ use std::collections::BTreeMap; +use std::path::PathBuf; +use std::str::FromStr; use kube::core::ObjectMeta; +use sartd_kubernetes::agent::cni::netns::NetNS; use sartd_kubernetes::crd::{ address_pool::AddressType, bgp_advertisement::{ @@ -11,12 +14,14 @@ use sartd_kubernetes::crd::{ cluster_bgp::{SpeakerConfig, ASN_LABEL}, node_bgp::{NodeBGP, NodeBGPSpec, NodeBGPStatus, NODE_BGP_FINALIZER}, }; +use sartd_proto::sart::Args; // Make sure kind binary is in here const KIND_BIN: &str = "../../../bin/kind"; const KUBECTL_BIN: &str = "../../../bin/kubectl"; -const CRD_MANIFEST: &str = "../../../manifests/crd/sart.yaml"; +const CRD_MANIFEST: &str = "../../../manifests/base/crd/sart.yaml"; const KIND_CLUSTER_NAME: &str = "sart-integration"; +const KIND_CONFIG_DISABLE_CNI: &str = "tests/config/config.yaml"; pub(super) const KIND_NODE_CP: &str = "sart-integration-control-plane"; const KIND_CLUSTER_IMAGE: &str = "kindest/node"; const KIND_CLUSTER_IMAGE_VERSION_ENV: &str = "KIND_NODE_VERSION"; @@ -110,6 +115,7 @@ pub fn test_node_bgp() -> NodeBGP { speaker: SpeakerConfig { path: "localhost:5000".to_string(), timeout: None, + multipath: Some(false), }, peers: Some(vec![BGPPeerSlim { name: "test-peer1".to_string(), @@ -137,6 +143,7 @@ pub fn test_bgp_peer() -> BGPPeer { speaker: SpeakerConfig { path: "localhost:5000".to_string(), timeout: None, + multipath: Some(false), }, ..Default::default() }, @@ -166,3 +173,78 @@ pub fn test_bgp_advertisement() -> BGPAdvertisement { }), } } + +pub struct TestContainer { + pub args: Args, + pub netns: NetNS, +} + +impl TestContainer { + pub fn new( + container_id: &str, + netns: &str, + ifname: &str, + path: &str, + uid: &str, + pod_name: &str, + namespace: &str, + ) -> Self { + let ns_path = PathBuf::from_str(netns).unwrap(); + let mut cmd = std::process::Command::new("ip"); + cmd.args([ + "netns", + "add", + ns_path.file_name().unwrap().to_str().unwrap(), + ]); + cmd.output().unwrap(); + + TestContainer { + args: Args { + container_id: container_id.to_string(), + netns: netns.to_string(), + ifname: ifname.to_string(), + path: vec![path.to_string()], + // K8S_POD_INFRA_CONTAINER_ID=0a6a4b09df59d64e3be5cf662808076fee664447a1c90dd05a5d5588e2cd6b5a;K8S_POD_UID=b0e1fc4a-f842-4ec2-8e23-8c0c8da7b5e5;IgnoreUnknown=1;K8S_POD_NAMESPACE=kube-system;K8S_POD_NAME=coredns-787d4945fb-7xrrd + args: format!("K8S_POD_INFRA_CONTAINER_ID={container_id};K8S_POD_UID={uid};IgnoreUnknown=1;K8S_POD_NAMESPACE={namespace};K8S_POD_NAME={pod_name}"), + prev_result: None, + conf: None, + data: String::new(), + }, + netns: NetNS::try_from(netns).unwrap(), + } + } +} + +impl Drop for TestContainer { + fn drop(&mut self) { + let mut cmd = std::process::Command::new("ip"); + cmd.args([ + "netns", + "del", + self.netns.path().file_name().unwrap().to_str().unwrap(), + ]); + cmd.output().unwrap(); + } +} + +pub struct TestRoutingRule { + v6: bool, + table: u32, +} + +impl TestRoutingRule { + pub fn new(table: u32, v6: bool) -> Self { + Self { v6, table } + } +} + +impl Drop for TestRoutingRule { + fn drop(&mut self) { + let mut cmd = std::process::Command::new("ip"); + if self.v6 { + cmd.arg("-6"); + } + cmd.args(["rule", "del", "table", &format!("{}", self.table)]); + cmd.output().unwrap(); + } +} diff --git a/sartd/src/kubernetes/tests/config/.cargo b/sartd/src/kubernetes/tests/config/.cargo new file mode 100644 index 0000000..ca6c72f --- /dev/null +++ b/sartd/src/kubernetes/tests/config/.cargo @@ -0,0 +1,2 @@ +[target.x86_64-unknown-linux-gnu] +runner = 'sudo -E' diff --git a/sartd/src/kubernetes/tests/config/config.yaml b/sartd/src/kubernetes/tests/config/config.yaml new file mode 100644 index 0000000..1616db8 --- /dev/null +++ b/sartd/src/kubernetes/tests/config/config.yaml @@ -0,0 +1,6 @@ +apiVersion: kind.x-k8s.io/v1alpha4 +kind: Cluster +networking: + disableDefaultCNI: true +nodes: + - role: control-plane diff --git a/sartd/src/kubernetes/tests/config/dummy_kubeconfig b/sartd/src/kubernetes/tests/config/dummy_kubeconfig new file mode 100644 index 0000000..c031b85 --- /dev/null +++ b/sartd/src/kubernetes/tests/config/dummy_kubeconfig @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUMvakNDQWVhZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJME1ESXlPVEUxTkRVMU1sb1hEVE0wTURJeU5qRTFORFUxTWxvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTWhvCmU2TEE0YmZXZ2tmMUZ2UERndHZFb3N3WVY0NE45RkRRcmMvVGZWYTJIRjNNTmtjQndrR0VSeHZhL2xhbkNtdWcKQ05HeExucitBakVweVJZNUJoOENKREx5VWhCa1VhR0lYV0R2ZEluaHVrYXdaMk1LMkpRemFndTkvZVdpLzdWVgp0MFA5Rk54WmNxcVJCcDNRYUhodUl0eDZrd2tYMmNOSW1zMWgxalovRUR6RDlOQWlTSWhEdVlTbVBwNW55WkF2CjlvSTdhajhxS2IybFdlWXlFd2VkNDBWeXNNOTh5dTAxMHoxbTB2d2R6UkdRWFdFalYxWGZXeXlFL20wNEdCNzEKczVEemhYdkNCUVNnc3FsTUNVZDQ3bk5tczZlYnFPQ1M3dDQ0a3A3Zlp3cGlTd0FJYjFQZlZseVhCN1o4WVVldgpTZTZWQlF6SnNlZlVZcEN6cnlzQ0F3RUFBYU5aTUZjd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0hRWURWUjBPQkJZRUZHZUdZTzRnUEFodDdOVXp3VUQ0ellVYzk0V3lNQlVHQTFVZEVRUU8KTUF5Q0NtdDFZbVZ5Ym1WMFpYTXdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBTENuZWFTY1p2Ny8ydkdWSlZDSQp5aHBzRVJzcW1XQTZIWkhFMVE0Q3FselI0ZmNMS1JrSjMwd1JlTVlYSzdQYjJianpMUnkxRDRxSktRc0JwZVY1CjNOaThiRTNCRmVHdG54OHZrdWM4THJOU3V2S01FZHVZeWJqSzIycWJyMmhTYUkxay9aRHNhN1JtZDFOeU9Cc0YKOExsamVMdzVBcE41NEpKRWR3emlCUCt5eTVTSldUbEM4a0RpWFVscFE5T1ZmWG9IMHFteTh6c2liNXVMWmNEaAppQWQyYjZxTHJQaU9NYjMzY1I2U0t1YkdpYVcxTVhVTEVrNG5NV2dZYzFtR3BoOEtMMC84TERxTnIvWGo2SVB5Cm56Q2VPL000WHY2Y3pkSDVrcE01UWVrV0xiaWJsMVpERXcwK2c3NVJJQVZWUnNHK0tFZGVlSXJUSmVnVkp6RzQKOG40PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + server: https://127.0.0.1:39153 + name: kind-sart-integration +contexts: +- context: + cluster: kind-sart-integration + user: kind-sart-integration + name: kind-sart-integration +current-context: kind-sart-integration +kind: Config +preferences: {} +users: +- name: kind-sart-integration + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURJVENDQWdtZ0F3SUJBZ0lJZm8zUWZrYkgxZFF3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TkRBeU1qa3hOVFExTlRKYUZ3MHlOVEF5TWpneE5UUTFOVEphTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXE2b1dqRzZVSDM0ZFB2MHAKU1dySDFJQUNDQm01ajNLalhicDV6eXhmeFRIVXRJRzZmWlluYml0MXkvNUQ0bHZvVGhOVHVFTXlwaWJ1TmlscApSMWFQT0oxMTlGaUpMNGYrM3lMZDRXaE1HcWVPR1c3N1drQThNejVMNDdHV09iREFtaUFUZzNkTGh5bVhLT1doCkE2SXhXVkliZDNpRG9zU2JsYWFLVmVudHk2R28yd1ErUDY2eG1SN1ovOS96b0FscUJhYVAzZm8vK1hQTWdpUlYKSkd1eFVaYXhtR3p6YmFYaE00MHhYY1kxZU1Ld0lYcHNhVytsM3hyMUtlZFkzT0FXbmF0RWIvVzFnVHcxaThMdgpFYytTSUVPUkt0cTdGRENqTzZ6aHZvVDB5ZGdxNmQwV244T05IdS9BOFQxeFFvNFpyUllqTW5OWFNWbks2MlBPCitEQk5CUUlEQVFBQm8xWXdWREFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RBWURWUjBUQVFIL0JBSXdBREFmQmdOVkhTTUVHREFXZ0JSbmhtRHVJRHdJYmV6Vk04RkErTTJGSFBlRgpzakFOQmdrcWhraUc5dzBCQVFzRkFBT0NBUUVBYkR1Z05aV0xvTWQycnB5MDk5eU1Kc2dGMXlKR0J0d3RSelFxCkxpbkRLNDdReVlyQXlOUTNjVkUxNmlmc3h2Q1M4Z0drRnBMQzAxS3UwOVVjTlMySm5ReTFkaHRtN0FFMEE2VDMKcVpuVGFZVWdFQ1Q4TCs3VDVpNFBhZHBzeHZkR1JtSWsvM0dNTk1JanhqY21RVG8rc1AzOTJ5YkFIRENpcThJUApqOEVtbEdBbWVHTW14V0RLb3FqUGwwaWwyRTMvdFVmRFVyUmFhOXFxVXBBZDE3bjZJSUpDNSt6d0N4MVFqcFc3ClV1em42RnhXcEh5WDN0Sm9NUEhQNzF1aGp0cjNBRVZhMzU5L1dQckJGbWluZE1HczM5RW5BUGlTNGY0VE1vU24Kdkw4eGlJaUg1V3g0RDRPQUtxSjVVN0hzNVVuK3JWZ0EyVXJFRUtMRGFZYnJTa2ErSEE9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb3dJQkFBS0NBUUVBcTZvV2pHNlVIMzRkUHYwcFNXckgxSUFDQ0JtNWozS2pYYnA1enl4ZnhUSFV0SUc2CmZaWW5iaXQxeS81RDRsdm9UaE5UdUVNeXBpYnVOaWxwUjFhUE9KMTE5RmlKTDRmKzN5TGQ0V2hNR3FlT0dXNzcKV2tBOE16NUw0N0dXT2JEQW1pQVRnM2RMaHltWEtPV2hBNkl4V1ZJYmQzaURvc1NibGFhS1ZlbnR5NkdvMndRKwpQNjZ4bVI3Wi85L3pvQWxxQmFhUDNmby8rWFBNZ2lSVkpHdXhVWmF4bUd6emJhWGhNNDB4WGNZMWVNS3dJWHBzCmFXK2wzeHIxS2VkWTNPQVduYXRFYi9XMWdUdzFpOEx2RWMrU0lFT1JLdHE3RkRDak82emh2b1QweWRncTZkMFcKbjhPTkh1L0E4VDF4UW80WnJSWWpNbk5YU1ZuSzYyUE8rREJOQlFJREFRQUJBb0lCQUhpYSsxU1dyQUtrUEMvdwo5WHpiUktJTGZXSHJrTzh6ZjR0dVkvNG1Fdk1jVzNDbVpEdXZydXc4SkhxNW5ULy9pbXF3TVhXWDRKSFRjREVsClR3NEx4bnZrUk9ieS9ROGo0UEVzTWU4WHM4QldVQU9XY1ltcTc2Q2c5L2gwNERLUCtBRlEzK1RSZjRMcXVZVEcKTUNXNVlpcDdBVWtpcG5sSTRIWjNsMkZkQlBDMjhMRG1ZdlhCSWxza0E2S2o1cVhYWk13Nll3ekxmKzRIajJXTwptSnhkWFUwendWVUFMcjZVRnZoMjlibUV5UDZxK0dHZms1Q051bjNjM0dvU0FUNWlscmpONkcxbWxHTGx5ZmN6ClNBb3ZFbDRWaVFiV2t2U1A3U1MvbVY0U2NGcDU2dGZzRUJHb1VuYUtoZkR4OHZHQVFkZlpidk1zWnR1bWhyVEYKTFRmTEFORUNnWUVBenlNZTYrb0NOcW12NnQyeWd4azYvQlFvUTFWTGdNQ2NuNHFoRTZRSSs2SndSaFpMakJVdwp2dlFkRFk4TlVieHVMRTg4ZVd5ODZmMEZaSktmU2g1L0VtRlRVMEp2Qm9lNUtJYlRwL3Nyeld3Smp6ckZWWU1RCmpkSEgvbTNNakZFM2xkb0hmQ1lHV3VNZ3ZhN3dRaUVwTmM2bXMxMDNXdUJDcUVvTFIyNFBnTk1DZ1lFQTFDaksKQXgzT3J5Rk50d2ZOelZoUWN5Q1VRT0tKNUZUWDBBc3BWRUZZKzFNSEhXQmN3emxsTTUwbEhSRDF4SU9wOFVnQQpPa2xaMDBnT2IzdHNqbnhadlBCZW0yMEE0VkVmczZlekNPdXRnWm5JeEx0OU9UUWwxWTlkR251Q3FlclFvbnY5CjIyMkcvMloxYmMvVnZHQ1BHc0Z3V3ZnRlNacVdpR2NPZGVYdWs4Y0NnWUVBZ3hGSkZxUTdCRlc0LzllS2hsUG4Kcyt3WlVnbmR2Ym1qSVh5NTBkREE4bURseENPRmFEMWNXK2Zobmx1L1pOSGpzTnZ1ZXpMb0V2Tk1mMGdiek8yaApJSU0vRTNiOUE5Zkx2SzRicmJaTUVDN2xtYURwanVOaERTd0o3WitTaDlNNFpmVFJPYWhoNnc2Ky8rZDdGWTBnCkkzRUFvSXhDWTVsMTlFRHVCQ1BMWVlzQ2dZQjRhMFp1Q21jYnhCMW1POG0yT2VBLzJFZ05wQWQ4VUNQR2MrOWIKWVZQdUlxd3hJbGJFZ0RsTzJHME9XR2dHeUd5WEJMVU1ZRUhaeUZaWk5JYmpRUFYyWmtCc202d2FOMzkwNjBTVgpWWisvcGRmc3dqWkxWNnY1MFV1WkVtMlJvRFFYWmFVcS9MblpFUUNwK1lOcFRxMHJGSXgvTmJOYW1hSERmaXNaCkJBOHNWUUtCZ0NSdnV4Q3NHSkJhdnpDdmhZT3F3MVdZYXV5bXpPcHpGMTY1ajhDNlZmMldWaXVqOThaY3I5c2MKTjB3WWNkUURUZ2cxMFhQMUhZN250bXpoN3AzWVgwQ2pnY0xuV1JBTVJSTnBiUTlOSTRQbWdQQjRyeUY3ZWlTcApWNXMrbFNJekU1UHhyeUJ3WkJoOTduYlRqbTJWcmJnMW9kamNlZVRBVzB1WXVJWGN2TWtQCi0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg==