diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..017b030d5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +* +!hcloud/ +!internal/ +!go.mod +!go.sum +!main.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b21c4fc4f..cf932430e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: - name: Run tests run: | go vet ./... - go test $(go list ./... | grep -v e2etests) + go test $(go list ./... | grep -v e2e) lint: name: Lint diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index a6b6db125..08672919e 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -1,46 +1,11 @@ name: Run e2e tests on: [ pull_request ] jobs: - k8s: - runs-on: ubuntu-latest - strategy: - matrix: - k8s: [ 1.24.10, 1.25.6, 1.26.1 ] - fail-fast: false - name: k8s ${{ matrix.k8s }} - steps: - - uses: actions/setup-go@v4 - with: - go-version: '1.19' - - uses: actions/checkout@master - - name: HCLOUD_TOKEN - env: - HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }} - TTS_TOKEN: ${{ secrets.TTS_TOKEN }} - run: | - set -ueo pipefail - if [[ "${HCLOUD_TOKEN:-}" != "" ]]; then - echo "HCLOUD_TOKEN=$HCLOUD_TOKEN" >> "$GITHUB_ENV" - elif [[ "${TTS_TOKEN:-}" != "" ]]; then - token="$(./scripts/get-token.sh)" - echo "::add-mask::$token" - echo "HCLOUD_TOKEN=$token" >> "$GITHUB_ENV" - else - echo "::error ::Couldn't determine HCLOUD_TOKEN. Check that repository secrets are setup correctly." - exit 1 - fi - - name: Run tests - env: - K8S_VERSION: k8s-${{ matrix.k8s }} - USE_NETWORKS: yes - run: | - go test $(go list ./... | grep e2etests) -v -timeout 60m - ./scripts/delete-token.sh $HCLOUD_TOKEN k3s: runs-on: ubuntu-latest strategy: matrix: - k3s: [ v1.24.10+k3s1, v1.25.6+k3s1, v1.26.1+k3s1 ] + k3s: [ v1.24, v1.25, v1.26 ] fail-fast: false name: k3s ${{ matrix.k3s }} steps: @@ -64,10 +29,29 @@ jobs: echo "::error ::Couldn't determine HCLOUD_TOKEN. Check that repository secrets are setup correctly." exit 1 fi + + - uses: 3bit/setup-hcloud@v2 + - uses: yokawasa/action-setup-kube-tools@v0.9.2 + with: + setup-tools: | + helm + kubectl + skaffold + helm: v3.11.2 + kubectl: v1.26.3 + skaffold: v2.3.0 + - name: Run tests env: - K8S_VERSION: k3s-${{ matrix.k3s }} - USE_NETWORKS: yes + K3S_CHANNEL: ${{ matrix.k3s }} + SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }} run: | - go test $(go list ./... | grep e2etests) -v -timeout 60m - ./scripts/delete-token.sh $HCLOUD_TOKEN + curl -sLS https://get.k3sup.dev | sh + + trap "hack/dev-down.sh; ./scripts/delete-token.sh $HCLOUD_TOKEN" EXIT + source <(hack/dev-up.sh) + + skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" + tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}") + skaffold deploy --images=hetznercloud/hcloud-cloud-controller-manager=$tag + go test ./tests/e2e -v -timeout 60m diff --git a/.gitignore b/.gitignore index fbbfeea9e..898979d12 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ deploy/gen/ .coverage.out .envrc hcloud-cloud-controller-manager +*.tgz +hack/.* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 36eac7a8a..32a6dc4f2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,7 +35,7 @@ test:unit: variables: NODE_NAME: "test" script: - - go test $(go list ./... | grep -v e2etests) -v + - go test $(go list ./... | grep -v e2e) -v tags: - hc-bladerunner @@ -92,7 +92,7 @@ e2e: - docker login $CI_REGISTRY --username=$CI_REGISTRY_USER --password=$CI_REGISTRY_PASSWORD - docker pull $CCM_IMAGE_NAME script: - - go test $(go list ./... | grep e2etests) -v -timeout 60m + - go test $(go list ./... | grep e2e) -v -timeout 60m tags: - hc-bladerunner-build diff --git a/README.md b/README.md index cad3e4d8b..7039b0269 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ release. To run unit tests locally, execute ```sh -go test $(go list ./... | grep -v e2etests) -v +go test $(go list ./... | grep -v e2e) -v ``` Check that your go version is up to date, tests might fail if it is not. @@ -232,7 +232,7 @@ export KEEP_SERVER_ON_FAILURE=yes # Keep the test server after a test failure. 2. Run the tests ```bash -go test $(go list ./... | grep e2etests) -v -timeout 60m +go test $(go list ./... | grep e2e) -v -timeout 60m ``` The tests will now run and cleanup themselves afterwards. Sometimes it might happen that you need to clean up the diff --git a/e2etests/.gitignore b/e2etests/.gitignore deleted file mode 100644 index 108479e84..000000000 --- a/e2etests/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -ci-hcloud-ccm.tar -ssh_key -kubeconfig -join.txt diff --git a/e2etests/setup.go b/e2etests/setup.go deleted file mode 100644 index 8f4a295ad..000000000 --- a/e2etests/setup.go +++ /dev/null @@ -1,648 +0,0 @@ -package e2etests - -import ( - "bytes" - "context" - "crypto/rand" - "crypto/rsa" - "crypto/x509" - "encoding/pem" - "fmt" - "html/template" - "io" - "net" - "os" - "os/exec" - "regexp" - "strings" - "sync" - "time" - - "golang.org/x/crypto/ssh" - - "github.com/hetznercloud/hcloud-go/hcloud" -) - -type K8sDistribution string - -const ( - K8sDistributionK8s K8sDistribution = "k8s" - K8sDistributionK3s K8sDistribution = "k3s" - - imageName = "hetznercloud/hcloud-cloud-controller-manager" -) - -var ( - imageRegexp = regexp.MustCompilePOSIX(fmt.Sprintf("%s\\:.*$", imageName)) - instanceType = "cpx21" -) - -type hcloudK8sSetup struct { - Hcloud *hcloud.Client - HcloudToken string - K8sVersion string - K8sDistribution K8sDistribution - TestIdentifier string - ImageName string - KeepOnFailure bool - ClusterNode *hcloud.Server - ExtServer *hcloud.Server - UseNetworks bool - privKey string - sshKey *hcloud.SSHKey - network *hcloud.Network - clusterJoinCMD string - WorkerNodes []*hcloud.Server - testLabels map[string]string -} - -type cloudInitTmpl struct { - K8sVersion string - HcloudToken string - HcloudNetwork string - IsClusterServer bool - JoinCMD string - UseFlannel bool -} - -// PrepareTestEnv setups a test environment for the Cloud Controller Manager -// This includes the creation of a Network, SSH Key and Server. -// The server will be created with a Cloud Init UserData -// The template can be found under e2etests/templates/cloudinit_.ixt.tpl. -func (s *hcloudK8sSetup) PrepareTestEnv(ctx context.Context, additionalSSHKeys []*hcloud.SSHKey) (string, error) { - const op = "hcloudK8sSetup/PrepareTestEnv" - - s.testLabels = map[string]string{"K8sDistribution": string(s.K8sDistribution), "K8sVersion": strings.ReplaceAll(s.K8sVersion, "+", ""), "test": s.TestIdentifier} - err := s.getSSHKey(ctx) - if err != nil { - return "", fmt.Errorf("%s getSSHKey: %s", op, err) - } - - err = s.getNetwork(ctx) - if err != nil { - return "", fmt.Errorf("%s getNetwork: %s", op, err) - } - userData, err := s.getCloudInitConfig(true) - if err != nil { - fmt.Printf("[cluster-node] %s getCloudInitConfig: %s", op, err) - return "", err - } - srv, err := s.createServer(ctx, "cluster-node", instanceType, additionalSSHKeys, userData) - if err != nil { - return "", fmt.Errorf("%s: create cluster node: %v", op, err) - } - s.ClusterNode = srv - s.waitUntilSSHable(srv) - err = s.waitForCloudInit(srv) - if err != nil { - return "", err - } - - joinCmd, err := s.getJoinCmd() - if err != nil { - return "", err - } - s.clusterJoinCMD = joinCmd - - err = s.transferDockerImage(s.ClusterNode) - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - fmt.Printf("[%s] %s: Load Image:\n", s.ClusterNode.Name, op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "ctr -n=k8s.io image import ci-hcloud-ccm.tar") - if err != nil { - return "", fmt.Errorf("%s: Load image %s", op, err) - } - kubeconfigPath, err := s.PrepareK8s() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - var workers = 1 // Change this value if you want to have more workers for the test - var wg sync.WaitGroup - for worker := 1; worker <= workers; worker++ { - wg.Add(1) - go s.createClusterWorker(ctx, additionalSSHKeys, &wg, worker) - } - wg.Wait() - - srv, err = s.createServer(ctx, "ext-server", instanceType, additionalSSHKeys, "") - if err != nil { - return "", fmt.Errorf("%s: create ext server: %v", op, err) - } - s.ExtServer = srv - s.waitUntilSSHable(srv) - - return kubeconfigPath, nil -} - -func (s *hcloudK8sSetup) createClusterWorker(ctx context.Context, additionalSSHKeys []*hcloud.SSHKey, wg *sync.WaitGroup, worker int) { - const op = "hcloudK8sSetup/createClusterWorker" - defer wg.Done() - - workerName := fmt.Sprintf("cluster-worker-%d", worker) - fmt.Printf("[%s] %s Create worker node:\n", workerName, op) - - userData, err := s.getCloudInitConfig(false) - if err != nil { - fmt.Printf("[%s] %s getCloudInitConfig: %s", workerName, op, err) - return - } - srv, err := s.createServer(ctx, workerName, instanceType, additionalSSHKeys, userData) - if err != nil { - fmt.Printf("[%s] %s createServer: %s", workerName, op, err) - return - } - s.WorkerNodes = append(s.WorkerNodes, srv) - - s.waitUntilSSHable(srv) - - err = s.waitForCloudInit(srv) - if err != nil { - fmt.Printf("[%s] %s: wait for cloud init on worker: %v", srv.Name, op, err) - return - } - - err = s.transferDockerImage(srv) - if err != nil { - fmt.Printf("[%s] %s: transfer image on worker: %v", srv.Name, op, err) - return - } - - fmt.Printf("[%s] %s Load Image\n", srv.Name, op) - err = RunCommandOnServer(s.privKey, srv, "ctr -n=k8s.io image import ci-hcloud-ccm.tar") - if err != nil { - fmt.Printf("[%s] %s: load image on worker: %v", srv.Name, op, err) - return - } -} - -// waitForCloudInit waits on cloud init on the server. -// when cloud init is ready we can assume that the server -// and the plain k8s installation is ready. -func (s *hcloudK8sSetup) getJoinCmd() (string, error) { - const op = "hcloudK8sSetup/getJoinCmd" - fmt.Printf("[%s] %s: Download join cmd\n", s.ClusterNode.Name, op) - if s.K8sDistribution == K8sDistributionK8s { - err := scp("ssh_key", fmt.Sprintf("root@%s:/root/join.txt", s.ClusterNode.PublicNet.IPv4.IP.String()), "join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s download join cmd: %s", s.ClusterNode.Name, op, err) - } - cmd, err := os.ReadFile("join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s reading join cmd file: %s", s.ClusterNode.Name, op, err) - } - - return string(cmd), nil - } - err := scp("ssh_key", fmt.Sprintf("root@%s:/var/lib/rancher/k3s/server/node-token", s.ClusterNode.PublicNet.IPv4.IP.String()), "join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s download join cmd: %s", s.ClusterNode.Name, op, err) - } - token, err := os.ReadFile("join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s read join cmd: %s", s.ClusterNode.Name, op, err) - } - return fmt.Sprintf("K3S_URL=https://%s:6443 K3S_TOKEN=%s", s.ClusterNode.PublicNet.IPv4.IP.String(), token), nil -} - -func (s *hcloudK8sSetup) waitUntilSSHable(server *hcloud.Server) { - const op = "hcloudK8sSetup/PrepareTestEnv" - fmt.Printf("[%s] %s: Waiting for server to be sshable:\n", server.Name, op) - for { - conn, err := net.Dial("tcp", fmt.Sprintf("%s:22", server.PublicNet.IPv4.IP.String())) - if err != nil { - time.Sleep(1 * time.Second) - continue - } - _ = conn.Close() - fmt.Printf("[%s] %s: SSH Connection successful\n", server.Name, op) - break - } -} - -func (s *hcloudK8sSetup) createServer(ctx context.Context, name, typ string, additionalSSHKeys []*hcloud.SSHKey, userData string) (*hcloud.Server, error) { - const op = "e2etest/createServer" - - sshKeys := []*hcloud.SSHKey{s.sshKey} - sshKeys = append(sshKeys, additionalSSHKeys...) - - res, _, err := s.Hcloud.Server.Create(ctx, hcloud.ServerCreateOpts{ - Name: fmt.Sprintf("srv-%s-%s", name, s.TestIdentifier), - ServerType: &hcloud.ServerType{Name: typ}, - Image: &hcloud.Image{Name: "ubuntu-20.04"}, - SSHKeys: sshKeys, - UserData: userData, - Labels: s.testLabels, - Networks: []*hcloud.Network{s.network}, - }) - if err != nil { - return nil, fmt.Errorf("%s Hcloud.Server.Create: %s", op, err) - } - - _, errCh := s.Hcloud.Action.WatchProgress(ctx, res.Action) - if err := <-errCh; err != nil { - return nil, fmt.Errorf("%s WatchProgress Action %s: %s", op, res.Action.Command, err) - } - - for _, nextAction := range res.NextActions { - _, errCh := s.Hcloud.Action.WatchProgress(ctx, nextAction) - if err := <-errCh; err != nil { - return nil, fmt.Errorf("%s WatchProgress NextAction %s: %s", op, nextAction.Command, err) - } - } - srv, _, err := s.Hcloud.Server.GetByID(ctx, res.Server.ID) - if err != nil { - return nil, fmt.Errorf("%s Hcloud.Server.GetByID: %s", op, err) - } - return srv, nil -} - -// PrepareK8s patches an existing kubernetes cluster with a CNI and the correct -// Cloud Controller Manager version from this test run. -func (s *hcloudK8sSetup) PrepareK8s() (string, error) { - const op = "hcloudK8sSetup/PrepareK8s" - - if s.UseNetworks { - err := s.deployCilium() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - } - if s.K8sDistribution != K8sDistributionK3s && !s.UseNetworks { - err := s.deployFlannel() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - } - - err := s.prepareCCMDeploymentFile(s.UseNetworks) - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - fmt.Printf("[%s] %s: Apply ccm deployment\n", s.ClusterNode.Name, op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl apply -f ccm.yml") - if err != nil { - return "", fmt.Errorf("%s Deploy ccm: %s", op, err) - } - - fmt.Printf("[%s] %s: Download kubeconfig\n", s.ClusterNode.Name, op) - - err = scp("ssh_key", fmt.Sprintf("root@%s:/root/.kube/config", s.ClusterNode.PublicNet.IPv4.IP.String()), "kubeconfig") - if err != nil { - return "", fmt.Errorf("%s download kubeconfig: %s", op, err) - } - - fmt.Printf("[%s] %s: Ensure correct server is set\n", s.ClusterNode.Name, op) - kubeconfigBefore, err := os.ReadFile("kubeconfig") - if err != nil { - return "", fmt.Errorf("%s reading kubeconfig: %s", op, err) - } - kubeconfigAfterwards := strings.ReplaceAll(string(kubeconfigBefore), "127.0.0.1", s.ClusterNode.PublicNet.IPv4.IP.String()) - err = os.WriteFile("kubeconfig", []byte(kubeconfigAfterwards), 0) - if err != nil { - return "", fmt.Errorf("%s writing kubeconfig: %s", op, err) - } - return "kubeconfig", nil -} - -func scp(identityFile, src, dest string) error { - const op = "e2etests/scp" - - err := runCmd( - "/usr/bin/scp", - []string{ - "-F", "/dev/null", // ignore $HOME/.ssh/config - "-i", identityFile, - "-o", "IdentitiesOnly=yes", // only use the identities passed on the command line - "-o", "UserKnownHostsFile=/dev/null", - "-o", "StrictHostKeyChecking=no", - src, - dest, - }, - nil, - ) - if err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func runCmd(name string, argv []string, env []string) error { - cmd := exec.Command(name, argv...) - if os.Getenv("TEST_DEBUG_MODE") != "" { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - } - if env != nil { - cmd.Env = append(os.Environ(), env...) - } - if err := cmd.Run(); err != nil { - return fmt.Errorf("run cmd: %s %s: %v", name, strings.Join(argv, " "), err) - } - return nil -} - -// prepareCCMDeploymentFile patches the Cloud Controller Deployment file -// It replaces the used image and the pull policy to always use the local image -// from this test run. -func (s *hcloudK8sSetup) prepareCCMDeploymentFile(networks bool) error { - const op = "hcloudK8sSetup/prepareCCMDeploymentFile" - fmt.Printf("%s: Read master deployment file\n", op) - var deploymentFilePath = "../deploy/ccm.yaml" - if networks { - deploymentFilePath = "../deploy/ccm-networks.yaml" - } - deploymentFile, err := os.ReadFile(deploymentFilePath) - if err != nil { - return fmt.Errorf("%s: read ccm deployment file %s: %v", op, deploymentFilePath, err) - } - - fmt.Printf("%s: Prepare deployment file and transfer it\n", op) - deploymentFile = imageRegexp.ReplaceAll(deploymentFile, []byte(s.ImageName)) - deploymentFile = []byte(strings.ReplaceAll(string(deploymentFile), " imagePullPolicy: Always", " imagePullPolicy: IfNotPresent")) - - err = RunCommandOnServer(s.privKey, s.ClusterNode, fmt.Sprintf("echo '%s' >> ccm.yml", deploymentFile)) - if err != nil { - return fmt.Errorf("%s: Prepare deployment file and transfer it: %s", op, err) - } - return nil -} - -// deployFlannel deploys Flannel as CNI. Flannel is used for all tests where -// we don't use Hetzner Cloud Networks. -func (s *hcloudK8sSetup) deployFlannel() error { - const op = "hcloudK8sSetup/deployFlannel" - fmt.Printf("%s: apply flannel deployment\n", op) - err := RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml") - if err != nil { - return fmt.Errorf("%s: apply flannel deployment: %s", op, err) - } - fmt.Printf("%s: patch flannel deployment\n", op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl -n kube-flannel patch ds kube-flannel-ds --type json -p '[{\"op\":\"add\",\"path\":\"/spec/template/spec/tolerations/-\",\"value\":{\"key\":\"node.cloudprovider.kubernetes.io/uninitialized\",\"value\":\"true\",\"effect\":\"NoSchedule\"}}]'") - if err != nil { - return fmt.Errorf("%s: patch flannel deployment: %s", op, err) - } - return nil -} - -// deployCilium deploys Cilium as CNI. Cilium is used for all tests where -// we use Hetzner Cloud Networks as Cilium is one of the only CNIs -// that support Cloud Controllers as source for advertising routes. -func (s *hcloudK8sSetup) deployCilium() error { - const op = "hcloudK8sSetup/deployCilium" - - deploymentFile, err := os.ReadFile("templates/cilium.yml") - if err != nil { - return fmt.Errorf("%s: read cilium deployment file %s: %v", op, "templates/cilium.yml", err) - } - err = RunCommandOnServer(s.privKey, s.ClusterNode, fmt.Sprintf("cat < cilium.yml\n%s\nEOF", deploymentFile)) - if err != nil { - return fmt.Errorf("%s: Transfer cilium deployment: %s", op, err) - } - - fmt.Printf("%s: apply cilium deployment\n", op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl apply -f cilium.yml") - if err != nil { - return fmt.Errorf("%s: apply cilium deployment: %s", op, err) - } - - return nil -} - -// transferDockerImage transfers the local build docker image tar via SCP. -func (s *hcloudK8sSetup) transferDockerImage(server *hcloud.Server) error { - const op = "hcloudK8sSetup/transferDockerImage" - fmt.Printf("[%s] %s: Transfer docker image\n", server.Name, op) - err := WithSSHSession(s.privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - file, err := os.Open("ci-hcloud-ccm.tar") - if err != nil { - return fmt.Errorf("%s read ci-hcloud-ccm.tar: %s", op, err) - } - defer file.Close() - stat, err := file.Stat() - if err != nil { - return fmt.Errorf("%s file.Stat: %s", op, err) - } - wg := sync.WaitGroup{} - wg.Add(1) - - go func() { - hostIn, _ := session.StdinPipe() - defer hostIn.Close() - fmt.Fprintf(hostIn, "C0664 %d %s\n", stat.Size(), "ci-hcloud-ccm.tar") - io.Copy(hostIn, file) - fmt.Fprint(hostIn, "\x00") - wg.Done() - }() - - err = session.Run("/usr/bin/scp -t /root") - if err != nil { - return fmt.Errorf("%s copy via scp: %s", op, err) - } - wg.Wait() - return err - }) - return err -} - -// waitForCloudInit waits on cloud init on the server. -// when cloud init is ready we can assume that the server -// and the plain k8s installation is ready. -func (s *hcloudK8sSetup) waitForCloudInit(server *hcloud.Server) error { - const op = "hcloudK8sSetup/PrepareTestEnv" - fmt.Printf("[%s] %s: Wait for cloud-init\n", server.Name, op) - err := RunCommandOnServer(s.privKey, server, "cloud-init status --wait > /dev/null") - if err != nil { - return fmt.Errorf("%s: Wait for cloud-init: %s", op, err) - } - return nil -} - -// TearDown deletes all created resources within the Hetzner Cloud -// there is no need to "shutdown" the k8s cluster before -// so we just delete all created resources. -func (s *hcloudK8sSetup) TearDown(testFailed bool) error { - const op = "hcloudK8sSetup/TearDown" - - if s.KeepOnFailure && testFailed { - fmt.Println("Skipping tear-down for further analysis.") - fmt.Println("Please clean-up afterwards ;-)") - return nil - } - - ctx := context.Background() - - _, _, err := s.Hcloud.Server.DeleteWithResult(ctx, s.ClusterNode) - if err != nil { - return fmt.Errorf("%s Hcloud.Server.Delete: %s", op, err) - } - s.ClusterNode = nil - - for _, wn := range s.WorkerNodes { - _, _, err := s.Hcloud.Server.DeleteWithResult(ctx, wn) - if err != nil { - return fmt.Errorf("[%s] %s Hcloud.Server.Delete: %s", wn.Name, op, err) - } - } - - _, _, err = s.Hcloud.Server.DeleteWithResult(ctx, s.ExtServer) - if err != nil { - return fmt.Errorf("%s Hcloud.Server.Delete: %s", op, err) - } - s.ExtServer = nil - - _, err = s.Hcloud.SSHKey.Delete(ctx, s.sshKey) - if err != nil { - return fmt.Errorf("%s Hcloud.SSHKey.Delete: %s", err, err) - } - s.sshKey = nil - _, err = s.Hcloud.Network.Delete(ctx, s.network) - if err != nil { - return fmt.Errorf("%s Hcloud.Network.Delete: %s", err, err) - } - s.network = nil - return nil -} - -// getCloudInitConfig returns the generated cloud init configuration. -func (s *hcloudK8sSetup) getCloudInitConfig(isClusterServer bool) (string, error) { - const op = "hcloudK8sSetup/getCloudInitConfig" - - data := cloudInitTmpl{ - K8sVersion: s.K8sVersion, - HcloudToken: s.HcloudToken, - HcloudNetwork: s.network.Name, - IsClusterServer: isClusterServer, - JoinCMD: s.clusterJoinCMD, - UseFlannel: s.K8sDistribution == K8sDistributionK3s && !s.UseNetworks, - } - str, err := os.ReadFile(fmt.Sprintf("templates/cloudinit_%s.txt.tpl", s.K8sDistribution)) - if err != nil { - return "", fmt.Errorf("%s: read template file %s: %v", "templates/cloudinit.txt.tpl", op, err) - } - tmpl, err := template.New("cloud_init").Parse(string(str)) - if err != nil { - return "", fmt.Errorf("%s: parsing template file %s: %v", "templates/cloudinit.txt.tpl", op, err) - } - var buf bytes.Buffer - if err := tmpl.Execute(&buf, data); err != nil { - return "", fmt.Errorf("%s: execute template: %v", op, err) - } - return buf.String(), nil -} - -// getSSHKey create and get the Hetzner Cloud SSH Key for the test. -func (s *hcloudK8sSetup) getSSHKey(ctx context.Context) error { - const op = "hcloudK8sSetup/getSSHKey" - pubKey, privKey, err := makeSSHKeyPair() - if err != nil { - return err - } - sshKey, _, err := s.Hcloud.SSHKey.Create(ctx, hcloud.SSHKeyCreateOpts{ - Name: fmt.Sprintf("s-%s", s.TestIdentifier), - PublicKey: pubKey, - Labels: s.testLabels, - }) - if err != nil { - return fmt.Errorf("%s: creating ssh key: %v", op, err) - } - s.privKey = privKey - s.sshKey = sshKey - err = os.WriteFile("ssh_key", []byte(s.privKey), 0600) - if err != nil { - return fmt.Errorf("%s: writing ssh key private key: %v", op, err) - } - return nil -} - -// getNetwork create a Hetzner Cloud Network for this test. -func (s *hcloudK8sSetup) getNetwork(ctx context.Context) error { - const op = "hcloudK8sSetup/getNetwork" - _, ipRange, _ := net.ParseCIDR("10.0.0.0/8") - _, subnetRange, _ := net.ParseCIDR("10.0.0.0/16") - network, _, err := s.Hcloud.Network.Create(ctx, hcloud.NetworkCreateOpts{ - Name: fmt.Sprintf("nw-%s", s.TestIdentifier), - IPRange: ipRange, - Labels: s.testLabels, - }) - if err != nil { - return fmt.Errorf("%s: creating network: %v", op, err) - } - _, _, err = s.Hcloud.Network.AddSubnet(ctx, network, hcloud.NetworkAddSubnetOpts{ - Subnet: hcloud.NetworkSubnet{ - Type: hcloud.NetworkSubnetTypeCloud, - IPRange: subnetRange, - NetworkZone: hcloud.NetworkZoneEUCentral, - }, - }) - if err != nil { - return fmt.Errorf("%s: creating subnet: %v", op, err) - } - s.network = network - return nil -} - -// makeSSHKeyPair generate a SSH key pair. -func makeSSHKeyPair() (string, string, error) { - privateKey, err := rsa.GenerateKey(rand.Reader, 1024) - if err != nil { - return "", "", err - } - - // generate and write private key as PEM - var privKeyBuf strings.Builder - - privateKeyPEM := &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(privateKey)} - if err := pem.Encode(&privKeyBuf, privateKeyPEM); err != nil { - return "", "", err - } - - // generate and write public key - pub, err := ssh.NewPublicKey(&privateKey.PublicKey) - if err != nil { - return "", "", err - } - - var pubKeyBuf strings.Builder - pubKeyBuf.Write(ssh.MarshalAuthorizedKey(pub)) - - return pubKeyBuf.String(), privKeyBuf.String(), nil -} - -func RunCommandOnServer(privKey string, server *hcloud.Server, command string) error { - return WithSSHSession(privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - if ok := os.Getenv("TEST_DEBUG_MODE"); ok != "" { - session.Stdout = os.Stdout - session.Stderr = os.Stderr - } - return session.Run(command) - }) -} - -func WithSSHSession(privKey string, host string, fn func(*ssh.Session) error) error { - signer, err := ssh.ParsePrivateKey([]byte(privKey)) - if err != nil { - return err - } - - client, err := ssh.Dial("tcp", net.JoinHostPort(host, "22"), &ssh.ClientConfig{ - User: "root", - Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - Timeout: 1 * time.Second, - }) - if err != nil { - return err - } - - session, err := client.NewSession() - if err != nil { - return err - } - defer session.Close() - - return fn(session) -} diff --git a/e2etests/templates/cilium.yml b/e2etests/templates/cilium.yml deleted file mode 100644 index 4c63204e6..000000000 --- a/e2etests/templates/cilium.yml +++ /dev/null @@ -1,776 +0,0 @@ ---- -# Source: cilium/templates/cilium-agent-serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: "cilium" - namespace: kube-system ---- -# Source: cilium/templates/cilium-operator-serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: "cilium-operator" - namespace: kube-system ---- -# Source: cilium/templates/cilium-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: cilium-config - namespace: kube-system -data: - - # Identity allocation mode selects how identities are shared between cilium - # nodes by setting how they are stored. The options are "crd" or "kvstore". - # - "crd" stores identities in kubernetes as CRDs (custom resource definition). - # These can be queried with: - # kubectl get ciliumid - # - "kvstore" stores identities in a kvstore, etcd or consul, that is - # configured below. Cilium versions before 1.6 supported only the kvstore - # backend. Upgrades from these older cilium versions should continue using - # the kvstore by commenting out the identity-allocation-mode below, or - # setting it to "kvstore". - identity-allocation-mode: crd - cilium-endpoint-gc-interval: "5m0s" - - # If you want to run cilium in debug mode change this value to true - debug: "false" - # The agent can be put into the following three policy enforcement modes - # default, always and never. - # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes - enable-policy: "default" - - # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4 - # address. - enable-ipv4: "true" - - # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6 - # address. - enable-ipv6: "false" - # Users who wish to specify their own custom CNI configuration file must set - # custom-cni-conf to "true", otherwise Cilium may overwrite the configuration. - custom-cni-conf: "false" - enable-bpf-clock-probe: "true" - # If you want cilium monitor to aggregate tracing for packets, set this level - # to "low", "medium", or "maximum". The higher the level, the less packets - # that will be seen in monitor output. - monitor-aggregation: medium - - # The monitor aggregation interval governs the typical time between monitor - # notification events for each allowed connection. - # - # Only effective when monitor aggregation is set to "medium" or higher. - monitor-aggregation-interval: 5s - - # The monitor aggregation flags determine which TCP flags which, upon the - # first observation, cause monitor notifications to be generated. - # - # Only effective when monitor aggregation is set to "medium" or higher. - monitor-aggregation-flags: all - # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic - # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps. - bpf-map-dynamic-size-ratio: "0.0025" - # bpf-policy-map-max specifies the maximum number of entries in endpoint - # policy map (per endpoint) - bpf-policy-map-max: "16384" - # bpf-lb-map-max specifies the maximum number of entries in bpf lb service, - # backend and affinity maps. - bpf-lb-map-max: "65536" - # bpf-lb-bypass-fib-lookup instructs Cilium to enable the FIB lookup bypass - # optimization for nodeport reverse NAT handling. - bpf-lb-external-clusterip: "false" - - # Pre-allocation of map entries allows per-packet latency to be reduced, at - # the expense of up-front memory allocation for the entries in the maps. The - # default value below will minimize memory usage in the default installation; - # users who are sensitive to latency may consider setting this to "true". - # - # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore - # this option and behave as though it is set to "true". - # - # If this value is modified, then during the next Cilium startup the restore - # of existing endpoints and tracking of ongoing connections may be disrupted. - # As a result, reply packets may be dropped and the load-balancing decisions - # for established connections may change. - # - # If this option is set to "false" during an upgrade from 1.3 or earlier to - # 1.4 or later, then it may cause one-time disruptions during the upgrade. - preallocate-bpf-maps: "false" - - # Regular expression matching compatible Istio sidecar istio-proxy - # container image names - sidecar-istio-proxy-image: "cilium/istio_proxy" - - # Name of the cluster. Only relevant when building a mesh of clusters. - cluster-name: default - # Unique ID of the cluster. Must be unique across all conneted clusters and - # in the range of 1 and 255. Only relevant when building a mesh of clusters. - cluster-id: "" - - # Encapsulation mode for communication between nodes - # Possible values: - # - disabled - # - vxlan (default) - # - geneve - tunnel: disabled - # Enables L7 proxy for L7 policy enforcement and visibility - enable-l7-proxy: "true" - - enable-ipv4-masquerade: "true" - enable-ipv6-masquerade: "true" - enable-bpf-masquerade: "true" - - enable-xt-socket-fallback: "true" - install-iptables-rules: "true" - install-no-conntrack-iptables-rules: "false" - - auto-direct-node-routes: "false" - enable-bandwidth-manager: "false" - enable-local-redirect-policy: "false" - - native-routing-cidr: "10.0.0.0/8" - kube-proxy-replacement: "probe" - enable-health-check-nodeport: "true" - node-port-bind-protection: "true" - enable-auto-protect-node-port-range: "true" - enable-session-affinity: "true" - enable-l2-neigh-discovery: "true" - enable-endpoint-health-checking: "true" - enable-health-checking: "true" - enable-well-known-identities: "false" - enable-remote-node-identity: "true" - operator-api-serve-addr: "127.0.0.1:9234" - # Enable Hubble gRPC service. - enable-hubble: "true" - # UNIX domain socket for Hubble server to listen to. - hubble-socket-path: "/var/run/cilium/hubble.sock" - # An additional address for Hubble server to listen to (e.g. ":4244"). - hubble-listen-address: ":4244" - hubble-disable-tls: "false" - hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt - hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key - hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt - ipam: "kubernetes" - k8s-require-ipv4-pod-cidr: "true" - k8s-require-ipv6-pod-cidr: "false" - # cluster-pool-ipv4-cidr: "10.0.0.0/8" - # cluster-pool-ipv4-mask-size: "8" - disable-cnp-status-updates: "true" - cgroup-root: "/run/cilium/cgroupv2" ---- -# Source: cilium/templates/cilium-agent-clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cilium -rules: -- apiGroups: - - networking.k8s.io - resources: - - networkpolicies - verbs: - - get - - list - - watch -- apiGroups: - - discovery.k8s.io - resources: - - endpointslices - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - namespaces - - services - - nodes - - endpoints - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - pods - - pods/finalizers - verbs: - - get - - list - - watch - - update - - delete -- apiGroups: - - "" - resources: - - nodes - verbs: - - get - - list - - watch - - update -- apiGroups: - - "" - resources: - - nodes - - nodes/status - verbs: - - patch -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - # Deprecated for removal in v1.10 - - create - - list - - watch - - update - - # This is used when validating policies in preflight. This will need to stay - # until we figure out how to avoid "get" inside the preflight, and then - # should be removed ideally. - - get -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies - - ciliumnetworkpolicies/status - - ciliumnetworkpolicies/finalizers - - ciliumclusterwidenetworkpolicies - - ciliumclusterwidenetworkpolicies/status - - ciliumclusterwidenetworkpolicies/finalizers - - ciliumendpoints - - ciliumendpoints/status - - ciliumendpoints/finalizers - - ciliumnodes - - ciliumnodes/status - - ciliumnodes/finalizers - - ciliumidentities - - ciliumidentities/finalizers - - ciliumlocalredirectpolicies - - ciliumlocalredirectpolicies/status - - ciliumlocalredirectpolicies/finalizers - - ciliumegressnatpolicies - verbs: - - '*' ---- -# Source: cilium/templates/cilium-operator-clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cilium-operator -rules: -- apiGroups: - - "" - resources: - # to automatically delete [core|kube]dns pods so that are starting to being - # managed by Cilium - - pods - verbs: - - get - - list - - watch - - delete -- apiGroups: - - discovery.k8s.io - resources: - - endpointslices - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - services - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - # to perform LB IP allocation for BGP - - services/status - verbs: - - update -- apiGroups: - - "" - resources: - # to perform the translation of a CNP that contains `ToGroup` to its endpoints - - services - - endpoints - # to check apiserver connectivity - - namespaces - verbs: - - get - - list - - watch -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies - - ciliumnetworkpolicies/status - - ciliumnetworkpolicies/finalizers - - ciliumclusterwidenetworkpolicies - - ciliumclusterwidenetworkpolicies/status - - ciliumclusterwidenetworkpolicies/finalizers - - ciliumendpoints - - ciliumendpoints/status - - ciliumendpoints/finalizers - - ciliumnodes - - ciliumnodes/status - - ciliumnodes/finalizers - - ciliumidentities - - ciliumidentities/status - - ciliumidentities/finalizers - - ciliumlocalredirectpolicies - - ciliumlocalredirectpolicies/status - - ciliumlocalredirectpolicies/finalizers - verbs: - - '*' -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - create - - get - - list - - update - - watch -# For cilium-operator running in HA mode. -# -# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election -# between multiple running instances. -# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less -# common and fewer objects in the cluster watch "all Leases". -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - update ---- -# Source: cilium/templates/cilium-agent-clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cilium -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cilium -subjects: -- kind: ServiceAccount - name: "cilium" - namespace: kube-system ---- -# Source: cilium/templates/cilium-operator-clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cilium-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cilium-operator -subjects: -- kind: ServiceAccount - name: "cilium-operator" - namespace: kube-system ---- -# Source: cilium/templates/cilium-agent-daemonset.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - labels: - k8s-app: cilium - name: cilium - namespace: kube-system -spec: - selector: - matchLabels: - k8s-app: cilium - updateStrategy: - rollingUpdate: - maxUnavailable: 2 - type: RollingUpdate - template: - metadata: - annotations: - # This annotation plus the CriticalAddonsOnly toleration makes - # cilium to be a critical pod in the cluster, which ensures cilium - # gets priority scheduling. - # https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - scheduler.alpha.kubernetes.io/critical-pod: "" - labels: - k8s-app: cilium - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/os - operator: In - values: - - linux - - matchExpressions: - - key: beta.kubernetes.io/os - operator: In - values: - - linux - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: k8s-app - operator: In - values: - - cilium - topologyKey: kubernetes.io/hostname - containers: - - args: - - --config-dir=/tmp/cilium/config-map - command: - - cilium-agent - startupProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9876 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 105 - periodSeconds: 2 - successThreshold: 1 - livenessProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9876 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 10 - periodSeconds: 30 - successThreshold: 1 - timeoutSeconds: 5 - readinessProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9876 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 3 - periodSeconds: 30 - successThreshold: 1 - timeoutSeconds: 5 - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - - name: CILIUM_K8S_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: CILIUM_CLUSTERMESH_CONFIG - value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - key: cni-chaining-mode - name: cilium-config - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - key: custom-cni-conf - name: cilium-config - optional: true - image: "quay.io/cilium/cilium:v1.10.5@sha256:0612218e28288db360c63677c09fafa2d17edda4f13867bcabf87056046b33bb" - imagePullPolicy: IfNotPresent - lifecycle: - postStart: - exec: - command: - - "/cni-install.sh" - - "--enable-debug=false" - - "--cni-exclusive=true" - preStop: - exec: - command: - - /cni-uninstall.sh - name: cilium-agent - securityContext: - capabilities: - add: - - NET_ADMIN - - SYS_MODULE - privileged: true - volumeMounts: - - mountPath: /sys/fs/bpf - name: bpf-maps - - mountPath: /var/run/cilium - name: cilium-run - - mountPath: /host/opt/cni/bin - name: cni-path - - mountPath: /host/etc/cni/net.d - name: etc-cni-netd - - mountPath: /var/lib/cilium/clustermesh - name: clustermesh-secrets - readOnly: true - - mountPath: /tmp/cilium/config-map - name: cilium-config-path - readOnly: true - # Needed to be able to load kernel modules - - mountPath: /lib/modules - name: lib-modules - readOnly: true - - mountPath: /run/xtables.lock - name: xtables-lock - - mountPath: /var/lib/cilium/tls/hubble - name: hubble-tls - readOnly: true - hostNetwork: true - initContainers: - # Required to mount cgroup2 filesystem on the underlying Kubernetes node. - # We use nsenter command with host's cgroup and mount namespaces enabled. - - name: mount-cgroup - env: - - name: CGROUP_ROOT - value: /run/cilium/cgroupv2 - - name: BIN_PATH - value: /opt/cni/bin - command: - - sh - - -c - # The statically linked Go program binary is invoked to avoid any - # dependency on utilities like sh and mount that can be missing on certain - # distros installed on the underlying host. Copy the binary to the - # same directory where we install cilium cni plugin so that exec permissions - # are available. - - 'cp /usr/bin/cilium-mount /hostbin/cilium-mount && nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-mount" $CGROUP_ROOT; rm /hostbin/cilium-mount' - image: "quay.io/cilium/cilium:v1.10.5@sha256:0612218e28288db360c63677c09fafa2d17edda4f13867bcabf87056046b33bb" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /hostproc - name: hostproc - - mountPath: /hostbin - name: cni-path - securityContext: - privileged: true - - command: - - /init-container.sh - env: - - name: CILIUM_ALL_STATE - valueFrom: - configMapKeyRef: - key: clean-cilium-state - name: cilium-config - optional: true - - name: CILIUM_BPF_STATE - valueFrom: - configMapKeyRef: - key: clean-cilium-bpf-state - name: cilium-config - optional: true - image: "quay.io/cilium/cilium:v1.10.5@sha256:0612218e28288db360c63677c09fafa2d17edda4f13867bcabf87056046b33bb" - imagePullPolicy: IfNotPresent - name: clean-cilium-state - securityContext: - capabilities: - add: - - NET_ADMIN - privileged: true - volumeMounts: - - mountPath: /sys/fs/bpf - name: bpf-maps - # Required to mount cgroup filesystem from the host to cilium agent pod - - mountPath: /run/cilium/cgroupv2 - name: cilium-cgroup - mountPropagation: HostToContainer - - mountPath: /var/run/cilium - name: cilium-run - resources: - requests: - cpu: 100m - memory: 100Mi - restartPolicy: Always - priorityClassName: system-node-critical - serviceAccount: "cilium" - serviceAccountName: "cilium" - terminationGracePeriodSeconds: 1 - tolerations: - - operator: Exists - volumes: - # To keep state between restarts / upgrades - - hostPath: - path: /var/run/cilium - type: DirectoryOrCreate - name: cilium-run - # To keep state between restarts / upgrades for bpf maps - - hostPath: - path: /sys/fs/bpf - type: DirectoryOrCreate - name: bpf-maps - # To mount cgroup2 filesystem on the host - - hostPath: - path: /proc - type: Directory - name: hostproc - # To keep state between restarts / upgrades for cgroup2 filesystem - - hostPath: - path: /run/cilium/cgroupv2 - type: DirectoryOrCreate - name: cilium-cgroup - # To install cilium cni plugin in the host - - hostPath: - path: /opt/cni/bin - type: DirectoryOrCreate - name: cni-path - # To install cilium cni configuration in the host - - hostPath: - path: /etc/cni/net.d - type: DirectoryOrCreate - name: etc-cni-netd - # To be able to load kernel modules - - hostPath: - path: /lib/modules - name: lib-modules - # To access iptables concurrently with other processes (e.g. kube-proxy) - - hostPath: - path: /run/xtables.lock - type: FileOrCreate - name: xtables-lock - # To read the clustermesh configuration - - name: clustermesh-secrets - secret: - defaultMode: 420 - optional: true - secretName: cilium-clustermesh - # To read the configuration from the config map - - configMap: - name: cilium-config - name: cilium-config-path - - name: hubble-tls - projected: - sources: - - secret: - name: hubble-server-certs - items: - - key: ca.crt - path: client-ca.crt - - key: tls.crt - path: server.crt - - key: tls.key - path: server.key - optional: true ---- -# Source: cilium/templates/cilium-operator-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - io.cilium/app: operator - name: cilium-operator - name: cilium-operator - namespace: kube-system -spec: - # See docs on ServerCapabilities.LeasesResourceLock in file pkg/k8s/version/version.go - # for more details. - replicas: 2 - selector: - matchLabels: - io.cilium/app: operator - name: cilium-operator - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - annotations: - labels: - io.cilium/app: operator - name: cilium-operator - spec: - # In HA mode, cilium-operator pods must not be scheduled on the same - # node as they will clash with each other. - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: io.cilium/app - operator: In - values: - - operator - topologyKey: kubernetes.io/hostname - containers: - - args: - - --config-dir=/tmp/cilium/config-map - command: - - cilium-operator-generic - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - - name: CILIUM_K8S_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: CILIUM_DEBUG - valueFrom: - configMapKeyRef: - key: debug - name: cilium-config - optional: true - image: "quay.io/cilium/operator-generic:v1.10.5@sha256:2d2f730f219d489ff0702923bf24c0002cd93eb4b47ba344375566202f56d972" - imagePullPolicy: IfNotPresent - name: cilium-operator - livenessProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9234 - scheme: HTTP - initialDelaySeconds: 60 - periodSeconds: 10 - timeoutSeconds: 3 - volumeMounts: - - mountPath: /tmp/cilium/config-map - name: cilium-config-path - readOnly: true - hostNetwork: true - restartPolicy: Always - priorityClassName: system-cluster-critical - serviceAccount: "cilium-operator" - serviceAccountName: "cilium-operator" - tolerations: - - operator: Exists - volumes: - # To read the configuration from the config map - - configMap: - name: cilium-config - name: cilium-config-path diff --git a/e2etests/templates/cloudinit_k3s.txt.tpl b/e2etests/templates/cloudinit_k3s.txt.tpl deleted file mode 100644 index 73f839f57..000000000 --- a/e2etests/templates/cloudinit_k3s.txt.tpl +++ /dev/null @@ -1,35 +0,0 @@ -#cloud-config -write_files: -- content: | - net.bridge.bridge-nf-call-ip6tables = 1 - net.bridge.bridge-nf-call-iptables = 1 - path: /etc/sysctl.d/k8s.conf -- content: | - alias k="kubectl" - alias ksy="kubectl -n kube-system" - alias kgp="kubectl get pods" - alias kgs="kubectl get services" - export HCLOUD_TOKEN={{.HcloudToken}} - path: /root/.bashrc -runcmd: -- sysctl --system -- apt install -y apt-transport-https curl -- export INSTALL_K3S_VERSION={{.K8sVersion}} -# Download and install latest hcloud cli release for easier debugging on host -- curl -s https://api.github.com/repos/hetznercloud/cli/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - -- tar xvzf hcloud-linux-amd64.tar.gz && cp hcloud /usr/bin/hcloud && chmod +x /usr/bin/hcloud -{{if .IsClusterServer}} -- curl -sfL https://get.k3s.io | sh -s - --disable servicelb --disable traefik --disable-cloud-controller --kubelet-arg="cloud-provider=external" --disable metrics-server {{if not .UseFlannel }}--flannel-backend=none{{ end }} -- mkdir -p /opt/cni/bin -- ln -s /var/lib/rancher/k3s/data/current/bin/loopback /opt/cni/bin/loopback # Workaround for https://github.com/k3s-io/k3s/issues/219 -- ln -s /var/lib/rancher/k3s/data/current/bin/bridge /opt/cni/bin/bridge # Workaround for https://github.com/k3s-io/k3s/issues/219 -- ln -s /var/lib/rancher/k3s/data/current/bin/host-local /opt/cni/bin/host-local # Workaround for https://github.com/k3s-io/k3s/issues/219 -- ln -s /var/lib/rancher/k3s/data/current/bin/portmap /opt/cni/bin/portmap # Workaround for https://github.com/k3s-io/k3s/issues/219 -- mkdir -p /root/.kube -- cp -i /etc/rancher/k3s/k3s.yaml /root/.kube/config -- until KUBECONFIG=/root/.kube/config kubectl get node; do sleep 2;done -- KUBECONFIG=/root/.kube/config kubectl -n kube-system create secret generic hcloud --from-literal=token={{.HcloudToken}} --from-literal=network={{.HcloudNetwork}} -{{else}} -- curl -sfL https://get.k3s.io | {{.JoinCMD}} sh -s - --kubelet-arg="cloud-provider=external" -- sleep 10 # to get the joining work -{{end}} diff --git a/e2etests/templates/cloudinit_k8s.txt.tpl b/e2etests/templates/cloudinit_k8s.txt.tpl deleted file mode 100644 index 7382c63e6..000000000 --- a/e2etests/templates/cloudinit_k8s.txt.tpl +++ /dev/null @@ -1,61 +0,0 @@ -#cloud-config -write_files: -- content: | - overlay - br_netfilter - path: /etc/modules-load.d/containerd.conf -- content: | - net.bridge.bridge-nf-call-ip6tables = 1 - net.bridge.bridge-nf-call-iptables = 1 - net.ipv4.ip_forward = 1 - path: /etc/sysctl.d/k8s.conf -- content: | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - kubernetesVersion: v{{.K8sVersion}} - networking: - podSubnet: "10.244.0.0/16" - path: /tmp/kubeadm-config.yaml -- content: | - [Service] - Environment="KUBELET_EXTRA_ARGS=--cloud-provider=external" - path: /etc/systemd/system/kubelet.service.d/20-hcloud.conf -- content: | - alias k="kubectl" - alias ksy="kubectl -n kube-system" - alias kgp="kubectl get pods" - alias kgs="kubectl get services" - alias cilog="cat /var/log/cloud-init-output.log" - export HCLOUD_TOKEN={{.HcloudToken}} - path: /root/.bashrc -runcmd: -- export HOME=/root -- modprobe overlay -- modprobe br_netfilter -- sysctl --system -- apt install -y apt-transport-https curl -- curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - -- echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list -- curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg -- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null -- apt update -- apt install -y kubectl={{.K8sVersion}}-00 kubeadm={{.K8sVersion}}-00 kubelet={{.K8sVersion}}-00 containerd.io -- systemctl daemon-reload -- mkdir -p /etc/containerd -- containerd config default | tee /etc/containerd/config.toml -- systemctl restart containerd -- systemctl restart kubelet -# Download and install latest hcloud cli release for easier debugging on host -- curl -s https://api.github.com/repos/hetznercloud/cli/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - -- tar xvzf hcloud-linux-amd64.tar.gz && cp hcloud /usr/bin/hcloud && chmod +x /usr/bin/hcloud -{{if .IsClusterServer}} -- kubeadm init --config /tmp/kubeadm-config.yaml -- mkdir -p /root/.kube -- cp -i /etc/kubernetes/admin.conf /root/.kube/config -- until KUBECONFIG=/root/.kube/config kubectl get node; do sleep 2;done -- KUBECONFIG=/root/.kube/config kubectl -n kube-system create secret generic hcloud --from-literal=token={{.HcloudToken}} --from-literal=network={{.HcloudNetwork}} -- kubeadm token create --print-join-command >> /root/join.txt -{{else}} -- {{.JoinCMD}} -- sleep 10 # to get the joining work -{{end}} diff --git a/e2etests/testing.go b/e2etests/testing.go deleted file mode 100644 index 47483a4fc..000000000 --- a/e2etests/testing.go +++ /dev/null @@ -1,616 +0,0 @@ -package e2etests - -import ( - "context" - "crypto/tls" - "fmt" - "math/rand" - "net" - "net/http" - "os" - "strings" - "sync" - "testing" - "time" - - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/clientcmd" - - "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" - "github.com/hetznercloud/hcloud-go/hcloud" -) - -var rng *rand.Rand - -func init() { - rng = rand.New(rand.NewSource(time.Now().UnixNano())) -} - -type TestCluster struct { - KeepOnFailure bool - useNetworks bool - setup *hcloudK8sSetup - k8sClient *kubernetes.Clientset - started bool - certificates []*hcloud.Certificate - - mu sync.Mutex -} - -func (tc *TestCluster) initialize() error { - const op = "e2tests/TestCluster.initialize" - - if tc.started { - return nil - } - - fmt.Printf("%s: Starting CCM Testsuite\n", op) - - networksSupport := os.Getenv("USE_NETWORKS") - if networksSupport == "yes" { - tc.useNetworks = true - } - isUsingGithubActions := os.Getenv("GITHUB_ACTIONS") - isUsingGitlabCI := os.Getenv("CI_JOB_ID") - testIdentifier := "" - if isUsingGithubActions == "true" { - testIdentifier = fmt.Sprintf("gh-%s-%d", os.Getenv("GITHUB_RUN_ID"), rng.Int()) - fmt.Printf("%s: Running in Github Action\n", op) - } - if isUsingGitlabCI != "" { - testIdentifier = fmt.Sprintf("gl-%s", isUsingGitlabCI) - fmt.Printf("%s: Running in Gitlab CI\n", op) - } - if testIdentifier == "" { - testIdentifier = fmt.Sprintf("local-%d", rng.Int()) - fmt.Printf("%s: Running local\n", op) - } - - k8sVersion := os.Getenv("K8S_VERSION") - if k8sVersion == "" { - k8sVersion = "k8s-1.18.9" - } - - k8sVersionsDetails := strings.Split(k8sVersion, "-") - if len(k8sVersionsDetails) != 2 { - return fmt.Errorf("%s: invalid k8s version: %v should be format -", op, k8sVersion) - } - - token := os.Getenv("HCLOUD_TOKEN") - if len(token) != 64 { - return fmt.Errorf("%s: No valid HCLOUD_TOKEN found", op) - } - tc.KeepOnFailure = os.Getenv("KEEP_SERVER_ON_FAILURE") == "yes" - - var additionalSSHKeys []*hcloud.SSHKey - - opts := []hcloud.ClientOption{ - hcloud.WithToken(token), - hcloud.WithApplication("hcloud-ccm-testsuite", "1.0"), - } - hcloudClient := hcloud.NewClient(opts...) - additionalSSHKeysIDOrName := os.Getenv("USE_SSH_KEYS") - if additionalSSHKeysIDOrName != "" { - idsOrNames := strings.Split(additionalSSHKeysIDOrName, ",") - for _, idOrName := range idsOrNames { - additionalSSHKey, _, err := hcloudClient.SSHKey.Get(context.Background(), idOrName) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - additionalSSHKeys = append(additionalSSHKeys, additionalSSHKey) - } - } - - fmt.Printf("%s: Test against %s\n", op, k8sVersion) - - imageName := os.Getenv("CCM_IMAGE_NAME") - buildImage := false - if imageName == "" { - imageName = fmt.Sprintf("hcloud-ccm:ci_%s", testIdentifier) - buildImage = true - } - if buildImage { - fmt.Printf("%s: Building ccm image\n", op) - - err := runCmd( - "go", - []string{"build", "-o", "../hcloud-cloud-controller-manager", "../."}, - []string{"CGO_ENABLED=0", "GOOS=linux", "GOARCH=amd64"}, - ) - if err != nil { - return fmt.Errorf("%s: %v", op, err) - } - - if err := runCmd("docker", []string{"build", "-t", imageName, "../"}, nil); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - } - - fmt.Printf("%s: Saving ccm image to disk\n", op) - if err := runCmd("docker", []string{"save", "--output", "ci-hcloud-ccm.tar", imageName}, nil); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - - tc.setup = &hcloudK8sSetup{ - Hcloud: hcloudClient, - K8sDistribution: K8sDistribution(k8sVersionsDetails[0]), - K8sVersion: k8sVersionsDetails[1], - TestIdentifier: testIdentifier, - ImageName: imageName, - HcloudToken: token, - KeepOnFailure: tc.KeepOnFailure, - UseNetworks: tc.useNetworks, - } - fmt.Printf("%s: Setting up test env\n", op) - - kubeconfigPath, err := tc.setup.PrepareTestEnv(context.Background(), additionalSSHKeys) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - - config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) - if err != nil { - return fmt.Errorf("%s: clientcmd.BuildConfigFromFlags: %s", op, err) - } - - tc.k8sClient, err = kubernetes.NewForConfig(config) - if err != nil { - return fmt.Errorf("%s: kubernetes.NewForConfig: %s", op, err) - } - - tc.started = true - return nil -} - -func (tc *TestCluster) Start() error { - const op = "e2etests/TestCluster.Start" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if err := tc.initialize(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - if err := tc.ensureNodesReady(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - if err := tc.ensurePodsReady(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func (tc *TestCluster) Stop(testFailed bool) error { - const op = "e2etests/TestCluster.Stop" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if !tc.started { - return nil - } - - for _, c := range tc.certificates { - if _, err := tc.setup.Hcloud.Certificate.Delete(context.Background(), c); err != nil { - fmt.Printf("%s: delete certificate %d: %v", op, c.ID, err) - } - } - - if err := tc.setup.TearDown(testFailed); err != nil { - fmt.Printf("%s: Tear Down: %s", op, err) - } - return nil -} - -func (tc *TestCluster) ensureNodesReady() error { - const op = "e2etests/ensureNodesReady" - - err := wait.Poll(1*time.Second, 5*time.Minute, func() (bool, error) { - var totalNodes = len(tc.setup.WorkerNodes) + 1 // Number Worker Nodes + 1 Cluster Node - var readyNodes int - nodes, err := tc.k8sClient.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) - if err != nil { - return false, err - } - for _, node := range nodes.Items { - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - readyNodes++ - } - } - } - pendingNodes := totalNodes - readyNodes - fmt.Printf("Waiting for %d/%d nodes\n", pendingNodes, totalNodes) - return pendingNodes == 0, err - }) - - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - return nil -} - -func (tc *TestCluster) ensurePodsReady() error { - const op = "e2etests/ensurePodsReady" - - err := wait.Poll(1*time.Second, 10*time.Minute, func() (bool, error) { - pods, err := tc.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) - if err != nil { - return false, err - } - totalPods := len(pods.Items) - - var readyPods int - for _, pod := range pods.Items { - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { - readyPods++ - } - } - } - - pendingPods := totalPods - readyPods - fmt.Printf("Waiting for %d/%d pods\n", pendingPods, totalPods) - return pendingPods == 0, err - }) - - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - return nil -} - -// CreateTLSCertificate creates a TLS certificate used for testing and posts it -// to the Hetzner Cloud backend. -// -// The baseName of the certificate gets a random number suffix attached. -// baseName and suffix are separated by a single "-" character. -func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { - const op = "e2etests/TestCluster.CreateTLSCertificate" - - rndInt := rng.Int() - name := fmt.Sprintf("%s-%d", baseName, rndInt) - - p := testsupport.NewTLSPair(t, fmt.Sprintf("www.example%d.com", rndInt)) - opts := hcloud.CertificateCreateOpts{ - Name: name, - Certificate: p.Cert, - PrivateKey: p.Key, - } - cert, _, err := tc.setup.Hcloud.Certificate.Create(context.Background(), opts) - if err != nil { - t.Fatalf("%s: %s: %v", op, name, err) - } - if cert == nil { - t.Fatalf("%s: no certificate created", op) - } - - tc.mu.Lock() - defer tc.mu.Unlock() - tc.certificates = append(tc.certificates, cert) - - return cert -} - -type lbTestHelper struct { - podName string - port int - K8sClient *kubernetes.Clientset - KeepOnFailure bool - t *testing.T -} - -// DeployTestPod deploys a basic nginx pod within the k8s cluster -// and waits until it is "ready". -func (l *lbTestHelper) DeployTestPod() *corev1.Pod { - const op = "lbTestHelper/DeployTestPod" - - podName := fmt.Sprintf("pod-%s", l.podName) - testPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - Labels: map[string]string{ - "app": podName, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "nginx-hello-world", - Image: "nginxdemos/hello:plain-text", - Ports: []corev1.ContainerPort{ - { - ContainerPort: 80, - Name: "http", - }, - }, - }, - }, - }, - } - - pod, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Create(context.Background(), &testPod, metav1.CreateOptions{}) - if err != nil { - l.t.Fatalf("%s: could not create test pod: %s", op, err) - } - err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - p, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - return false, err - } - for _, condition := range p.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - return true, nil - } - } - pod = p - return false, nil - }) - if err != nil { - l.t.Fatalf("%s: pod %s did not come up after 1 minute: %s", op, podName, err) - } - return pod -} - -// ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service). -func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string]string) *corev1.Service { - port := l.port - if port == 0 { - port = 80 - } - - return &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("svc-%s", l.podName), - Annotations: annotations, - }, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{ - "app": pod.Name, - }, - Type: corev1.ServiceTypeLoadBalancer, - Ports: []corev1.ServicePort{ - { - Port: int32(port), - TargetPort: intstr.FromInt(80), - Name: "http", - }, - }, - ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, - }, - } -} - -// CreateService creates a k8s service based on the given service definition -// and waits until it is "ready". -func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { - const op = "lbTestHelper/CreateService" - _, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Create(context.Background(), lbSvc, metav1.CreateOptions{}) - if err != nil { - return nil, fmt.Errorf("%s: could not create service: %s", op, err) - } - - err = wait.Poll(1*time.Second, 5*time.Minute, func() (done bool, err error) { - svc, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), lbSvc.Name, metav1.GetOptions{}) - if err != nil { - return false, err - } - ingressIPs := svc.Status.LoadBalancer.Ingress - if len(ingressIPs) > 0 { - lbSvc = svc - return true, nil - } - return false, nil - }) - if err != nil { - return nil, fmt.Errorf("%s: test service (load balancer) did not come up after 5 minute: %s", op, err) - } - return lbSvc, nil -} - -// TearDown deletes the created pod and service. -func (l *lbTestHelper) TearDown() { - const op = "lbTestHelper/TearDown" - - if l.KeepOnFailure && l.t.Failed() { - return - } - - svcName := fmt.Sprintf("svc-%s", l.podName) - err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Delete(context.Background(), svcName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("%s: deleting test svc failed: %s", op, err) - } - - err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), svcName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - l.t.Errorf("%s: test service was not removed after 3 minutes: %s", op, err) - } - - podName := fmt.Sprintf("pod-%s", l.podName) - err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Delete(context.Background(), podName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("%s: deleting test pod failed: %s", op, err) - } - err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - l.t.Errorf("%s: test pod not removed after 3 minutes: %s", op, err) - } -} - -type nwTestHelper struct { - podName string - K8sClient *kubernetes.Clientset - privateKey string - t *testing.T -} - -// DeployTestPod deploys a basic nginx pod within the k8s cluster -// and waits until it is "ready". -func (n *nwTestHelper) DeployTestPod() *corev1.Pod { - const op = "nwTestHelper/DeployTestPod" - podName := fmt.Sprintf("pod-%s", n.podName) - testPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - Labels: map[string]string{ - "app": podName, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "nginx-hello-world", - Image: "nginxdemos/hello:plain-text", - Ports: []corev1.ContainerPort{ - { - ContainerPort: 80, - Name: "http", - }, - }, - }, - }, - }, - } - - pod, err := n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Create(context.Background(), &testPod, metav1.CreateOptions{}) - if err != nil { - n.t.Fatalf("%s: could not create test pod: %s", op, err) - } - err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - p, err := n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - return false, err - } - for _, condition := range p.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - return true, nil - } - } - pod = p - return false, nil - }) - if err != nil { - n.t.Fatalf("%s: pod %s did not come up after 1 minute: %s", op, podName, err) - } - pod, err = n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - n.t.Fatalf("%s: could not create test pod: %s", op, err) - } - return pod -} - -// TearDown deletes the created pod. -func (n *nwTestHelper) TearDown() { - const op = "nwTestHelper/TearDown" - podName := fmt.Sprintf("pod-%s", n.podName) - err := n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Delete(context.Background(), podName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - n.t.Errorf("%s: deleting test pod failed: %s", op, err) - } - err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - _, err = n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - n.t.Errorf("%s: test pod not removed after 1 minute: %s", op, err) - } -} - -// WaitForHTTPAvailable tries to connect to the given IP via http -// It tries it for 2 minutes, if after two minutes the connection -// wasn't successful and it wasn't a HTTP 200 response it will fail. -func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { - const op = "e2etests/WaitForHTTPAvailable" - - client := &http.Client{ - Timeout: 1 * time.Second, - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, // nolint - }, - }, - } - proto := "http" - if useHTTPS { - proto = "https" - } - - err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { - resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) - if err != nil { - return false, nil - } - defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: - // Success - return true, nil - case http.StatusServiceUnavailable: - // Health checks are still evaluating - return false, nil - default: - return false, fmt.Errorf("%s: got HTTP Code %d instead of 200", op, resp.StatusCode) - } - }) - if err != nil { - t.Errorf("%s: not available via client.Get: %s", op, err) - } -} - -// WaitForHTTPOnServer tries to connect to the given IP using curl. -// -// It tries it for 2 minutes, if after two minutes the connection wasn't -// successful or it was not a HTTP 200 response it will fail. -func WaitForHTTPOnServer(t *testing.T, srv *hcloud.Server, privateKey, tgtIP string, useHTTPS bool) { - const op = "e2etests/WaitForHTTPOnServer" - - proto := "http" - if useHTTPS { - proto = "https" - } - cmd := fmt.Sprintf("curl -k %s://%s", proto, tgtIP) - if net.ParseIP(tgtIP).To4() == nil { - // Assume its a IPv6 address - cmd = fmt.Sprintf("curl -6 -kg %s://[%s]", proto, tgtIP) - } - - err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { - if err := RunCommandOnServer(privateKey, srv, cmd); err != nil { - return false, nil - } - return true, nil - }) - if err != nil { - t.Errorf("%s: not available via %q: %s", op, cmd, err) - } -} diff --git a/hack/Dockerfile b/hack/Dockerfile index d47434e00..c7107b6a1 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -1,15 +1,14 @@ FROM golang:1.20 as builder WORKDIR /hccm -ADD ../go.mod go.sum /hccm/ +ADD go.mod go.sum /hccm/ RUN go mod download -ADD .. /hccm/ +ADD . /hccm/ RUN ls -al # `skaffold debug` sets SKAFFOLD_GO_GCFLAGS to disable compiler optimizations ARG SKAFFOLD_GO_GCFLAGS RUN CGO_ENABLED=0 go build -gcflags="${SKAFFOLD_GO_GCFLAGS}" -o hcloud-cloud-controller-manager.bin github.com/hetznercloud/hcloud-cloud-controller-manager - FROM alpine:3.17 RUN apk add --no-cache ca-certificates bash COPY --from=builder /hccm/hcloud-cloud-controller-manager.bin /bin/hcloud-cloud-controller-manager -ENTRYPOINT ["/bin/hcloud-cloud-controller-manager"] \ No newline at end of file +ENTRYPOINT ["/bin/hcloud-cloud-controller-manager"] diff --git a/hack/dev-down.sh b/hack/dev-down.sh new file mode 100755 index 000000000..9a7ade0a1 --- /dev/null +++ b/hack/dev-down.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -ue -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +scope="${SCOPE:-dev}" +scope=${scope//[^a-zA-Z0-9_]/-} +scope_name=hccm-${scope} +label="managedby=hack" + +if [[ "${ALL:-}" == "" ]]; then + label="$label,scope=$scope_name" + rm -f $SCRIPT_DIR/.ssh-$scope $SCRIPT_DIR/.kubeconfig-$scope +else + rm -f $SCRIPT_DIR/.ssh* $SCRIPT_DIR/.kubeconfig* +fi + +for instance in $(hcloud server list -o noheader -o columns=id -l $label); do + ( + hcloud server delete $instance + ) & +done + + +for key in $(hcloud ssh-key list -o noheader -o columns=name -l $label); do + ( + hcloud ssh-key delete $key + ) & +done + + +for key in $(hcloud network list -o noheader -o columns=name -l $label); do + ( + hcloud network delete $key + ) & +done + +wait diff --git a/hack/dev-up.sh b/hack/dev-up.sh new file mode 100755 index 000000000..8b99873ba --- /dev/null +++ b/hack/dev-up.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +set -ueo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +if [[ -n "${DEBUG:-}" ]]; then set -x; fi + +# Redirect all stdout to stderr. +{ + if ! hcloud version >/dev/null; then echo "ERROR: 'hcloud' CLI not found, please install it and make it available on your \$PATH"; exit 1; fi + if ! k3sup version >/dev/null; then echo "ERROR: 'k3sup' not found, please install it and make it available on your \$PATH"; exit 1; fi + if ! helm version >/dev/null; then echo "ERROR: 'helm' not found, please install it and make it available on your \$PATH"; exit 1; fi + if [[ "${HCLOUD_TOKEN:-}" == "" ]]; then echo "ERROR: please set \$HCLOUD_TOKEN"; exit 1; fi + + # We run a lot of subshells below for speed. If any encounter an error, we shut down the whole process group, pronto. + function error() { + echo "Onoes, something went wrong! :( The output above might have some clues." + kill 0 + } + + trap error ERR + + image_name=${IMAGE_NAME:-ubuntu-20.04} + instance_count=${INSTANCES:-1} + instance_type=${INSTANCE_TYPE:-cpx11} + location=${LOCATION:-fsn1} + network_zone=${NETWORK_ZONE:-eu-central} + ssh_keys=${SSH_KEYS:-} + channel=${K3S_CHANNEL:-stable} + network_cidr=${NETWORK_CIDR:-10.0.0.0/8} + subnet_cidr=${SUBNET_CIDR:-10.0.0.0/24} + cluster_cidr=${CLUSTER_CIDR:-10.244.0.0/16} + scope="${SCOPE:-dev}" + scope=${scope//[^a-zA-Z0-9_]/-} + scope_name=hccm-${scope} + label="managedby=hack,scope=$scope_name" + ssh_private_key="$SCRIPT_DIR/.ssh-$scope" + k3s_opts=${K3S_OPTS:-"--kubelet-arg cloud-provider=external --disable=traefik --disable=servicelb --flannel-backend=none --disable=local-storage"} + k3s_server_opts=${K3S_SERVER_OPTS:-"--disable-cloud-controller --cluster-cidr ${cluster_cidr}"} + + echo -n "$HCLOUD_TOKEN" > "$SCRIPT_DIR/.token-$scope" + + export KUBECONFIG="$SCRIPT_DIR/.kubeconfig-$scope" + + ssh_command="ssh -i $ssh_private_key -o StrictHostKeyChecking=off -o BatchMode=yes -o ConnectTimeout=5" + + # Generate SSH keys and upload publkey to Hetzner Cloud. + ( trap error ERR + [[ ! -f $ssh_private_key ]] && ssh-keygen -t ed25519 -f $ssh_private_key -C '' -N '' + [[ ! -f $ssh_private_key.pub ]] && ssh-keygen -y -f $ssh_private_key > $ssh_private_key.pub + if ! hcloud ssh-key describe $scope_name >/dev/null 2>&1; then + hcloud ssh-key create --label $label --name $scope_name --public-key-from-file $ssh_private_key.pub + fi + ) & + + # Create Network + ( trap error ERR + if ! hcloud network describe $scope_name >/dev/null 2>&1; then + hcloud network create --label $label --ip-range $network_cidr --name $scope_name + hcloud network add-subnet --network-zone $network_zone --type cloud --ip-range $subnet_cidr $scope_name + fi + ) & + + + for num in $(seq $instance_count); do + # Create server and initialize Kubernetes on it with k3sup. + ( trap error ERR + + server_name="$scope_name-$num" + + # Maybe cluster is already up and node is already there. + if kubectl get node $server_name >/dev/null 2>&1; then + exit 0 + fi + + ip=$(hcloud server ip $server_name 2>/dev/null || true) + + if [[ -z "${ip:-}" ]]; then + # Wait for SSH key + until hcloud ssh-key describe $scope_name >/dev/null 2>&1; do sleep 1; done + until hcloud network describe $scope_name >/dev/null 2>&1; do sleep 1; done + + createcmd="hcloud server create --image $image_name --label $label --location $location --name $server_name --ssh-key=$scope_name --type $instance_type --network $scope_name" + for key in $ssh_keys; do + createcmd+=" --ssh-key $key" + done + $createcmd + ip=$(hcloud server ip $server_name) + fi + + # Wait for SSH. + until [ "$($ssh_command root@$ip echo ok 2>/dev/null)" = "ok" ]; do + sleep 1 + done + + $ssh_command root@$ip 'mkdir -p /etc/rancher/k3s && cat > /etc/rancher/k3s/registries.yaml' < $SCRIPT_DIR/k3s-registries.yaml + + private_ip=$(hcloud server describe $server_name -o format="{{ (index .PrivateNet 0).IP }}") + k3s_node_ip_opts="--node-external-ip ${ip} --node-ip ${private_ip}" + + if [[ "$num" == "1" ]]; then + # First node is control plane. + k3sup install --print-config=false --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_server_opts} ${k3s_opts} ${k3s_node_ip_opts}" --local-path $KUBECONFIG --ssh-key $ssh_private_key + else + # All subsequent nodes are initialized as workers. + + # Can't go any further until control plane has bootstrapped a bit though. + until $ssh_command root@$(hcloud server ip $scope_name-1 || true) stat /etc/rancher/node/password >/dev/null 2>&1; do + sleep 1 + done + + k3sup join --server-ip $(hcloud server ip $scope_name-1) --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_opts} ${k3s_node_ip_opts}" --ssh-key $ssh_private_key + fi + ) & + + # Wait for this node to show up in the cluster. + ( trap error ERR; set +x + until kubectl wait --for=condition=Ready node/$scope_name-$num >/dev/null 2>&1; do sleep 1; done + echo $scope_name-$num is up and in cluster + ) & + done + + ( trap error ERR + # Control plane init tasks. + # This is running in parallel with the server init, above. + + # Wait for control plane to look alive. + until kubectl get nodes >/dev/null 2>&1; do sleep 1; done; + + # Deploy private registry. + ( trap error ERR + if ! helm status -n kube-system registry >/dev/null 2>&1; then + helm install registry docker-registry \ + --repo=https://helm.twun.io \ + -n kube-system \ + --version 2.2.2 \ + --set service.clusterIP=10.43.0.2 \ + --set 'tolerations[0].key=node.cloudprovider.kubernetes.io/uninitialized' \ + --set 'tolerations[0].operator=Exists' + fi + ) & + + # Install Cilium. + ( trap error ERR + if ! helm status -n kube-system cilium >/dev/null 2>&1; then + helm install cilium cilium --repo https://helm.cilium.io/ -n kube-system --version 1.13.1 \ + --set tunnel=disabled \ + --set ipv4NativeRoutingCIDR=$cluster_cidr \ + --set ipam.mode=kubernetes + fi) & + + # Create HCLOUD_TOKEN Secret for hcloud-cloud-controller-manager. + ( trap error ERR + if ! kubectl -n kube-system get secret hcloud >/dev/null 2>&1; then + kubectl -n kube-system create secret generic hcloud --from-literal="token=$HCLOUD_TOKEN" --from-literal="network=$scope_name" + fi) & + wait + ) & + wait + echo "Success - cluster fully initialized and ready, why not see for yourself?" + echo '$ kubectl get nodes' + kubectl get nodes +} >&2 + +echo "export KUBECONFIG=$KUBECONFIG" +$SCRIPT_DIR/registry-port-forward.sh +echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666" diff --git a/hack/k3s-registries.yaml b/hack/k3s-registries.yaml new file mode 100644 index 000000000..8c808b121 --- /dev/null +++ b/hack/k3s-registries.yaml @@ -0,0 +1,3 @@ +mirrors: + localhost:30666: + endpoint: ["http://10.43.0.2:5000"] diff --git a/hack/kustomization.yaml b/hack/kustomization.yaml deleted file mode 100644 index c3d6e7e51..000000000 --- a/hack/kustomization.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - ../deploy -patches: - - target: - kind: Deployment - name: hcloud-cloud-controller-manager - patch: |- - - op: add - path: /spec/template/spec/containers/0/env/- - value: - name: LOG_LEVEL - value: info - - op: replace - path: /spec/template/spec/containers/0/env/1/valueFrom/secretKeyRef/name - value: hcloud \ No newline at end of file diff --git a/hack/registry-port-forward.sh b/hack/registry-port-forward.sh new file mode 100755 index 000000000..082079d24 --- /dev/null +++ b/hack/registry-port-forward.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -ue -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +{ +until kubectl -n kube-system --timeout=30s rollout status deployment/registry-docker-registry >/dev/null 2>&1; do sleep 1; done +old_pid=$(cat $SCRIPT_DIR/.reg-pf 2>/dev/null || true) +if [[ -n "$old_pid" ]]; then + echo "killing old port-forward with PID $old_pid" + kill $old_pid || true +fi + +nohup kubectl port-forward -n kube-system svc/registry-docker-registry 30666:5000 >$SCRIPT_DIR/.reg-pf.out 2>$SCRIPT_DIR/.reg-pf.err & +} >&2 + +echo $! > $SCRIPT_DIR/.reg-pf diff --git a/scripts/e2etest-local.sh b/scripts/e2etest-local.sh deleted file mode 100755 index 7a05cdca7..000000000 --- a/scripts/e2etest-local.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -set -e - -function test_k8s_version() { - if [[ -z "$1" ]]; then - echo "Usage: $0 " - return 1 - fi - - export K8S_VERSION="$1" - - echo "Testing $K8S_VERSION without network support" - export USE_NETWORKS="no" - if ! go test -count=1 -v -timeout 60m ./e2etests; then - return 2 - fi - - echo - echo - echo "Testing $K8S_VERSION with network support" - export USE_NETWORKS="yes" - if ! go test -count=1 -v -timeout 60m ./e2etests; then - return 2 - fi -} - -if [[ -z "$HCLOUD_TOKEN" ]]; then - echo "HCLOUD_TOKEN not set! Aborting tests." - exit 1 -fi - -K8S_VERSIONS=( - "k8s-1.20.12" - "k3s-v1.20.12+k3s1" - "k8s-1.21.6" - "k3s-v1.21.6+k3s1" - "k8s-1.22.3" - "k3s-v1.22.3+k3s1" -) -for v in "${K8S_VERSIONS[@]}"; do - test_k8s_version "$v" -done diff --git a/skaffold.yaml b/skaffold.yaml index 803082933..a44783951 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1,4 +1,4 @@ -apiVersion: skaffold/v2beta19 +apiVersion: skaffold/v4beta3 kind: Config metadata: name: cloud-controller-manager @@ -11,6 +11,12 @@ build: - hetznercloud/hcloud-cloud-controller-manager:buildcache local: useBuildkit: true -deploy: - kustomize: - paths: [hack/] \ No newline at end of file + insecureRegistries: + - localhost:30666 +manifests: + helm: + releases: + - name: hccm + chartPath: chart + setValues: + networking.enabled: true diff --git a/e2etests/e2e_test.go b/tests/e2e/e2e_test.go similarity index 63% rename from e2etests/e2e_test.go rename to tests/e2e/e2e_test.go index e3d493fbe..aece4b404 100644 --- a/e2etests/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -1,4 +1,4 @@ -package e2etests +package e2e import ( "context" @@ -7,10 +7,12 @@ import ( "os" "strings" "testing" + "time" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops" @@ -27,7 +29,7 @@ func TestMain(m *testing.M) { rc := m.Run() - if err := testCluster.Stop(rc > 0); err != nil { + if err := testCluster.Stop(); err != nil { fmt.Printf("%v\n", err) os.Exit(1) } @@ -35,6 +37,8 @@ func TestMain(m *testing.M) { } func TestCloudControllerManagerPodIsPresent(t *testing.T) { + t.Parallel() + t.Run("hcloud-cloud-controller-manager pod is present in kube-system", func(t *testing.T) { pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) assert.NoError(t, err) @@ -52,7 +56,10 @@ func TestCloudControllerManagerPodIsPresent(t *testing.T) { }) t.Run("pod with app=hcloud-cloud-controller-manager is present in kube-system", func(t *testing.T) { - pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{LabelSelector: "app=hcloud-cloud-controller-manager"}) + pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system"). + List(context.Background(), metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=hcloud-cloud-controller-manager", + }) assert.NoError(t, err) if len(pods.Items) == 0 { @@ -62,15 +69,22 @@ func TestCloudControllerManagerPodIsPresent(t *testing.T) { } func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.setup.ClusterNode.Name, metav1.GetOptions{}) + t.Parallel() + + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) assert.NoError(t, err) + server, _, err := testCluster.hcloud.Server.Get(context.TODO(), "hccm-"+testCluster.scope+"-1") + if err != nil { + return + } + labels := node.Labels expectedLabels := map[string]string{ - "node.kubernetes.io/instance-type": testCluster.setup.ClusterNode.ServerType.Name, - "topology.kubernetes.io/region": testCluster.setup.ClusterNode.Datacenter.Location.Name, - "topology.kubernetes.io/zone": testCluster.setup.ClusterNode.Datacenter.Name, - "kubernetes.io/hostname": testCluster.setup.ClusterNode.Name, + "node.kubernetes.io/instance-type": server.ServerType.Name, + "topology.kubernetes.io/region": server.Datacenter.Location.Name, + "topology.kubernetes.io/zone": server.Datacenter.Name, + "kubernetes.io/hostname": server.Name, "kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64", } @@ -82,26 +96,31 @@ func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) for _, address := range node.Status.Addresses { if address.Type == corev1.NodeExternalIP { - expectedIP := testCluster.setup.ClusterNode.PublicNet.IPv4.IP.String() + expectedIP := server.PublicNet.IPv4.IP.String() if expectedIP != address.Address { t.Errorf("Got %s as NodeExternalIP but expected %s", address.Address, expectedIP) } } } - if testCluster.useNetworks { - for _, address := range node.Status.Addresses { - if address.Type == corev1.NodeInternalIP { - expectedIP := testCluster.setup.ClusterNode.PrivateNet[0].IP.String() - if expectedIP != address.Address { - t.Errorf("Got %s as NodeInternalIP but expected %s", address.Address, expectedIP) - } + + for _, address := range node.Status.Addresses { + if address.Type == corev1.NodeInternalIP { + expectedIP := server.PrivateNet[0].IP.String() + if expectedIP != address.Address { + t.Errorf("Got %s as NodeInternalIP but expected %s", address.Address, expectedIP) } } } } func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { - lbTest := lbTestHelper{t: t, K8sClient: testCluster.k8sClient, podName: "loadbalancer-minimal"} + t.Parallel() + + lbTest := lbTestHelper{ + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-minimal", + } pod := lbTest.DeployTestPod() @@ -114,24 +133,20 @@ func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB - WaitForHTTPAvailable(t, ingressIP, false) - - for _, ing := range lbSvc.Status.LoadBalancer.Ingress { - WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, ing.IP, false) - } + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) lbTest.TearDown() } func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { + t.Parallel() + cert := testCluster.CreateTLSCertificate(t, "loadbalancer-https") lbTest := lbTestHelper{ - t: t, - K8sClient: testCluster.k8sClient, - KeepOnFailure: testCluster.KeepOnFailure, - podName: "loadbalancer-https", - port: 443, + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-https", + port: 443, } pod := lbTest.DeployTestPod() @@ -147,24 +162,20 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB - WaitForHTTPAvailable(t, ingressIP, true) - - for _, ing := range lbSvc.Status.LoadBalancer.Ingress { - WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, ing.IP, true) - } + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, true) lbTest.TearDown() } func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { + t.Parallel() + domainName := fmt.Sprintf("%d-ccm-test.hc-certs.de", rand.Int()) lbTest := lbTestHelper{ - t: t, - K8sClient: testCluster.k8sClient, - KeepOnFailure: testCluster.KeepOnFailure, - podName: "loadbalancer-https", - port: 443, + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-https", + port: 443, } pod := lbTest.DeployTestPod() @@ -181,7 +192,7 @@ func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testi if err != nil { t.Fatalf("deploying test svc: %s", err) } - certs, err := testCluster.setup.Hcloud.Certificate.AllWithOpts(context.Background(), hcloud.CertificateListOpts{ + certs, err := testCluster.hcloud.Certificate.AllWithOpts(context.Background(), hcloud.CertificateListOpts{ ListOpts: hcloud.ListOpts{ LabelSelector: fmt.Sprintf("%s=%s", hcops.LabelServiceUID, lbSvc.ObjectMeta.UID), }, @@ -190,14 +201,12 @@ func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testi assert.Len(t, certs, 1) lbTest.TearDown() - _, err = testCluster.setup.Hcloud.Certificate.Delete(context.Background(), certs[0]) + _, err = testCluster.hcloud.Certificate.Delete(context.Background(), certs[0]) assert.NoError(t, err) } func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { - if testCluster.useNetworks == false { - t.Skipf("Private Networks test is disabled") - } + t.Parallel() lbTest := lbTestHelper{t: t, K8sClient: testCluster.k8sClient, podName: "loadbalancer-private-network"} @@ -213,22 +222,37 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB - WaitForHTTPAvailable(t, ingressIP, false) + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) lbTest.TearDown() } func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { - if testCluster.useNetworks == false { - t.Skipf("Private Networks test is disabled") - } - - nwTest := nwTestHelper{t: t, K8sClient: testCluster.k8sClient, privateKey: testCluster.setup.privKey, podName: "network-routes-accessible"} + t.Parallel() - pod := nwTest.DeployTestPod() - - WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, pod.Status.PodIP, false) + err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) + if err != nil { + return false, err + } - nwTest.TearDown() + network, _, err := testCluster.hcloud.Network.Get(context.TODO(), "hccm-"+testCluster.scope) + if err != nil { + return false, err + } + for _, route := range network.Routes { + if route.Destination.String() == node.Spec.PodCIDR { + for _, a := range node.Status.Addresses { + if a.Type == corev1.NodeInternalIP { + assert.Equal(t, a.Address, route.Gateway.String()) + } + } + return true, nil + } + } + return false, nil + }) + if err != nil { + t.Fatal(err) + } } diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go new file mode 100644 index 000000000..9787fd162 --- /dev/null +++ b/tests/e2e/testing.go @@ -0,0 +1,311 @@ +package e2e + +import ( + "context" + "crypto/tls" + "fmt" + "math/rand" + "net/http" + "os" + "regexp" + "strconv" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" + "github.com/hetznercloud/hcloud-go/hcloud" +) + +var rng *rand.Rand +var scopeButcher = regexp.MustCompile(`[^a-zA-Z0-9_]`) + +func init() { + rng = rand.New(rand.NewSource(time.Now().UnixNano())) +} + +type TestCluster struct { + hcloud *hcloud.Client + k8sClient *kubernetes.Clientset + certificates []*hcloud.Certificate + scope string +} + +func (tc *TestCluster) Start() error { + tc.scope = os.Getenv("SCOPE") + if tc.scope == "" { + tc.scope = "dev" + } + tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") + + token := os.Getenv("HCLOUD_TOKEN") + if token == "" { + buf, err := os.ReadFile(fmt.Sprintf("../../hack/.token-%s", tc.scope)) + if err != nil { + return err + } + token = string(buf) + } + + if token == "" { + return fmt.Errorf("no valid HCLOUD_TOKEN found") + } + + opts := []hcloud.ClientOption{ + hcloud.WithToken(token), + hcloud.WithApplication("hcloud-ccm-testsuite", "1.0"), + } + hcloudClient := hcloud.NewClient(opts...) + tc.hcloud = hcloudClient + + err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-"+tc.scope) + if err != nil { + return err + } + + loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() + configOverrides := &clientcmd.ConfigOverrides{} + + kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) + clientConfig, err := kubeConfig.ClientConfig() + if err != nil { + return fmt.Errorf("kubeConfig.ClientConfig: %s", err) + } + + tc.k8sClient, err = kubernetes.NewForConfig(clientConfig) + if err != nil { + return fmt.Errorf("kubernetes.NewForConfig: %s", err) + } + + return nil +} + +func (tc *TestCluster) Stop() error { + for _, c := range tc.certificates { + if _, err := tc.hcloud.Certificate.Delete(context.Background(), c); err != nil { + fmt.Printf("delete certificate %d failed: %v", c.ID, err) + } + } + + return nil +} + +// CreateTLSCertificate creates a TLS certificate used for testing and posts it +// to the Hetzner Cloud backend. +// +// The baseName of the certificate gets a random number suffix attached. +// baseName and suffix are separated by a single "-" character. +func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { + rndInt := rng.Int() + name := fmt.Sprintf("%s-%d", baseName, rndInt) + + p := testsupport.NewTLSPair(t, fmt.Sprintf("www.example%d.com", rndInt)) + opts := hcloud.CertificateCreateOpts{ + Name: name, + Certificate: p.Cert, + PrivateKey: p.Key, + } + cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) + if err != nil { + t.Fatalf("%s: %v", name, err) + } + if cert == nil { + t.Fatalf("no certificate created") + } + + tc.certificates = append(tc.certificates, cert) + + return cert +} + +type lbTestHelper struct { + podName string + port int + K8sClient *kubernetes.Clientset + t *testing.T + namespace string +} + +// DeployTestPod deploys a basic nginx pod within the k8s cluster +// and waits until it is "ready". +func (l *lbTestHelper) DeployTestPod() *corev1.Pod { + if l.namespace == "" { + l.namespace = "hccm-test-" + strconv.Itoa(rand.Int()) + } + _, err := l.K8sClient.CoreV1().Namespaces().Create(context.Background(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: l.namespace, + }, + }, metav1.CreateOptions{}) + if err != nil && !k8serrors.IsAlreadyExists(err) { + panic(err) + } + + podName := fmt.Sprintf("pod-%s", l.podName) + testPod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "app": podName, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nginx-hello-world", + Image: "nginxdemos/hello:plain-text", + Ports: []corev1.ContainerPort{ + { + ContainerPort: 80, + Name: "http", + }, + }, + }, + }, + }, + } + + pod, err := l.K8sClient.CoreV1().Pods(l.namespace).Create(context.Background(), &testPod, metav1.CreateOptions{}) + if err != nil { + l.t.Fatalf("could not create test pod: %s", err) + } + err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { + p, err := l.K8sClient.CoreV1().Pods(l.namespace).Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, condition := range p.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true, nil + } + } + pod = p + return false, nil + }) + if err != nil { + l.t.Fatalf("pod %s did not come up after 1 minute: %s", podName, err) + } + return pod +} + +// ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service). +func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string]string) *corev1.Service { + port := l.port + if port == 0 { + port = 80 + } + + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("svc-%s", l.podName), + Annotations: annotations, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "app": pod.Name, + }, + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{ + { + Port: int32(port), + TargetPort: intstr.FromInt(80), + Name: "http", + }, + }, + ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, + }, + } +} + +// CreateService creates a k8s service based on the given service definition +// and waits until it is "ready". +func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { + // Default is 15s interval, 10s timeout, 3 retries => 45 seconds until up + // With these changes it should be 1 seconds until up + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckInterval)] = "1s" + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckTimeout)] = "2s" + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckRetries)] = "1" + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckProtocol)] = "tcp" + + _, err := l.K8sClient.CoreV1().Services(l.namespace).Create(context.Background(), lbSvc, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("could not create service: %s", err) + } + + err = wait.Poll(1*time.Second, 5*time.Minute, func() (done bool, err error) { + svc, err := l.K8sClient.CoreV1().Services(l.namespace).Get(context.Background(), lbSvc.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + ingressIPs := svc.Status.LoadBalancer.Ingress + if len(ingressIPs) > 0 { + lbSvc = svc + return true, nil + } + return false, nil + }) + if err != nil { + return nil, fmt.Errorf("test service (load balancer) did not come up after 5 minute: %s", err) + } + return lbSvc, nil +} + +// TearDown deletes the created pod and service. +func (l *lbTestHelper) TearDown() { + err := wait.Poll(1*time.Second, 3*time.Minute, func() (bool, error) { + err := l.K8sClient.CoreV1().Namespaces().Delete(context.Background(), l.namespace, metav1.DeleteOptions{}) + if err != nil && !k8serrors.IsNotFound(err) { + return false, err + } + return k8serrors.IsNotFound(err), nil + }) + if err != nil { + panic(err) + } +} + +// WaitForHTTPAvailable tries to connect to the given IP via http +// It tries it for 2 minutes, if after two minutes the connection +// wasn't successful and it wasn't a HTTP 200 response it will fail. +func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { + client := &http.Client{ + Timeout: 1 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, // nolint + }, + }, + } + proto := "http" + if useHTTPS { + proto = "https" + } + + err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { + resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) + if err != nil { + return false, nil + } + defer resp.Body.Close() + switch resp.StatusCode { + case http.StatusOK: + // Success + return true, nil + case http.StatusServiceUnavailable: + // Health checks are still evaluating + return false, nil + default: + return false, fmt.Errorf("got HTTP Code %d instead of 200", resp.StatusCode) + } + }) + if err != nil { + t.Errorf("%s not available: %s", ingressIP, err) + } +}