diff --git a/docs/usage/registry-cache/configuration.md b/docs/usage/registry-cache/configuration.md index c816382a..b056a966 100644 --- a/docs/usage/registry-cache/configuration.md +++ b/docs/usage/registry-cache/configuration.md @@ -167,3 +167,5 @@ The registry cache runs with a single replica. This fact may lead to concerns fo Example: If the Service of a registry cache is deleted, then a new Service will be created. containerd's registry config will still contain the old Service's cluster IP. containerd requests against the old Service's cluster IP will time out and containerd will fall back to upstream. - Image pull of `docker.io/library/alpine:3.13.2` from the upstream takes ~2s while image pull of the same image with invalid registry cache cluster IP takes ~2m.2s. - Image pull of `eu.gcr.io/gardener-project/gardener/ops-toolbelt:0.18.0` from the upstream takes ~10s while image pull of the same image with invalid registry cache cluster IP takes ~3m.10s. + +3. Amazon Elastic Container Registry is currently not supported. For details see [distribution/distribution#4383](https://github.com/distribution/distribution/issues/4383). diff --git a/test/common/common.go b/test/common/common.go index ec8ecefd..548902e9 100644 --- a/test/common/common.go +++ b/test/common/common.go @@ -29,19 +29,24 @@ import ( ) const ( - // PublicEcrAwsNginx1230Image is the public.ecr.aws/nginx/nginx:1.23.0 image. - PublicEcrAwsNginx1230Image = "public.ecr.aws/nginx/nginx:1.23.0" - // PublicEcrAwsNginx1240Image is the public.ecr.aws/nginx/nginx:1.24.0 image. - PublicEcrAwsNginx1240Image = "public.ecr.aws/nginx/nginx:1.24.0" - // PublicEcrAwsNginx1250Image is the public.ecr.aws/nginx/nginx:1.25.0 image. - PublicEcrAwsNginx1250Image = "public.ecr.aws/nginx/nginx:1.25.0" + // For the e2e tests don't use images from the following upstreams: + // - docker.io: DockerHub has rate limiting for anonymous users. + // - gcr.io, registry.k8s.io, quay.io, europe-docker.pkg.dev: These are all registries used in the Gardener's local setup. Avoid using them to do not have conflicts with provider-local in some corner cases. + // - Amazon ECR: The Distribution project does not support image pulls from Amazon ECR. Ref https://github.com/distribution/distribution/issues/4383. + + // GithubRegistryJitesoftAlpine3189Image is the ghcr.io/jitesoft/alpine:3.18.9 image. + GithubRegistryJitesoftAlpine3189Image = "ghcr.io/jitesoft/alpine:3.18.9" + // GithubRegistryJitesoftAlpine3194Image is the ghcr.io/jitesoft/alpine:3.19.4 image. + GithubRegistryJitesoftAlpine3194Image = "ghcr.io/jitesoft/alpine:3.19.4" + // GithubRegistryJitesoftAlpine3203Image is the ghcr.io/jitesoft/alpine:3.20.3 image. + GithubRegistryJitesoftAlpine3203Image = "ghcr.io/jitesoft/alpine:3.20.3" + // GitlabRegistryJitesoftAlpine31710Image is the registry.gitlab.com/jitesoft/dockerfiles/alpine:3.17.10 image. + GitlabRegistryJitesoftAlpine31710Image = "registry.gitlab.com/jitesoft/dockerfiles/alpine:3.17.10" // ArtifactRegistryNginx1176Image is the europe-docker.pkg.dev/gardener-project/releases/3rd/nginx:1.17.6 image (copy of docker.io/library/nginx:1.17.6). ArtifactRegistryNginx1176Image = "europe-docker.pkg.dev/gardener-project/releases/3rd/nginx:1.17.6" // RegistryK8sNginx1154Image is the registry.k8s.io/e2e-test-images/nginx:1.15-4 image. RegistryK8sNginx1154Image = "registry.k8s.io/e2e-test-images/nginx:1.15-4" - // GithubRegistryNginx1261Image is the ghcr.io/linuxserver/nginx:1.26.1 image. - GithubRegistryNginx1261Image = "ghcr.io/linuxserver/nginx:1.26.1" // jqExtractRegistryLocation is a jq command that extracts the source location of the '/var/lib/registry' mount from the container's config.json file. jqExtractRegistryLocation = `jq -j '.mounts[] | select(.destination=="/var/lib/registry") | .source' /run/containerd/io.containerd.runtime.v2.task/k8s.io/%s/config.json` @@ -167,41 +172,55 @@ func VerifyHostsTOMLFilesDeletedForAllNodes(ctx context.Context, log logr.Logger } } +// MutatePodFn is an optional function to change the Pod specification depending on the image used. +type MutatePodFn func(pod *corev1.Pod) *corev1.Pod + +// SleepInfinity is MutatePodFn that keeps the container running indefinitely. +func SleepInfinity(pod *corev1.Pod) *corev1.Pod { + pod.Spec.Containers[0].Command = []string{"sleep"} + pod.Spec.Containers[0].Args = []string{"infinity"} + return pod +} + // VerifyRegistryCache verifies that a registry cache works as expected. // // The verification consists of the following steps: -// 1. It deploys an nginx Pod with the given image. +// 1. It deploys a Pod with the given image. // 2. It waits until the Pod is running. // 3. It verifies that the image is present in the registry's volume. // This is a verification that the image pull happened via the registry cache (and the containerd didn't fall back to the upstream). -func VerifyRegistryCache(parentCtx context.Context, log logr.Logger, shootClient kubernetes.Interface, nginxImage string) { - By("Create nginx Pod") +func VerifyRegistryCache(parentCtx context.Context, log logr.Logger, shootClient kubernetes.Interface, image string, mutateFns ...MutatePodFn) { + upstream, path, tag := splitImage(image) + name := strings.ReplaceAll(path, "/", "-") + By(fmt.Sprintf("Create %s Pod", name)) ctx, cancel := context.WithTimeout(parentCtx, 5*time.Minute) defer cancel() pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - GenerateName: "nginx-", + GenerateName: name + "-", Namespace: corev1.NamespaceDefault, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{ { - Name: "nginx", - Image: nginxImage, + Name: name, + Image: image, }, }, }, } + for _, mutateFn := range mutateFns { + pod = mutateFn(pod) + } ExpectWithOffset(1, shootClient.Client().Create(ctx, pod)).To(Succeed()) - By("Wait until nginx Pod is running") + By(fmt.Sprintf("Wait until %s Pod is running", name)) ExpectWithOffset(1, framework.WaitUntilPodIsRunning(ctx, log, pod.Name, pod.Namespace, shootClient)).To(Succeed()) - By("Verify the registry cache pulled the nginx image") + By(fmt.Sprintf("Verify the registry cache pulled the %s image", image)) ctx, cancel = context.WithTimeout(parentCtx, 2*time.Minute) defer cancel() - upstream, path, tag := splitImage(nginxImage) selector := labels.SelectorFromSet(labels.Set(map[string]string{"upstream-host": strings.Replace(upstream, ":", "-", 1)})) EventuallyWithOffset(1, ctx, func() error { registryPod, err := framework.GetFirstRunningPodWithLabels(ctx, selector, metav1.NamespaceSystem, shootClient) @@ -222,20 +241,20 @@ func VerifyRegistryCache(parentCtx context.Context, log logr.Logger, shootClient imageDigest, err := rootPodExecutor.Execute(ctx, fmt.Sprintf("cat %s/docker/registry/v2/repositories/%s/_manifests/tags/%s/current/link", string(registryRootPath), path, tag)) if err != nil { - return fmt.Errorf("failed to get the %s image digest: %w", nginxImage, err) + return fmt.Errorf("failed to get the %s image digest: %w", image, err) } imageSha256Value := strings.TrimPrefix(string(imageDigest), "sha256:") imageIndexPath := fmt.Sprintf("sha256/%s/%s", imageSha256Value[:2], imageSha256Value) _, err = rootPodExecutor.Execute(ctx, fmt.Sprintf(jqCountManifests, string(registryRootPath), imageIndexPath)) if err != nil { - return fmt.Errorf("failed to get the %s image index manifests count: %w", nginxImage, err) + return fmt.Errorf("failed to get the %s image index manifests count: %w", image, err) } return nil - }).WithPolling(10*time.Second).Should(Succeed(), "Expected to successfully find the nginx image in the registry's volume") + }).WithPolling(10*time.Second).Should(Succeed(), fmt.Sprintf("Expected to successfully find the %s image in the registry's volume", image)) - By("Delete nginx Pod") + By(fmt.Sprintf("Delete %s Pod", name)) timeout := 5 * time.Minute ctx, cancel = context.WithTimeout(parentCtx, timeout) defer cancel() diff --git a/test/e2e/cache/create_enable_add_remove_disable_delete.go b/test/e2e/cache/create_enable_add_remove_disable_delete.go index ffb32f46..f774bdb8 100644 --- a/test/e2e/cache/create_enable_add_remove_disable_delete.go +++ b/test/e2e/cache/create_enable_add_remove_disable_delete.go @@ -37,47 +37,47 @@ var _ = Describe("Registry Cache Extension Tests", Label("cache"), func() { Expect(f.UpdateShoot(ctx, f.Shoot, func(shoot *gardencorev1beta1.Shoot) error { size := resource.MustParse("2Gi") common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, }) return nil })).To(Succeed()) - By("[public.ecr.aws] Verify registry-cache works") - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.PublicEcrAwsNginx1230Image) + By("[ghcr.io] Verify registry-cache works") + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.GithubRegistryJitesoftAlpine3189Image, common.SleepInfinity) - By("Add the ghcr.io upstream to the registry-cache extension") + By("Add the registry.gitlab.com upstream to the registry-cache extension") ctx, cancel = context.WithTimeout(parentCtx, 10*time.Minute) defer cancel() Expect(f.UpdateShoot(ctx, f.Shoot, func(shoot *gardencorev1beta1.Shoot) error { size := resource.MustParse("2Gi") common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "registry.gitlab.com", Volume: &v1alpha3.Volume{Size: &size}}, }) return nil })).To(Succeed()) - By("[ghcr.io] Verify registry-cache works") - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.GithubRegistryNginx1261Image) + By("[registry.gitlab.com] Verify registry-cache works") + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.GitlabRegistryJitesoftAlpine31710Image, common.SleepInfinity) - By("Remove the ghcr.io upstream from the registry-cache extension") + By("Remove the registry.gitlab.com upstream from the registry-cache extension") ctx, cancel = context.WithTimeout(parentCtx, 10*time.Minute) defer cancel() Expect(f.UpdateShoot(ctx, f.Shoot, func(shoot *gardencorev1beta1.Shoot) error { size := resource.MustParse("2Gi") common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, }) return nil })).To(Succeed()) - By("[ghcr.io] Verify registry configuration is removed") + By("[registry.gitlab.com] Verify registry configuration is removed") ctx, cancel = context.WithTimeout(parentCtx, 2*time.Minute) defer cancel() - common.VerifyHostsTOMLFilesDeletedForAllNodes(ctx, f.Logger, f.ShootFramework.ShootClient, []string{"ghcr.io"}) + common.VerifyHostsTOMLFilesDeletedForAllNodes(ctx, f.Logger, f.ShootFramework.ShootClient, []string{"registry.gitlab.com"}) By("Disable the registry-cache extension") ctx, cancel = context.WithTimeout(parentCtx, 10*time.Minute) @@ -88,10 +88,10 @@ var _ = Describe("Registry Cache Extension Tests", Label("cache"), func() { return nil })).To(Succeed()) - By("[public.ecr.aws] Verify registry configuration is removed") + By("[ghcr.io] Verify registry configuration is removed") ctx, cancel = context.WithTimeout(parentCtx, 2*time.Minute) defer cancel() - common.VerifyHostsTOMLFilesDeletedForAllNodes(ctx, f.Logger, f.ShootFramework.ShootClient, []string{"public.ecr.aws"}) + common.VerifyHostsTOMLFilesDeletedForAllNodes(ctx, f.Logger, f.ShootFramework.ShootClient, []string{"ghcr.io"}) By("Delete Shoot") ctx, cancel = context.WithTimeout(parentCtx, 15*time.Minute) diff --git a/test/e2e/cache/create_enabled_force_delete.go b/test/e2e/cache/create_enabled_force_delete.go index 0813cbe8..bdca2b2e 100644 --- a/test/e2e/cache/create_enabled_force_delete.go +++ b/test/e2e/cache/create_enabled_force_delete.go @@ -24,7 +24,7 @@ var _ = Describe("Registry Cache Extension Tests", Label("cache"), func() { shoot := e2e.DefaultShoot("e2e-cache-fd") size := resource.MustParse("2Gi") common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, }) f.Shoot = shoot @@ -36,7 +36,7 @@ var _ = Describe("Registry Cache Extension Tests", Label("cache"), func() { f.Verify() By("Verify registry-cache works") - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.PublicEcrAwsNginx1230Image) + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.GithubRegistryJitesoftAlpine3189Image, common.SleepInfinity) By("Force Delete Shoot") ctx, cancel = context.WithTimeout(parentCtx, 10*time.Minute) diff --git a/test/e2e/cache/create_enabled_hibernate_reconcile_delete.go b/test/e2e/cache/create_enabled_hibernate_reconcile_delete.go index e6d6c5d3..abd5bd2e 100644 --- a/test/e2e/cache/create_enabled_hibernate_reconcile_delete.go +++ b/test/e2e/cache/create_enabled_hibernate_reconcile_delete.go @@ -26,7 +26,7 @@ var _ = Describe("Registry Cache Extension Tests", Label("cache"), func() { shoot := e2e.DefaultShoot("e2e-cache-hib") size := resource.MustParse("2Gi") common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, }) f.Shoot = shoot @@ -38,7 +38,7 @@ var _ = Describe("Registry Cache Extension Tests", Label("cache"), func() { f.Verify() By("Verify registry-cache works") - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.PublicEcrAwsNginx1230Image) + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootFramework.ShootClient, common.GithubRegistryJitesoftAlpine3189Image, common.SleepInfinity) By("Hibernate Shoot") ctx, cancel = context.WithTimeout(parentCtx, 10*time.Minute) diff --git a/test/testmachinery/shoot/enable_disable_test.go b/test/testmachinery/shoot/enable_disable_test.go index d71b3672..f82eec58 100644 --- a/test/testmachinery/shoot/enable_disable_test.go +++ b/test/testmachinery/shoot/enable_disable_test.go @@ -38,14 +38,14 @@ var _ = Describe("Shoot registry cache testing", func() { } common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, }) return nil })).To(Succeed()) By("Verify registry-cache works") - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootClient, common.PublicEcrAwsNginx1230Image) + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootClient, common.GithubRegistryJitesoftAlpine3189Image, common.SleepInfinity) By("Disable the registry-cache extension") ctx, cancel = context.WithTimeout(parentCtx, 10*time.Minute) @@ -59,7 +59,7 @@ var _ = Describe("Shoot registry cache testing", func() { By("Verify registry configuration is removed") ctx, cancel = context.WithTimeout(parentCtx, 2*time.Minute) defer cancel() - common.VerifyHostsTOMLFilesDeletedForAllNodes(ctx, f.Logger, f.ShootClient, []string{"public.ecr.aws"}) + common.VerifyHostsTOMLFilesDeletedForAllNodes(ctx, f.Logger, f.ShootClient, []string{"ghcr.io"}) }, defaultTestTimeout, framework.WithCAfterTest(func(ctx context.Context) { if common.HasRegistryCacheExtension(f.Shoot) { By("Disable the registry-cache extension") diff --git a/test/testmachinery/shoot/enable_hibernate_reconcile_wakeup_disable_test.go b/test/testmachinery/shoot/enable_hibernate_reconcile_wakeup_disable_test.go index 9da8a3df..06d9b47c 100644 --- a/test/testmachinery/shoot/enable_hibernate_reconcile_wakeup_disable_test.go +++ b/test/testmachinery/shoot/enable_hibernate_reconcile_wakeup_disable_test.go @@ -40,16 +40,16 @@ var _ = Describe("Shoot registry cache testing", func() { } common.AddOrUpdateRegistryCacheExtension(shoot, []v1alpha3.RegistryCache{ - {Upstream: "public.ecr.aws", Volume: &v1alpha3.Volume{Size: &size}}, + {Upstream: "ghcr.io", Volume: &v1alpha3.Volume{Size: &size}}, }) return nil })).To(Succeed()) By("Verify registry-cache works") - // We are using nginx:1.24.0 as nginx:1.23.0 is already used by the "should enable and disable the registry-cache extension" test. - // Hence, nginx:1.23.0 will be present in the Node. - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootClient, common.PublicEcrAwsNginx1240Image) + // We are using ghcr.io/jitesoft/alpine:3.19.4 as ghcr.io/jitesoft/alpine:3.18.9 is already used by the "should enable and disable the registry-cache extension" test. + // Hence, ghcr.io/jitesoft/alpine:3.18.9 will be present in the Node. + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootClient, common.GithubRegistryJitesoftAlpine3194Image, common.SleepInfinity) By("Hibernate Shoot") ctx, cancel = context.WithTimeout(parentCtx, 15*time.Minute) @@ -72,8 +72,8 @@ var _ = Describe("Shoot registry cache testing", func() { Expect(f.WakeUpShoot(ctx)).To(Succeed()) By("Verify registry-cache works after wake up") - // We are using nginx:1.25.0 as nginx:1.24.0 is already used above and already present in the Node and in the registry cache. - common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootClient, common.PublicEcrAwsNginx1250Image) + // We are using ghcr.io/jitesoft/alpine:3.20.3 as ghcr.io/jitesoft/alpine:3.19.4 is already used above and already present in the Node and in the registry cache. + common.VerifyRegistryCache(parentCtx, f.Logger, f.ShootClient, common.GithubRegistryJitesoftAlpine3203Image, common.SleepInfinity) }, hibernationTestTimeout, framework.WithCAfterTest(func(ctx context.Context) { if v1beta1helper.HibernationIsEnabled(f.Shoot) { By("Wake up Shoot")