From 258aa61f55f1add07816905d21c94382fca330f5 Mon Sep 17 00:00:00 2001 From: thesayyn Date: Thu, 23 Mar 2023 14:41:40 +0300 Subject: [PATCH 1/5] feat: implement auth for oci_pull --- docs/pull.md | 6 +- e2e/custom_registry/BUILD.bazel | 2 +- e2e/custom_registry/WORKSPACE | 10 +- oci/BUILD.bazel | 6 +- oci/pull.bzl | 204 +++++++++++++++++++++++++++++--- 5 files changed, 201 insertions(+), 27 deletions(-) diff --git a/docs/pull.md b/docs/pull.md index 18385364..59111a77 100644 --- a/docs/pull.md +++ b/docs/pull.md @@ -63,7 +63,7 @@ oci_alias(name, pla ## oci_pull_rule
-oci_pull_rule(name, identifier, image, platform, repo_mapping)
+oci_pull_rule(name, identifier, image, platform, repo_mapping, toolchain_name)
 
@@ -78,6 +78,7 @@ oci_pull_rule(name, image | The name of the image we are fetching, e.g. gcr.io/distroless/static | String | required | | | platform | platform in os/arch format, for multi-arch images | String | optional | "" | | repo_mapping | A dictionary from local repository name to global repository name. This allows controls over workspace dependency resolution for dependencies of this repository.<p>For example, an entry "@foo": "@bar" declares that, for any time this repository depends on @foo (such as a dependency on @foo//some:target, it should actually resolve that dependency within globally-declared @bar (@bar//some:target). | Dictionary: String -> String | required | | +| toolchain_name | Value of name attribute to the oci_register_toolchains call in the workspace. | String | optional | "oci" | @@ -106,7 +107,7 @@ pin_tag(name, image, -oci_pull(name, image, platforms, digest, tag, reproducible) +oci_pull(name, image, platforms, digest, tag, reproducible, toolchain_name) Repository macro to fetch image manifest data from a remote docker registry. @@ -122,5 +123,6 @@ Repository macro to fetch image manifest data from a remote docker registry. | digest | the digest string, starting with "sha256:", "sha512:", etc. If omitted, instructions for pinning are provided. | None | | tag | a tag to choose an image from the registry. Exactly one of tag and digest must be set. Since tags are mutable, this is not reproducible, so a warning is printed. | None | | reproducible | Set to False to silence the warning about reproducibility when using tag. | True | +| toolchain_name | Value of name attribute to the oci_register_toolchains call in the workspace. | "oci" | diff --git a/e2e/custom_registry/BUILD.bazel b/e2e/custom_registry/BUILD.bazel index 6e6234ee..4cf81d5d 100644 --- a/e2e/custom_registry/BUILD.bazel +++ b/e2e/custom_registry/BUILD.bazel @@ -6,7 +6,7 @@ oci_image( "@platforms//cpu:arm64": "arm64", "@platforms//cpu:x86_64": "amd64", }), - base = "@distroless_static", + base = "@debian", cmd = [ "--arg1", "--arg2", diff --git a/e2e/custom_registry/WORKSPACE b/e2e/custom_registry/WORKSPACE index a9d4d84b..b1fe8997 100644 --- a/e2e/custom_registry/WORKSPACE +++ b/e2e/custom_registry/WORKSPACE @@ -26,13 +26,15 @@ oci_register_toolchains( load("@rules_oci//oci:pull.bzl", "oci_pull") oci_pull( - name = "distroless_static", - digest = "sha256:c3c3d0230d487c0ad3a0d87ad03ee02ea2ff0b3dcce91ca06a1019e07de05f12", - image = "gcr.io/distroless/static", + name = "debian", + image = "index.docker.io/library/debian", platforms = [ - "linux/amd64", "linux/arm64", + "linux/amd64", ], + # Don't make a debian_unpinned repo and print a warning about the tag + reproducible = False, + tag = "latest", ) ############################################ diff --git a/oci/BUILD.bazel b/oci/BUILD.bazel index aa818644..27d39306 100644 --- a/oci/BUILD.bazel +++ b/oci/BUILD.bazel @@ -39,7 +39,11 @@ bzl_library( name = "pull", srcs = ["pull.bzl"], visibility = ["//visibility:public"], - deps = ["@aspect_bazel_lib//lib:paths"], + deps = [ + "@aspect_bazel_lib//lib:base64", + "@aspect_bazel_lib//lib:paths", + "@aspect_bazel_lib//lib:repo_utils", + ], ) bzl_library( diff --git a/oci/pull.bzl b/oci/pull.bzl index af11281e..eac4e1de 100644 --- a/oci/pull.bzl +++ b/oci/pull.bzl @@ -37,24 +37,164 @@ oci_image( """ load("@aspect_bazel_lib//lib:paths.bzl", "BASH_RLOCATION_FUNCTION") +load("@aspect_bazel_lib//lib:base64.bzl", "base64") +load("@aspect_bazel_lib//lib:repo_utils.bzl", "repo_utils") + +def _strip_host(url): + # TODO: a principled way of doing this + return url.replace("http://", "").replace("https://", "").replace("/v1/", "") + +def _file_exists(rctx, path): + result = rctx.execute(["stat", path]) + return result.return_code == 0 + +def _get_auth_file_path(rctx): + # this is the standard path where registry credentials are stored + config_path = "{}/.docker/config.json".format(rctx.os.environ["HOME"]) + + # set config path to DOCKER_CONFIG env if present + if "DOCKER_CONFIG" in rctx.os.environ: + config_path = rctx.os.environ["DOCKER_CONFIG"] + + if _file_exists(rctx, config_path): + return config_path + + # https://docs.podman.io/en/latest/markdown/podman-login.1.html#authfile-path + XDG_RUNTIME_DIR = "{}/.config".format(rctx.os.environ["HOME"]) + if "XDG_RUNTIME_DIR" in rctx.os.environ: + XDG_RUNTIME_DIR = rctx.os.environ["XDG_RUNTIME_DIR"] + + config_path = "{}/containers/auth.json".format(XDG_RUNTIME_DIR) + + if _file_exists(rctx, config_path): + return config_path + + return None + +def _auth_anonymous(rctx, registry, repository, identifier): + """A function that performs anonymous auth for docker registry. + + Args: + rctx: repository context + registry: registry url + repository: image repository + identifier: tag or digest + + Returns: + A dict for rctx.download#auth + """ + pattern = {} + if registry == "index.docker.io": + scope = "repository:{}:pull".format(repository) + rctx.download( + url = ["https://auth.docker.io/token?scope={}&service=registry.docker.io".format(scope)], + output = "auth_anonymous.json", + ) + auth_raw = rctx.read("auth_anonymous.json") + auth = json.decode(auth_raw) + pattern = { + "type": "pattern", + "pattern": "Bearer ", + "password": auth["token"], + } + + return pattern + +def _auth_basic(rctx, registry, repository, identifier): + """A function that performs basic auth using docker/config.json + + Args: + rctx: repository context + registry: registry url + repository: image repository + identifier: tag or digest + + Returns: + A dict for rctx.download#auth + """ + + config_path = _get_auth_file_path(rctx) + + if not config_path: + # buildifier: disable=print + print(""" +WARNING: Could not find the `$HOME/.docker/config.json` and `$XDG_RUNTIME_DIR/containers/auth.json` file. + +Running one of `podman login`, `docker login`, `crane login` may help. + """) + return _auth_anonymous(rctx, registry, repository, identifier) + + config_raw = rctx.read(config_path) + config = json.decode(config_raw) + + pattern = {} + + for host_raw in config["auths"]: + host = _strip_host(host_raw) + if host == registry: + raw_auth = config["auths"][host_raw]["auth"] + (login, password) = base64.decode(raw_auth).split(":") + pattern = { + "type": "basic", + "login": login, + "password": password, + } + + # Probably other registries send a WWW-Authenticate header too. Unfortunately bazel downloader + # only tells us about the http body. + if registry == "index.docker.io": + scope = "repository:{}:pull".format(repository) + auth_url = "https://auth.docker.io/token?scope={}&service=registry.docker.io".format(scope) + rctx.download( + url = [auth_url], + output = "auth.json", + auth = {auth_url: pattern}, + ) + auth_raw = rctx.read("auth.json") + auth = json.decode(auth_raw) + pattern = { + "type": "pattern", + "pattern": "Bearer ", + "password": auth["token"], + } + return pattern # OCI Image Media Types # Spec: https://github.com/distribution/distribution/blob/main/docs/spec/manifest-v2-2.md#media-types _MANIFEST_TYPE = "application/vnd.docker.distribution.manifest.v2+json" _MANIFEST_LIST_TYPE = "application/vnd.docker.distribution.manifest.list.v2+json" +def _parse_reference(reference): + firstslash = reference.find("/") + registry = reference[:firstslash] + repository = reference[firstslash + 1:] + return registry, repository + +def _is_tag(str): + return str.find(":") == -1 + +def _trim_hash_algorithm(identifier): + "Optionally remove the sha256: prefix from identifier, if present" + parts = identifier.split(":", 1) + if len(parts) != 2: + return identifier + return parts[1] + def _download(rctx, identifier, output, resource = "blobs"): "Use the Bazel Downloader to fetch from the remote registry" if resource != "blobs" and resource != "manifests": fail("resource must be blobs or manifests") + registry, repository = _parse_reference(rctx.attr.image) + + auth = _auth_basic(rctx, registry, repository, identifier) + # Construct the URL to fetch from remote, see # https://github.com/google/go-containerregistry/blob/62f183e54939eabb8e80ad3dbc787d7e68e68a43/pkg/v1/remote/descriptor.go#L234 - firstslash = rctx.attr.image.find("/") registry_url = "https://{registry}/v2/{repository}/{resource}/{identifier}".format( - registry = rctx.attr.image[:firstslash], - repository = rctx.attr.image[firstslash + 1:], + registry = registry, + repository = repository, resource = resource, identifier = identifier, ) @@ -65,6 +205,9 @@ def _download(rctx, identifier, output, resource = "blobs"): output = output, sha256 = identifier[len("sha256:"):], url = registry_url, + auth = { + registry_url: auth, + }, ) else: # buildifier: disable=print @@ -73,13 +216,35 @@ WARNING: fetching from %s without an integrity hash. The result will not be cach rctx.download( output = output, url = registry_url, + auth = { + registry_url: auth, + }, ) - if resource == "manifests": - bytes = rctx.read(output) - return json.decode(bytes), len(bytes) - else: - return None +def _crane_label(rctx): + return Label("@{}_crane_{}//:crane".format(rctx.attr.toolchain_name, repo_utils.platform(rctx))) + +def _download_manifest(rctx, identifier, output): + _download(rctx, identifier, output, "manifests") + bytes = rctx.read(output) + manifest = json.decode(bytes) + + if manifest["schemaVersion"] == 1: + # buildifier: disable=print + print(""" +WARNING: fetching from a registry that requires `Docker-Distribution-API-Version` header to be set. Falling back to using `crane manifest`. The result will not be cached. +See https://github.com/bazelbuild/bazel/issues/17829 for the context. +""") + + crane = _crane_label(rctx) + + tag_or_digest = ":" if _is_tag(identifier) else "@" + + result = rctx.execute([crane, "manifest", "{}{}{}".format(rctx.attr.image, tag_or_digest, identifier), "--platform=all"]) + bytes = result.stdout + manifest = json.decode(bytes) + rctx.file(output, bytes) + return manifest, len(bytes) _build_file = """\ "Generated by oci_pull" @@ -130,13 +295,6 @@ copy_to_directory( ) """ -def _trim_hash_algorithm(identifier): - "Optionally remove the sha256: prefix from identifier, if present" - parts = identifier.split(":", 1) - if len(parts) != 2: - return identifier - return parts[1] - def _find_platform_manifest(rctx, image_mf): for mf in image_mf["manifests"]: plat = "{}/{}".format(mf["platform"]["os"], mf["platform"]["architecture"]) @@ -146,7 +304,7 @@ def _find_platform_manifest(rctx, image_mf): def _oci_pull_impl(rctx): mf_file = _trim_hash_algorithm(rctx.attr.identifier) - mf, mf_len = _download(rctx, rctx.attr.identifier, mf_file, resource = "manifests") + mf, mf_len = _download_manifest(rctx, rctx.attr.identifier, mf_file) if mf["mediaType"] == _MANIFEST_TYPE: if rctx.attr.platform: @@ -162,9 +320,9 @@ def _oci_pull_impl(rctx): matching_mf = _find_platform_manifest(rctx, mf) image_digest = matching_mf["digest"] image_mf_file = _trim_hash_algorithm(image_digest) - image_mf, image_mf_len = _download(rctx, image_digest, image_mf_file, resource = "manifests") + image_mf, image_mf_len = _download_manifest(rctx, image_digest, image_mf_file) else: - fail("Unrecognized mediaType {} in manifest file".format(image_mf["mediaType"])) + fail("Unrecognized mediaType {} in manifest file".format(mf["mediaType"])) image_config_file = _trim_hash_algorithm(image_mf["config"]["digest"]) _download(rctx, image_mf["config"]["digest"], image_config_file) @@ -228,7 +386,12 @@ oci_pull_rule = repository_rule( "image": attr.string(doc = "The name of the image we are fetching, e.g. gcr.io/distroless/static", mandatory = True), "identifier": attr.string(doc = "The digest or tag of the manifest file", mandatory = True), "platform": attr.string(doc = "platform in `os/arch` format, for multi-arch images"), + "toolchain_name": attr.string(default = "oci", doc = "Value of name attribute to the oci_register_toolchains call in the workspace."), }, + environ = [ + "DOCKER_CONFIG", + "CONTAINER_CONFIG", + ], ) _alias_target = """\ @@ -344,7 +507,7 @@ _DOCKER_ARCH_TO_BAZEL_CPU = { "s390x": "@platforms//cpu:s390x", } -def oci_pull(name, image, platforms = None, digest = None, tag = None, reproducible = True): +def oci_pull(name, image, platforms = None, digest = None, tag = None, reproducible = True, toolchain_name = "oci"): """Repository macro to fetch image manifest data from a remote docker registry. Args: @@ -358,6 +521,7 @@ def oci_pull(name, image, platforms = None, digest = None, tag = None, reproduci Exactly one of `tag` and `digest` must be set. Since tags are mutable, this is not reproducible, so a warning is printed. reproducible: Set to False to silence the warning about reproducibility when using `tag`. + toolchain_name: Value of name attribute to the oci_register_toolchains call in the workspace. """ if digest and tag: @@ -393,6 +557,7 @@ bazel run @{}_unpinned//:pin image = image, identifier = digest or tag, platform = plat, + toolchain_name = toolchain_name, ) select_map[_DOCKER_ARCH_TO_BAZEL_CPU[arch]] = "@" + plat_name oci_alias( @@ -404,4 +569,5 @@ bazel run @{}_unpinned//:pin name = name, image = image, identifier = digest or tag, + toolchain_name = toolchain_name, ) From aa8643ad3e3d513874284a02945c2ebfd13e0534 Mon Sep 17 00:00:00 2001 From: thesayyn Date: Fri, 24 Mar 2023 15:30:16 +0300 Subject: [PATCH 2/5] address changes --- oci/pull.bzl | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/oci/pull.bzl b/oci/pull.bzl index eac4e1de..062a9541 100644 --- a/oci/pull.bzl +++ b/oci/pull.bzl @@ -48,6 +48,8 @@ def _file_exists(rctx, path): result = rctx.execute(["stat", path]) return result.return_code == 0 +# Path of the auth file is determined by the order described here; +# https://github.com/google/go-containerregistry/tree/main/pkg/authn#tldr-for-consumers-of-this-package def _get_auth_file_path(rctx): # this is the standard path where registry credentials are stored config_path = "{}/.docker/config.json".format(rctx.os.environ["HOME"]) @@ -66,6 +68,11 @@ def _get_auth_file_path(rctx): config_path = "{}/containers/auth.json".format(XDG_RUNTIME_DIR) + # podman support overriding the standard path for the auth file via this special environment variable. + # https://docs.podman.io/en/latest/markdown/podman-login.1.html#authfile-path + if "REGISTRY_AUTH_FILE" in rctx.os.environ: + config_path = rctx.os.environ["REGISTRY_AUTH_FILE"] + if _file_exists(rctx, config_path): return config_path @@ -235,15 +242,17 @@ def _download_manifest(rctx, identifier, output): WARNING: fetching from a registry that requires `Docker-Distribution-API-Version` header to be set. Falling back to using `crane manifest`. The result will not be cached. See https://github.com/bazelbuild/bazel/issues/17829 for the context. """) + crane = _crane_label(rctx) + tag_or_digest = ":" if _is_tag(identifier) else "@" - crane = _crane_label(rctx) + result = rctx.execute([crane, "manifest", "{}{}{}".format(rctx.attr.image, tag_or_digest, identifier), "--platform=all"]) - tag_or_digest = ":" if _is_tag(identifier) else "@" + bytes = result.stdout + manifest = json.decode(bytes) + + # overwrite the file with new manifest downloaded through crane + rctx.file(output, bytes) - result = rctx.execute([crane, "manifest", "{}{}{}".format(rctx.attr.image, tag_or_digest, identifier), "--platform=all"]) - bytes = result.stdout - manifest = json.decode(bytes) - rctx.file(output, bytes) return manifest, len(bytes) _build_file = """\ @@ -389,8 +398,13 @@ oci_pull_rule = repository_rule( "toolchain_name": attr.string(default = "oci", doc = "Value of name attribute to the oci_register_toolchains call in the workspace."), }, environ = [ + # These environment variables allow standard authorization file path to overridden with something else therefore + # needs to be tracked as part of the repository cache key so that bazel refetches these when any of the variables change. + # while docker uses DOCKER_CONFIG for the override, podman uses REGISTRY_AUTH_FILE environment variable, and + # since rules_oci has no preference over the runtime, it has to support both. + # See: https://github.com/google/go-containerregistry/tree/main/pkg/authn#tldr-for-consumers-of-this-package for go implementation. "DOCKER_CONFIG", - "CONTAINER_CONFIG", + "REGISTRY_AUTH_FILE", ], ) From 2ba2eabbb0a56049835f638348f28a4f2f358868 Mon Sep 17 00:00:00 2001 From: thesayyn Date: Fri, 24 Mar 2023 17:05:14 +0300 Subject: [PATCH 3/5] fix pinning --- oci/pull.bzl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/oci/pull.bzl b/oci/pull.bzl index 062a9541..0c67a46a 100644 --- a/oci/pull.bzl +++ b/oci/pull.bzl @@ -235,7 +235,6 @@ def _download_manifest(rctx, identifier, output): _download(rctx, identifier, output, "manifests") bytes = rctx.read(output) manifest = json.decode(bytes) - if manifest["schemaVersion"] == 1: # buildifier: disable=print print(""" @@ -247,12 +246,12 @@ See https://github.com/bazelbuild/bazel/issues/17829 for the context. result = rctx.execute([crane, "manifest", "{}{}{}".format(rctx.attr.image, tag_or_digest, identifier), "--platform=all"]) + # overwrite the file with new manifest downloaded through crane + rctx.file(output, result.stdout) + bytes = result.stdout manifest = json.decode(bytes) - # overwrite the file with new manifest downloaded through crane - rctx.file(output, bytes) - return manifest, len(bytes) _build_file = """\ @@ -487,7 +486,7 @@ echo ")" def _pin_tag_impl(rctx): """Download the tag and create a repository that can produce pinning instructions""" - _download(rctx, rctx.attr.tag, "manifest_list.json", "manifests") + _download_manifest(rctx, rctx.attr.tag, "manifest_list.json") result = rctx.execute(["shasum", "-a", "256", "manifest_list.json"]) if result.return_code: msg = "shasum failed: \nSTDOUT:\n%s\nSTDERR:\n%s" % (result.stdout, result.stderr) @@ -507,6 +506,7 @@ pin_tag = repository_rule( attrs = { "image": attr.string(doc = "The name of the image we are fetching, e.g. `gcr.io/distroless/static`", mandatory = True), "tag": attr.string(doc = "The tag being used, e.g. `latest`", mandatory = True), + "toolchain_name": attr.string(default = "oci", doc = "Value of name attribute to the oci_register_toolchains call in the workspace."), }, ) @@ -548,7 +548,7 @@ def oci_pull(name, image, platforms = None, digest = None, tag = None, reproduci fail("One of 'digest' or 'tag' must be set") if tag and reproducible: - pin_tag(name = name + "_unpinned", image = image, tag = tag) + pin_tag(name = name + "_unpinned", image = image, tag = tag, toolchain_name = toolchain_name) # Print a command - in the future we should print a buildozer command or # buildifier: disable=print From 8fc1408e1be528e6c2b616b82687a57a897b8c40 Mon Sep 17 00:00:00 2001 From: thesayyn Date: Fri, 24 Mar 2023 17:08:43 +0300 Subject: [PATCH 4/5] docs --- docs/pull.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/pull.md b/docs/pull.md index 59111a77..77c67ca3 100644 --- a/docs/pull.md +++ b/docs/pull.md @@ -86,7 +86,7 @@ oci_pull_rule(name, name, image, repo_mapping, tag) +pin_tag(name, image, repo_mapping, tag, toolchain_name) @@ -100,6 +100,7 @@ pin_tag(name, image, image | The name of the image we are fetching, e.g. gcr.io/distroless/static | String | required | | | repo_mapping | A dictionary from local repository name to global repository name. This allows controls over workspace dependency resolution for dependencies of this repository.<p>For example, an entry "@foo": "@bar" declares that, for any time this repository depends on @foo (such as a dependency on @foo//some:target, it should actually resolve that dependency within globally-declared @bar (@bar//some:target). | Dictionary: String -> String | required | | | tag | The tag being used, e.g. latest | String | required | | +| toolchain_name | Value of name attribute to the oci_register_toolchains call in the workspace. | String | optional | "oci" | From 89aee918c82c545705a24baad5dcdad6ac99a95d Mon Sep 17 00:00:00 2001 From: thesayyn Date: Fri, 24 Mar 2023 17:15:49 +0300 Subject: [PATCH 5/5] update schema1 warning --- oci/pull.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oci/pull.bzl b/oci/pull.bzl index 0c67a46a..657230d8 100644 --- a/oci/pull.bzl +++ b/oci/pull.bzl @@ -238,8 +238,8 @@ def _download_manifest(rctx, identifier, output): if manifest["schemaVersion"] == 1: # buildifier: disable=print print(""" -WARNING: fetching from a registry that requires `Docker-Distribution-API-Version` header to be set. Falling back to using `crane manifest`. The result will not be cached. -See https://github.com/bazelbuild/bazel/issues/17829 for the context. +WARNING: registry responded with a manifest that has schemaVersion=1. Usually happens when fetching from a registry that requires `Docker-Distribution-API-Version` header to be set. +Falling back to using `crane manifest`. The result will not be cached. See https://github.com/bazelbuild/bazel/issues/17829 for the context. """) crane = _crane_label(rctx) tag_or_digest = ":" if _is_tag(identifier) else "@"