diff --git a/binderhub/registry.py b/binderhub/registry.py index 2c54a7673..748fe20f3 100644 --- a/binderhub/registry.py +++ b/binderhub/registry.py @@ -1,14 +1,18 @@ """ Interaction with the Docker Registry """ +import asyncio import base64 import json import os +from concurrent.futures import ThreadPoolExecutor from urllib.parse import urlparse +import kubernetes.client +import kubernetes.config from tornado import httpclient from tornado.httputil import url_concat -from traitlets import Dict, Unicode, default +from traitlets import Any, Dict, Integer, Unicode, default from traitlets.config import LoggingConfigurable DEFAULT_DOCKER_REGISTRY_URL = "https://registry-1.docker.io" @@ -233,6 +237,91 @@ async def get_image_manifest(self, image, tag): return json.loads(resp.body.decode("utf-8")) +class AWSElasticContainerRegistry(DockerRegistry): + import boto3 + + aws_region = Unicode( + config=True, + help=""" + AWS region for ECR service + """, + ) + + ecr_client = Any() + + @default("ecr_client") + def _get_ecr_client(self): + return boto3.client("ecr", region_name=self.aws_region) + + username = "AWS" + + executor_threads = Integer( + 5, + config=True, + help="""The number of threads to use for blocking calls + + Should generaly be a small number because we don't + care about high concurrency here, just not blocking the webserver. + This executor is not used for long-running tasks (e.g. builds). + """, + ) + + executor = Any() + + @default("executor") + def _get_executor(self): + return ThreadPoolExecutor(self.executor_threads) + + kube_client = Any() + + @default("kube_client") + def _get_kube_client(self): + kubernetes.config.load_incluster_config() + return kubernetes.client.CoreV1Api() + + async def get_image_manifest(self, image, tag): + image = image.split("/", 1)[1] + await asyncio.wrap_future( + self.executor.submit(self._pre_get_image_manifest, image, tag) + ) + return await super().get_image_manifest(image, tag) + + def _pre_get_image_manifest(self, image, tag): + self._create_repository(image, tag) + self._refresh_password() + + def _create_repository(self, image, tag): + try: + self.ecr_client.create_repository(repositoryName=image) + self.log.info(f"ECR repo {image} created") + except self.ecr_client.exceptions.RepositoryAlreadyExistsException: + self.log.info(f"ECR repo {image} already exists") + + # An IAM principal is used to generate an auth token that is valid for 12 hours + # ref: https://docs.aws.amazon.com/AmazonECR/latest/userguide/Registries.html + # TODO: cache auth if not expired - authorizationData[i]["expiresAt"] + def _refresh_password(self): + auths = self.ecr_client.get_authorization_token()["authorizationData"] + auth = next(x for x in auths if x["proxyEndpoint"] == self.url) + self._patch_docker_config_secret(auth) + self.password = ( + base64.b64decode(auth["authorizationToken"]).decode("utf-8").split(":")[1] + ) + + def _patch_docker_config_secret(self, auth): + """Patch push_secret. Necessary because AWS rotates auth tokens. + ref: https://docs.aws.amazon.com/AmazonECR/latest/userguide/Registries.html""" + secret_data = {"auths": {self.url: {"auth": auth["authorizationToken"]}}} + secret_data = base64.b64encode(json.dumps(secret_data).encode("utf8")).decode( + "utf8" + ) + with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f: + namespace = f.read() + self.kube_client.patch_namespaced_secret( + self.parent.push_secret, namespace, {"data": {"config.json": secret_data}} + ) + + class FakeRegistry(DockerRegistry): """ Fake registry that contains no images diff --git a/doc/zero-to-binderhub/setup-binderhub.rst b/doc/zero-to-binderhub/setup-binderhub.rst index aa1681bdd..198a6a2b9 100644 --- a/doc/zero-to-binderhub/setup-binderhub.rst +++ b/doc/zero-to-binderhub/setup-binderhub.rst @@ -107,6 +107,22 @@ where: * `` is the Harbor username * `` is the Harbor password +If you are using Amazon Elastic Container Registry +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Update `secret.yaml` to include the following:: + + registry: + url: https://.dkr.ecr..amazonaws.com + +where: + +* ```` is the identifier of your AWS account +* ```` is the AWS region of the ECR registry, e.g. ``us-east-1`` + +As ECR uses AWS IAM for authorization, specifying ``username`` and ``password`` +is not necessary. + Create ``config.yaml`` ---------------------- @@ -205,6 +221,40 @@ As an example, the config should look like the following:: token_url: https://abcde.gra7.container-registry.ovh.net/service/token?service=harbor-registry +If you are using Amazon Elastic Container Registry +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you want your BinderHub to push and pull images from an Amazon Elastic +Container Registry (ECR), then your `config.yaml` file will look as follows:: + + rbac: + patchSecrets: true + config: + BinderHub: + use_registry: true + registry_class: binderhub.registry.AWSElasticContainerRegistry + image_prefix: ".dkr.ecr..amazonaws.com/-" + AWSElasticContainerRegistry: + aws_region: + +where: + +* ```` is the identifier of your AWS account +* ```` is the AWS region of the ECR registry, e.g. ``us-east-1``. +* ```` can be any string, and will be prepended to image names. We + recommend something descriptive such as ``binder-dev-`` or ``binder-prod-`` + (ending with a `-` is useful). + +If you opted to use an IAM User with programmatic access instead of assuming +the role in the previous step you will additionally need to add the following +to your `config.yaml`:: + + extraEnv: + - name: AWS_ACCESS_KEY_ID + value: "xxx" + - name: AWS_SECRET_ACCESS_KEY + value: "yyy" + If you are using a custom registry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/zero-to-binderhub/setup-registry.rst b/doc/zero-to-binderhub/setup-registry.rst index afbcbcfa3..5f561e742 100644 --- a/doc/zero-to-binderhub/setup-registry.rst +++ b/doc/zero-to-binderhub/setup-registry.rst @@ -167,6 +167,99 @@ To use the OVH Container Registry, log in to the `OVH Control Panel `_ +.. _use-ecr: + +Set up Amazon Elastic Container Registry +---------------------------------------- + +To use Amazon Elastic Container Registry (ECR), you'll need to use AWS IAM to +authorize the machine or pod running BinderHub so it can push images. There +are a number of options on how to do this with IAM and Kubernetes, but we +will highlight two: define and assign an IAM role, or assume an IAM user with programmatic access. + +For the former, start by creating an IAM policy that grants access to create repositories and +read/write images from them. You can create policies using the AWS console, CLI +or API as detailed in the documentation `Creating IAM policies `_. +An example IAM permissions policy is provided below. For more information and examples see `Identity and Access Management for Amazon Elastic Container Registry `_. + +.. code-block:: json + + { + "Statement": [ + { + "Action": [ + "ecr:ListImages" + ], + "Effect": "Allow", + "Resource": "arn:aws:ecr:::-*", + "Sid": "ListImagesInRepository" + }, + { + "Action": [ + "ecr:GetAuthorizationToken" + ], + "Effect": "Allow", + "Resource": "*", + "Sid": "GetAuthorizationToken" + }, + { + "Action": [ + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:GetRepositoryPolicy", + "ecr:DescribeRepositories", + "ecr:ListImages", + "ecr:DescribeImages", + "ecr:BatchGetImage", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "ecr:PutImage" + ], + "Effect": "Allow", + "Resource": "arn:aws:ecr:::-*", + "Sid": "ManageRepositoryContents" + }, + { + "Action": [ + "ecr:CreateRepository" + ], + "Effect": "Allow", + "Resource": "arn:aws:ecr:::-*", + "Sid": "CreateRepository" + } + ], + "Version": "2012-10-17" + } + +If you used AWS services like EC2 or EKS to set up your Kubernetes cluster you +can add this policy to the IAM Role assumed by the nodes of the cluster, e.g. +``nodes..k8s.local`` if you followed `Zero to JupyterHub with Kubernetes `_ +and used kops. One way to do this from the AWS Console is to navigate to the IAM service and click "Roles" in the side bar, +then find and select the IAM Role assumed by the nodes of your cluster, click "Permissions" and then "Attach policies" to attach +the IAM Policy we just created. The IAM permissions policy will need to accompanied with an IAM +trust policy to allow it to be assumed. A suitable trust policy may already be defined for your node's IAM Role, +you can view and edit the trust policy by clicking the "Trust relationships" tab is next to the "Permissions" tab in the AWS console. +An example trust policy for EC2 is provided below. For more information see `Granting a User Permissions to Pass a Role to an AWS Service `_. +This is the recommended method if your Kubernetes cluster is provisioned on AWS. + +.. code-block:: json + + { + "Version": "2012-10-17", + "Statement": { + "Sid": "TrustPolicyStatementThatAllowsEC2ServiceToAssumeTheAttachedRole", + "Effect": "Allow", + "Principal": { "Service": "ec2.amazonaws.com" }, + "Action": "sts:AssumeRole" + } + } + +Alternatively to the above steps, you can create an IAM user with programmatic access (see +`Creating an IAM User in Your AWS Account `_) +and specify the ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environment +variables in the following step. + Next step --------- diff --git a/helm-chart/binderhub/schema.yaml b/helm-chart/binderhub/schema.yaml index f55ffd1df..05449632d 100644 --- a/helm-chart/binderhub/schema.yaml +++ b/helm-chart/binderhub/schema.yaml @@ -88,6 +88,13 @@ properties: description: | Decides if RBAC resources are to be created and referenced by the the Helm chart's workloads. + patchSecrets: + type: boolean + description: | + Allows get and patch Secrets if set to true. + This should only be needed if using the AWSElasticContainerRegistry + registry class, because AWS rotates auth tokens. See [the + documentation](https://docs.aws.amazon.com/AmazonECR/latest/userguide/registry_auth.html) nodeSelector: &nodeSelector-spec type: object diff --git a/helm-chart/binderhub/templates/rbac.yaml b/helm-chart/binderhub/templates/rbac.yaml index 5716b5abf..54bc0fced 100644 --- a/helm-chart/binderhub/templates/rbac.yaml +++ b/helm-chart/binderhub/templates/rbac.yaml @@ -14,6 +14,11 @@ rules: - apiGroups: [""] resources: ["pods/log"] verbs: ["get"] +{{- if .Values.rbac.patchSecrets -}} +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "patch"] +{{- end }} --- kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 diff --git a/requirements.txt b/requirements.txt index 9fadb2d19..d0be691e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +boto3 # About pycurl: # - pycurl is an optional dependency which improves performance # - pycurl requires both `curl-config` and `gcc` to be available when installing