Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gitlab registry support #1283

Merged
merged 2 commits into from
Jul 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion binderhub/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from http.client import responses
import json
import string
import re
import time
import escapism

Expand Down Expand Up @@ -55,6 +56,30 @@
LAUNCHES_INPROGRESS = Gauge('binderhub_inprogress_launches', 'Launches currently in progress')


def _get_image_basename_and_tag(full_name):
"""Get a supposed image name and tag without the registry part
:param full_name: full image specification, e.g. "gitlab.com/user/project:tag"
:return: tuple of image name and tag, e.g. ("user/project", "tag")
"""
# the tag is either after the last (and only) colon, or not given at all,
# in which case "latest" is implied
tag_splits = full_name.rsplit(':', 1)
if len(tag_splits) == 2:
image_name = tag_splits[0]
tag = tag_splits[1]
else:
image_name = full_name
tag = 'latest'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the current implementation, I think the tag is left unspecified if it isn't present. Let's preserve that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't mind. If i see correctly, the tag is passed to registry.get_image_manifest() where the following happens:
"{}/v2/{}/manifests/{}".format(self.url, image, tag)
So if tag is an empty string, could this be a problem for the manifests API? Idk personally, as i am pretty much clueless about this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rappertomate I think tag is just always required to be specified when pushing, and is never left empty. I wouldn't mind raising an exception if tag is empty or None, but we can tackle that in another PR. I mostly just wanted to make sure the behavior doesn't change.

I think once that's done I'll happily merge this.


if re.fullmatch('[a-z0-9]{4,40}/[a-z0-9._-]{2,255}', image_name):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we not instead just check for the number of / present? The full validator for image names is at https://github.com/distribution/distribution/blob/main/reference/regexp.go, and I'm not sure if this is exactly right. Can we get away with not using regexes?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not so sure. What if we have self-host registries without any paths to specify user or project? The unit test shows an example:
("weirdregistry.com/image:tag", "image", "tag")
vs.
("jupyterhub/k8s-binderhub:0.2.0-a2079a5", "jupyterhub/k8s-binderhub", "0.2.0-a2079a5")

This distinction cannot be covered by just counting /. And the purpose of this PR should be to cover these edge cases as well imo.

# if it looks like a Docker Hub image name, we're done
return image_name, tag
# if the image isn't implied to origin at Docker Hub,
# the first part has to be a registry
image_basename = '/'.join(image_name.split('/')[1:])
return image_basename, tag


def _generate_build_name(build_slug, ref, prefix='', limit=63, ref_length=6):
"""Generate a unique build name with a limited character length.

Expand Down Expand Up @@ -310,7 +335,7 @@ async def get(self, provider_prefix, _unescaped_spec):
if self.settings['use_registry']:
for _ in range(3):
try:
image_manifest = await self.registry.get_image_manifest(*'/'.join(image_name.split('/')[-2:]).split(':', 1))
image_manifest = await self.registry.get_image_manifest(*_get_image_basename_and_tag(image_name))
image_found = bool(image_manifest)
break
except HTTPClientError:
Expand Down
5 changes: 4 additions & 1 deletion binderhub/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ async def get_image_manifest(self, image, tag):
# first, get a token to perform the manifest request
if self.token_url:
auth_req = httpclient.HTTPRequest(
url_concat(self.token_url, {"scope": "repository:{}:pull".format(image)}),
url_concat(self.token_url, {
"scope": "repository:{}:pull".format(image),
"service": "container_registry"
}),
auth_username=self.username,
auth_password=self.password,
)
Expand Down
16 changes: 15 additions & 1 deletion binderhub/tests/test_builder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
import pytest

from binderhub.builder import _generate_build_name
from binderhub.builder import _generate_build_name, _get_image_basename_and_tag


@pytest.mark.parametrize("fullname,basename,tag", [
("jupyterhub/k8s-binderhub:0.2.0-a2079a5", "jupyterhub/k8s-binderhub", "0.2.0-a2079a5"),
("jupyterhub/jupyterhub", "jupyterhub/jupyterhub", "latest"),
("gcr.io/project/image:tag", "project/image", "tag"),
("weirdregistry.com/image:tag", "image", "tag"),
("gitlab-registry.example.com/group/project:some-tag", "group/project", "some-tag"),
("gitlab-registry.example.com/group/project/image:latest", "group/project/image", "latest"),
("gitlab-registry.example.com/group/project/my/image:rc1", "group/project/my/image", "rc1")
])
def test_image_basename_resolution(fullname, basename, tag):
result_basename, result_tag = _get_image_basename_and_tag(fullname)
assert result_basename == basename
assert result_tag == tag

@pytest.mark.parametrize('ref,build_slug', [
# a long ref, no special characters at critical positions
('3035124.v3.0', 'dataverse-dvn-2ftjclkp'),
Expand Down