Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker adaptor can retrieve an image from the local docker service #246

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions datalad_container/adapters/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import json
import os
import os.path as op
from pathlib import Path
import subprocess as sp
import sys
import tarfile
Expand Down Expand Up @@ -88,6 +89,14 @@
return out.decode().splitlines()


def _get_repotag_from_image_sha256(sha):
out = sp.check_output(

Check warning on line 93 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L93

Added line #L93 was not covered by tests
['docker', 'image', 'inspect', '--format',
'{{range $v := .RepoTags}}{{$v}} {{end}}',
sha])
return out.decode().splitlines()[0].strip()

Check warning on line 97 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L97

Added line #L97 was not covered by tests


def get_image(path, repo_tag=None, config=None):
"""Return the image ID of the image extracted at `path`.
"""
Expand Down Expand Up @@ -153,6 +162,87 @@
return image_id


def repopulate_from_daemon(contds, imgpath: Path) -> None:
# crude check whether anything at the image location is not
# locally present
contrepo = contds.repo
if not contrepo.call_annex(
['find', '--not', '--in', 'here'],
files=str(imgpath),
):
# nothing is missing, we have nothing to do here
return

# a docker image is a collection of files in a directory
assert imgpath.is_dir()

Check warning on line 177 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L177

Added line #L177 was not covered by tests
# we could look into `manifest.json`, but it might also be
# annexed and not around. instead look for the config filename
imgcfg = [

Check warning on line 180 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L180

Added line #L180 was not covered by tests
p.name for p in imgpath.iterdir()
# a sha256 is 64 chars plus '.json'
if len(p.name) == 69 and p.name.endswith('.json')
]
# there is only one
assert len(imgcfg) == 1

Check warning on line 186 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L186

Added line #L186 was not covered by tests

# look for the employed annex backend, we need it for key reinject below
backends = set(contrepo.call_annex_oneline([

Check warning on line 189 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L189

Added line #L189 was not covered by tests
'find',
f'--branch=HEAD:{imgpath.relative_to(contds.pathobj)}',
# this needs git-annex 10.20230126 or later
'--anything',
# the trailing space is not a mistake!
'--format=${backend} ',
]).split())
# we can only deal with a single homogeneous backend here
assert len(backends) == 1

Check warning on line 198 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L198

Added line #L198 was not covered by tests

# ID is filename, minus .json extension
img_id = imgcfg[0][:-5]

Check warning on line 201 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L201

Added line #L201 was not covered by tests

# make an effort to get the repotags matching the image sha256
# from docker. This is needed, because the query tag will end up
# in manifest.json, and the original addition was likely via a tag
# and not a sha256
repo_tag = None
try:
repo_tag = _get_repotag_from_image_sha256(img_id)
except Exception:

Check warning on line 210 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L207-L210

Added lines #L207 - L210 were not covered by tests
# however, we will go on without a tag. In the worst case, it
# would trigger a download of manifest.json (tiny file), but
# the large `layer.tar` will still be successfully extracted
# and reinject via a query by ID/sha256
pass

Check warning on line 215 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L215

Added line #L215 was not covered by tests

# let docker dump into a TMPDIR inside the dataset
# this place is likely to have sufficient space
with tempfile.TemporaryDirectory(dir=imgpath) as tmpdir:

Check warning on line 219 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L219

Added line #L219 was not covered by tests
# try to export the image from a local docker instance
save(

Check warning on line 221 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L221

Added line #L221 was not covered by tests
# prefer the tag, but continue with ID (see above)
repo_tag or f'sha256:{img_id}',
tmpdir,
)
# the line above will raise an exception when
# - this docker does not have the image.
# - or there is not docker running at all.
# this is fine, we will just not proceed.

# now let git-annex reinject any file that matches a known
# key (given the backend determined above). This will populate
# as much as we can. This approach has built-in content verification.
# this means that even if this docker instance has different metadata
# we will be able to harvest any image piece that fits, and ignore
# anything else
contrepo.call_annex(

Check warning on line 237 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L237

Added line #L237 was not covered by tests
['reinject', '--known', '--backend', backends.pop()],
files=[
str(p) for p in Path(tmpdir).glob('**/*')
if p.is_file()
],
)


# Command-line


Expand Down
29 changes: 29 additions & 0 deletions datalad_container/containers_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import logging
import os.path as op
from pathlib import Path
import sys

from datalad.interface.base import Interface
from datalad.interface.base import build_doc
from datalad.support.exceptions import CapturedException
from datalad.support.param import Parameter
from datalad.distribution.dataset import datasetmethod
from datalad.distribution.dataset import require_dataset
Expand Down Expand Up @@ -163,6 +165,33 @@

lgr.debug("extra_inputs = %r", extra_inputs)

if '-m datalad_container.adapters.docker run' in cmd:
# this will use the docker adapter to execute the container.
# below we let the adaptor have a first look at the image
# it will run. The adaptor might query a local docker service,
# and try to populate missing image parts -- possibly avoiding
# a download (via the `get()` that `run()` would perform), whenever
# the local service already has the respective images.
# this is a scenario that would occur frequently in short-lived
# clones that are repeatedly generated on the same machine.
from datalad_container.adapters.docker import repopulate_from_daemon
contds = require_dataset(
container['parentds'], check_installed=True,
purpose='check for docker images')
try:
repopulate_from_daemon(
contds,
# we use the container report here too, and not any of the
# processed variants from above to stay internally
# consistent
imgpath=Path(container['path']),
)
except Exception as e:

Check warning on line 189 in datalad_container/containers_run.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/containers_run.py#L189

Added line #L189 was not covered by tests
# get basic logging of a failure, but overall consider this
# a "best effort". if anything fails, we will silently fall
# back on a standard "get" via the `extra_inputs` below
CapturedException(e)

Check warning on line 193 in datalad_container/containers_run.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/containers_run.py#L193

Added line #L193 was not covered by tests

with patch.dict('os.environ',
{CONTAINER_NAME_ENVVAR: container['name']}):
# fire!
Expand Down