Skip to content

Commit

Permalink
Docker adaptor can retrieve an image from the local docker service
Browse files Browse the repository at this point in the history
Closes: #199

Demo

```
❯ datalad create busydemo
❯ cd busydemo
❯ datalad containers-add -u dhub://busybox:latest busy
❯ datalad drop .datalad/environments --reckless availability
❯ git annex info | grep 'local annex'
local annex keys: 0
local annex size: 0 bytes
❯ cat .datalad/config
[datalad "dataset"]
        id = b7adee52-a65a-43fc-a85b-c0d5e2d5b67c
[datalad "containers.busy"]
        image = .datalad/environments/busy/image
        cmdexec = {python} -m datalad_container.adapters.docker run {img} {cmd}
❯ datalad containers-run -n busy uname
[INFO   ] Saved busybox:latest to /tmp/busydemo/.datalad/environments/busy/image/tmpzrdd7mj2
[INFO   ] Making sure inputs are available (this may take some time)
[INFO   ] == Command start (output follows) =====
Linux
[INFO   ] == Command exit (modification check follows) =====
run(ok): /tmp/busydemo (dataset) [/home/mih/env/datalad-dev/bin/python -m ...]
```
  • Loading branch information
mih committed Oct 10, 2023
1 parent c505e52 commit 24a11ce
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 0 deletions.
90 changes: 90 additions & 0 deletions datalad_container/adapters/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import json
import os
import os.path as op
from pathlib import Path
import subprocess as sp
import sys
import tarfile
Expand Down Expand Up @@ -88,6 +89,14 @@ def _list_images():
return out.decode().splitlines()


def _get_repotag_from_image_sha256(sha):
out = sp.check_output(

Check warning on line 93 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L93

Added line #L93 was not covered by tests
['docker', 'image', 'inspect', '--format',
'{{range $v := .RepoTags}}{{$v}} {{end}}',
sha])
return out.decode().splitlines()[0].strip()

Check warning on line 97 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L97

Added line #L97 was not covered by tests


def get_image(path, repo_tag=None, config=None):
"""Return the image ID of the image extracted at `path`.
"""
Expand Down Expand Up @@ -153,6 +162,87 @@ def load(path, repo_tag, config):
return image_id


def repopulate_from_daemon(contds, imgpath: Path) -> None:
# crude check whether anything at the image location is not
# locally present
contrepo = contds.repo
if not contrepo.call_annex(
['find', '--not', '--in', 'here'],
files=str(imgpath),
):
# nothing is missing, we have nothing to do here
return

# a docker image is a collection of files in a directory
assert imgpath.is_dir()

Check warning on line 177 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L177

Added line #L177 was not covered by tests
# we could look into `manifest.json`, but it might also be
# annexed and not around. instead look for the config filename
imgcfg = [

Check warning on line 180 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L180

Added line #L180 was not covered by tests
p.name for p in imgpath.iterdir()
# a sha256 is 64 chars plus '.json'
if len(p.name) == 69 and p.name.endswith('.json')
]
# there is only one
assert len(imgcfg) == 1

Check warning on line 186 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L186

Added line #L186 was not covered by tests

# look for the employed annex backend, we need it for key reinject below
backends = set(contrepo.call_annex_oneline([

Check warning on line 189 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L189

Added line #L189 was not covered by tests
'find',
f'--branch=HEAD:{imgpath.relative_to(contds.pathobj)}',
# this needs git-annex 10.20230126 or later
'--anything',
# the trailing space is not a mistake!
'--format=${backend} ',
]).split())
# we can only deal with a single homogeneous backend here
assert len(backends) == 1

Check warning on line 198 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L198

Added line #L198 was not covered by tests

# ID is filename, minus .json extension
img_id = imgcfg[0][:-5]

Check warning on line 201 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L201

Added line #L201 was not covered by tests

# make an effort to get the repotags matching the image sha256
# from docker. This is needed, because the query tag will end up
# in manifest.json, and the original addition was likely via a tag
# and not a sha256
repo_tag = None
try:
repo_tag = _get_repotag_from_image_sha256(img_id)
except Exception:

Check warning on line 210 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L207-L210

Added lines #L207 - L210 were not covered by tests
# however, we will go on without a tag. In the worst case, it
# would trigger a download of manifest.json (tiny file), but
# the large `layer.tar` will still be successfully extracted
# and reinject via a query by ID/sha256
pass

Check warning on line 215 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L215

Added line #L215 was not covered by tests

# let docker dump into a TMPDIR inside the dataset
# this place is likely to have sufficient space
with tempfile.TemporaryDirectory(dir=imgpath) as tmpdir:

Check warning on line 219 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L219

Added line #L219 was not covered by tests
# try to export the image from a local docker instance
save(

Check warning on line 221 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L221

Added line #L221 was not covered by tests
# prefer the tag, but continue with ID (see above)
repo_tag or f'sha256:{img_id}',
tmpdir,
)
# the line above will raise an exception when
# - this docker does not have the image.
# - or there is not docker running at all.
# this is fine, we will just not proceed.

# now let git-annex reinject any file that matches a known
# key (given the backend determined above). This will populate
# as much as we can. This approach has built-in content verification.
# this means that even if this docker instance has different metadata
# we will be able to harvest any image piece that fits, and ignore
# anything else
contrepo.call_annex(

Check warning on line 237 in datalad_container/adapters/docker.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/adapters/docker.py#L237

Added line #L237 was not covered by tests
['reinject', '--known', '--backend', backends.pop()],
files=[
str(p) for p in Path(tmpdir).glob('**/*')
if p.is_file()
],
)


# Command-line


Expand Down
29 changes: 29 additions & 0 deletions datalad_container/containers_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import logging
import os.path as op
from pathlib import Path
import sys

from datalad.interface.base import Interface
from datalad.interface.base import build_doc
from datalad.support.exceptions import CapturedException
from datalad.support.param import Parameter
from datalad.distribution.dataset import datasetmethod
from datalad.distribution.dataset import require_dataset
Expand Down Expand Up @@ -163,6 +165,33 @@ def __call__(cmd, container_name=None, dataset=None,

lgr.debug("extra_inputs = %r", extra_inputs)

if '-m datalad_container.adapters.docker run' in cmd:
# this will use the docker adapter to execute the container.
# below we let the adaptor have a first look at the image
# it will run. The adaptor might query a local docker service,
# and try to populate missing image parts -- possibly avoiding
# a download (via the `get()` that `run()` would perform), whenever
# the local service already has the respective images.
# this is a scenario that would occur frequently in short-lived
# clones that are repeatedly generated on the same machine.
from datalad_container.adapters.docker import repopulate_from_daemon
contds = require_dataset(
container['parentds'], check_installed=True,
purpose='check for docker images')
try:
repopulate_from_daemon(
contds,
# we use the container report here too, and not any of the
# processed variants from above to stay internally
# consistent
imgpath=Path(container['path']),
)
except Exception as e:

Check warning on line 189 in datalad_container/containers_run.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/containers_run.py#L189

Added line #L189 was not covered by tests
# get basic logging of a failure, but overall consider this
# a "best effort". if anything fails, we will silently fall
# back on a standard "get" via the `extra_inputs` below
CapturedException(e)

Check warning on line 193 in datalad_container/containers_run.py

View check run for this annotation

Codecov / codecov/patch

datalad_container/containers_run.py#L193

Added line #L193 was not covered by tests

with patch.dict('os.environ',
{CONTAINER_NAME_ENVVAR: container['name']}):
# fire!
Expand Down

0 comments on commit 24a11ce

Please sign in to comment.