Skip to content

Commit

Permalink
Merge pull request #243 from mih/path-normalize
Browse files Browse the repository at this point in the history
Unconditionally store image path in POSIX notation in configuration
  • Loading branch information
mih authored Oct 12, 2023
2 parents 450b9a8 + cd49d66 commit cce7edb
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 2 deletions.
8 changes: 6 additions & 2 deletions datalad_container/containers_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
import logging
import os
import os.path as op
from pathlib import (
Path,
PurePosixPath,
)
import re
import sys
from shutil import copyfile

from datalad.cmd import WitlessRunner
Expand Down Expand Up @@ -339,7 +342,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None,
# force store the image, and prevent multiple entries
ds.config.set(
"{}.image".format(cfgbasevar),
op.relpath(image, start=ds.path),
# always store a POSIX path, relative to dataset root
str(PurePosixPath(Path(image).relative_to(ds.pathobj))),
force=True)
if call_fmt:
ds.config.set(
Expand Down
7 changes: 7 additions & 0 deletions datalad_container/tests/test_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def test_add_local_path(path=None, local_file=None):
foo_target = op.join(path, ".datalad", "environments", "foobert", "image")
assert_result_count(res, 1, status="ok", type="file", path=foo_target,
action="containers_add")
# the image path configuration is always in POSIX format
assert ds.config.get('datalad.containers.foobert.image') \
== '.datalad/environments/foobert/image'

# We've just copied and added the file.
assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target))

Expand Down Expand Up @@ -219,6 +223,9 @@ def test_container_update(ds_path=None, local_file=None, url=None):
assert_in_results(res, action="remove", status="ok")
assert_in_results(res, action="save", status="ok")
ok_file_has_content(op.join(ds.path, img), "bar")
# the image path configuration is (still) always in POSIX format
assert ds.config.get('datalad.containers.foo.image') \
== '.datalad/environments/foo/image'

# Test commit message
# In the above case it was updating existing image so should have "Update "
Expand Down
45 changes: 45 additions & 0 deletions datalad_container/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,3 +380,48 @@ def test_run_subdataset_install(path=None):
# There's no install record if subdataset is already present.
res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2", **common_kwargs)
assert_not_in_results(res, action="install")


@skip_if_no_network
def test_nonPOSIX_imagepath(tmp_path):
ds = Dataset(tmp_path).create(**common_kwargs)

# plug in a proper singularity image
ds.containers_add(
'mycontainer',
url=testimg_url,
**common_kwargs
)
assert_result_count(
ds.containers_list(**common_kwargs), 1,
# we get a report in platform conventions
path=str(ds.pathobj / '.datalad' / 'environments' /
'mycontainer' / 'image'),
name='mycontainer')
assert_repo_status(tmp_path)

# now reconfigure the image path to look as if a version <= 1.2.4
# configured it on windows
# this must still run across all platforms
ds.config.set(
'datalad.containers.mycontainer.image',
'.datalad\\environments\\mycontainer\\image',
scope='branch',
reload=True,
)
ds.save(**common_kwargs)
assert_repo_status(tmp_path)

assert_result_count(
ds.containers_list(**common_kwargs), 1,
# we still get a report in platform conventions
path=str(ds.pathobj / '.datalad' / 'environments' /
'mycontainer' / 'image'),
name='mycontainer')

res = ds.containers_run(['ls'], **common_kwargs)
assert_result_count(
res, 1,
action='run',
status='ok',
)
75 changes: 75 additions & 0 deletions datalad_container/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

from __future__ import annotations

# the pathlib equivalent is only available in PY3.12
from os.path import lexists

from pathlib import (
PurePath,
PurePosixPath,
PureWindowsPath,
)

from datalad.distribution.dataset import Dataset
from datalad.support.external_versions import external_versions

Expand Down Expand Up @@ -68,9 +77,75 @@ def get_container_configuration(
if not ccfgname:
continue

if ccfgname == 'image':
# run image path normalization to get a relative path
# in platform conventions, regardless of the input.
# for now we report a str, because the rest of the code
# is not using pathlib
value = str(_normalize_image_path(value, ds))

cinfo = containers.get(cname, {})
cinfo[ccfgname] = value

containers[cname] = cinfo

return containers if name is None else containers.get(name, {})


def _normalize_image_path(path: str, ds: Dataset) -> PurePath:
"""Helper to standardize container image path handling
Previously, container configuration would contain platform-paths
for container image location (e.g., windows paths when added on
windows, POSIX paths elsewhere). This made cross-platform reuse
impossible out-of-the box, but it also means that such dataset
are out there in unknown numbers.
This helper inspects an image path READ FROM CONFIG(!) and ensures
that it matches platform conventions (because all other arguments)
also come in platform conventions. This enables standardizing
the storage conventions to be POSIX-only (for the future).
Parameters
----------
path: str
A str-path, as read from the configuration, matching its conventions
(relative path, pointing to a container image relative to the
dataset's root).
ds: Dataset
This dataset's base path is used as a reference for resolving
the relative image path to an absolute location on the file system.
Returns
-------
PurePath
Relative path in platform conventions
"""
# we only need to act differently, when an incoming path is
# windows. This is not possible to say with 100% confidence,
# because a POSIX path can also contain a backslash. We support
# a few standard cases where we CAN tell
pathobj = None
if '\\' not in path:
# no windows pathsep, no problem
pathobj = PurePosixPath(path)
elif path.startswith(r'.datalad\\environments\\'):
# this is the default location setup in windows conventions
pathobj = PureWindowsPath(path)
else:
# let's assume it is windows for a moment
if lexists(str(ds.pathobj / PureWindowsPath(path))):
# if there is something on the filesystem for this path,
# we can be reasonably sure that this is indeed a windows
# path. This won't catch images in uninstalled subdataset,
# but better than nothing
pathobj = PureWindowsPath(path)
else:
# if we get here, we have no idea, and no means to verify
# further hypotheses -- go with the POSIX assumption
# and hope for the best
pathobj = PurePosixPath(path)

assert pathobj is not None
# we report in platform-conventions
return PurePath(pathobj)

0 comments on commit cce7edb

Please sign in to comment.