Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ls-url: add support for --tree/--level #10664

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions dvc/commands/ls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,7 @@ def _build_tree_structure(

num_entries = len(entries)
for i, (name, entry) in enumerate(entries.items()):
# show full path for root, otherwise only show the name
if _depth > 0:
entry["path"] = name

entry["path"] = name
is_last = i >= num_entries - 1
tree_part = ""
if _depth > 0:
Expand Down
40 changes: 36 additions & 4 deletions dvc/commands/ls_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,45 @@
from dvc.cli.utils import DictAction, append_doc_link
from dvc.log import logger

from .ls import show_entries
from .ls import show_entries, show_tree

logger = logger.getChild(__name__)


class CmdListUrl(CmdBaseNoRepo):
def run(self):
from dvc.config import Config
def _show_tree(self, config):
from dvc.fs import parse_external_url
from dvc.repo.ls import _ls_tree

fs, fs_path = parse_external_url(
self.args.url, fs_config=self.args.fs_config, config=config
)
entries = _ls_tree(fs, fs_path, maxdepth=self.args.level)
show_tree(entries, with_color=True, with_size=self.args.size)
return 0

def _show_list(self, config):
from dvc.repo import Repo

entries = Repo.ls_url(
self.args.url,
recursive=self.args.recursive,
maxdepth=self.args.level,
fs_config=self.args.fs_config,
config=Config.from_cwd(),
config=config,
)
if entries:
show_entries(entries, with_color=True, with_size=self.args.size)
return 0

def run(self):
from dvc.config import Config

config = Config.from_cwd()
if self.args.tree:
return self._show_tree(config=config)
return self._show_list(config=config)


def add_parser(subparsers, parent_parser):
LS_HELP = "List directory contents from URL."
Expand All @@ -40,6 +59,19 @@ def add_parser(subparsers, parent_parser):
lsurl_parser.add_argument(
"-R", "--recursive", action="store_true", help="Recursively list files."
)
lsurl_parser.add_argument(
"-T",
"--tree",
action="store_true",
help="Recurse into directories as a tree.",
)
lsurl_parser.add_argument(
"-L",
"--level",
metavar="depth",
type=int,
help="Limit the depth of recursion.",
)
lsurl_parser.add_argument("--size", action="store_true", help="Show sizes.")
lsurl_parser.add_argument(
"--fs-config",
Expand Down
31 changes: 20 additions & 11 deletions dvc/repo/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def ls_tree(
path = path or ""
fs: DVCFileSystem = repo.dvcfs
fs_path = fs.from_os_path(path)
return _ls_tree(fs, fs_path, dvc_only, maxdepth)
return _ls_tree(
fs, fs_path, maxdepth=maxdepth, dvc_only=dvc_only, dvcfiles=True
)


def _ls(
Expand Down Expand Up @@ -145,27 +147,34 @@ def _ls(
return ret_list


def _ls_tree(
fs, path, dvc_only: bool = False, maxdepth: Optional[int] = None, _info=None
):
ret = {}
def _ls_tree(fs, path, maxdepth=None, _info=None, **fs_kwargs):
info = _info or fs.info(path)
if _info is None:
# preserve the original path name
name = path
if not name:
name = os.curdir if fs.protocol == "local" else fs.root_marker
path = info["name"]
else:
name = path.rsplit(fs.sep, 1)[-1]

path = info["name"].rstrip(fs.sep) or os.curdir
name = path.rsplit("/", 1)[-1]
ret = {}
ls_info = _adapt_info(info)
ls_info["path"] = path

recurse = maxdepth is None or maxdepth > 0
if recurse and info["type"] == "directory":
infos = fs.ls(path, dvcfiles=True, dvc_only=dvc_only, detail=True)
try:
infos = fs.ls(path, detail=True, **fs_kwargs)
except FileNotFoundError:
# broken symlink?
infos = []

infos.sort(key=lambda f: f["name"])
maxdepth = maxdepth - 1 if maxdepth is not None else None
contents = {}
for info in infos:
d = _ls_tree(
fs, info["name"], dvc_only=dvc_only, maxdepth=maxdepth, _info=info
)
d = _ls_tree(fs, info["name"], maxdepth=maxdepth, _info=info, **fs_kwargs)
contents.update(d)
ls_info["contents"] = contents

Expand Down
21 changes: 16 additions & 5 deletions dvc/repo/ls_url.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
from fsspec.implementations.local import LocalFileSystem as _LocalFileSystem

from dvc.exceptions import URLMissingError
from dvc.fs import parse_external_url
from dvc.fs import LocalFileSystem, parse_external_url


def ls_url(url, *, fs_config=None, recursive=False, config=None):
def ls_url(url, *, fs_config=None, recursive=False, maxdepth=None, config=None):
fs, fs_path = parse_external_url(url, fs_config=fs_config, config=config)
try:
info = fs.info(fs_path)
except FileNotFoundError as exc:
raise URLMissingError(url) from exc
if info["type"] != "directory":
if maxdepth == 0 or info["type"] != "directory":
return [{"path": info["name"], "isdir": False}]

if isinstance(fs, LocalFileSystem):
# dvc's LocalFileSystem does not support maxdepth yet
walk = _LocalFileSystem().walk
else:
walk = fs.walk

ret = []
for _, dirs, files in fs.walk(fs_path, detail=True):
if not recursive:
for root, dirs, files in walk(fs_path, detail=True, maxdepth=maxdepth):
parts = fs.relparts(root, fs_path)
if parts == (".",):
parts = ()
if not recursive or (maxdepth and len(parts) >= maxdepth - 1):
files.update(dirs)

for info in files.values():
Expand Down
33 changes: 33 additions & 0 deletions dvc/testing/workspace_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,39 @@ def test_recursive(self, cloud):
],
)

result = ls_url(
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=0
)
match_files(
fs,
result,
[{"path": (cloud / "dir").fs_path, "isdir": False}],
)

result = ls_url(
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=1
)
match_files(
fs,
result,
[
{"path": "foo", "isdir": False},
{"path": "subdir", "isdir": True},
],
)

result = ls_url(
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=2
)
match_files(
fs,
result,
[
{"path": "foo", "isdir": False},
{"path": "subdir/bar", "isdir": False},
],
)

def test_nonexistent(self, cloud):
with pytest.raises(URLMissingError):
ls_url(str(cloud / "dir"), fs_config=cloud.config)
Expand Down
64 changes: 63 additions & 1 deletion tests/func/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@

import pytest

from dvc.fs import MemoryFileSystem
from dvc.repo import Repo
from dvc.repo.ls import ls_tree
from dvc.repo.ls import _ls_tree, ls_tree
from dvc.scm import CloneError

FS_STRUCTURE = {
Expand Down Expand Up @@ -998,3 +999,64 @@ def test_ls_tree_maxdepth(M, tmp_dir, scm, dvc):
"structure.xml.dvc": None,
}
}


def test_fs_ls_tree():
fs = MemoryFileSystem(global_store=False)
fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})
root = fs.root_marker

files = _ls_tree(fs, "README.md")
assert _simplify_tree(files) == {"README.md": None}
files = _ls_tree(fs, root)
expected = {
root: {
".gitignore": None,
"README.md": None,
"model": {
"script.py": None,
"train.py": None,
},
}
}
assert _simplify_tree(files) == expected

files = _ls_tree(fs, "model")
assert _simplify_tree(files) == {
"model": {
"script.py": None,
"train.py": None,
}
}


def test_fs_ls_tree_maxdepth():
fs = MemoryFileSystem(global_store=False)
fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})

files = _ls_tree(fs, "/", maxdepth=0)
assert _simplify_tree(files) == {"/": None}

files = _ls_tree(fs, "/", maxdepth=1)
assert _simplify_tree(files) == {
"/": {
".gitignore": None,
"README.md": None,
"model": None,
}
}

files = _ls_tree(fs, "/", maxdepth=2)
assert _simplify_tree(files) == {
"/": {
".gitignore": None,
"README.md": None,
"model": {
"script.py": None,
"train.py": None,
},
}
}

files = _ls_tree(fs, "README.md", maxdepth=3)
assert _simplify_tree(files) == {"README.md": None}
22 changes: 19 additions & 3 deletions tests/unit/command/test_ls_url.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dvc.cli import parse_args
from dvc.commands.ls_url import CmdListUrl
from dvc.config import Config
from dvc.fs import LocalFileSystem


def test_ls_url(mocker, M):
Expand All @@ -12,18 +13,33 @@ def test_ls_url(mocker, M):
assert cmd.run() == 0

m.assert_called_once_with(
"src", recursive=False, fs_config=None, config=M.instance_of(Config)
"src",
recursive=False,
maxdepth=None,
fs_config=None,
config=M.instance_of(Config),
)


def test_recursive(mocker, M):
cli_args = parse_args(["ls-url", "-R", "src"])
cli_args = parse_args(["ls-url", "-R", "-L", "2", "src"])
assert cli_args.func == CmdListUrl
cmd = cli_args.func(cli_args)
m = mocker.patch("dvc.repo.Repo.ls_url", autospec=True)

assert cmd.run() == 0

m.assert_called_once_with(
"src", recursive=True, fs_config=None, config=M.instance_of(Config)
"src", recursive=True, maxdepth=2, fs_config=None, config=M.instance_of(Config)
)


def test_tree(mocker, M):
cli_args = parse_args(["ls-url", "--tree", "--level", "2", "src"])
assert cli_args.func == CmdListUrl
cmd = cli_args.func(cli_args)
m = mocker.patch("dvc.repo.ls._ls_tree", autospec=True)

assert cmd.run() == 0

m.assert_called_once_with(M.instance_of(LocalFileSystem), "src", maxdepth=2)
Loading