Skip to content

Commit

Permalink
Implement maxdepth for recursive cp/get/put (#1259)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianthomas23 authored May 12, 2023
1 parent ee83549 commit 813a241
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 40 deletions.
64 changes: 50 additions & 14 deletions docs/source/copying.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,6 @@ Forward slashes are used for directory separators throughout.

.. dropdown:: 1e. Directory to existing directory

.. warning::

``maxdepth`` is not yet implemented for copying functions
(`issue 1231 <https://github.com/fsspec/filesystem_spec/issues/1231>`_).

.. code-block:: python
cp("source/subdir/", "target/", recursive=True)
Expand Down Expand Up @@ -168,12 +163,21 @@ Forward slashes are used for directory separators throughout.
└── 📁 nesteddir
└── 📄 nestedfile

.. dropdown:: 1f. Directory to new directory
Again the depth of recursion can be controlled using the ``maxdepth`` keyword argument, for
example:

.. code-block:: python
cp("source/subdir", "target/", recursive=True, maxdepth=1)
.. warning::
results in::

📁 target
└── 📁 subdir
├── 📄 subfile1
└── 📄 subfile2

``maxdepth`` is not yet implemented for copying functions
(`issue 1231 <https://github.com/fsspec/filesystem_spec/issues/1231>`_).
.. dropdown:: 1f. Directory to new directory

.. code-block:: python
Expand All @@ -192,7 +196,18 @@ Forward slashes are used for directory separators throughout.
They are recommended to explicitly indicate both are directories.

The ``recursive=True`` keyword argument is required otherwise the call does nothing. The depth
of recursion can be controlled using the ``maxdepth`` keyword argument.
of recursion can be controlled using the ``maxdepth`` keyword argument, for example:

.. code-block:: python
cp("source/subdir/", "target/newdir/", recursive=True, maxdepth=1)
results in::

📁 target
└── 📁 newdir
├── 📄 subfile1
└── 📄 subfile2

.. dropdown:: 1g. Glob to existing directory

Expand Down Expand Up @@ -222,11 +237,21 @@ Forward slashes are used for directory separators throughout.
└── 📁 nesteddir
└── 📄 nestedfile

The depth of recursion can be controlled by the ``maxdepth`` keyword argument.

The trailing slash on ``"target/"`` is optional but recommended as it explicitly indicates that
the target is a directory.

The depth of recursion can be controlled by the ``maxdepth`` keyword argument, for example:

.. code-block:: python
cp("source/subdir/*", "target/", recursive=True, maxdepth=1)
results in::

📁 target
├── 📄 subfile1
└── 📄 subfile2

.. dropdown:: 1h. Glob to new directory

Nonrecursive
Expand Down Expand Up @@ -257,11 +282,22 @@ Forward slashes are used for directory separators throughout.
└── 📁 nesteddir
└── 📄 nestedfile

The depth of recursion can be controlled by the ``maxdepth`` keyword argument.

The trailing slash on the ``target`` is optional but recommended as it explicitly indicates that
it is a directory.

The depth of recursion can be controlled by the ``maxdepth`` keyword argument, for example:

.. code-block:: python
cp("source/subdir/*", "target/newdir/", recursive=True, maxdepth=1)
results in::

📁 target
└── 📁 newdir
├── 📄 subfile1
└── 📄 subfile2

These calls fail if the ``target`` file system is not capable of creating the directory, for
example if it is write-only or if ``auto_mkdir=False``. There is no command line equivalent of
this scenario without an explicit ``mkdir`` to create the new directory.
Expand Down
43 changes: 33 additions & 10 deletions fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,15 @@ def get_file(
if not isfilelike(lpath):
outfile.close()

def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs):
def get(
self,
rpath,
lpath,
recursive=False,
callback=_DEFAULT_CALLBACK,
maxdepth=None,
**kwargs,
):
"""Copy file(s) to local.
Copies a specific file or tree of files (if recursive=True). If lpath
Expand All @@ -887,8 +895,8 @@ def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg
)

source_is_str = isinstance(rpath, str)
rpaths = self.expand_path(rpath, recursive=recursive)
if source_is_str and not recursive:
rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
if source_is_str and (not recursive or maxdepth is not None):
# Non-recursive glob does not copy directories
rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
if not rpaths:
Expand Down Expand Up @@ -932,7 +940,15 @@ def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
segment_len = len(data)
callback.relative_update(segment_len)

def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs):
def put(
self,
lpath,
rpath,
recursive=False,
callback=_DEFAULT_CALLBACK,
maxdepth=None,
**kwargs,
):
"""Copy file(s) from local.
Copies a specific file or tree of files (if recursive=True). If rpath
Expand All @@ -952,8 +968,8 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg
lpath = make_path_posix(lpath)
fs = LocalFileSystem()
source_is_str = isinstance(lpath, str)
lpaths = fs.expand_path(lpath, recursive=recursive)
if source_is_str and not recursive:
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
if source_is_str and (not recursive or maxdepth is not None):
# Non-recursive glob does not copy directories
lpaths = [p for p in lpaths if not (trailing_sep(p) or self.isdir(p))]
if not lpaths:
Expand Down Expand Up @@ -992,7 +1008,9 @@ def tail(self, path, size=1024):
def cp_file(self, path1, path2, **kwargs):
raise NotImplementedError

def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
def copy(
self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
):
"""Copy within two locations in the filesystem
on_error : "raise", "ignore"
Expand All @@ -1008,8 +1026,8 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
on_error = "raise"

source_is_str = isinstance(path1, str)
paths = self.expand_path(path1, recursive=recursive)
if source_is_str and not recursive:
paths = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
if source_is_str and (not recursive or maxdepth is not None):
# Non-recursive glob does not copy directories
paths = [p for p in paths if not (trailing_sep(p) or self.isdir(p))]
if not paths:
Expand Down Expand Up @@ -1051,11 +1069,16 @@ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
bit = set(self.glob(p, **kwargs))
out |= bit
if recursive:
# glob call above expanded one depth so if maxdepth is defined
# then decrement it in expand_path call below. If it is zero
# after decrementing then avoid expand_path call.
if maxdepth is not None and maxdepth <= 1:
continue
out |= set(
self.expand_path(
list(bit),
recursive=recursive,
maxdepth=maxdepth,
maxdepth=maxdepth - 1 if maxdepth is not None else None,
**kwargs,
)
)
Expand Down
66 changes: 50 additions & 16 deletions fsspec/tests/abstract/copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,9 @@ def test_copy_directory_to_existing_directory(
assert fs.isfile(fs_join(target, "subfile2"))
assert fs.isdir(fs_join(target, "nesteddir"))
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))

fs.rm(
[
fs_join(target, "subfile1"),
fs_join(target, "subfile2"),
fs_join(target, "nesteddir"),
],
recursive=True,
)
fs.rm(fs.ls(target, detail=False), recursive=True)
else:
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
Expand All @@ -121,8 +115,23 @@ def test_copy_directory_to_existing_directory(
fs.rm(fs_join(target, "subdir"), recursive=True)
assert fs.ls(target) == []

# Limit by maxdepth
# ERROR: maxdepth ignored here
# Limit recursive by maxdepth
fs.cp(s, t, recursive=True, maxdepth=1)
if source_slash:
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.exists(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))

fs.rm(fs.ls(target, detail=False), recursive=True)
else:
assert fs.isdir(fs_join(target, "subdir"))
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))

fs.rm(fs_join(target, "subdir"), recursive=True)
assert fs.ls(target) == []

def test_copy_directory_to_new_directory(
self, fs, fs_join, fs_path, fs_scenario_cp
Expand Down Expand Up @@ -152,12 +161,21 @@ def test_copy_directory_to_new_directory(
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
assert not fs.exists(fs_join(target, "subdir"))

fs.rm(fs_join(target, "newdir"), recursive=True)
assert fs.ls(target) == []

# Limit by maxdepth
# ERROR: maxdepth ignored here
# Limit recursive by maxdepth
fs.cp(s, t, recursive=True, maxdepth=1)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))

fs.rm(fs_join(target, "newdir"), recursive=True)
assert fs.ls(target) == []

def test_copy_glob_to_existing_directory(
self, fs, fs_join, fs_path, fs_scenario_cp
Expand Down Expand Up @@ -193,8 +211,15 @@ def test_copy_glob_to_existing_directory(
fs.rm(fs.ls(target, detail=False), recursive=True)
assert fs.ls(target) == []

# Limit by maxdepth
# ERROR: maxdepth ignored here
# Limit recursive by maxdepth
fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1)
assert fs.isfile(fs_join(target, "subfile1"))
assert fs.isfile(fs_join(target, "subfile2"))
assert not fs.exists(fs_join(target, "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))

fs.rm(fs.ls(target, detail=False), recursive=True)
assert fs.ls(target) == []

def test_copy_glob_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp):
# Copy scenario 1h
Expand Down Expand Up @@ -234,8 +259,17 @@ def test_copy_glob_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp):
fs.rm(fs_join(target, "newdir"), recursive=True)
assert fs.ls(target) == []

# Limit by maxdepth
# ERROR: this is not correct
# Limit recursive by maxdepth
fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1)
assert fs.isdir(fs_join(target, "newdir"))
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
assert not fs.exists(fs_join(target, "subdir"))
assert not fs.exists(fs_join(target, "newdir", "subdir"))

fs.rm(fs.ls(target, detail=False), recursive=True)
assert fs.ls(target) == []

def test_copy_list_of_files_to_existing_directory(
self, fs, fs_join, fs_path, fs_scenario_cp
Expand Down

0 comments on commit 813a241

Please sign in to comment.