Skip to content

Commit

Permalink
Validate maxdepth kwarg (#1151)
Browse files Browse the repository at this point in the history
* Validate maxdepth in AbstractFileSystem.walk

* maxdepth in expand_path

* maxdepth in async _expand_path and _walk

* Fix windows tests
  • Loading branch information
ianthomas23 authored Jan 13, 2023
1 parent c64d7f0 commit 30925b3
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 7 deletions.
2 changes: 1 addition & 1 deletion docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ Fixes:
- random appending of a directory within the filesystems ``find()`` method (#507, 537)
- fix git tests (#501)
- fix recursive memfs operations (#502)
- fix recorsive/maxdepth for cp (#508)
- fix recursive/maxdepth for cp (#508)
- fix listings cache timeout (#513)
- big endian bytes tests (#519)
- docs syntax (#535, 524, 520, 542)
Expand Down
10 changes: 8 additions & 2 deletions fsspec/asyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,9 @@ async def _ls(self, path, detail=True, **kwargs):
raise NotImplementedError

async def _walk(self, path, maxdepth=None, **kwargs):
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

path = self._strip_protocol(path)
full_dirs = {}
dirs = {}
Expand Down Expand Up @@ -727,11 +730,12 @@ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
return {name: out[name] for name in names}

async def _expand_path(self, path, recursive=False, maxdepth=None):
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

if isinstance(path, str):
out = await self._expand_path([path], recursive, maxdepth)
else:
# reduce depth on each recursion level unless None or 0
maxdepth = maxdepth if not maxdepth else maxdepth - 1
out = set()
path = [self._strip_protocol(p) for p in path]
for p in path: # can gather here
Expand All @@ -751,6 +755,8 @@ async def _expand_path(self, path, recursive=False, maxdepth=None):
if p not in out and (recursive is False or (await self._exists(p))):
# should only check once, for the root
out.add(p)
# reduce depth on each recursion level unless None or 0
maxdepth = maxdepth if not maxdepth else maxdepth - 1
if not out:
raise FileNotFoundError(path)
return list(sorted(out))
Expand Down
37 changes: 37 additions & 0 deletions fsspec/implementations/tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,40 @@ def test_with_cache(server):
with fs1.open(fn, "rb") as f:
out = f.read()
assert out == fs1.cat(fn)


@pytest.mark.asyncio
async def test_async_expand_path(server):
fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)

# maxdepth=1
assert await fs._expand_path(server + "/index", recursive=True, maxdepth=1) == [
server + "/index",
server + "/index/realfile",
]

# maxdepth=0
with pytest.raises(ValueError):
await fs._expand_path(server + "/index", maxdepth=0)
with pytest.raises(ValueError):
await fs._expand_path(server + "/index", recursive=True, maxdepth=0)

await fs._session.close()


@pytest.mark.asyncio
async def test_async_walk(server):
fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)

# No maxdepth
res = []
async for a in fs._walk(server + "/index"):
res.append(a)
assert res == [(server + "/index", [], ["realfile"])]

# maxdepth=0
with pytest.raises(ValueError):
async for a in fs._walk(server + "/index", maxdepth=0):
pass

await fs._session.close()
3 changes: 2 additions & 1 deletion fsspec/implementations/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,8 @@ def test_du(tmpdir):

assert fs.du(tmpdir, maxdepth=2) == 11
assert fs.du(tmpdir, maxdepth=1) == 4
assert fs.du(tmpdir, maxdepth=0) == 4
with pytest.raises(ValueError):
fs.du(tmpdir, maxdepth=0)

# Size of file only.
assert fs.du(file) == 4
Expand Down
12 changes: 10 additions & 2 deletions fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,9 @@ def walk(self, path, maxdepth=None, topdown=True, **kwargs):
the bottom upwards.
kwargs: passed to ``ls``
"""
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

path = self._strip_protocol(path)
full_dirs = {}
dirs = {}
Expand Down Expand Up @@ -429,6 +432,8 @@ def walk(self, path, maxdepth=None, topdown=True, **kwargs):
if maxdepth is not None:
maxdepth -= 1
if maxdepth < 1:
if not topdown:
yield path, dirs, files
return

for d in full_dirs:
Expand Down Expand Up @@ -977,11 +982,12 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
def expand_path(self, path, recursive=False, maxdepth=None):
"""Turn one or more globs or directories into a list of all matching paths
to files or directories."""
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

if isinstance(path, str):
out = self.expand_path([path], recursive, maxdepth)
else:
# reduce depth on each recursion level unless None or 0
maxdepth = maxdepth if not maxdepth else maxdepth - 1
out = set()
path = [self._strip_protocol(p) for p in path]
for p in path:
Expand All @@ -1003,6 +1009,8 @@ def expand_path(self, path, recursive=False, maxdepth=None):
if p not in out and (recursive is False or self.exists(p)):
# should only check once, for the root
out.add(p)
# reduce depth on each recursion level unless None or 0
maxdepth = maxdepth if not maxdepth else maxdepth - 1
if not out:
raise FileNotFoundError(path)
return list(sorted(out))
Expand Down
53 changes: 52 additions & 1 deletion fsspec/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def test_du(m):
"/dir/dirb/afile": 2,
"/dir/dirb/bfile": 3,
}
assert fs.du("/dir", maxdepth=0) == 1
with pytest.raises(ValueError):
assert fs.du("/dir", maxdepth=0) == 1
assert fs.du("/dir", total=False, withdirs=True, maxdepth=1) == {
"/dir": 0,
"/dir/afile": 1,
Expand Down Expand Up @@ -363,3 +364,53 @@ def test_url_to_fs():

assert isinstance(fs, MemoryFileSystem)
assert url2 == "/a.txt"


def test_walk(m):
dir0 = "/dir0"
dir1 = dir0 + "/dir1"
dir2 = dir1 + "/dir2"
file1 = dir0 + "/file1"
file2 = dir1 + "/file2"
file3 = dir2 + "/file3"

m.mkdir(dir2) # Creates parents too
m.touch(file1)
m.touch(file2)
m.touch(file3)

# No maxdepth
assert list(m.walk(dir0, topdown=True)) == [
(dir0, ["dir1"], ["file1"]),
(dir1, ["dir2"], ["file2"]),
(dir2, [], ["file3"]),
]
assert list(m.walk(dir0, topdown=False)) == [
(dir2, [], ["file3"]),
(dir1, ["dir2"], ["file2"]),
(dir0, ["dir1"], ["file1"]),
]

# maxdepth=2
assert list(m.walk(dir0, maxdepth=2, topdown=True)) == [
(dir0, ["dir1"], ["file1"]),
(dir1, ["dir2"], ["file2"]),
]
assert list(m.walk(dir0, maxdepth=2, topdown=False)) == [
(dir1, ["dir2"], ["file2"]),
(dir0, ["dir1"], ["file1"]),
]

# maxdepth=1
assert list(m.walk(dir0, maxdepth=1, topdown=True)) == [
(dir0, ["dir1"], ["file1"]),
]
assert list(m.walk(dir0, maxdepth=1, topdown=False)) == [
(dir0, ["dir1"], ["file1"]),
]

# maxdepth=0
with pytest.raises(ValueError):
list(m.walk(dir0, maxdepth=0, topdown=True))
with pytest.raises(ValueError):
list(m.walk(dir0, maxdepth=0, topdown=False))
11 changes: 11 additions & 0 deletions fsspec/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,17 @@ def test_expand_path_recursive(test_paths, expected):
paths = test_fs.expand_path(list(test_paths), recursive=True)
assert sorted(paths) == sorted(expected)

# test with maxdepth
assert test_fs.expand_path("top_level", recursive=True, maxdepth=1) == [
"top_level",
"top_level/second_level",
]

with pytest.raises(ValueError):
test_fs.expand_path("top_level", recursive=True, maxdepth=0)
with pytest.raises(ValueError):
test_fs.expand_path("top_level", maxdepth=0)


@pytest.mark.xfail
def test_find():
Expand Down

0 comments on commit 30925b3

Please sign in to comment.