Skip to content

Commit

Permalink
Fix walk and find test issues
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Sep 9, 2024
1 parent 96a7e04 commit 9c42e1a
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 31 deletions.
29 changes: 16 additions & 13 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,22 +1132,25 @@ def _find_file_dir(
out = [self.info(path)]
except FileNotFoundError:
out = []
dirs = []
for o in out:
par = self._parent(o["name"])
if len(path) <= len(par):
d = {
"Key": self._split_path(par)[1].rstrip("/"),
"Size": 0,
"name": par.rstrip("/"),
"type": "directory",
}
dirs.append(d)
dirs = {
self._parent(o["name"]): {
"Key": self._parent(o["name"]).rstrip("/"),
"Size": 0,
"name": self._parent(o["name"]).rstrip("/"),
"type": "directory",
}
for o in out
if len(path) <= len(self._parent(o["name"]))
}

if withdirs:
out = sorted(out + dirs, key=lambda x: x["name"])
for dir_info in dirs.values():
if dir_info not in out:
out.append(dir_info)
else:
out = [o for o in out if o["type"] == "file"]
return out

return sorted(out, key=lambda x: x["name"])

def _open_remote_file(
self,
Expand Down
41 changes: 23 additions & 18 deletions tosfs/tests/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,6 @@ def test_write_read_without_protocol(


def test_walk(fsspecfs: Any, bucket: str, temporary_workspace: str):
sub_dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10))
temp_folder = f"{bucket}/{temporary_workspace}/{sub_dir_name}"
nested_dir_1 = f"{bucket}/{temporary_workspace}/nested_dir_1"
nested_dir_2 = f"{nested_dir_1}/nested_dir_2"
file_1 = f"{bucket}/{temporary_workspace}/file_1.txt"
Expand All @@ -349,40 +347,40 @@ def test_walk(fsspecfs: Any, bucket: str, temporary_workspace: str):
f.write(b"File 3 content")

# Test walk with maxdepth=None and topdown=True
result = list(fsspecfs.walk(temp_folder, maxdepth=None, topdown=True))
result = list(fsspecfs.walk(f"{bucket}/{temporary_workspace}", maxdepth=None, topdown=True))
expected = [
(fsspecfs._strip_protocol(temp_folder), ["nested_dir_1"], ["file_1.txt"]),
(fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}"), ["nested_dir_1"], ["file_1.txt"]),
(fsspecfs._strip_protocol(nested_dir_1), ["nested_dir_2"], ["file_2.txt"]),
(fsspecfs._strip_protocol(nested_dir_2), [], ["file_3.txt"]),
]
assert result == expected, f"Expected {expected}, got {result}"

# Test walk with maxdepth=1 and topdown=True
result = list(fsspecfs.walk(temp_folder, maxdepth=1, topdown=True))
result = list(fsspecfs.walk(f"{bucket}/{temporary_workspace}", maxdepth=1, topdown=True))
expected = [
(fsspecfs._strip_protocol(temp_folder), ["nested_dir_1"], ["file_1.txt"]),
(fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}"), ["nested_dir_1"], ["file_1.txt"]),
]
assert result == expected, f"Expected {expected}, got {result}"

# Test walk with maxdepth=2 and topdown=True
result = list(fsspecfs.walk(temp_folder, maxdepth=2, topdown=True))
result = list(fsspecfs.walk(f"{bucket}/{temporary_workspace}", maxdepth=2, topdown=True))
expected = [
(fsspecfs._strip_protocol(temp_folder), ["nested_dir_1"], ["file_1.txt"]),
(fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}"), ["nested_dir_1"], ["file_1.txt"]),
(fsspecfs._strip_protocol(nested_dir_1), ["nested_dir_2"], ["file_2.txt"]),
]
assert result == expected, f"Expected {expected}, got {result}"

# Test walk with maxdepth=None and topdown=False
result = list(fsspecfs.walk(temp_folder, maxdepth=None, topdown=False))
result = list(fsspecfs.walk(f"{bucket}/{temporary_workspace}", maxdepth=None, topdown=False))
expected = [
(fsspecfs._strip_protocol(nested_dir_2), [], ["file_3.txt"]),
(fsspecfs._strip_protocol(nested_dir_1), ["nested_dir_2"], ["file_2.txt"]),
(fsspecfs._strip_protocol(temp_folder), ["nested_dir_1"], ["file_1.txt"]),
(fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}"), ["nested_dir_1"], ["file_1.txt"]),
]
assert result == expected, f"Expected {expected}, got {result}"

# Test walk with detail=True
result = list(fsspecfs.walk(temp_folder, maxdepth=None, topdown=True, detail=True))
result = list(fsspecfs.walk(f"{bucket}/{temporary_workspace}", maxdepth=None, topdown=True, detail=True))
expected_dir_num = 3
assert (
len(result) == expected_dir_num
Expand All @@ -400,8 +398,6 @@ def remove_last_modification_time_ms(data):
del data[key]["last_modification_time_ms"]
return data

sub_dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10))
temp_folder = f"{bucket}/{temporary_workspace}/{sub_dir_name}"
file1_path = f"{bucket}/{temporary_workspace}/file1"
file2_path = f"{bucket}/{temporary_workspace}/file2"
dir1_path = f"{bucket}/{temporary_workspace}/dir1"
Expand All @@ -418,7 +414,7 @@ def remove_last_modification_time_ms(data):
fsspecfs.touch(file4_path)

# Test finding all files
result = fsspecfs.find(temp_folder)
result = fsspecfs.find(f"{bucket}/{temporary_workspace}")
expected = [
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir1/file3"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir2/file4"),
Expand All @@ -428,49 +424,58 @@ def remove_last_modification_time_ms(data):
assert result == expected, f"Expected {expected}, got {result}"

# Test finding files with maxdepth=1
result = fsspecfs.find(temp_folder, maxdepth=1)
result = fsspecfs.find(f"{bucket}/{temporary_workspace}", maxdepth=1)
expected = [
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file1"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file2"),
]
assert result == expected, f"Expected {expected}, got {result}"

# Test finding files and directories
result = fsspecfs.find(temp_folder, withdirs=True)
result = fsspecfs.find(f"{bucket}/{temporary_workspace}", withdirs=True)
expected = [
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir1"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir1/file3"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir2"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir2/file4"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file1"),
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file2"),
]
assert result == expected, f"Expected {expected}, got {result}"
assert sorted(result) == sorted(expected), f"Expected {expected}, got {result}"

# Test finding files with detail=True
result = fsspecfs.find(temp_folder, detail=True)
result = fsspecfs.find(f"{bucket}/{temporary_workspace}", detail=True)
result = remove_last_modification_time_ms(result)
expected = {
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir1/file3"): {
"Key": fsspecfs._strip_protocol(
f"{bucket}/{temporary_workspace}/dir1/file3"
),
"name": fsspecfs._strip_protocol(
f"{bucket}/{temporary_workspace}/dir1/file3"
),
"type": "file",
"size": 0,
},
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir2/file4"): {
"Key": fsspecfs._strip_protocol(
f"{bucket}/{temporary_workspace}/dir2/file4"
),
"name": fsspecfs._strip_protocol(
f"{bucket}/{temporary_workspace}/dir2/file4"
),
"type": "file",
"size": 0,
},
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file1"): {
"Key": fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file1"),
"name": fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file1"),
"type": "file",
"size": 0,
},
fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file2"): {
"Key": fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file2"),
"name": fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/file2"),
"type": "file",
"size": 0,
Expand Down

0 comments on commit 9c42e1a

Please sign in to comment.