Skip to content

Commit

Permalink
Add list_file() functional API to FSSpecFileLister and IoPathFileList…
Browse files Browse the repository at this point in the history
…er (#463)

Summary:
Fixes #387

### Changes
- Adds `list_file()` method on `IoPathFileListerIterDataPipe`
- Adds `list_file()` method on `FSSpecFileListerIterDataPipe`
- Add tests for those methods

#### Additional comments
I feel as if the implementation is quite naive. Would appreciate any feedback on it.

Pull Request resolved: #463

Reviewed By: NivekT

Differential Revision: D36777142

Pulled By: ejguan

fbshipit-source-id: 1c4474776f3fcd377ae545bd8bd7bf26d0b2fa88
  • Loading branch information
bushshrub authored and ejguan committed Jun 6, 2022
1 parent d6e24ba commit c49c752
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 0 deletions.
23 changes: 23 additions & 0 deletions test/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ def test_fsspec_file_lister_iterdatapipe(self):
{fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files},
)

# checks for functional API
datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name])
datapipe = datapipe.list_files_by_fsspec()
for path in datapipe:
self.assertIn(
path.split("://")[1],
{fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files},
)

@skipIfNoFSSpec
def test_fsspec_file_lister_iterdatapipe_with_list(self):
datapipe = FSSpecFileLister(root=["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name])
Expand All @@ -82,6 +91,20 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self):
# check all file paths within sub_folder are listed
self.assertEqual(file_lister, temp_files)

# checks for functional API
datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name])
datapipe = datapipe.list_files_by_fsspec()
res = list(map(lambda path: path.split("://")[1], datapipe))
res.sort()
temp_files = list(
map(
lambda file: fsspec.implementations.local.make_path_posix(file),
self.temp_sub_files + self.temp_sub_files_2,
)
)
temp_files.sort()
self.assertEqual(res, temp_files)

@skipIfNoFSSpec
def test_fsspec_file_loader_iterdatapipe(self):
datapipe1 = FSSpecFileLister(root="file://" + self.temp_sub_dir.name)
Expand Down
11 changes: 11 additions & 0 deletions test/test_local_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,11 @@ def test_io_path_file_lister_iterdatapipe(self):
for path in datapipe:
self.assertTrue(path in self.temp_sub_files)

datapipe = IterableWrapper([self.temp_sub_dir.name])
datapipe = datapipe.list_files_by_iopath()
for path in datapipe:
self.assertTrue(path in self.temp_sub_files)

@skipIfNoIoPath
def test_io_path_file_lister_iterdatapipe_with_list(self):
datapipe = IoPathFileLister(root=[self.temp_sub_dir.name, self.temp_sub_dir_2.name])
Expand All @@ -672,6 +677,12 @@ def test_io_path_file_lister_iterdatapipe_with_list(self):
# check all file paths within sub_folder are listed
self.assertEqual(file_lister, all_temp_files)

datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name])
datapipe = datapipe.list_files_by_iopath()
results = list(datapipe)
results.sort()
self.assertEqual(results, all_temp_files)

@skipIfNoIoPath
def test_io_path_file_loader_iterdatapipe(self):
datapipe1 = IoPathFileLister(root=self.temp_sub_dir.name)
Expand Down
1 change: 1 addition & 0 deletions torchdata/datapipes/iter/load/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def _assert_fsspec() -> None:
)


@functional_datapipe("list_files_by_fsspec")
class FSSpecFileListerIterDataPipe(IterDataPipe[str]):
r"""
Lists the contents of the directory at the provided ``root`` pathname or URL,
Expand Down
1 change: 1 addition & 0 deletions torchdata/datapipes/iter/load/iopath.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _create_default_pathmanager():
return pathmgr


@functional_datapipe("list_files_by_iopath")
class IoPathFileListerIterDataPipe(IterDataPipe[str]):
r"""
Lists the contents of the directory at the provided ``root`` pathname or URL,
Expand Down

0 comments on commit c49c752

Please sign in to comment.