Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add list_file() functional API to FSSpecFileLister and IoPathFileLister #463

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
bcee0e4
feat: naive impl of list_file for fsspec
bushshrub May 25, 2022
2a09e77
test: test for list_file() for FSSpecFileLister
bushshrub May 25, 2022
225189a
test: consistency with test_fsspec_file_lister_iterdatapipe_with_list
bushshrub May 25, 2022
8da9374
test: ensure protocol is present
bushshrub May 25, 2022
ab91524
feat: naive impl of list_file for iopath
bushshrub May 25, 2022
aafaa71
test: test for list_file() of iopath
bushshrub May 25, 2022
774a6ac
test: consistency for iopath
bushshrub May 25, 2022
2c52f9d
feat: functional_datapipe list_file_by
bushshrub May 25, 2022
44909dd
test: update tests for fsspec and iopath for functional api
bushshrub May 25, 2022
e777d26
test: merge functional API tests with regular tests
bushshrub May 25, 2022
04b17d0
test: delete unnecessary test
bushshrub May 25, 2022
4d88bed
style: change variable names for functional lister tests
bushshrub May 26, 2022
0971a6e
chore: directly iterate over datapipe
bushshrub May 27, 2022
0155dc1
refactor: fix grammar
bushshrub May 31, 2022
7c89201
refactor: fix grammar
bushshrub May 31, 2022
492c9b3
test: update tests with new functional API
bushshrub May 31, 2022
626385a
Update test/test_fsspec.py
bushshrub May 31, 2022
6e3c661
test: fix failing tests due to missing file:// prefix
bushshrub Jun 1, 2022
3d1702e
test: fix breaking test due to unsortable datapipe
bushshrub Jun 1, 2022
cf1085c
test: fix failing test_fsspec_file_lister_iterdp_with_lsit
bushshrub Jun 2, 2022
2a4efb4
test: fix failing tests due to .sort() being in-place
bushshrub Jun 2, 2022
ff13537
test: fix failing tests due to missing file:// prefix again
bushshrub Jun 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions test/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ def test_fsspec_file_lister_iterdatapipe(self):
{fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files},
)

# checks for functional API
datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name])
datapipe = datapipe.list_files_by_fsspec()
for path in datapipe:
self.assertIn(
path.split("://")[1],
{fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files},
)

@skipIfNoFSSpec
def test_fsspec_file_lister_iterdatapipe_with_list(self):
datapipe = FSSpecFileLister(root=["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name])
Expand All @@ -82,6 +91,20 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self):
# check all file paths within sub_folder are listed
self.assertEqual(file_lister, temp_files)

# checks for functional API
datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name])
datapipe = datapipe.list_files_by_fsspec()
res = list(map(lambda path: path.split("://")[1], datapipe))
res.sort()
temp_files = list(
map(
lambda file: fsspec.implementations.local.make_path_posix(file),
self.temp_sub_files + self.temp_sub_files_2,
)
)
temp_files.sort()
self.assertEqual(res, temp_files)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems the test is still failing here. Could you please fix it by mimicking the test case from line 80 to line 89?


@skipIfNoFSSpec
def test_fsspec_file_loader_iterdatapipe(self):
datapipe1 = FSSpecFileLister(root="file://" + self.temp_sub_dir.name)
Expand Down
11 changes: 11 additions & 0 deletions test/test_local_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,11 @@ def test_io_path_file_lister_iterdatapipe(self):
for path in datapipe:
self.assertTrue(path in self.temp_sub_files)

datapipe = IterableWrapper([self.temp_sub_dir.name])
datapipe = datapipe.list_files_by_iopath()
for path in datapipe:
self.assertTrue(path in self.temp_sub_files)

@skipIfNoIoPath
def test_io_path_file_lister_iterdatapipe_with_list(self):
datapipe = IoPathFileLister(root=[self.temp_sub_dir.name, self.temp_sub_dir_2.name])
Expand All @@ -672,6 +677,12 @@ def test_io_path_file_lister_iterdatapipe_with_list(self):
# check all file paths within sub_folder are listed
self.assertEqual(file_lister, all_temp_files)

datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name])
datapipe = datapipe.list_files_by_iopath()
results = list(datapipe)
results.sort()
self.assertEqual(results, all_temp_files)

@skipIfNoIoPath
def test_io_path_file_loader_iterdatapipe(self):
datapipe1 = IoPathFileLister(root=self.temp_sub_dir.name)
Expand Down
1 change: 1 addition & 0 deletions torchdata/datapipes/iter/load/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def _assert_fsspec() -> None:
)


@functional_datapipe("list_files_by_fsspec")
class FSSpecFileListerIterDataPipe(IterDataPipe[str]):
r"""
Lists the contents of the directory at the provided ``root`` pathname or URL,
Expand Down
1 change: 1 addition & 0 deletions torchdata/datapipes/iter/load/iopath.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _create_default_pathmanager():
return pathmgr


@functional_datapipe("list_files_by_iopath")
class IoPathFileListerIterDataPipe(IterDataPipe[str]):
r"""
Lists the contents of the directory at the provided ``root`` pathname or URL,
Expand Down