Skip to content

Commit

Permalink
[DataPipe] Adding a 's' to the functional names of open/list DataPipes
Browse files Browse the repository at this point in the history
ghstack-source-id: 3bb1bce0576bc0ef5c298bf7bbc85b88bc8d4945
Pull Request resolved: #479
  • Loading branch information
NivekT committed May 31, 2022
1 parent 3d967c6 commit 2d04683
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 13 deletions.
6 changes: 3 additions & 3 deletions torchdata/datapipes/iter/load/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Note: refer to the official documentation for detailed installtion instructions

### S3FileLister

`S3FileLister` accepts a list of S3 prefixes and iterates all matching s3 urls. The functional API is `list_file_by_s3`.
Acceptable prefixes include `s3://bucket-name`, `s3://bucket-name/`, `s3://bucket-name/folder`,
`S3FileLister` accepts a list of S3 prefixes and iterates all matching s3 urls. The functional API is
`list_files_by_s3`. Acceptable prefixes include `s3://bucket-name`, `s3://bucket-name/`, `s3://bucket-name/folder`,
`s3://bucket-name/folder/`, and `s3://bucket-name/prefix`. You may also set `length`, `request_timeout_ms` (default 3000
ms in aws-sdk-cpp), and `region`. Note that:

Expand All @@ -48,7 +48,7 @@ ms in aws-sdk-cpp), and `region`. Note that:
### S3FileLoader

`S3FileLoader` accepts a list of S3 URLs and iterates all files in `BytesIO` format with `(url, BytesIO)` tuples. The
functional API is `load_file_by_s3`. You may also set `request_timeout_ms` (default 3000 ms in aws-sdk-cpp), `region`,
functional API is `load_files_by_s3`. You may also set `request_timeout_ms` (default 3000 ms in aws-sdk-cpp), `region`,
`buffer_size` (default 120Mb), and `multi_part_download` (default to use multi-part downloading). Note that:

1. Input **must** be a list and S3 URLs must be valid.
Expand Down
6 changes: 3 additions & 3 deletions torchdata/datapipes/iter/load/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ def __iter__(self) -> Iterator[str]:
yield abs_path


@functional_datapipe("open_file_by_fsspec")
@functional_datapipe("open_files_by_fsspec")
class FSSpecFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
r"""
Opens files from input datapipe which contains `fsspec` paths and yields a tuple of
pathname and opened file stream (functional name: ``open_file_by_fsspec``).
pathname and opened file stream (functional name: ``open_files_by_fsspec``).
Args:
source_datapipe: Iterable DataPipe that provides the pathnames or URLs
Expand All @@ -114,7 +114,7 @@ class FSSpecFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
Example:
>>> from torchdata.datapipes.iter import FSSpecFileLister
>>> datapipe = FSSpecFileLister(root=dir_path)
>>> file_dp = datapipe.open_file_by_fsspec()
>>> file_dp = datapipe.open_files_by_fsspec()
"""

def __init__(self, source_datapipe: IterDataPipe[str], mode: str = "r") -> None:
Expand Down
6 changes: 3 additions & 3 deletions torchdata/datapipes/iter/load/iopath.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,11 @@ def __iter__(self) -> Iterator[str]:
yield os.path.join(path, file_name)


@functional_datapipe("open_file_by_iopath")
@functional_datapipe("open_files_by_iopath")
class IoPathFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
r"""
Opens files from input datapipe which contains pathnames or URLs,
and yields a tuple of pathname and opened file stream (functional name: ``open_file_by_iopath``).
and yields a tuple of pathname and opened file stream (functional name: ``open_files_by_iopath``).
Args:
source_datapipe: Iterable DataPipe that provides the pathnames or URLs
Expand All @@ -114,7 +114,7 @@ class IoPathFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
Example:
>>> from torchdata.datapipes.iter import IoPathFileLister
>>> datapipe = IoPathFileLister(root=S3URL)
>>> file_dp = datapipe.open_file_by_iopath()
>>> file_dp = datapipe.open_files_by_iopath()
"""

def __init__(self, source_datapipe: IterDataPipe[str], mode: str = "r", pathmgr=None) -> None:
Expand Down
8 changes: 4 additions & 4 deletions torchdata/datapipes/iter/load/s3io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from torchdata.datapipes.utils import StreamWrapper


@functional_datapipe("list_file_by_s3")
@functional_datapipe("list_files_by_s3")
class S3FileListerIterDataPipe(IterDataPipe[str]):
r"""
Iterable DataPipe that lists Amazon S3 file URLs with the given prefixes (functional name: ``list_file_by_s3``).
Iterable DataPipe that lists Amazon S3 file URLs with the given prefixes (functional name: ``list_files_by_s3``).
Acceptable prefixes include ``s3://bucket-name``, ``s3://bucket-name/``, ``s3://bucket-name/folder``,
``s3://bucket-name/folder/``, and ``s3://bucket-name/prefix``. You may also set ``length``, ``request_timeout_ms``
(default 3000 ms in aws-sdk-cpp), and ``region``.
Expand Down Expand Up @@ -72,10 +72,10 @@ def __len__(self) -> int:
return self.length


@functional_datapipe("load_file_by_s3")
@functional_datapipe("load_files_by_s3")
class S3FileLoaderIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
r"""
Iterable DataPipe that loads Amazon S3 files from the given S3 URLs (functional name: ``load_file_by_s3``).
Iterable DataPipe that loads Amazon S3 files from the given S3 URLs (functional name: ``load_files_by_s3``).
``S3FileLoader`` iterates all given S3 URLs in ``BytesIO`` format with ``(url, BytesIO)`` tuples.
You may also set ``request_timeout_ms`` (default 3000 ms in aws-sdk-cpp), ``region``,
``buffer_size`` (default 120Mb), and ``multi_part_download`` (default to use multi-part downloading).
Expand Down

0 comments on commit 2d04683

Please sign in to comment.