Skip to content

Commit 9389657

Browse files
Update plan_download_op.py
Signed-off-by: Robert Nishihara <robertnishihara@gmail.com>
1 parent bd09c14 commit 9389657

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

python/ray/data/_internal/planner/plan_download_op.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,16 @@ def load_uri_bytes(uri_path_iterator):
190190
"""Function that takes an iterator of URI paths and yields downloaded bytes for each."""
191191
for uri_path in uri_path_iterator:
192192
last_error = None
193-
# Handle both file and stream for uri download
193+
# Handle both file and stream uris. The file-based approach handles the vast majority of cases.
194+
# However, some data sources are not seekable and require the stream approach.
194195
for uri_open_func in [fs.open_input_file, fs.open_input_stream]:
195196
try:
196197
with uri_open_func(uri_path) as f:
197198
yield f.read()
198199
break
199200
except ValueError as e:
201+
# This handles the case where the file is not seekable and so we need to fall back to
202+
# using fs.open_input_stream.
200203
last_error = e
201204
continue
202205
except OSError as e:

0 commit comments

Comments
 (0)