Skip to content

Commit bd09c14

Browse files
committed
[data] Fix HTTP streaming file download by trying both open_input_file and open_input_stream
Signed-off-by: xyuzh <xinyzng@gmail.com>
1 parent f2a7a94 commit bd09c14

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

python/ray/data/_internal/planner/plan_download_op.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,27 @@ def download_bytes_threaded(
189189
def load_uri_bytes(uri_path_iterator):
190190
"""Function that takes an iterator of URI paths and yields downloaded bytes for each."""
191191
for uri_path in uri_path_iterator:
192-
try:
193-
with fs.open_input_file(uri_path) as f:
194-
yield f.read()
195-
except OSError as e:
192+
last_error = None
193+
# Handle both file and stream for uri download
194+
for uri_open_func in [fs.open_input_file, fs.open_input_stream]:
195+
try:
196+
with uri_open_func(uri_path) as f:
197+
yield f.read()
198+
break
199+
except ValueError as e:
200+
last_error = e
201+
continue
202+
except OSError as e:
203+
logger.debug(f"OSError: '{uri_path}' from column '{uri_column_name}' with error: {e}")
204+
yield None
205+
break
206+
except Exception as e:
207+
logger.error(f"Unexpected error in load_uri_bytes: {e}")
208+
yield None
209+
break
210+
else:
196211
logger.debug(
197-
f"Failed to download URI '{uri_path}' from column '{uri_column_name}' with error: {e}"
212+
f"Failed to download URI '{uri_path}' from column '{uri_column_name}' with error: {last_error}"
198213
)
199214
yield None
200215

0 commit comments

Comments
 (0)