diff --git a/python/ray/data/_internal/datasource/hudi_datasource.py b/python/ray/data/_internal/datasource/hudi_datasource.py index 5ebff3e4558a..828d9baada7f 100644 --- a/python/ray/data/_internal/datasource/hudi_datasource.py +++ b/python/ray/data/_internal/datasource/hudi_datasource.py @@ -49,6 +49,7 @@ def _perform_read( read_tasks = [] for file_slices_split in hudi_table.split_file_slices(parallelism): if len(file_slices_split) == 0: + # when the table is empty, this will be an empty split continue num_rows = 0 diff --git a/python/ray/data/tests/test_hudi.py b/python/ray/data/tests/test_hudi.py index 87e431530985..af8035cc315f 100644 --- a/python/ray/data/tests/test_hudi.py +++ b/python/ray/data/tests/test_hudi.py @@ -17,14 +17,12 @@ MIN_PYARROW_VERSION_FOR_HUDI = parse_version("11.0.0") _VER = _get_pyarrow_version() -PYARROW_VERSION = None if _VER is None else parse_version(_VER) +PYARROW_VERSION = parse_version(_VER) if _VER else None PYARROW_VERSION_MEETS_REQUIREMENT = ( - PYARROW_VERSION is not None and PYARROW_VERSION >= MIN_PYARROW_VERSION_FOR_HUDI + PYARROW_VERSION and PYARROW_VERSION >= MIN_PYARROW_VERSION_FOR_HUDI ) - -pytestmark = pytest.mark.skipif( - not PYARROW_VERSION_MEETS_REQUIREMENT, - reason=f"Hudi only supported if pyarrow >= {MIN_PYARROW_VERSION_FOR_HUDI}", +PYARROW_HUDI_TEST_SKIP_REASON = ( + f"Hudi only supported if pyarrow >= {MIN_PYARROW_VERSION_FOR_HUDI}" ) @@ -34,6 +32,10 @@ def _extract_testing_table(fixture_path: str, table_dir: str, target_dir: str) - return os.path.join(target_dir, table_dir) +@pytest.mark.skipif( + not PYARROW_VERSION_MEETS_REQUIREMENT, + reason=PYARROW_HUDI_TEST_SKIP_REASON, +) @pytest.mark.parametrize( "fs,data_path", [