From 0abbd5088d4d073f4850cae09e06f0cfb63c01e4 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 17 Nov 2025 13:25:55 -0800 Subject: [PATCH 1/5] Initial commit Signed-off-by: Balaji Veeramani --- release/release_data_tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release/release_data_tests.yaml b/release/release_data_tests.yaml index e73ace2ec856..cb03cc9d3a5c 100644 --- a/release/release_data_tests.yaml +++ b/release/release_data_tests.yaml @@ -181,7 +181,7 @@ # This test consistently fails on fixed-size clusters due to head OOM from # too many objects references on the head node. So, we only run it on # autoscaling clusters. - scaling: [autoscaling] + scaling: [fixed_size, autoscaling] shuffle_strategy: [sort_shuffle_pull_based, hash_shuffle] columns: - "column08 column13 column14" # 84 groups @@ -193,7 +193,7 @@ run: timeout: 3600 script: > - python groupby_benchmark.py --sf 100 --map-groups --group-by {{columns}} + python groupby_benchmark.py --sf 10 --map-groups --group-by {{columns}} --shuffle-strategy {{shuffle_strategy}} ############### From 0340d588870dd73c988605f25b67b3097d3fbf36 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 17 Nov 2025 13:27:18 -0800 Subject: [PATCH 2/5] Update scale Signed-off-by: Balaji Veeramani --- release/release_data_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/release_data_tests.yaml b/release/release_data_tests.yaml index cb03cc9d3a5c..d784e1451245 100644 --- a/release/release_data_tests.yaml +++ b/release/release_data_tests.yaml @@ -193,7 +193,7 @@ run: timeout: 3600 script: > - python groupby_benchmark.py --sf 10 --map-groups --group-by {{columns}} + python groupby_benchmark.py --sf 100 --map-groups --group-by {{columns}} --shuffle-strategy {{shuffle_strategy}} ############### From 28ebd4b5818c303e46557dbe5e1fac21525c7572 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 17 Nov 2025 13:27:29 -0800 Subject: [PATCH 3/5] Remove comment Signed-off-by: Balaji Veeramani --- release/release_data_tests.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/release/release_data_tests.yaml b/release/release_data_tests.yaml index d784e1451245..04f8f3623d05 100644 --- a/release/release_data_tests.yaml +++ b/release/release_data_tests.yaml @@ -178,9 +178,6 @@ matrix: setup: - # This test consistently fails on fixed-size clusters due to head OOM from - # too many objects references on the head node. So, we only run it on - # autoscaling clusters. scaling: [fixed_size, autoscaling] shuffle_strategy: [sort_shuffle_pull_based, hash_shuffle] columns: From 3d79ecc06cf2db8fc8e11b6b0415e8b572f9551f Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 17 Nov 2025 15:55:33 -0800 Subject: [PATCH 4/5] Increase scale Signed-off-by: Balaji Veeramani --- release/release_data_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/release_data_tests.yaml b/release/release_data_tests.yaml index 15455fc38bb3..741aa8a13779 100644 --- a/release/release_data_tests.yaml +++ b/release/release_data_tests.yaml @@ -190,7 +190,7 @@ run: timeout: 3600 script: > - python groupby_benchmark.py --sf 10 --map-groups --group-by {{columns}} + python groupby_benchmark.py --sf 100 --map-groups --group-by {{columns}} --shuffle-strategy {{shuffle_strategy}} ############### From f430e5e3714443cea6520d5ea28b493511be9763 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 17 Nov 2025 23:11:25 -0800 Subject: [PATCH 5/5] Update file' Signed-off-by: Balaji Veeramani --- release/release_data_tests.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/release/release_data_tests.yaml b/release/release_data_tests.yaml index 741aa8a13779..ee00fe6e8dba 100644 --- a/release/release_data_tests.yaml +++ b/release/release_data_tests.yaml @@ -178,12 +178,25 @@ matrix: setup: - scaling: [fixed_size, autoscaling] + scaling: [fixed_size] shuffle_strategy: [sort_shuffle_pull_based, hash_shuffle] columns: - "column08 column13 column14" # 84 groups - "column02 column14" # 7M groups + adjustments: + # Ray Data OOMs with hash shuffle on autoscaling clusters. So, only run + # autoscaling variants with sort shuffle. For more information, see + # https://github.com/ray-project/ray/issues/58734. + - with: + scaling: autoscaling + shuffle_strategy: sort_shuffle_pull_based + columns: "column08 column13 column14" + - with: + scaling: autoscaling + shuffle_strategy: sort_shuffle_pull_based + columns: "column02 column14" + cluster: cluster_compute: "{{scaling}}_all_to_all_compute.yaml"