Skip to content

Commit

Permalink
Upgrade Tensorflow, Torch and Spark test dependencies (horovod#3774)
Browse files Browse the repository at this point in the history
* Upgrade tensorflow to 2.10.1, 2.9.3, add 2.11.0, remove 2.8.3
* Upgrade add torch 1.13.0, remove 1.10.2
* Upgrade PySpark to 3.2.3

Signed-off-by: Enrico Minack <github@enrico.minack.dev>
  • Loading branch information
EnricoMi authored Nov 30, 2022
1 parent 0e9d1e8 commit 0b19c5c
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 455 deletions.
30 changes: 15 additions & 15 deletions .buildkite/gen-pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gpux2_queue="2x-gpu-v5111"
gpux4_queue="4x-gpu-v5111"

# our baseline test is
baseline="test-cpu-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1"
baseline="test-cpu-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1"
# in run_gloo_integration we run 'Elastic Spark * Tests' for this baseline
# so it has to have Gloo mpi kind

Expand All @@ -22,19 +22,19 @@ code_files=$(python "$dir/get_changed_code_files.py" || echo failure)
tests=$(if [[ -n "${PIPELINE_MODE:-}" ]] && ( [[ "${BUILDKITE_BRANCH:-}" == "${BUILDKITE_PIPELINE_DEFAULT_BRANCH:-}" ]] || [[ -n "$code_files" ]] ); then
# we vary the baseline along the Python dimension and PySpark together
# run_gloo_integration expects these to have Gloo mpi kind to run 'Elastic Spark * Tests'
printf "test-cpu-gloo-py3_7-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark2_4_8 "
printf "test-cpu-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_2_2 "
printf "test-cpu-gloo-py3_7-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark2_4_8 "
printf "test-cpu-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_2_3 "
# our baseline
printf "$baseline "
# then we vary the baseline along mpi kinds dimension
# our baseline again
# printf "test-cpu-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-mpich-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-oneccl-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-openmpi-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
# printf "test-cpu-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-mpich-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-oneccl-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-openmpi-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
# note: we test openmpi-gloo mpi kind in this variation in each of [cpu, gpu, mixed]
printf "test-cpu-openmpi-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-openmpi-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
# then we vary the baseline along the framework dimensions all together
# run_gloo_integration expects tf1 to have Gloo mpi kind to run 'Elastic Spark * Tests'
Expand All @@ -43,10 +43,10 @@ tests=$(if [[ -n "${PIPELINE_MODE:-}" ]] && ( [[ "${BUILDKITE_BRANCH:-}" == "${B
# torch==1.8.1 is the latest we can test in this setup
# see test-gpu-gloo-py3_7-tf1_15_5-... below why we have to test with mxnet 1.5.1 here
printf "test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_8_1-mxnet1_5_1_p0-pyspark3_3_1 "
printf "test-cpu-gloo-py3_8-tf2_8_3-keras2_8_0-torch1_10_2-mxnet1_7_0_p2-pyspark3_3_1 "
printf "test-cpu-gloo-py3_8-tf2_9_2-keras2_9_0-torch1_11_0-mxnet1_8_0_p0-pyspark3_3_1 "
printf "test-cpu-gloo-py3_8-tf2_9_3-keras2_9_0-torch1_11_0-mxnet1_7_0_p2-pyspark3_3_1 "
printf "test-cpu-gloo-py3_8-tf2_10_1-keras2_10_0-torch1_12_1-mxnet1_8_0_p0-pyspark3_3_1 "
# our baseline again
# printf "test-cpu-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
# printf "test-cpu-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
printf "test-cpu-openmpi-gloo-py3_8-tfhead-keras_none-torchhead-mxnethead-pyspark3_3_1 "
# these are the lowest framework versions that Horovod compiles with, but they are not tested
printf "test-cpu-openmpi-gloo-py3_7-tfmin-kerasmin-torchmin-mxnetmin-pysparkmin "
Expand All @@ -60,15 +60,15 @@ tests=$(if [[ -n "${PIPELINE_MODE:-}" ]] && ( [[ "${BUILDKITE_BRANCH:-}" == "${B
# so we test with mxnet 1.5.1
printf "test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_8_1-mxnet1_5_1_p0-pyspark3_3_1 "
# here we deviate from mxnet==1.7.0.post2 as there is none for cu101, only post1
printf "test-gpu-gloo-py3_8-tf2_8_3-keras2_8_0-torch1_10_2-mxnet1_7_0_p1-pyspark3_3_1 "
printf "test-gpu-gloo-py3_8-tf2_9_2-keras2_9_0-torch1_11_0-mxnet1_8_0_p0-pyspark3_3_1 "
printf "test-gpu-openmpi-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
printf "test-gpu-gloo-py3_8-tf2_9_3-keras2_9_0-torch1_11_0-mxnet1_7_0_p1-pyspark3_3_1 "
printf "test-gpu-gloo-py3_8-tf2_10_1-keras2_10_0-torch1_12_1-mxnet1_8_0_p0-pyspark3_3_1 "
printf "test-gpu-openmpi-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
printf "test-gpu-openmpi-gloo-py3_8-tfhead-keras_none-torchhead-mxnethead-pyspark3_3_1 "
# these are the lowest framework versions that Horovod compiles with, but they are not tested
printf "test-gpu-openmpi-gloo-py3_7-tfmin-kerasmin-torchmin-mxnetmin-pysparkmin "
# and one final test with mixed cpu+gpu
printf "test-mixed-openmpi-gloo-py3_8-tf2_10_0-keras2_10_0-torch1_12_1-mxnet1_9_1-pyspark3_3_1 "
printf "test-mixed-openmpi-gloo-py3_8-tf2_11_0-keras2_11_0-torch1_13_0-mxnet1_9_1-pyspark3_3_1 "
fi | if [[ "${PIPELINE_MODE:-}" == "GPU"* ]]; then sed -E "s/[^ ]*-cpu-[^ ]*//g"; else cat; fi \
| if [[ "${PIPELINE_MODE:-}" == "GPU HEADS" ]]; then sed -E "s/ /\n/g" | grep -e "-tfhead-keras_none-torchhead-mxnethead-" | paste -s -d " " -; else cat; fi \
| if [[ "${PIPELINE_MODE:-}" == "GPU NON HEADS" ]]; then sed -E "s/[^ ]*-tfhead-keras_none-torchhead-mxnethead-[^ ]*//g"; else cat; fi)
Expand Down
Loading

0 comments on commit 0b19c5c

Please sign in to comment.