From 916c81ab3d26649b6bc0e071bdea6202b8ddc3f0 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Wed, 20 Nov 2024 10:47:02 -0800 Subject: [PATCH] Troubleshoot hanging matrix jobs --- .github/workflows/jvm_tests.yml | 12 +++++++++--- .github/workflows/main.yml | 28 ++++++++++++++++++---------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index a0ecaa189e36..80eb641367e5 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -18,8 +18,10 @@ jobs: build-containers: name: Build CI containers (${{ matrix.container_id }}) runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=${{ matrix.runner }} + - run-id=${{ github.run_id }} + - tag=${{ matrix.container_id }} strategy: max-parallel: 2 matrix: @@ -48,8 +50,10 @@ jobs: (arch ${{ matrix.arch }}, runner ${{ matrix.runner }}) needs: build-containers runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=${{ matrix.runner }} + - run-id=${{ github.run_id }} + - tag=${{ matrix.arch }} strategy: fail-fast: false matrix: @@ -266,8 +270,10 @@ jobs: name: Deploy JVM packages to S3 (${{ matrix.variant }}) needs: [build-jvm-gpu, build-test-jvm-packages, test-jvm-packages-gpu] runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=linux-amd64-cpu + - run-id=${{ github.run_id }} + - tag=${{ matrix.variant }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3073c73ae642..7ff53f26fd9f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,8 +18,10 @@ jobs: build-containers: name: Build CI containers (${{ matrix.container_id }}) runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=${{ matrix.runner }} + - run-id=${{ github.run_id }} + - tag=${{ matrix.container_id }} strategy: max-parallel: 2 matrix: @@ -169,8 +171,10 @@ jobs: name: Build manylinux2014_${{ matrix.arch }} wheel needs: build-containers runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=${{ matrix.runner }} + - run-id=${{ github.run_id }} + - tag=${{ matrix.arch }} strategy: fail-fast: false matrix: @@ -227,8 +231,10 @@ jobs: (Suite ${{ matrix.suite }}, Runner ${{ matrix.runner }}) needs: [build-cuda, build-cuda-with-rmm] runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=${{ matrix.runner }} + - run-id=${{ github.run_id }} + - tag=${{ matrix.suite }} strategy: fail-fast: false max-parallel: 2 @@ -266,39 +272,41 @@ jobs: name: Run Python tests (${{ matrix.description }}) needs: [build-cuda, build-cpu-arm64] runs-on: - - runs-on=${{ github.run_id }} + - runs-on - runner=${{ matrix.runner }} + - run-id=${{ github.run_id }} + - tag=${{ matrix.description }} strategy: fail-fast: false max-parallel: 2 matrix: include: - - description: "single GPU" + - description: single-gpu container: xgb-ci.gpu suite: gpu runner: linux-amd64-gpu artifact_from: build-cuda - - description: "single GPU, nightly deps" + - description: single-gpu-nightly-deps container: xgb-ci.gpu_dev_ver suite: gpu runner: linux-amd64-gpu artifact_from: build-cuda - - description: "multiple GPUs" + - description: multiple-gpu container: xgb-ci.gpu suite: mgpu runner: linux-amd64-mgpu artifact_from: build-cuda - - description: "multiple GPUs, nightly deps" + - description: multiple-gpu-nightly-deps container: xgb-ci.gpu_dev_ver suite: mgpu runner: linux-amd64-mgpu artifact_from: build-cuda - - description: "CPU" + - description: cpu-amd64 container: xgb-ci.cpu suite: cpu runner: linux-amd64-cpu artifact_from: build-cuda - - description: "CPU ARM64" + - description: cpu-arm64 container: xgb-ci.aarch64 suite: cpu-arm64 runner: linux-arm64-cpu