From 4d216f15d3665e49ba27ba799318e606514e17f4 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 22 Mar 2024 09:14:34 +0000 Subject: [PATCH 1/2] enable amd ci --- .github/workflows/build-docker-images.yml | 166 +++++++++++++--------- 1 file changed, 96 insertions(+), 70 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 3c7df0f2da4bb6..a355e7d184d5ab 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -198,41 +198,54 @@ jobs: push: true tags: huggingface/transformers-pytorch-gpu -# Need to be fixed with the help from Guillaume. -# latest-pytorch-amd: -# name: "Latest PyTorch (AMD) [dev]" -# runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210] -# steps: -# - name: Set up Docker Buildx -# uses: docker/setup-buildx-action@v3 -# - name: Check out code -# uses: actions/checkout@v3 -# - name: Login to DockerHub -# uses: docker/login-action@v3 -# with: -# username: ${{ secrets.DOCKERHUB_USERNAME }} -# password: ${{ secrets.DOCKERHUB_PASSWORD }} -# - name: Build and push -# uses: docker/build-push-action@v5 -# with: -# context: ./docker/transformers-pytorch-amd-gpu -# build-args: | -# REF=main -# push: true -# tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }} -# # Push CI images still need to be re-built daily -# - -# name: Build and push (for Push CI) in a daily basis -# # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. -# # The later case is useful for manual image building for debugging purpose. Use another tag in this case! -# if: inputs.image_postfix != '-push-ci' -# uses: docker/build-push-action@v5 -# with: -# context: ./docker/transformers-pytorch-amd-gpu -# build-args: | -# REF=main -# push: true -# tags: huggingface/transformers-pytorch-amd-gpu-push-ci + latest-pytorch-amd: + name: "Latest PyTorch (AMD) [dev]" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }} + # Push CI images still need to be re-built daily + - + name: Build and push (for Push CI) in a daily basis + # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. + # The later case is useful for manual image building for debugging purpose. Use another tag in this case! + if: inputs.image_postfix != '-push-ci' + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-amd-gpu-push-ci latest-tensorflow: name: "Latest TensorFlow [dev]" @@ -262,41 +275,54 @@ jobs: push: true tags: huggingface/transformers-tensorflow-gpu - # latest-pytorch-deepspeed-amd: - # name: "PyTorch + DeepSpeed (AMD) [dev]" - - # runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210] - # steps: - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # - name: Check out code - # uses: actions/checkout@v3 - # - name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_PASSWORD }} - # - name: Build and push - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-deepspeed-amd-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }} - # # Push CI images still need to be re-built daily - # - - # name: Build and push (for Push CI) in a daily basis - # # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. - # # The later case is useful for manual image building for debugging purpose. Use another tag in this case! - # if: inputs.image_postfix != '-push-ci' - # uses: docker/build-push-action@v5 - # with: - # context: ./docker/transformers-pytorch-deepspeed-amd-gpu - # build-args: | - # REF=main - # push: true - # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci + latest-pytorch-deepspeed-amd: + name: "PyTorch + DeepSpeed (AMD) [dev]" + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-deepspeed-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }} + # Push CI images still need to be re-built daily + - + name: Build and push (for Push CI) in a daily basis + # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`. + # The later case is useful for manual image building for debugging purpose. Use another tag in this case! + if: inputs.image_postfix != '-push-ci' + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-pytorch-deepspeed-amd-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci latest-quantization-torch-docker: name: "Latest Pytorch + Quantization [dev]" From 4513aad60545c279bf79d67790cab0753164810f Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 22 Mar 2024 09:49:45 +0000 Subject: [PATCH 2/2] remove unnecessary clean up --- .github/workflows/build-docker-images.yml | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index a355e7d184d5ab..23424ffb83ac63 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -202,16 +202,6 @@ jobs: name: "Latest PyTorch (AMD) [dev]" runs-on: [intel-cpu, 8-cpu, ci] steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -279,16 +269,6 @@ jobs: name: "PyTorch + DeepSpeed (AMD) [dev]" runs-on: [intel-cpu, 8-cpu, ci] steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3