Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into lazy-export
Browse files Browse the repository at this point in the history
Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
  • Loading branch information
borisfom committed Nov 12, 2024
2 parents 7932d5f + 085e957 commit 22a4704
Show file tree
Hide file tree
Showing 155 changed files with 226,215 additions and 2,818 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/_test_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- name: Docker pull image
run: |
docker pull nemoci.azurecr.io/nemo_container_${{ github.run_id }}
docker pull nemoci.azurecr.io/nemo_container:${{ github.run_id }}
- name: Start container
run: |
Expand All @@ -60,7 +60,7 @@ jobs:
ARG=("--runtime=nvidia --gpus all")
fi
docker run --rm -d --name nemo_container_${{ github.run_id }} ${ARG[@]} --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
docker run --rm -d --name nemo_container_${{ github.run_id }} ${ARG[@]} --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
- id: main
name: Run main script
Expand Down
45 changes: 45 additions & 0 deletions .github/workflows/build-test-publish-wheel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Build, test, and publish a PyPi wheel (to testpypi)

on:
push:
branches:
- main
- 'r**'

defaults:
run:
shell: bash -x -e -u -o pipefail {0}

jobs:
build-test-publish-wheel:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.7.0
with:
image-name: nemo_container
dockerfile: Dockerfile.ci
image-label: nemo-core
build-args: |
IMAGE_LABEL=nemo-core
prune-filter-timerange: 24h
dry-run: true
python-package: nemo
container-workdir: /workspace
environment: public
secrets:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
136 changes: 6 additions & 130 deletions .github/workflows/cherry-pick-release-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,133 +6,9 @@ on:
- main

jobs:
main:
runs-on: ubuntu-latest
environment:
name: main
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
token: ${{ secrets.PAT }}


- name: Cherry pick
env:
GH_TOKEN: ${{ secrets.PAT }}
run: |
set -x
set +e
git config --global user.email "nemo-bot@nvidia.com"
git config --global user.name "NeMo Bot"
SHA=$(git rev-list --no-merges -n 1 HEAD)
MESSAGE=$(git log -n 1 --pretty=format:%s $SHA)
PR_ID=$(echo $MESSAGE | awk -F'#' '{print $2}' | awk -F')' '{print $1}' )
USERNAME=$(git log -n 1 --pretty=format:%ae $SHA | awk -F'@' '{print $1}')
PR=$(curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls/$PR_ID)
PR_TITLE=$(echo -E $PR | jq '.title' | tr -d '"')
LABELS=$(echo -E $PR | jq '.labels | [.[].name] | join(",")' | tr -d '"')
AUTHOR=$(echo -E $PR | jq '.user.login' | tr -d '"')
TARGET_BRANCHES=$(echo "$LABELS" | grep -o 'r[^,]*')
if [[ $TARGET_BRANCHES == '' ]]; then
echo Nothing to cherry-pick
exit 0
fi

echo $TARGET_BRANCHES | while read -r RELEASE_BRANCH ; do
TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$RELEASE_BRANCH)" != "" ]] && echo true || echo false)

if [[ "$TARGET_BRANCH_EXISTS_OK" == "false" ]]; then
echo Release branch does not yet exist, will not cherry-pick
continue
fi

(
git fetch origin $RELEASE_BRANCH:$RELEASE_BRANCH
git switch --force-create cherry-pick-$PR_ID-$RELEASE_BRANCH $RELEASE_BRANCH
git cherry-pick $SHA
git push -u origin --force cherry-pick-$PR_ID-$RELEASE_BRANCH
git checkout ${CI_DEFAULT_BRANCH:-main}
)

CHERRYPICK_SUCCESSFUL=$?

if [[ $CHERRYPICK_SUCCESSFUL -eq 0 ]]; then
PR_URL="https://github.com/NVIDIA/NeMo/pull/$PR_ID"

PAYLOAD=$(jq \
-n \
-c \
--arg TITLE "Cherry pick \`$PR_TITLE ($PR_ID)\` into \`$RELEASE_BRANCH\`" \
--arg HEAD "cherry-pick-$PR_ID-$RELEASE_BRANCH" \
--arg RELEASE_BRANCH "$RELEASE_BRANCH" \
--arg BODY "[🤖]: Hi @$AUTHOR 👋,<br><br>we've cherry picked #$PR_ID into \`$RELEASE_BRANCH\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!" \
'{
"title": $TITLE,
"head": $HEAD,
"base": $RELEASE_BRANCH,
"body": $BODY
}'
)

NEW_PR=$(curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls \
-d "$PAYLOAD")

NEW_PR_ID=$(echo -E $NEW_PR | jq '.number')
curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls/$NEW_PR_ID/requested_reviewers \
-d '{"reviewers":["'$AUTHOR'"]}'

curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/issues/$NEW_PR_ID/labels \
-d '{"labels":["Run CICD", "cherry-pick"]}'

else
URL="https://github.com/NVIDIA/NeMo/pull/$PR_ID"

MESSAGE='{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":alert: Cherrypick bot 🤖: Hey <@'$USERNAME'>: Cherry-pick of <'$URL'|#'$PR_ID'> failed (3-way merge impossible). Please resolve manually and create a PR.\n\ncc: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>"
}
}
]
}'

curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${{ secrets.SLACK_WEBHOOK }}

fi

done



env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
cherry-pick:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.6.0
secrets:
PAT: ${{ secrets.PAT }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
73 changes: 16 additions & 57 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,69 +48,29 @@ jobs:
id: all
run: |
echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT"
gpu-test:
needs: [pre-flight]
runs-on: self-hosted-azure
if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }}
steps:
- name: Run nvidia-smi test
run: |
whoami
nvidia-smi

cicd-cluster-clean:
runs-on: self-hosted-azure-builder
needs: [pre-flight]
cicd-test-container-build:
if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }}
steps:
- name: Clean server from old files
run: |
docker system prune --filter "until=24h" --filter "label=nemo.library=nemo-core" --force
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.1.0
with:
image-name: nemo_container
dockerfile: Dockerfile.ci
image-label: nemo-core
build-args: |
IMAGE_LABEL=nemo-core
prune-filter-timerange: 24h

cicd-test-container-setup:
needs: [cicd-cluster-clean, pre-flight]
needs: [cicd-test-container-build, pre-flight]
runs-on: self-hosted-azure-builder
if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }}
outputs:
test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
all: ${{ needs.pre-flight.outputs.all }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
path: ${{ github.run_id }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
# We use `docker` driver as this speeds things up for
# trivial (non-multi-stage) builds.
driver: docker

- name: Restore cache
run: |
docker pull nemoci.azurecr.io/nemo_container:latest
docker pull nemoci.azurecr.io/nemo_container_${{ github.event.number || 'noop' }} || true
- name: Build and push
uses: docker/build-push-action@v5
with:
file: Dockerfile.ci
push: true
cache-from: |
nemoci.azurecr.io/nemo_container:latest
nemoci.azurecr.io/nemo_container_${{ github.event.number || 'noop' }}
cache-to: type=inline
tags: |
nemoci.azurecr.io/nemo_container_${{ github.run_id }}
nemoci.azurecr.io/nemo_container_${{ github.event.number || 'noop' }}
nemoci.azurecr.io/nemo_container:latest
- name: Run some checks
run: |
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '\
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\
# PyTorch Lightning version
python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
Expand Down Expand Up @@ -475,7 +435,7 @@ jobs:
# needs: [cicd-test-container-setup]
# runs-on: self-hosted-azure
# container:
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
# options:
# # --user 0:128
# --device=/dev/nvidia0
Expand Down Expand Up @@ -527,7 +487,7 @@ jobs:
# runs-on: self-hosted-azure
# timeout-minutes: 10
# container:
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
# options:
# # --user 0:128
# --device=/dev/nvidia0
Expand Down Expand Up @@ -3611,7 +3571,7 @@ jobs:
# runs-on: self-hosted-azure
# timeout-minutes: 10
# container:
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
# options:
# # --user 0:128
# --device=/dev/nvidia0
Expand Down Expand Up @@ -3676,7 +3636,7 @@ jobs:
# needs: [cicd-test-container-setup]
# runs-on: self-hosted-azure
# container:
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
# options:
# # --user 0:128
# --device=/dev/nvidia0
Expand Down Expand Up @@ -4341,7 +4301,6 @@ jobs:
Nemo_CICD_Test:
needs:
- pre-flight
- gpu-test
- cicd-test-container-setup

- L0_Unit_Tests_GPU_ASR
Expand Down
Loading

0 comments on commit 22a4704

Please sign in to comment.