Skip to content

docs: Apply other feedback from 24.09 VDR #358

docs: Apply other feedback from 24.09 VDR

docs: Apply other feedback from 24.09 VDR #358

Workflow file for this run

# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "CICD NeMo Aligner"
on:
pull_request:
branches:
- 'main'
- 'r**'
- 'dev'
types: [labeled]
workflow_dispatch:
inputs:
test_to_run:
required: false
default: all
type: string
description: Comma-separated list of tests to run. Use "all" to run the full test suite.
push:
branches:
- 'main'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
pre-flight:
runs-on: ubuntu-latest
outputs:
test_to_run: ${{ steps.test_to_run.outputs.main }}
all: ${{ steps.all.outputs.main }}
steps:
- name: Parse test_to_run
id: test_to_run
run: |
parsed_string=$(echo ${{ inputs.test_to_run || 'all' }} | jq -c --raw-input 'split(",")')
echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT"
- name: Parse all
id: all
run: |
echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT"
build-container:
if: ${{ github.event.label.name == 'Run CICD' || github.ref == 'refs/heads/main' }}
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.1.0
with:
image-name: nemo_aligner_container
dockerfile: Dockerfile
image-label: nemo-aligner
build-args: |
MAX_JOBS=32
ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }}
Unit_Tests:
name: ${{ matrix.test_case }}
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'unit') || needs.pre-flight.outputs.all == 'true'
strategy:
matrix:
test_case:
- run_unit.sh
- run_mpi_unit.sh
with:
RUNNER: self-hosted-azure
TIMEOUT: 10
SCRIPT: |
nvidia-smi
cd ${ALIGNER_REPO_DIR}
bash tests/${{ matrix.test_case }}
Functional_Tests:
name: ${{ matrix.test_case }}
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'functional') || needs.pre-flight.outputs.all == 'true'
strategy:
matrix:
test_case:
- ppo-llama3-pp2-reshard
- reinforce-llama3-pp2-reshard
- dpo-llama3
- kd-llama3
- sft-llama3
- rm-llama3
with:
RUNNER: self-hosted-azure
# Fairly aggresive timeout that all functional tests should try to adhere to
TIMEOUT: 8
SCRIPT: |
bash /opt/NeMo-Aligner/tests/functional/test_cases/${{ matrix.test_case }}