1234 #4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests ROCM | |
run-name: "${{ github.event.inputs.note }}" | |
defaults: | |
run: | |
shell: bash -le {0} | |
on: | |
repository_dispatch: | |
workflow_dispatch: | |
inputs: | |
note: | |
description: 'Note' | |
required: false | |
default: '' | |
repo: | |
description: 'GitHub repo {owner}/{repo}' | |
required: false | |
default: '' | |
ref: | |
description: 'GitHub ref: Branch, Tag or Commit SHA' | |
required: false | |
default: '' | |
pr_number: | |
description: 'PR Number' | |
required: false | |
type: number | |
test_names: | |
description: 'Input Test(s) to Run (default all)' | |
required: false | |
default: '' | |
test_regex: | |
description: 'Regex to filter test files' | |
required: false | |
default: '' | |
artifact_id: | |
description: 'Run id for artifact to be downloaded' | |
required: false | |
default: '' | |
max-parallel: | |
description: 'max parallel jobs' | |
required: false | |
default: '10' | |
m4-only: | |
description: 'only run m4(test only)' | |
type: boolean | |
required: false | |
default: false | |
env: | |
CUDA_DEVICE_ORDER: PCI_BUS_ID | |
ROCM_VERSION: 6.3 | |
PYTORCH_CUDA_ALLOC_CONF: 'expandable_segments:True' | |
MAX_JOBS: 8 | |
RUNNER: 10.0.13.31 | |
TRANSFORMERS_DIFF_TESTS: "models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py" | |
TORCH_2_5_TESTS: "test_evalplus.py,test_perplexity.py,test_q4_ipex.py,test_save_loaded_quantized_model.py,test_quant_formats.py,models/test_hymba.py" | |
IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,test_q4_torch_apple.py,test_ipex_xpu.py" | |
GPTQMODEL_FORCE_BUILD: 1 | |
repo: ${{ github.event.inputs.repo || github.repository }} | |
ref: ${{ github.event.inputs.ref || github.ref }} | |
concurrency: | |
group: ${{ github.event.inputs.ref || github.ref }}-workflow-unit-tests-${{ github.event.inputs.test_names }} | |
cancel-in-progress: true | |
jobs: | |
check-vm: | |
runs-on: [ self-hosted, rocm ] | |
container: | |
image: modelcloud/gptqmodel:alpine-ci-v1 | |
outputs: | |
ip: ${{ steps.get_ip.outputs.ip }} | |
run_id: ${{ steps.get_ip.outputs.run_id }} | |
max-parallel: ${{ steps.get_ip.outputs.max-parallel }} | |
steps: | |
- name: Print env | |
run: | | |
echo "repo: ${{ env.repo }}" | |
echo "ref: ${{ env.ref }}" | |
echo "artifact_id: ${{ github.event.inputs.artifact_id }}" | |
echo "test_names: ${{ github.event.inputs.test_names }}" | |
- name: Select server | |
id: get_ip | |
run: | | |
echo "ip=$RUNNER" >> "$GITHUB_OUTPUT" | |
echo "ip: $ip" | |
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then | |
run_id="${{ github.event.inputs.artifact_id }}" | |
else | |
run_id="${{ github.run_id }}" | |
fi | |
echo "run_id=$run_id" >> "$GITHUB_OUTPUT" | |
echo "artifact_id=$run_id" | |
max_p=${{ github.event.inputs.max-parallel }} | |
max_p="{\"size\": ${max_p:-10}}" | |
echo "max-parallel=$max_p" >> "$GITHUB_OUTPUT" | |
echo "max-parallel=$max_p" | |
list-test-files: | |
runs-on: ubuntu-latest | |
if: github.event.inputs.m4-only != 'true' | |
outputs: | |
torch-2-5-files: ${{ steps.files.outputs.torch-2-5-files }} | |
transformers-files: ${{ steps.files.outputs.transformers-files }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: List files | |
id: files | |
run: | | |
script=" | |
import json | |
import os | |
import re | |
TRANSFORMERS_DIFF_TESTS = '${TRANSFORMERS_DIFF_TESTS}' | |
IGNORED_TEST_FILES = '${IGNORED_TEST_FILES}' | |
TEST_NAMES='${{ github.event.inputs.test_names }}' | |
TEST_REGEX='${{ github.event.inputs.test_regex }}' | |
input_test_files_list = [f.strip().removesuffix('.py') for f in TEST_NAMES.split(',') if f.strip()] | |
transformers_test_files = [f.strip().removesuffix('.py') for f in f'{TRANSFORMERS_DIFF_TESTS}'.split(',') if f.strip()] | |
transformers_test_files = [f for f in transformers_test_files if not input_test_files_list or f in input_test_files_list] | |
all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('py') not in f'{IGNORED_TEST_FILES}'] | |
all_tests_models = ['models/'+f.removesuffix('.py') for f in os.listdir('tests/models') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('py') not in f'{IGNORED_TEST_FILES}'] | |
torch_2_5_test_files = [f for f in all_tests+all_tests_models if (not input_test_files_list or f in input_test_files_list) and f not in transformers_test_files] | |
torch_2_5_test_files = [test for test in torch_2_5_test_files if re.match(rf'{TEST_REGEX}', test)] | |
transformers_test_files = [test for test in transformers_test_files if re.match(rf'{TEST_REGEX}', test)] | |
print(f'{json.dumps(torch_2_5_test_files)}|{json.dumps(transformers_test_files)}') | |
" | |
test_files=$(python3 -c "$script") | |
IFS='|' read -r torch_2_5_test_files transformers_test_files <<< "$test_files" | |
echo "torch-2-5-files=$torch_2_5_test_files" >> "$GITHUB_OUTPUT" | |
echo "transformers-files=$transformers_test_files" >> "$GITHUB_OUTPUT" | |
echo "Test files: $test_files" | |
echo "Torch 2.5 Test files: $torch_2_5_test_files" | |
echo "Transformers Test files: $transformers_test_files" | |
echo "Ignored Test files: $IGNORED_TEST_FILES" | |
build: | |
runs-on: [ self-hosted, rocm ] | |
needs: check-vm | |
if: github.event.inputs.m4-only != 'true' && github.event.inputs.artifact_id == '' && !cancelled() | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v6-rocm | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
echo "##### pip list #####" | |
pip list | |
- name: Compile | |
timeout-minutes: 35 | |
run: python setup.py bdist_wheel | |
- name: Test install | |
run: | | |
ls -ahl dist | |
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename) | |
sha256=$(sha256sum dist/$whl) | |
echo "hash=$sha256" | |
echo "WHL_HASH=$sha256" >> $GITHUB_ENV | |
echo "WHL_NAME=$whl" >> $GITHUB_ENV | |
twine check dist/$whl | |
uv pip install dist/$whl | |
- name: Upload wheel | |
continue-on-error: true | |
run: | | |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload | |
- name: Upload to artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
- name: Clean cache | |
if: always() | |
run: pip cache purge && uv cache clean | |
transformers_diff: | |
needs: | |
- build | |
- list-test-files | |
- check-vm | |
runs-on: [ self-hosted, rocm ] | |
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && github.event.inputs.m4-only != 'true' && needs.list-test-files.outputs.transformers-files != '[]' | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v6-rocm | |
options: --device /dev/dri --device /dev/kfd --ipc=host | |
volumes: | |
- /home/ci/models:/monster/data/model | |
strategy: | |
fail-fast: false | |
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }} | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ github.event.inputs.repo }} | |
ref: ${{ github.event.inputs.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
echo "== pip list ==" | |
pip list | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' && !cancelled() | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: | | |
echo "===== install optimum bitblas parameterized uvicorn =====" | |
uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
echo "===== install dist/whl =====" | |
uv pip install dist/*.whl | |
echo "===== init test env =====" | |
echo "===== install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 =====" | |
uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
if [ "${{ matrix.test_script }}" == "models/test_xverse" ]; then | |
echo "===== install tokenizers==0.15.2 =====" | |
uv pip install tokenizers==0.15.2 -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
fi | |
if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then | |
echo "===== install auto_round =====" | |
uv pip install auto_round | |
fi | |
- name: Clean cache | |
if: always() | |
run: pip cache purge && uv cache clean | |
- name: Run tests | |
if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }} | |
run: | | |
start_time=$(date +%s) | |
pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; } | |
execution_time=$(( $(date +%s) - start_time )) | |
echo "$((execution_time / 60))m $((execution_time % 60))s" | |
curl "http://${{ needs.check-vm.outputs.ip }}/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}" | |
torch2_5: | |
needs: | |
- build | |
- list-test-files | |
- check-vm | |
runs-on: [ self-hosted, rocm ] | |
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && github.event.inputs.m4-only != 'true' && needs.list-test-files.outputs.torch-2-5-files != '[]' | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v6-rocm | |
options: --device /dev/dri --device /dev/kfd --ipc=host | |
volumes: | |
- /home/ci/models:/monster/data/model | |
strategy: | |
fail-fast: false | |
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }} | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.torch-2-5-files) }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
echo "== pip list ==" | |
pip list | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' && !cancelled() | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: | | |
if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then | |
echo "===== install auto_round =====" | |
uv pip install auto_round | |
fi | |
if [ "${{ matrix.test_script }}" == "models/test_cohere2" ]; then | |
echo "===== install transformers from git =====" | |
uv pip install -U git+https://github.com/huggingface/transformers.git@5615a393691c81e00251e420c73e4d04c6fe22e5 | |
fi | |
if [ "${{ matrix.test_script }}" == "test_ipex_xpu" ]; then | |
source /etc/profile.d/pyenv.sh && pyenv activate xpu | |
fi | |
echo "===== install dist/whl =====" | |
uv pip install dist/*.whl | |
- name: Clean cache | |
if: always() | |
run: pip cache purge && uv cache clean | |
- name: Run tests | |
if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }} | |
run: | | |
if [ "${{ matrix.test_script }}" == "test_ipex_xpu" ]; then | |
export CUDA_VISIBLE_DEVICES="" | |
source /etc/profile.d/pyenv.sh && pyenv activate xpu | |
pip uninstall vllm -y | |
pip list | |
fi | |
start_time=$(date +%s) | |
pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; } | |
execution_time=$(( $(date +%s) - start_time )) | |
echo "$((execution_time / 60))m $((execution_time % 60))s" | |
curl "http://${{ needs.check-vm.outputs.ip }}/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}" | |
show-statistics: | |
runs-on: [ self-hosted, rocm ] | |
if: always() | |
container: | |
image: modelcloud/gptqmodel:alpine-ci-v1 | |
needs: | |
- transformers_diff | |
- torch2_5 | |
steps: | |
- name: Print statistics | |
run: curl "http://10.0.14.248/gpu/get_vram_logs?id=${{ github.run_id }}" | |
m4: | |
runs-on: [ self-hosted, m4 ] | |
needs: check-vm | |
if: (github.event.inputs.test_names == '' || contains(github.event.inputs.test_names, 'apple') )&& !cancelled() | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Run test | |
run: | | |
export PATH="/opt/homebrew/bin:$PATH" && eval "$(pyenv init -)" | |
pyenv global 3.11.11 && python -m venv venv | |
curl -O http://${{ needs.check-vm.outputs.ip }}/scripts/m4/profile.sb | |
curl -O http://${{ needs.check-vm.outputs.ip }}/scripts/m4/run.sh | |
sandbox-exec -f profile.sb /bin/bash ./run.sh |