.github/workflows/unit_tests.yml

name: Unit Tests

defaults:
  run:
    shell: bash -le {0}

on:
  repository_dispatch:
  workflow_dispatch:
    inputs:
      repo:
        description: 'GitHub repo {owner}/{repo}'
        required: false
        default: ''
      ref:
        description: 'GitHub ref: Branch, Tag or Commit SHA'
        required: false
        default: ''
      pr_number:
        description: 'PR Number'
        required: false
        type: number
      test_names:
        description: 'Input Test(s) to Run (default all)'
        required: false
        default: ''
      test_regex:
        description: 'Regex to filter test files'
        required: false
        default: ''
      artifact_id:
        description: 'Run id for artifact to be downloaded'
        required: false
        default: ''
      max-parallel:
        description: 'max parallel jobs'
        required: false
        default: '10'

env:
  CUDA_DEVICE_ORDER: PCI_BUS_ID
  CUDA_VISIBLE_DEVICES: 0
  TORCH_CUDA_ARCH_LIST: '8.9'
  MAX_JOBS: 8
  RUNNER: 10.0.14.248
  TRANSFORMERS_DIFF_TESTS: "models/test_internlm,models/test_internlm2_5,models/test_xverse"
  TORCH_2_5_TESTS: "test_q4_ipex.py,test_save_loaded_quantized_model,test_quant_formats,models/test_hymba"
  IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral"
  GPTQMODEL_FORCE_BUILD: 1
  repo: ${{ github.event.inputs.repo || github.repository }}
  ref: ${{ github.event.inputs.ref || github.ref }}

concurrency:
  group: ${{ github.event.inputs.ref || github.ref }}-workflow-unit-tests-${{ github.event.inputs.test_names }}
  cancel-in-progress: true

jobs:
  # check-vm:
  #   runs-on: self-hosted
  #   container:
  #     image:  modelcloud/gptqmodel:alpine-ci-v1
  #   outputs:
  #     ip: ${{ steps.get_ip.outputs.ip }}
  #     run_id: ${{ steps.get_ip.outputs.run_id }}
  #     max-parallel: ${{ steps.get_ip.outputs.max-parallel }}
  #   steps:
  #     - name: Print env
  #       run: |
  #         echo "repo: ${{ env.repo }}"
  #         echo "ref: ${{ env.ref }}"
  #         echo "artifact_id: ${{ github.event.inputs.artifact_id }}"
  #         echo "test_names: ${{ github.event.inputs.test_names }}"

  #     - name: Select server
  #       id: get_ip
  #       run: |
  #         echo "ip=$RUNNER" >> "$GITHUB_OUTPUT"

  #         echo "ip: $ip"

  #         if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
  #           run_id="${{ github.event.inputs.artifact_id }}"
  #         else
  #           run_id="${{ github.run_id }}"
  #         fi
  #         echo "run_id=$run_id" >> "$GITHUB_OUTPUT"
  #         echo "artifact_id=$run_id"

  #         max_p=${{ github.event.inputs.max-parallel }}
  #         max_p="{\"size\": ${max_p:-10}}"
  #         echo "max-parallel=$max_p" >> "$GITHUB_OUTPUT"
  #         echo "max-parallel=$max_p"

  # list-test-files:
  #   runs-on: ubuntu-latest
  #   outputs:
  #     torch-2-5-files: ${{ steps.files.outputs.torch-2-5-files }}
  #     gpu-files: ${{ steps.files.outputs.gpu-files }}
  #     transformers-files: ${{ steps.files.outputs.transformers-files }}

  #   steps:
  #     - name: Checkout Codes
  #       uses: actions/checkout@v4
  #       with:
  #         repository: ${{ env.repo }}
  #         ref: ${{ env.ref }}

  #     - name: Fetch PR by number
  #       if: ${{ github.event.inputs.pr_number != 0 }}
  #       run: |
  #         PR_NUMBER=${{ github.event.inputs.pr_number }}
  #         echo "pr number $PR_NUMBER"
  #         git config --global --add safe.directory $(pwd)
  #         git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
  #         git checkout pr-${PR_NUMBER}

  #     - name: List files
  #       id: files
  #       run: |
  #         script="
  #         import json
  #         import os
  #         import re

  #         cpu_file_list = [f.strip().removesuffix('.py') for f in '${TORCH_2_5_TESTS}'.split(',') if f.strip()]
  #         test_files_list = [f.strip().removesuffix('.py') for f in '${{ github.event.inputs.test_names }}'.split(',') if f.strip()]
  #         cpu_test_files = [f for f in cpu_file_list if not test_files_list or f in test_files_list]
  #         transformers_diff_list = [f.strip().removesuffix('.py') for f in '${TRANSFORMERS_DIFF_TESTS}'.split(',') if f.strip()]
  #         transformers_test_files = [f for f in transformers_diff_list if not test_files_list or f in test_files_list]

  #         all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('.py') not in '${IGNORED_TEST_FILES}']

  #         all_tests_models = ['models/'+f.removesuffix('.py') for f in os.listdir('tests/models') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('.py') not in '${IGNORED_TEST_FILES}']
  #         all_tests = [item for item in all_tests+all_tests_models if item.strip().removesuffix('.py') not in '${TORCH_2_5_TESTS}']

  #         gpu_test_files = [f for f in all_tests if f not in cpu_file_list and (not test_files_list or f in test_files_list) and f not in transformers_diff_list]

  #         cpu_test_files = [test for test in cpu_test_files if re.match(r'${{ github.event.inputs.test_regex }}', test)]
  #         gpu_test_files = [test for test in gpu_test_files if re.match(r'${{ github.event.inputs.test_regex }}', test)]
  #         transformers_test_files = [test for test in transformers_test_files if re.match(r'${{ github.event.inputs.test_regex }}', test)]

  #         print(f'{json.dumps(cpu_test_files)}|{json.dumps(gpu_test_files)}|{json.dumps(transformers_test_files)}')
  #         "

  #         test_files=$(python3 -c "$script")
  #         IFS='|' read -r cpu_test_files gpu_test_files transformers_test_files <<< "$test_files"
  #         echo "torch-2-5-files=$cpu_test_files" >> "$GITHUB_OUTPUT"
  #         echo "gpu-files=$gpu_test_files" >> "$GITHUB_OUTPUT"
  #         echo "transformers-files=$transformers_test_files" >> "$GITHUB_OUTPUT"

  #         echo "Test files: $test_files"
  #         echo "CPU Test files: $cpu_test_files"
  #         echo "GPU Test files: $gpu_test_files"
  #         echo "Transformers Test files: $transformers_test_files"
  #         echo "Ignored Test files: $IGNORED_TEST_FILES"

  # build:
  #   runs-on: self-hosted
  #   needs: check-vm
  #   if: github.event.inputs.artifact_id == '' && !cancelled()
  #   container:
  #     image:  ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1
  #   steps:

  #     - name: Checkout Codes
  #       uses: actions/checkout@v4
  #       with:
  #         repository: ${{ env.repo }}
  #         ref: ${{ env.ref }}

  #     - name: Fetch PR by number
  #       if: ${{ github.event.inputs.pr_number != 0 }}
  #       run: |
  #         PR_NUMBER=${{ github.event.inputs.pr_number }}
  #         echo "pr number $PR_NUMBER"
  #         git config --global --add safe.directory $(pwd)
  #         git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
  #         git checkout pr-${PR_NUMBER}

  #     - name: Print Env
  #       run: |
  #         echo "== pyenv =="
  #         pyenv versions
  #         echo "== python =="
  #         python --version
  #         echo "== nvcc =="
  #         nvcc --version
  #         echo "== torch =="
  #         pip show torch

  #     - name: Install requirements
  #       run: |
  #         bash -c "$(curl -L http://$RUNNER/files/scripts/init_unit_tests.sh)" @ 12.4 2.4.1 3.11
  #         uv pip install transformers typing-extensions -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}

  #     - name: Compile
  #       timeout-minutes: 35
  #       run: python setup.py bdist_wheel

  #     - name: Test install
  #       run: |
  #         ls -ahl dist
  #         whl=$(ls -t dist/*.whl | head -n 1 | xargs basename)
  #         sha256=$(sha256sum dist/$whl)
  #         echo "hash=$sha256"

  #         echo "WHL_HASH=$sha256" >> $GITHUB_ENV
  #         echo "WHL_NAME=$whl" >> $GITHUB_ENV

  #         twine check dist/$whl
  #         uv pip install dist/$whl

  #     - name: Upload wheel
  #       continue-on-error: true
  #       run: |
  #         curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload

  #     - name: Upload to artifact
  #       uses: actions/upload-artifact@v4
  #       with:
  #         name: dist
  #         path: dist

  # torch2_4:
  #   needs:
  #     - build
  #     - list-test-files
  #     - check-vm
  #   runs-on: self-hosted
  #   if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.gpu-files != '[]'
  #   container:
  #     image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1
  #     volumes:
  #       - /dev/dri/by-path:/dev/dri/by-path
  #       - /home/ci/models:/monster/data/model
  #   strategy:
  #     fail-fast: false
  #     max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }}
  #     matrix:
  #       test_script: ${{ fromJSON(needs.list-test-files.outputs.gpu-files) }}

  #   steps:
  #     - name: Checkout Codes
  #       uses: actions/checkout@v4
  #       with:
  #         repository: ${{ github.event.inputs.repo }}
  #         ref: ${{ github.event.inputs.ref }}

  #     - name: Fetch PR by number
  #       if: ${{ github.event.inputs.pr_number != 0 }}
  #       run: |
  #         PR_NUMBER=${{ github.event.inputs.pr_number }}
  #         echo "pr number $PR_NUMBER"
  #         git config --global --add safe.directory $(pwd)
  #         git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
  #         git checkout pr-${PR_NUMBER}

  #     - name: Print Env
  #       run: |
  #         echo "== pyenv =="
  #         pyenv versions
  #         echo "== python =="
  #         python --version
  #         echo "== nvcc =="
  #         nvcc --version
  #         echo "== torch =="
  #         pip show torch

  #         if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
  #           run_id="${{ github.event.inputs.artifact_id }}"
  #         else
  #           run_id="${{ github.run_id }}"
  #         fi
  #         echo "RUN_ID=$run_id" >> $GITHUB_ENV
  #         echo "RUN_ID=${run_id}"

  #         if grep -q "bitblas" tests/${{ matrix.test_script }}.py; then
  #             echo "BITBLAS=1" >> $GITHUB_ENV
  #         fi

  #     - name: Download wheel
  #       continue-on-error: true
  #       run: |
  #         file_name=$(curl -s  -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")

  #         if echo "$file_name" | grep -q "gptqmodel"; then
  #             mkdir dist || true
  #             cd dist
  #             curl -s -O  http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
  #             ls -ahl .
  #             sha256=$(sha256sum $file_name)
  #             echo "sha256=$sha256"
  #             echo "DOWNLOADED=1" >> $GITHUB_ENV
  #         fi

  #     - name: Download artifact
  #       if: env.DOWNLOADED == '' && !cancelled()
  #       uses: actions/download-artifact@v4
  #       with:
  #         name: dist
  #         path: dist
  #         run-id: ${{ needs.check-vm.outputs.run_id }}

  #     - name: Install wheel
  #       run: |
  #         uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
  #         uv pip install dist/*.whl
  #         if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
  #           uv pip install auto_round
  #         fi
  #         bash -c "$(curl -L http://$RUNNER/files/scripts/init_unit_tests.sh)" @ 12.4 2.4.1 3.11
  #         uv pip install typing-extensions numpy==1.26.4 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
  #         if [ "${{ matrix.test_script }}" == "test_cohere2" ]; then
  #           uv pip install -U git+https://github.com/huggingface/transformers.git@5615a393691c81e00251e420c73e4d04c6fe22e5
  #         else
  #           uv pip install transformers -U
  #         fi


  #     - name: Check platform
  #       run: |
  #         ip=${RUNNER}
  #         echo "-----------"
  #         pip show torch
  #         echo "-----------"
  #         nvcc --version

  #     - name: Find suitable GPU
  #       run: |
  #         timestamp=$(date +%s%3N)
  #         gpu_id=-1

  #         while [ "$gpu_id" -lt 0 ]; do
  #           gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")

  #           if [ "$gpu_id" -lt 0 ]; then
  #             echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
  #             echo "No available GPU, waiting 5 seconds..."
  #             sleep 5
  #           else
  #             echo "Allocated GPU ID: $gpu_id"
  #           fi
  #         done
  #         echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
  #         echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
  #         echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"

  #     - name: Run tests
  #       if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
  #       run: pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }

  #     - name: Clear cache
  #       if: failure() && env.BITBLAS == '1' && env.ERROR == '1'
  #       run: |
  #         rm -rf ~/.cache/bitblas/nvidia/geforce-rtx-4090
  #         echo "clear bitblas cache"

  #     - name: Release GPU
  #       if: always()
  #       run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"

  # transformers_diff:
  #   needs:
  #     - build
  #     - list-test-files
  #     - check-vm
  #   runs-on: self-hosted
  #   if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != '[]'
  #   container:
  #     image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1
  #     volumes:
  #       - /dev/dri/by-path:/dev/dri/by-path
  #       - /home/ci/models:/monster/data/model
  #   strategy:
  #     fail-fast: false
  #     max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }}
  #     matrix:
  #       test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }}
  #   steps:
  #     - name: Checkout Codes
  #       uses: actions/checkout@v4
  #       with:
  #         repository: ${{ github.event.inputs.repo }}
  #         ref: ${{ github.event.inputs.ref }}

  #     - name: Fetch PR by number
  #       if: ${{ github.event.inputs.pr_number != 0 }}
  #       run: |
  #         PR_NUMBER=${{ github.event.inputs.pr_number }}
  #         echo "pr number $PR_NUMBER"
  #         git config --global --add safe.directory $(pwd)
  #         git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
  #         git checkout pr-${PR_NUMBER}

  #     - name: Print Env
  #       run: |
  #         echo "== pyenv =="
  #         pyenv versions
  #         echo "== python =="
  #         python --version
  #         echo "== nvcc =="
  #         nvcc --version
  #         echo "== torch =="
  #         pip show torch

  #     - name: Download wheel
  #       continue-on-error: true
  #       run: |
  #         file_name=$(curl -s  -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")

  #         if echo "$file_name" | grep -q "gptqmodel"; then
  #             mkdir dist || true
  #             cd dist
  #             curl -s -O  http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
  #             ls -ahl .
  #             sha256=$(sha256sum $file_name)
  #             echo "sha256=$sha256"
  #             echo "DOWNLOADED=1" >> $GITHUB_ENV
  #         fi

  #     - name: Download artifact
  #       if: env.DOWNLOADED == '' && !cancelled()
  #       uses: actions/download-artifact@v4
  #       with:
  #         name: dist
  #         path: dist
  #         run-id: ${{ needs.check-vm.outputs.run_id }}

  #     - name: Install wheel
  #       run: |
  #         uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
  #         uv pip install dist/*.whl
  #         bash -c "$(curl -L http://$RUNNER/files/scripts/init_unit_tests.sh)" @ 12.4 2.4.1 3.11
  #         uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
  #         if [ "${{ matrix.test_script }}" = "test_xverse" ]; then
  #           uv pip install tokenizers==0.15.2 -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
  #         fi
  #         if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
  #           uv pip install auto_round
  #         fi

  #     - name: Find suitable GPU
  #       run: |
  #         timestamp=$(date +%s%3N)
  #         gpu_id=-1

  #         while [ "$gpu_id" -lt 0 ]; do
  #           gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")

  #           if [ "$gpu_id" -lt 0 ]; then
  #             echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
  #             echo "No available GPU, waiting 5 seconds..."
  #             sleep 5
  #           else
  #             echo "Allocated GPU ID: $gpu_id"
  #           fi
  #         done
  #         echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
  #         echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
  #         echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"

  #     - name: Run tests
  #       if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
  #       run: pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }

  #     - name: Release GPU
  #       if: always()
  #       run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"

  # torch2_5:
  #   needs:
  #     - build
  #     - list-test-files
  #     - check-vm
  #   runs-on: self-hosted
  #   if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-2-5-files != '[]'
  #   container:
  #     image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.5.1
  #     volumes:
  #       - /dev/dri/by-path:/dev/dri/by-path
  #       - /home/ci/models:/monster/data/model
  #   strategy:
  #     fail-fast: false
  #     max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }}
  #     matrix:
  #       test_script:  ${{ fromJSON(needs.list-test-files.outputs.torch-2-5-files) }}
  #   steps:
  #     - name: Checkout Codes
  #       uses: actions/checkout@v4
  #       with:
  #         repository: ${{ env.repo }}
  #         ref: ${{ env.ref }}

  #     - name: Fetch PR by number
  #       if: ${{ github.event.inputs.pr_number != 0 }}
  #       run: |
  #         PR_NUMBER=${{ github.event.inputs.pr_number }}
  #         echo "pr number $PR_NUMBER"
  #         git config --global --add safe.directory $(pwd)
  #         git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
  #         git checkout pr-${PR_NUMBER}

  #     - name: Print Env
  #       run: |
  #         echo "== pyenv =="
  #         pyenv versions
  #         echo "== python =="
  #         python --version
  #         echo "== nvcc =="
  #         nvcc --version
  #         echo "== torch =="
  #         pip show torch

  #     - name: Download wheel
  #       continue-on-error: true
  #       run: |
  #         file_name=$(curl -s  -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")
  #         if echo "$file_name" | grep -q "gptqmodel"; then
  #             mkdir dist || true
  #             cd dist
  #             curl -s -O  http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
  #             ls -ahl .
  #             sha256=$(sha256sum $file_name)
  #             echo "sha256=$sha256"
  #             echo "DOWNLOADED=1" >> $GITHUB_ENV
  #         fi

  #     - name: Download artifact
  #       if: env.DOWNLOADED == '' && !cancelled()
  #       uses: actions/download-artifact@v4
  #       with:
  #         name: dist
  #         path: dist
  #         run-id: ${{ needs.check-vm.outputs.run_id }}

  #     - name: Install wheel
  #       run: |
  #         bash -c "$(curl -L http://$RUNNER/files/scripts/init_unit_tests.sh)" @ 12.4 2.5.1 3.11
  #         uv pip install -U intel_extension_for_pytorch typing-extensions bitblas==0.0.1.dev13 -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
  #         if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
  #           uv pip install auto_round
  #         fi
  #         uv pip install dist/*.whl

  #     - name: Find suitable GPU
  #       run: |
  #         timestamp=$(date +%s%3N)
  #         gpu_id=-1

  #         while [ "$gpu_id" -lt 0 ]; do
  #           gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")

  #           if [ "$gpu_id" -lt 0 ]; then
  #             echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
  #             echo "No available GPU, waiting 5 seconds..."
  #             sleep 5
  #           else
  #             echo "Allocated GPU ID: $gpu_id"
  #           fi
  #         done
  #         echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
  #         echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
  #         echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"

  #     - name: Run tests
  #       if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
  #       run: pytest --durations=0 tests/${{ matrix.test_script }}.py

  #     - name: Release GPU
  #       if: always()
  #       run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"

  xpu:
    runs-on: self-hosted
    container:
      image: 10.0.14.248:5000/modelcloud/gptqmodel:github-ci-v2-torch2.5.1
      options: ---device /dev/dri --ipc=host
      volumes:
        - /dev/dri/by-path:/dev/dri/by-path
        - /home/ci/models:/monster/data/model
    steps:
      - name: Print env
        run: clinfo