diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index dad3e1101fd..23a579e67c4 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe shift 2 ;; + -pt2e_quantize) + PT2E_QUANTIZE="$2" + shift 2 + ;; -upload) UPLOAD_DIR="$2" shift 2 @@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then fi if [[ "${QNN}" == "ON" ]]; then EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape" + echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}" + if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then + EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once " + fi fi # Add dynamically linked library location $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS} diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 7afc385a192..ae1b88fb182 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -441,3 +441,39 @@ jobs: cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" echo "::endgroup::" + + + test-llama-runner-qnn-linux: + name: test-llama-runner-qnn-linux + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + strategy: + matrix: + dtype: [fp32] + pt2e_quantize: [qnn_16a16w, qnn_8a8w] + mode: [qnn] + fail-fast: false + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-qnn-sdk + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 900 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + BUILD_TOOL="cmake" + DTYPE=${{ matrix.dtype }} + MODE=${{ matrix.mode }} + PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} + + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + + # Setup executorch + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # Install requirements for export_llama + PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh + # Test llama2 + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"