Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
shift 2
;;
-pt2e_quantize)
PT2E_QUANTIZE="$2"
shift 2
;;
-upload)
UPLOAD_DIR="$2"
shift 2
Expand Down Expand Up @@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
fi
if [[ "${QNN}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
fi
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,39 @@ jobs:

cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
echo "::endgroup::"


test-llama-runner-qnn-linux:
name: test-llama-runner-qnn-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
dtype: [fp32]
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
mode: [qnn]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

BUILD_TOOL="cmake"
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}

PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh

# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
Loading