From 87a3c54fa1eed5d2c14593faa9ff53f5be7f4b5f Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:35:01 -0800 Subject: [PATCH 01/13] up --- .ci/scripts/test_ane_static_llama.sh | 51 +++++++++++++++++++ .github/workflows/pull.yml | 28 ++++++++++ examples/apple/coreml/llama/export.py | 1 + .../apple/coreml/llama/llama_transformer.py | 6 ++- 4 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 .ci/scripts/test_ane_static_llama.sh diff --git a/.ci/scripts/test_ane_static_llama.sh b/.ci/scripts/test_ane_static_llama.sh new file mode 100644 index 00000000000..b671a6cf5e7 --- /dev/null +++ b/.ci/scripts/test_ane_static_llama.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.." + +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python3 +fi + +which "${PYTHON_EXECUTABLE}" + +pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama + +# Download stories llama110m artifacts +download_stories_model_artifacts + +python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w + + +python run.py -m model.pte -t tokenizer.model --prompt "Once upon a time," --temperature 0.0 &> tmp.txt +tail -n +6 tmp.txt &> output.txt + +cat output.txt + +printf 'Once upon a time,there was a little girl named L ily . She loved to play outside in the sun sh ine . One day , she saw ' &> expected.txt + + +if diff output.txt expected.txt > /dev/null; then + echo "Output matches." +else + echo "Output does not match." + echo "\n\nExpected:" + cat expected.txt + + echo "\n\nGot:" + cat output.txt + + echo "\n\nDiff:" + diff output.txt expected.txt + exit 1 +fi + +popd diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 5cc0d3c597b..6cbf8ae7db8 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -495,6 +495,34 @@ jobs: # Test static llama weight sharing and accuracy PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh + test-static-llama-ane: + name: test-static-llama-ane + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + runner: macos-m1-stable + python-version: '3.11' + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + bash .ci/scripts/setup-conda.sh + eval "$(conda shell.bash hook)" + + # Setup MacOS dependencies as there is no Docker support on MacOS atm + PYTHON_EXECUTABLE=python \ + EXECUTORCH_BUILD_PYBIND=ON \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ + ${CONDA_RUN} --no-capture-output \ + .ci/scripts/setup-macos.sh "$@" + + # Install llama3_2_vision dependencies. + PYTHON_EXECUTABLE=python \ + ${CONDA_RUN} --no-capture-output \ + ./examples/models/llama3_2_vision/install_requirements.sh + + # Test ANE llama + sh .ci/scripts/test_ane_static_llama.sh + test-qnn-models-linux: name: test-qnn-models-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py index c0f60529895..f440dc878d4 100644 --- a/examples/apple/coreml/llama/export.py +++ b/examples/apple/coreml/llama/export.py @@ -203,6 +203,7 @@ def main() -> None: torch.ops.aten.scaled_dot_product_attention.default, # preserve norm op for numerical stability torch.ops.aten.linalg_vector_norm.default, + torch.ops.aten.reciprocal.default, ], compile_config=EdgeCompileConfig( _check_ir_validity=False, diff --git a/examples/apple/coreml/llama/llama_transformer.py b/examples/apple/coreml/llama/llama_transformer.py index 2ce4c1d2b5b..3c371da4c00 100644 --- a/examples/apple/coreml/llama/llama_transformer.py +++ b/examples/apple/coreml/llama/llama_transformer.py @@ -134,8 +134,10 @@ def _norm(self, x): # We have yet to do large scale evaluations on the numeric stability of this solution, but note that # it appears better than what exists currently (removing FP32 casts and using FP16) rms_norm_eps0 = ( - x * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype)) - ) / torch.linalg.vector_norm(x, dim=-1, keepdim=True) + x + * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype)) + * torch.reciprocal(torch.linalg.vector_norm(x, dim=-1, keepdim=True)) + ) return rms_norm_eps0 def forward(self, x): From ee6a29419df996c366efb3b64bf21b559115066c Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:52:44 -0800 Subject: [PATCH 02/13] up --- .github/workflows/pull.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 6cbf8ae7db8..5666d5d6f41 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -505,6 +505,7 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux + export BUILD_TOOL="cmake" bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" From 9ec6231beb7a4fd69e0e3056d5b29068074a801e Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:26:48 -0800 Subject: [PATCH 03/13] up --- .github/workflows/pull.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 5666d5d6f41..74dba00540c 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -505,14 +505,14 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - export BUILD_TOOL="cmake" bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" # Setup MacOS dependencies as there is no Docker support on MacOS atm + BUILD_TOOL=cmake \ PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_PYBIND=ON \ - CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ ${CONDA_RUN} --no-capture-output \ .ci/scripts/setup-macos.sh "$@" From 0c627f11b8cd1ff5a96cc46d5f4e196272c60c79 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:43:22 -0800 Subject: [PATCH 04/13] up --- .github/workflows/pull.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 74dba00540c..7acb9a949c1 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -509,12 +509,12 @@ jobs: eval "$(conda shell.bash hook)" # Setup MacOS dependencies as there is no Docker support on MacOS atm - BUILD_TOOL=cmake \ PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_PYBIND=ON \ CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "$@" + .ci/scripts/setup-macos.sh cmake debug false + # Install llama3_2_vision dependencies. PYTHON_EXECUTABLE=python \ From 4d4b585c0565b734bfd339043c1ca9a9cc49f3b6 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 17:00:26 -0800 Subject: [PATCH 05/13] up --- .github/workflows/pull.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 7acb9a949c1..a596d620321 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -513,7 +513,7 @@ jobs: EXECUTORCH_BUILD_PYBIND=ON \ CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh cmake debug false + .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Debug --editable false # Install llama3_2_vision dependencies. From 7b57ae3dfb51fdcf2b7b889de9e4574f6a82bd13 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 17:13:29 -0800 Subject: [PATCH 06/13] up --- .github/workflows/pull.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index a596d620321..7b7324fb7f7 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -521,6 +521,9 @@ jobs: ${CONDA_RUN} --no-capture-output \ ./examples/models/llama3_2_vision/install_requirements.sh + # Install coreml + sh ./backends/apple/coreml/scripts/install_requirements.sh + # Test ANE llama sh .ci/scripts/test_ane_static_llama.sh From 1ba0f20628e8af11157e87ab5e74fc16f01faf02 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 17:57:57 -0800 Subject: [PATCH 07/13] up --- .github/workflows/pull.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 7b7324fb7f7..16c81a3c0b6 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -522,7 +522,7 @@ jobs: ./examples/models/llama3_2_vision/install_requirements.sh # Install coreml - sh ./backends/apple/coreml/scripts/install_requirements.sh + pip install coremltools # Test ANE llama sh .ci/scripts/test_ane_static_llama.sh From 74ecaaf4a4f376278590962fd99c1c90c6b48f03 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:12:54 -0800 Subject: [PATCH 08/13] up --- .github/workflows/pull.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 16c81a3c0b6..df1b4f9ece4 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -523,6 +523,7 @@ jobs: # Install coreml pip install coremltools + sh install_requirements.sh # Test ANE llama sh .ci/scripts/test_ane_static_llama.sh From 37339518cca9dde3899e3836ff47007afaf68742 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:33:39 -0800 Subject: [PATCH 09/13] up --- .github/workflows/pull.yml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index df1b4f9ece4..e4b7a49be5e 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -508,22 +508,25 @@ jobs: bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_PYBIND=ON \ - CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ - ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Debug --editable false + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python \ + # EXECUTORCH_BUILD_PYBIND=ON \ + # CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ + # ${CONDA_RUN} --no-capture-output \ + # .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Debug --editable false - # Install llama3_2_vision dependencies. - PYTHON_EXECUTABLE=python \ - ${CONDA_RUN} --no-capture-output \ - ./examples/models/llama3_2_vision/install_requirements.sh + # # Install llama3_2_vision dependencies. + # PYTHON_EXECUTABLE=python \ + # ${CONDA_RUN} --no-capture-output \ + # ./examples/models/llama3_2_vision/install_requirements.sh # Install coreml pip install coremltools sh install_requirements.sh + sh examples/models/llama/install_requirements.sh + python install_executorch.py --pybind coreml + # Test ANE llama sh .ci/scripts/test_ane_static_llama.sh From 1bb62e3f9a01ffcdefbc1c64174f37b913e3a081 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:42:05 -0800 Subject: [PATCH 10/13] up --- .github/workflows/pull.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index e4b7a49be5e..aa8a890636f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -524,8 +524,9 @@ jobs: # Install coreml pip install coremltools sh install_requirements.sh - sh examples/models/llama/install_requirements.sh + sh backends/apple/coreml/scripts/install_requirements.sh python install_executorch.py --pybind coreml + sh examples/models/llama/install_requirements.sh # Test ANE llama From 030a6b3fa72bc79bff225bfccf8f3b951b7d9407 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:49:33 -0800 Subject: [PATCH 11/13] up --- .github/workflows/pull.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index aa8a890636f..82a80179d11 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -508,27 +508,12 @@ jobs: bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" - # # Setup MacOS dependencies as there is no Docker support on MacOS atm - # PYTHON_EXECUTABLE=python \ - # EXECUTORCH_BUILD_PYBIND=ON \ - # CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ - # ${CONDA_RUN} --no-capture-output \ - # .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Debug --editable false - - - # # Install llama3_2_vision dependencies. - # PYTHON_EXECUTABLE=python \ - # ${CONDA_RUN} --no-capture-output \ - # ./examples/models/llama3_2_vision/install_requirements.sh - - # Install coreml - pip install coremltools + # Install requirements sh install_requirements.sh sh backends/apple/coreml/scripts/install_requirements.sh python install_executorch.py --pybind coreml sh examples/models/llama/install_requirements.sh - # Test ANE llama sh .ci/scripts/test_ane_static_llama.sh From c6532995cba41904904bd324713e3641f92bd38a Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 21:06:48 -0800 Subject: [PATCH 12/13] up --- .ci/scripts/test_ane_static_llama.sh | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/.ci/scripts/test_ane_static_llama.sh b/.ci/scripts/test_ane_static_llama.sh index b671a6cf5e7..c83c522d629 100644 --- a/.ci/scripts/test_ane_static_llama.sh +++ b/.ci/scripts/test_ane_static_llama.sh @@ -24,28 +24,4 @@ download_stories_model_artifacts python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w - -python run.py -m model.pte -t tokenizer.model --prompt "Once upon a time," --temperature 0.0 &> tmp.txt -tail -n +6 tmp.txt &> output.txt - -cat output.txt - -printf 'Once upon a time,there was a little girl named L ily . She loved to play outside in the sun sh ine . One day , she saw ' &> expected.txt - - -if diff output.txt expected.txt > /dev/null; then - echo "Output matches." -else - echo "Output does not match." - echo "\n\nExpected:" - cat expected.txt - - echo "\n\nGot:" - cat output.txt - - echo "\n\nDiff:" - diff output.txt expected.txt - exit 1 -fi - popd From 83ae7aadf6b7d74b2d1146083a8012f1cd82a134 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 4 Mar 2025 09:07:11 -0800 Subject: [PATCH 13/13] up --- .github/workflows/pull.yml | 22 ---------------------- .github/workflows/trunk.yml | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 82a80179d11..5cc0d3c597b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -495,28 +495,6 @@ jobs: # Test static llama weight sharing and accuracy PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh - test-static-llama-ane: - name: test-static-llama-ane - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - set -eux - bash .ci/scripts/setup-conda.sh - eval "$(conda shell.bash hook)" - - # Install requirements - sh install_requirements.sh - sh backends/apple/coreml/scripts/install_requirements.sh - python install_executorch.py --pybind coreml - sh examples/models/llama/install_requirements.sh - - # Test ANE llama - sh .ci/scripts/test_ane_static_llama.sh - test-qnn-models-linux: name: test-qnn-models-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 410e95d9a84..c003f050ba0 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -229,6 +229,28 @@ jobs: # see if we can import the module successfully ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" + test-static-llama-ane: + name: test-static-llama-ane + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + runner: macos-m1-stable + python-version: '3.11' + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + bash .ci/scripts/setup-conda.sh + eval "$(conda shell.bash hook)" + + # Install requirements + sh install_requirements.sh + sh backends/apple/coreml/scripts/install_requirements.sh + python install_executorch.py --pybind coreml + sh examples/models/llama/install_requirements.sh + + # Test ANE llama + sh .ci/scripts/test_ane_static_llama.sh + test-llama-runner-macos: name: test-llama-runner-mac uses: pytorch/test-infra/.github/workflows/macos_job.yml@main