diff --git a/.github/workflows/more-tests.yml b/.github/workflows/more-tests.yml index f47740fe3..f772382d1 100644 --- a/.github/workflows/more-tests.yml +++ b/.github/workflows/more-tests.yml @@ -9,6 +9,9 @@ on: jobs: test-cuda: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index 5a0d9920b..2e264e6cf 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -108,6 +108,9 @@ jobs: set -eux PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic" --backend "gpu" test-gpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 670c0205a..5dbafee9f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -215,6 +215,9 @@ jobs: set -eux PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu" test-gpu-compile: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu @@ -250,6 +253,9 @@ jobs: echo "::endgroup::" test-gpu-aoti-bfloat16: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu @@ -286,6 +292,9 @@ jobs: echo "::endgroup::" test-gpu-aoti-float32: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu @@ -327,6 +336,9 @@ jobs: echo "::endgroup::" test-gpu-aoti-float16: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu @@ -369,6 +381,9 @@ jobs: echo "::endgroup::" test-gpu-eval-sanity-check: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu @@ -1011,6 +1026,9 @@ jobs: echo "Tests complete." test-build-runner-et-android: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.4xlarge diff --git a/.github/workflows/run-readme-periodic.yml b/.github/workflows/run-readme-periodic.yml index 61501e0c4..2c49a975f 100644 --- a/.github/workflows/run-readme-periodic.yml +++ b/.github/workflows/run-readme-periodic.yml @@ -10,6 +10,9 @@ on: jobs: test-readme: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main secrets: inherit with: @@ -39,6 +42,9 @@ jobs: test-quantization-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -66,6 +72,9 @@ jobs: echo "::endgroup::" test-gguf-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main secrets: inherit with: diff --git a/.github/workflows/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml index 3e90265f5..bf1587896 100644 --- a/.github/workflows/run-readme-pr-mps.yml +++ b/.github/workflows/run-readme-pr-mps.yml @@ -10,7 +10,7 @@ jobs: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-14 - timeout-minutes: 50 + timeout: 50 script: | conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp conda activate test-readme-mps-macos @@ -36,7 +36,7 @@ jobs: test-quantization-mps-macos: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: - runner: macos-m1-14 + runner: macos-m1-14 script: | set -x conda create -y -n test-quantization-mps-macos python=3.10.11 diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml index 8694757e7..f32473435 100644 --- a/.github/workflows/run-readme-pr.yml +++ b/.github/workflows/run-readme-pr.yml @@ -9,6 +9,9 @@ on: jobs: test-readme-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -28,6 +31,9 @@ jobs: echo "::endgroup::" test-readme-cpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -47,6 +53,9 @@ jobs: echo "::endgroup::" test-quantization-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -66,6 +75,9 @@ jobs: echo "::endgroup::" test-quantization-cpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -80,6 +92,9 @@ jobs: TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization test-gguf-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -99,6 +114,9 @@ jobs: echo "::endgroup::" test-gguf-cpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -119,6 +137,9 @@ jobs: test-advanced-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -139,6 +160,9 @@ jobs: test-advanced-cpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -158,6 +182,9 @@ jobs: echo "::endgroup::" test-evaluation-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -177,6 +204,9 @@ jobs: echo "::endgroup::" test-evaluation-cpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -196,6 +226,9 @@ jobs: echo "::endgroup::" test-multimodal-any: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -215,6 +248,9 @@ jobs: echo "::endgroup::" test-multimodal-cpu: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu @@ -269,4 +305,4 @@ jobs: export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native + TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native diff --git a/.github/workflows/runner-cuda-dtype.yml b/.github/workflows/runner-cuda-dtype.yml index 4cfb9ff09..0b4597942 100644 --- a/.github/workflows/runner-cuda-dtype.yml +++ b/.github/workflows/runner-cuda-dtype.yml @@ -9,6 +9,9 @@ on: jobs: test-runner-aot-cuda: + permissions: + id-token: write + contents: read uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index bb70ed39d..e79e9c341 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -98e4dd524f2cb08414ee015b27616229cabc06ba +9c043290ad3944268290e015c3063bc411e6ef6b diff --git a/torchchat/export.py b/torchchat/export.py index e84a344bd..37f0b056e 100644 --- a/torchchat/export.py +++ b/torchchat/export.py @@ -125,7 +125,6 @@ def export_for_server( ) from executorch.exir.tracer import Value - from torch._export import capture_pre_autograd_graph from torch.export import export, export_for_training, ExportedProgram from torchchat.model import apply_rotary_emb, Attention @@ -223,7 +222,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None, cache_lane: int = 0): return self.wo(output) def replace_attention_with_custom_sdpa_attention(module: nn.Module): - from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # noqa + from executorch.extension.llm.custom_ops import custom_ops # noqa for name, child in module.named_children(): if isinstance(child, Attention): @@ -316,7 +315,7 @@ def export_for_et(model, device, output_path) -> str: with torch.nn.attention.sdpa_kernel( [torch.nn.attention.SDPBackend.MATH] ), torch.no_grad(): - m = capture_pre_autograd_graph(model, input, dynamic_shapes=dynamic_shapes) + m = export_for_training(model, input, dynamic_shapes=dynamic_shapes).module() edge_manager = export_to_edge( m,