Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bumping ET Pin to Jan16 2025 #1459

Merged
merged 7 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/more-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ on:

jobs:
test-cuda:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ jobs:
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic" --backend "gpu"
test-gpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
Expand Down
18 changes: 18 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ jobs:
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu"
test-gpu-compile:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
Expand Down Expand Up @@ -250,6 +253,9 @@ jobs:
echo "::endgroup::"

test-gpu-aoti-bfloat16:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
Expand Down Expand Up @@ -286,6 +292,9 @@ jobs:
echo "::endgroup::"

test-gpu-aoti-float32:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
Expand Down Expand Up @@ -327,6 +336,9 @@ jobs:
echo "::endgroup::"

test-gpu-aoti-float16:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
Expand Down Expand Up @@ -369,6 +381,9 @@ jobs:
echo "::endgroup::"

test-gpu-eval-sanity-check:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
Expand Down Expand Up @@ -1011,6 +1026,9 @@ jobs:
echo "Tests complete."

test-build-runner-et-android:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.4xlarge
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/run-readme-periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ on:

jobs:
test-readme:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
secrets: inherit
with:
Expand Down Expand Up @@ -39,6 +42,9 @@ jobs:


test-quantization-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand Down Expand Up @@ -66,6 +72,9 @@ jobs:
echo "::endgroup::"

test-gguf-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
secrets: inherit
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/run-readme-pr-mps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-14
timeout-minutes: 50
timeout: 50
script: |
conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp
conda activate test-readme-mps-macos
Expand All @@ -36,7 +36,7 @@ jobs:
test-quantization-mps-macos:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-14
runner: macos-m1-14
script: |
set -x
conda create -y -n test-quantization-mps-macos python=3.10.11
Expand Down
38 changes: 37 additions & 1 deletion .github/workflows/run-readme-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ on:

jobs:
test-readme-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -28,6 +31,9 @@ jobs:
echo "::endgroup::"

test-readme-cpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -47,6 +53,9 @@ jobs:
echo "::endgroup::"

test-quantization-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -66,6 +75,9 @@ jobs:
echo "::endgroup::"

test-quantization-cpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -80,6 +92,9 @@ jobs:
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization

test-gguf-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -99,6 +114,9 @@ jobs:
echo "::endgroup::"

test-gguf-cpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -119,6 +137,9 @@ jobs:


test-advanced-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -139,6 +160,9 @@ jobs:


test-advanced-cpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -158,6 +182,9 @@ jobs:
echo "::endgroup::"

test-evaluation-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -177,6 +204,9 @@ jobs:
echo "::endgroup::"

test-evaluation-cpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -196,6 +226,9 @@ jobs:
echo "::endgroup::"

test-multimodal-any:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand All @@ -215,6 +248,9 @@ jobs:
echo "::endgroup::"

test-multimodal-cpu:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand Down Expand Up @@ -269,4 +305,4 @@ jobs:
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"

TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native
3 changes: 3 additions & 0 deletions .github/workflows/runner-cuda-dtype.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ on:

jobs:
test-runner-aot-cuda:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
Expand Down
2 changes: 1 addition & 1 deletion install/.pins/et-pin.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
98e4dd524f2cb08414ee015b27616229cabc06ba
9c043290ad3944268290e015c3063bc411e6ef6b
5 changes: 2 additions & 3 deletions torchchat/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def export_for_server(
)
from executorch.exir.tracer import Value

from torch._export import capture_pre_autograd_graph
from torch.export import export, export_for_training, ExportedProgram

from torchchat.model import apply_rotary_emb, Attention
Expand Down Expand Up @@ -223,7 +222,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None, cache_lane: int = 0):
return self.wo(output)

def replace_attention_with_custom_sdpa_attention(module: nn.Module):
from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # noqa
from executorch.extension.llm.custom_ops import custom_ops # noqa

for name, child in module.named_children():
if isinstance(child, Attention):
Expand Down Expand Up @@ -316,7 +315,7 @@ def export_for_et(model, device, output_path) -> str:
with torch.nn.attention.sdpa_kernel(
[torch.nn.attention.SDPBackend.MATH]
), torch.no_grad():
m = capture_pre_autograd_graph(model, input, dynamic_shapes=dynamic_shapes)
m = export_for_training(model, input, dynamic_shapes=dynamic_shapes).module()

edge_manager = export_to_edge(
m,
Expand Down
Loading