From 034e4aab3f7a0754d38a7654df97111f12fd9756 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Tue, 5 Nov 2024 17:45:27 +0100
Subject: [PATCH] [CI] remove used workflows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tests have completely offloaded torch inductor tests to Meta  a
few months ago. They are currently disabled on GitHub.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/integration-tests.yml       | 572 ------------------
 .github/workflows/integration-tests.yml.in    | 482 ---------------
 .github/workflows/torch-inductor-tests.yml    |  45 --
 .../torch-inductor/scripts/check_acc.py       |  11 -
 .../torch-inductor/scripts/check_perf.py      |  70 ---
 .../torch-inductor/scripts/common.sh          |   9 -
 .../scripts/install_torchinductor.sh          |  74 ---
 .../torch-inductor/scripts/install_triton.sh  |  25 -
 .../scripts/run_torchinductor_acc.sh          |  55 --
 .../scripts/run_torchinductor_perf.sh         |  71 ---
 10 files changed, 1414 deletions(-)
 delete mode 100644 .github/workflows/integration-tests.yml
 delete mode 100644 .github/workflows/integration-tests.yml.in
 delete mode 100644 .github/workflows/torch-inductor-tests.yml
 delete mode 100644 .github/workflows/torch-inductor/scripts/check_acc.py
 delete mode 100644 .github/workflows/torch-inductor/scripts/check_perf.py
 delete mode 100755 .github/workflows/torch-inductor/scripts/common.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/install_torchinductor.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/install_triton.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
deleted file mode 100644
index 7ef502ad25dd4..0000000000000
--- a/.github/workflows/integration-tests.yml
+++ /dev/null
@@ -1,572 +0,0 @@
-# AUTOGENERATED by pre-commit, modify the .in file instead.
-
-# integration-tests.yml.in is used to generate integration-tests.yml by
-# expanding yaml anchors, because github actions don't support them
-# (https://github.com/actions/runner/issues/1182).  pre-commit will do this for
-# you automatically.
-
-
-name: Integration Tests
-on:
-  workflow_dispatch:
-  pull_request:
-    branches-ignore: ['llvm-**']
-  merge_group:
-    branches: [main, 'dev-**']
-    types: [checks_requested]
-  push:
-    branches: [main]
-concurrency:
-  group: ${{ github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-permissions: read-all
-env:
-  TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
-  TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
-  TRITON_DISABLE_LINE_INFO: 1
-  PROTON_SKIP_PC_SAMPLING_TEST: 1
-jobs:
-  Runner-Preparation:
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    outputs:
-      matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
-      matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
-      matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }}
-    steps:
-      - name: Decide pre-submit integration test enablement
-        # Always enable integration tests for pre-submit pull requests.
-        if: github.event_name == 'pull_request'
-        run: |
-          echo "enable_integration=true" >> $GITHUB_ENV
-      - name: Checkout post-submit commits
-        if: github.event_name == 'push'
-        uses: actions/checkout@v4
-        with:
-          # Only fetch two commits to check the latest changed files.
-          fetch-depth: 2
-      - name: Detect if build deps (e.g. LLVM hash) changed
-        id: detect-change
-        if: github.event_name == 'push'
-        uses: tj-actions/changed-files@v45
-        with:
-          files: |
-            cmake/*.txt
-            cmake/*.json
-      - name: Detect if enough time has passed since last post-submit run
-        id: detect-time
-        if: github.event_name == 'push'
-        run: |
-          GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
-          REPO_NAME="${{ github.repository }}"
-          # ID of integration-tests workflow
-          WORKFLOW_ID="11678186"
-
-          # Fetch the last run time of this workflow
-          LAST_RUN=$(curl -s \
-            -H "Authorization: token $GITHUB_TOKEN" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/$REPO_NAME/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success&per_page=1" \
-            | jq -r '.workflow_runs[0].updated_at')
-
-          # Convert to timestamp
-          LAST_RUN_TS=$(date -d "$LAST_RUN" +%s)
-          NOW_TS=$(date +%s)
-          DIFF=$(( (NOW_TS - LAST_RUN_TS) / 3600 )) # Difference in hours
-
-          echo "Last run was $DIFF hours ago."
-
-          if [ "$DIFF" -ge 4 ]; then
-            echo "Will run CI; last build was long enough ago."
-            echo "n_hours_since_last_run=true" >> $GITHUB_ENV
-          else
-            echo "Will not run CI; last build was too recent."
-            echo "n_hours_since_last_run=false" >> $GITHUB_ENV
-          fi
-      # We want to run integration tests on the main branch (i.e. post-submit)
-      # occasionally, because pre-submit CI caches will only read from caches
-      # generated from the main branch (or the PR's branch), and we want these
-      # caches to be recent.
-      #
-      # But we also don't want to run the tests on *every* commit, because this
-      # would compete for resources with pre-commit CI (and the whole point of
-      # caching is to speed up CI).
-      #
-      # As a compromise, run every N hours, or if a build dependency changes
-      # (e.g.  we update the LLVM hash).
-      - name: Decide whether to run integration tests post-submit
-        if: |
-          github.event_name == 'push' &&
-          (steps.detect-change.outputs.any_changed == 'true' ||
-           env.n_hours_since_last_run == 'true')
-        run: |
-          echo "enable_integration=true" >> $GITHUB_ENV
-      - name: Prepare runner matrix
-        id: set-matrix
-        if: env.enable_integration == 'true'
-        run: |
-          if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then
-            echo '::set-output name=matrix-CUDA::[["a100-runner-set"], ["h100-runner-set"]]'
-            echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"], ["self-hosted", "gfx942"]]'
-            echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
-          else
-            echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
-            echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
-            echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
-          fi
-  pre-commit:
-    name: pre-commit (code formatting)
-    needs: Runner-Preparation
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-      - name: Compute hash of pre-commit config
-        id: cache-key
-        run: |
-          echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
-        shell: bash
-      - name: Cache pre-commit's cache dir
-        uses: actions/cache@v4
-        with:
-          # Note that we cannot use environment variables here given there is
-          # no shell to interpret them in the paths.
-          path: |
-            ~/.cache/pre-commit
-          key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}
-      - name: Check pre-commit
-        run: |
-          python3 -m pip install --upgrade pre-commit
-          # TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
-          python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
-          # If first run of yapf worked and made changes reset the tree to the original state
-          git reset --hard
-          python3 -m pre_commit run --all-files --verbose
-      - name: Print diff of changes if pre-commit failed
-        if: failure()
-        run: |
-          git diff
-  Integration-Tests:
-    needs: Runner-Preparation
-    if: needs.Runner-Preparation.outputs.matrix-CUDA != ''
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: "true"
-      - name: Compute cache keys
-        id: cache-key
-        run: |
-          llvm_file="cmake/llvm-hash.txt"
-          nvidia_file="cmake/nvidia-toolchain-version.json"
-          json_file="cmake/json-version.txt"
-
-          # Check if files exist before proceeding
-          if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
-            echo "Error: Required dependency files are missing."
-            exit 1
-          fi
-
-          # Process the files if they exist
-          echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
-          echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
-          echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
-          echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
-        shell: bash
-      - name: Cache build dependencies
-        uses: actions/cache@v4
-        with:
-          # Note that we cannot use environment variables here given there is
-          # no shell to interpret them in the paths.
-          path: |
-            ~/.triton/llvm
-            ~/.triton/nvidia
-            ~/.triton/json
-          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
-      - # Cache ~/.triton/cache because the vast majority of unit test time is
-        # spent compiling.  Triton won't (well, should not) use these cached files
-        # if something internal to Triton changes, because Triton's internal
-        # source code is part of the cache key.
-        #
-        # Similarly, cache ~/.cache/ccache to speed up compilation.
-        #
-        # On branch `main` we always start from an empty cache, i.e. we skip the
-        # "restore" step.  This is to prevent the caches from accumulating stale
-        # files over time.
-        name: Restore cache of ccache and Triton compilation artifacts
-        if: github.event_name != 'push'
-        uses: actions/cache/restore@v4
-        with:
-          path: |
-            ~/.triton/cache
-            ~/.cache/ccache
-          # Restore the most recent cache entry.
-          restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
-          # We expect this cache key never to hit and for us to fall back
-          # unconditionally to the restore-key, so it doesn't actually matter
-          # what we put here (so long as it doesn't hit an existing key).
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-      - name: Inspect cache directory
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-      - name: Update PATH
-        run: |
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
-      - name: Install pip dependencies
-        run: |
-          python3 -m pip install --upgrade pip
-          python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-forked pytest-xdist lit
-      - name: Install Triton
-        env:
-          TRITON_BUILD_WITH_CCACHE: "true"
-          CUDA_HOME: "/usr/local/cuda"
-        run: |
-          echo "PATH is '$PATH'"
-          cd python
-          python3 -m pip install '.[tests]'
-      - name: Run lit tests
-        run: |
-          cd python
-          LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
-          if [ ! -d "${LIT_TEST_DIR}" ]; then
-            echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
-          fi
-          lit -v "${LIT_TEST_DIR}"
-      - name: Run python tests on CUDA
-        run: |
-          INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/build/$(ls python/build | grep -i lib)/triton/instrumentation"
-          if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
-            echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
-          fi
-          cd python/test/unit
-          python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py
-          python3 -m pytest -s -n 8 language/test_subprocess.py
-          python3 -m pytest -s -n 8 test_debug.py --forked
-          # Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
-          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s language/test_line_info.py
-          # Run hopper/test_flashattention.py separately to avoid out of gpu memory
-          python3 -m pytest -s hopper/test_flashattention.py
-          TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
-          python3 -m pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
-      - name: Run interpreter tests
-        if: ${{ matrix.runner[0] == 'h100-runner-set' }}
-        env:
-          TRITON_INTERPRET: "1"
-        run: |
-          cd python/test/unit
-          python3 -m pytest -s -n 16 -m interpreter language/test_core.py language/test_standard.py \
-           language/test_random.py language/test_block_pointer.py language/test_subprocess.py language/test_line_info.py \
-           runtime/test_autotuner.py::test_kwargs[False]\
-           ../../tutorials/06-fused-attention.py::test_op --device cpu
-      - name: Run regression tests
-        run: |
-          cd python/test/regression
-          python3 -m pytest -s -n 8 .
-      - name: Run C++ unittests
-        run: |
-          cd python
-          cd "build/$(ls build | grep -i cmake)"
-          ctest -j32
-      - name: Run Proton tests
-        run: |
-          cd third_party/proton
-          python3 -m pytest -s test
-      - # If we're on branch `main`, save the ccache Triton compilation artifacts
-        # to the cache so they can be used by other (non-main) CI runs.
-        #
-        # (It wouldn't be a problem to save the cache on every run, because github
-        # evicts cache entries LRU, but maybe this saves a bit of time in CI.)
-        name: Save ccache and Triton compilation artifacts to cache
-        if: github.ref == 'refs/heads/main'
-        uses: actions/cache/save@v4
-        with:
-          path: ~/.triton/cache ~/.cache/ccache
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-      - name: Inspect cache directories
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-          du -sh ~/.triton/**
-
-          mkdir -p ~/.cache/ccache
-          ls -alh ~/.cache/ccache
-          du -sh ~/.cache/ccache
-  Integration-Tests-AMD:
-    needs: Runner-Preparation
-    if: needs.Runner-Preparation.outputs.matrix-HIP != ''
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}
-    name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' || 'mi300x'}})
-    container:
-      image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4
-      options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: 'true'
-      - name: Compute cache keys
-        id: cache-key
-        run: |
-          llvm_file="cmake/llvm-hash.txt"
-          nvidia_file="cmake/nvidia-toolchain-version.json"
-          json_file="cmake/json-version.txt"
-
-          # Check if files exist before proceeding
-          if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
-            echo "Error: Required dependency files are missing."
-            exit 1
-          fi
-
-          # Process the files if they exist
-          echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
-          echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
-          echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
-          echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
-        shell: bash
-      - name: Cache build dependencies
-        uses: actions/cache@v4
-        with:
-          # Note that we cannot use environment variables here given there is
-          # no shell to interpret them in the paths.
-          path: |
-            ~/.triton/llvm
-            ~/.triton/nvidia
-            ~/.triton/json
-          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
-      - # Cache ~/.triton/cache because the vast majority of unit test time is
-        # spent compiling.  Triton won't (well, should not) use these cached files
-        # if something internal to Triton changes, because Triton's internal
-        # source code is part of the cache key.
-        #
-        # Similarly, cache ~/.cache/ccache to speed up compilation.
-        #
-        # On branch `main` we always start from an empty cache, i.e. we skip the
-        # "restore" step.  This is to prevent the caches from accumulating stale
-        # files over time.
-        name: Restore cache of ccache and Triton compilation artifacts
-        if: github.event_name != 'push'
-        uses: actions/cache/restore@v4
-        with:
-          path: |
-            ~/.triton/cache
-            ~/.cache/ccache
-          # Restore the most recent cache entry.
-          restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
-          # We expect this cache key never to hit and for us to fall back
-          # unconditionally to the restore-key, so it doesn't actually matter
-          # what we put here (so long as it doesn't hit an existing key).
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-      - name: Inspect cache directory
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-      - name: Update PATH
-        run: |
-          echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
-      - name: Install pip dependencies
-        run: |
-          python3 -m pip install --upgrade pip
-          python3 -m pip install lit
-      - name: Install Triton
-        id: amd-install-triton
-        run: |
-          echo "PATH is '$PATH'"
-          pip uninstall -y triton
-          cd python
-          pip install -v -e '.[tests]'
-      - name: Clean up after an unsuccessful build
-        if: ${{ !success() && steps.amd-install-triton.outcome != 'success' }}
-        run: |
-          rm -rf ~/.triton
-      - name: Run lit tests
-        run: |
-          cd python
-          LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
-          if [ ! -d "${LIT_TEST_DIR}" ]; then
-            echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
-          fi
-          lit -v "${LIT_TEST_DIR}"
-      - name: Run python tests on HIP
-        run: |
-          INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
-          if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
-            echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
-          fi
-          pytest --capture=tee-sys -rfs python/tutorials/06-fused-attention.py
-          cd python/test/unit
-          pytest --capture=tee-sys -rfs -n 16 language runtime \
-                 --ignore=language/test_line_info.py \
-                 --ignore=test_debug.py
-          # TODO: uncomment
-          # pytest --capture=tee-sys -rfs test_debug.py
-          TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
-          pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
-
-          # Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
-          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py
-      - name: Run regression tests
-        run: |
-          # Reenable test_functional_regression.py once it's fixed
-          cd python/test/regression
-          python3 -m pytest -s -n 8 ./test_cast_matmul.py
-      - name: Run Proton tests
-        run: |
-          cd third_party/proton
-          python3 -m pytest -s test
-      - name: Run C++ unittests
-        run: |
-          cd python
-          cd "build/$(ls build | grep -i cmake)"
-          ctest -j32
-      - # If we're on branch `main`, save the ccache Triton compilation artifacts
-        # to the cache so they can be used by other (non-main) CI runs.
-        #
-        # (It wouldn't be a problem to save the cache on every run, because github
-        # evicts cache entries LRU, but maybe this saves a bit of time in CI.)
-        name: Save ccache and Triton compilation artifacts to cache
-        if: github.ref == 'refs/heads/main'
-        uses: actions/cache/save@v4
-        with:
-          path: ~/.triton/cache ~/.cache/ccache
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-      - name: Inspect cache directories
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-          du -sh ~/.triton/**
-
-          mkdir -p ~/.cache/ccache
-          ls -alh ~/.cache/ccache
-          du -sh ~/.cache/ccache
-      - name: Clean up caches
-        run: |
-          rm -rf ~/.triton/cache
-  Build-Tests:
-    needs: Runner-Preparation
-    if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: "true"
-      - name: Install brew dependencies
-        run: |
-          brew update
-          brew install ccache llvm@19 lld
-      - name: Compute cache keys
-        id: cache-key
-        run: |
-          llvm_file="cmake/llvm-hash.txt"
-          nvidia_file="cmake/nvidia-toolchain-version.json"
-          json_file="cmake/json-version.txt"
-
-          # Check if files exist before proceeding
-          if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
-            echo "Error: Required dependency files are missing."
-            exit 1
-          fi
-
-          # Process the files if they exist
-          echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
-          echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
-          echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
-          echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
-        shell: bash
-      - name: Cache build dependencies
-        uses: actions/cache@v4
-        with:
-          # Note that we cannot use environment variables here given there is
-          # no shell to interpret them in the paths.
-          path: |
-            ~/.triton/llvm
-            ~/.triton/nvidia
-            ~/.triton/json
-          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
-      - # Cache ~/.triton/cache because the vast majority of unit test time is
-        # spent compiling.  Triton won't (well, should not) use these cached files
-        # if something internal to Triton changes, because Triton's internal
-        # source code is part of the cache key.
-        #
-        # Similarly, cache ~/.cache/ccache to speed up compilation.
-        #
-        # On branch `main` we always start from an empty cache, i.e. we skip the
-        # "restore" step.  This is to prevent the caches from accumulating stale
-        # files over time.
-        name: Restore cache of ccache and Triton compilation artifacts
-        if: github.event_name != 'push'
-        uses: actions/cache/restore@v4
-        with:
-          path: |
-            ~/.triton/cache
-            ~/.cache/ccache
-          # Restore the most recent cache entry.
-          restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
-          # We expect this cache key never to hit and for us to fall back
-          # unconditionally to the restore-key, so it doesn't actually matter
-          # what we put here (so long as it doesn't hit an existing key).
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-      - name: Inspect cache directory
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-      - name: Update PATH
-        run: |
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
-          echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
-      - name: Install pip dependencies
-        run: |
-          python3 -m venv ~/.venv
-          source ~/.venv/bin/activate
-          python3 -m pip install --upgrade pip
-          python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit pybind11
-      - name: Install Triton
-        env:
-          TRITON_BUILD_WITH_CCACHE: "true"
-          TRITON_BUILD_WITH_O1: "true"
-          # macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
-          # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
-          MAX_JOBS: 3
-        run: |
-          source ~/.venv/bin/activate
-          echo "PATH is '$PATH'"
-          cd python
-          python3 -m pip install --no-build-isolation .
-      - # If we're on branch `main`, save the ccache Triton compilation artifacts
-        # to the cache so they can be used by other (non-main) CI runs.
-        #
-        # (It wouldn't be a problem to save the cache on every run, because github
-        # evicts cache entries LRU, but maybe this saves a bit of time in CI.)
-        name: Save ccache and Triton compilation artifacts to cache
-        if: github.ref == 'refs/heads/main'
-        uses: actions/cache/save@v4
-        with:
-          path: ~/.triton/cache ~/.cache/ccache
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-      - name: Inspect cache directories
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-          du -sh ~/.triton/**
-
-          mkdir -p ~/.cache/ccache
-          ls -alh ~/.cache/ccache
-          du -sh ~/.cache/ccache
diff --git a/.github/workflows/integration-tests.yml.in b/.github/workflows/integration-tests.yml.in
deleted file mode 100644
index d84ac6f33466d..0000000000000
--- a/.github/workflows/integration-tests.yml.in
+++ /dev/null
@@ -1,482 +0,0 @@
-# integration-tests.yml.in is used to generate integration-tests.yml by
-# expanding yaml anchors, because github actions don't support them
-# (https://github.com/actions/runner/issues/1182).  pre-commit will do this for
-# you automatically.
-
-
-name: Integration Tests
-
-on:
-  workflow_dispatch:
-  pull_request:
-    branches-ignore: ['llvm-**']
-  merge_group:
-    branches: [main, 'dev-**']
-    types: [checks_requested]
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-
-permissions: read-all
-
-env:
-  TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
-  TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
-  TRITON_DISABLE_LINE_INFO: 1
-  PROTON_SKIP_PC_SAMPLING_TEST: 1
-
-jobs:
-  Runner-Preparation:
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    outputs:
-      matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
-      matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
-      matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }}
-    steps:
-      - name: Decide pre-submit integration test enablement
-        # Always enable integration tests for pre-submit pull requests.
-        if: github.event_name == 'pull_request'
-        run: |
-          echo "enable_integration=true" >> $GITHUB_ENV
-
-      - name: Checkout post-submit commits
-        if: github.event_name == 'push'
-        uses: actions/checkout@v4
-        with:
-          # Only fetch two commits to check the latest changed files.
-          fetch-depth: 2
-
-      - name: Detect if build deps (e.g. LLVM hash) changed
-        id: detect-change
-        if: github.event_name == 'push'
-        uses: tj-actions/changed-files@v45
-        with:
-          files: |
-            cmake/*.txt
-            cmake/*.json
-
-      - name: Detect if enough time has passed since last post-submit run
-        id: detect-time
-        if: github.event_name == 'push'
-        run: |
-          GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
-          REPO_NAME="${{ github.repository }}"
-          # ID of integration-tests workflow
-          WORKFLOW_ID="11678186"
-
-          # Fetch the last run time of this workflow
-          LAST_RUN=$(curl -s \
-            -H "Authorization: token $GITHUB_TOKEN" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/$REPO_NAME/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success&per_page=1" \
-            | jq -r '.workflow_runs[0].updated_at')
-
-          # Convert to timestamp
-          LAST_RUN_TS=$(date -d "$LAST_RUN" +%s)
-          NOW_TS=$(date +%s)
-          DIFF=$(( (NOW_TS - LAST_RUN_TS) / 3600 )) # Difference in hours
-
-          echo "Last run was $DIFF hours ago."
-
-          if [ "$DIFF" -ge 4 ]; then
-            echo "Will run CI; last build was long enough ago."
-            echo "n_hours_since_last_run=true" >> $GITHUB_ENV
-          else
-            echo "Will not run CI; last build was too recent."
-            echo "n_hours_since_last_run=false" >> $GITHUB_ENV
-          fi
-
-      # We want to run integration tests on the main branch (i.e. post-submit)
-      # occasionally, because pre-submit CI caches will only read from caches
-      # generated from the main branch (or the PR's branch), and we want these
-      # caches to be recent.
-      #
-      # But we also don't want to run the tests on *every* commit, because this
-      # would compete for resources with pre-commit CI (and the whole point of
-      # caching is to speed up CI).
-      #
-      # As a compromise, run every N hours, or if a build dependency changes
-      # (e.g.  we update the LLVM hash).
-      - name: Decide whether to run integration tests post-submit
-        if: |
-          github.event_name == 'push' &&
-          (steps.detect-change.outputs.any_changed == 'true' ||
-           env.n_hours_since_last_run == 'true')
-        run: |
-          echo "enable_integration=true" >> $GITHUB_ENV
-
-      - name: Prepare runner matrix
-        id: set-matrix
-        if: env.enable_integration == 'true'
-        run: |
-          if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then
-            echo '::set-output name=matrix-CUDA::[["a100-runner-set"], ["h100-runner-set"]]'
-            echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"], ["self-hosted", "gfx942"]]'
-            echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
-          else
-            echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
-            echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
-            echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
-          fi
-
-  pre-commit:
-    name: pre-commit (code formatting)
-    needs: Runner-Preparation
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Compute hash of pre-commit config
-        id: cache-key
-        run: |
-          echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
-        shell: bash
-
-      - name: Cache pre-commit's cache dir
-        uses: actions/cache@v4
-        with:
-          # Note that we cannot use environment variables here given there is
-          # no shell to interpret them in the paths.
-          path: |
-            ~/.cache/pre-commit
-          key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}
-
-      - name: Check pre-commit
-        run: |
-          python3 -m pip install --upgrade pre-commit
-          # TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
-          python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
-          # If first run of yapf worked and made changes reset the tree to the original state
-          git reset --hard
-          python3 -m pre_commit run --all-files --verbose
-
-      - name: Print diff of changes if pre-commit failed
-        if: failure()
-        run: |
-          git diff
-
-
-  Integration-Tests:
-    needs: Runner-Preparation
-    if: needs.Runner-Preparation.outputs.matrix-CUDA != ''
-
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 30
-
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: "true"
-
-      - &compute-cache-keys-step
-        name: Compute cache keys
-        id: cache-key
-        run: |
-          llvm_file="cmake/llvm-hash.txt"
-          nvidia_file="cmake/nvidia-toolchain-version.json"
-          json_file="cmake/json-version.txt"
-
-          # Check if files exist before proceeding
-          if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
-            echo "Error: Required dependency files are missing."
-            exit 1
-          fi
-
-          # Process the files if they exist
-          echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
-          echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
-          echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
-          echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
-        shell: bash
-
-      - &cache-build-dependencies-step
-        name: Cache build dependencies
-        uses: actions/cache@v4
-        with:
-          # Note that we cannot use environment variables here given there is
-          # no shell to interpret them in the paths.
-          path: |
-            ~/.triton/llvm
-            ~/.triton/nvidia
-            ~/.triton/json
-          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
-
-      # Cache ~/.triton/cache because the vast majority of unit test time is
-      # spent compiling.  Triton won't (well, should not) use these cached files
-      # if something internal to Triton changes, because Triton's internal
-      # source code is part of the cache key.
-      #
-      # Similarly, cache ~/.cache/ccache to speed up compilation.
-      #
-      # On branch `main` we always start from an empty cache, i.e. we skip the
-      # "restore" step.  This is to prevent the caches from accumulating stale
-      # files over time.
-      - &restore-build-artifacts-step
-        name: Restore cache of ccache and Triton compilation artifacts
-        if: github.event_name != 'push'
-        uses: actions/cache/restore@v4
-        with:
-          path: |
-            ~/.triton/cache
-            ~/.cache/ccache
-          # Restore the most recent cache entry.
-          restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
-          # We expect this cache key never to hit and for us to fall back
-          # unconditionally to the restore-key, so it doesn't actually matter
-          # what we put here (so long as it doesn't hit an existing key).
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-
-      - &inspect-cache-directory-step
-        name: Inspect cache directory
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-
-      - name: Update PATH
-        run: |
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
-
-      - name: Install pip dependencies
-        run: |
-          python3 -m pip install --upgrade pip
-          python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-forked pytest-xdist lit
-
-      - name: Install Triton
-        env:
-          TRITON_BUILD_WITH_CCACHE: "true"
-          CUDA_HOME: "/usr/local/cuda"
-        run: |
-          echo "PATH is '$PATH'"
-          cd python
-          python3 -m pip install '.[tests]'
-
-      - &run-lit-tests-step
-        name: Run lit tests
-        run: |
-          cd python
-          LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
-          if [ ! -d "${LIT_TEST_DIR}" ]; then
-            echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
-          fi
-          lit -v "${LIT_TEST_DIR}"
-
-      - name: Run python tests on CUDA
-        run: |
-          INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/build/$(ls python/build | grep -i lib)/triton/instrumentation"
-          if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
-            echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
-          fi
-          cd python/test/unit
-          python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py
-          python3 -m pytest -s -n 8 language/test_subprocess.py
-          python3 -m pytest -s -n 8 test_debug.py --forked
-          # Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
-          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s language/test_line_info.py
-          # Run hopper/test_flashattention.py separately to avoid out of gpu memory
-          python3 -m pytest -s hopper/test_flashattention.py
-          TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
-          python3 -m pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
-
-      - name: Run interpreter tests
-        if: ${{ matrix.runner[0] == 'h100-runner-set' }}
-        env:
-          TRITON_INTERPRET: "1"
-        run: |
-          cd python/test/unit
-          python3 -m pytest -s -n 16 -m interpreter language/test_core.py language/test_standard.py \
-           language/test_random.py language/test_block_pointer.py language/test_subprocess.py language/test_line_info.py \
-           runtime/test_autotuner.py::test_kwargs[False]\
-           ../../tutorials/06-fused-attention.py::test_op --device cpu
-
-      - name: Run regression tests
-        run: |
-          cd python/test/regression
-          python3 -m pytest -s -n 8 .
-
-      - &run-cpp-unittests-step
-        name: Run C++ unittests
-        run: |
-          cd python
-          cd "build/$(ls build | grep -i cmake)"
-          ctest -j32
-
-      - &run-proton-tests-step
-        name: Run Proton tests
-        run: |
-          cd third_party/proton
-          python3 -m pytest -s test
-
-      # If we're on branch `main`, save the ccache Triton compilation artifacts
-      # to the cache so they can be used by other (non-main) CI runs.
-      #
-      # (It wouldn't be a problem to save the cache on every run, because github
-      # evicts cache entries LRU, but maybe this saves a bit of time in CI.)
-      - &save-build-artifacts-step
-        name: Save ccache and Triton compilation artifacts to cache
-        if: github.ref == 'refs/heads/main'
-        uses: actions/cache/save@v4
-        with:
-          path: ~/.triton/cache ~/.cache/ccache
-          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
-
-      - &inspect-cache-directories-step
-        name: Inspect cache directories
-        run: |
-          mkdir -p ~/.triton
-          ls -alh ~/.triton
-          du -sh ~/.triton/**
-
-          mkdir -p ~/.cache/ccache
-          ls -alh ~/.cache/ccache
-          du -sh ~/.cache/ccache
-
-  Integration-Tests-AMD:
-    needs: Runner-Preparation
-    if: needs.Runner-Preparation.outputs.matrix-HIP != ''
-
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 30
-
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}
-
-    name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' || 'mi300x'}})
-
-    container:
-      image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4
-      options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: 'true'
-
-      - *compute-cache-keys-step
-      - *cache-build-dependencies-step
-      - *restore-build-artifacts-step
-      - *inspect-cache-directory-step
-
-      - name: Update PATH
-        run: |
-          echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
-
-      - name: Install pip dependencies
-        run: |
-          python3 -m pip install --upgrade pip
-          python3 -m pip install lit
-
-      - name: Install Triton
-        id: amd-install-triton
-        run: |
-          echo "PATH is '$PATH'"
-          pip uninstall -y triton
-          cd python
-          pip install -v -e '.[tests]'
-
-      - name: Clean up after an unsuccessful build
-        if: ${{ !success() && steps.amd-install-triton.outcome != 'success' }}
-        run: |
-          rm -rf ~/.triton
-
-      - *run-lit-tests-step
-
-      - name: Run python tests on HIP
-        run: |
-          INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
-          if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
-            echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
-          fi
-          pytest --capture=tee-sys -rfs python/tutorials/06-fused-attention.py
-          cd python/test/unit
-          pytest --capture=tee-sys -rfs -n 16 language runtime \
-                 --ignore=language/test_line_info.py \
-                 --ignore=test_debug.py
-          # TODO: uncomment
-          # pytest --capture=tee-sys -rfs test_debug.py
-          TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
-          pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
-
-          # Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
-          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py
-
-      - name: Run regression tests
-        run: |
-          # Reenable test_functional_regression.py once it's fixed
-          cd python/test/regression
-          python3 -m pytest -s -n 8 ./test_cast_matmul.py
-
-      - *run-proton-tests-step
-      - *run-cpp-unittests-step
-      - *save-build-artifacts-step
-      - *inspect-cache-directories-step
-
-      - name: Clean up caches
-        run: |
-          rm -rf ~/.triton/cache
-
-  Build-Tests:
-    needs: Runner-Preparation
-    if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: "true"
-      - name: Install brew dependencies
-        run: |
-          brew update
-          brew install ccache llvm@19 lld
-
-      - *compute-cache-keys-step
-      - *cache-build-dependencies-step
-      - *restore-build-artifacts-step
-      - *inspect-cache-directory-step
-
-      - name: Update PATH
-        run: |
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
-          echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
-      - name: Install pip dependencies
-        run: |
-          python3 -m venv ~/.venv
-          source ~/.venv/bin/activate
-          python3 -m pip install --upgrade pip
-          python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit pybind11
-      - name: Install Triton
-        env:
-          TRITON_BUILD_WITH_CCACHE: "true"
-          TRITON_BUILD_WITH_O1: "true"
-          # macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
-          # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
-          MAX_JOBS: 3
-        run: |
-          source ~/.venv/bin/activate
-          echo "PATH is '$PATH'"
-          cd python
-          python3 -m pip install --no-build-isolation .
-
-      - *save-build-artifacts-step
-      - *inspect-cache-directories-step
diff --git a/.github/workflows/torch-inductor-tests.yml b/.github/workflows/torch-inductor-tests.yml
deleted file mode 100644
index 3d8f980952917..0000000000000
--- a/.github/workflows/torch-inductor-tests.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: Torchinductor
-
-on:
-  workflow_run:
-    workflows: ["Wheels"]
-    types: [completed]
-  workflow_dispatch:
-
-permissions: read-all
-
-jobs:
-  Runner-Preparation:
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-    steps:
-      - name: Prepare runner matrix
-        id: set-matrix
-        run: |
-          echo '::set-output name=matrix::[["self-hosted", "A100"]]'
-
-  Torch-Inductor-Tests:
-    needs: Runner-Preparation
-    timeout-minutes: 240  # 4 hours
-    runs-on: ${{ matrix.runner }}
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix)}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Packages
-        run: |
-          ./.github/workflows/torch-inductor/scripts/install_torchinductor.sh torchbench
-      - name: Environment
-        run: |
-          source /tmp/torchinductor_venv/bin/activate
-          ./.github/workflows/torch-inductor/scripts/install_triton.sh
-      - name: Performance
-        run: |
-          ./.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh torchbench
-      # Runs too long time
-      #- name: Accuracy
-      #  run: |
-      #    ./.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh torchbench
diff --git a/.github/workflows/torch-inductor/scripts/check_acc.py b/.github/workflows/torch-inductor/scripts/check_acc.py
deleted file mode 100644
index c89976acab112..0000000000000
--- a/.github/workflows/torch-inductor/scripts/check_acc.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import csv
-import sys
-
-file_path = sys.argv[1]
-with open(file_path) as f:
-    reader = csv.reader(f)
-    for i, row in enumerate(reader):
-        if i == 0:
-            continue
-        if row[3] != "pass":
-            print(f"{row[1]} failed on device {row[0]} with batch size {row[2]}")
diff --git a/.github/workflows/torch-inductor/scripts/check_perf.py b/.github/workflows/torch-inductor/scripts/check_perf.py
deleted file mode 100644
index 212eadad55ae5..0000000000000
--- a/.github/workflows/torch-inductor/scripts/check_perf.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import argparse
-import csv
-from collections import namedtuple
-
-# Create a named tuple for the output of the benchmark
-BenchmarkOutput = namedtuple('BenchmarkOutput', ['dev', 'name', 'batch_size', 'speedup', 'latency'])
-
-
-def parse_output(file_path: str) -> dict:
-    entries = {}
-    with open(file_path) as f:
-        reader = csv.reader(f)
-        for i, row in enumerate(reader):
-            if i == 0 or len(row) < 5:
-                continue
-            dev = row[0]
-            name = row[1]
-            batch_size = row[2]
-            speedup = float(row[3])
-            latency = float(row[4])
-            entries[name] = BenchmarkOutput(dev, name, batch_size, speedup, latency)
-    return entries
-
-
-def compare(baseline: dict, new: dict, threshold: float, geomean_threshold: float) -> bool:
-    baseline_geomean = 1.0
-    new_geomean = 1.0
-    for key in new:
-        if key not in baseline:
-            print(f"New benchmark {key} not found in baseline")
-        baseline_latency = baseline[key].latency
-        new_latency = new[key].latency
-        if baseline_latency == 0:
-            print(f"Baseline latency for {key} is 0")
-            continue
-        elif new_latency == 0:
-            print(f"New latency for {key} is 0")
-            continue
-
-        if new_latency < baseline_latency * (1 - threshold):
-            print(f"New benchmark {key} is faster than baseline: {new_latency} vs {baseline_latency}")
-        elif new_latency > baseline_latency * (1 + threshold):
-            print(f"New benchmark {key} is slower than baseline: {new_latency} vs {baseline_latency}")
-        else:
-            print(f"New benchmark {key} is within threshold: {new_latency} vs {baseline_latency}")
-        baseline_geomean *= baseline[key].speedup
-        new_geomean *= new[key].speedup
-
-    baseline_geomean = baseline_geomean**(1 / len(baseline))
-    new_geomean = new_geomean**(1 / len(new))
-    print(f"Baseline geomean: {baseline_geomean}")
-    print(f"New geomean: {new_geomean}")
-    assert new_geomean >= baseline_geomean * (1 - geomean_threshold), \
-        f"New geomean is slower than baseline: {new_geomean} vs {baseline_geomean}"
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--baseline', required=True)
-    parser.add_argument('--new', required=True)
-    parser.add_argument('--threshold', type=float, default=0.1)
-    parser.add_argument('--geomean-threshold', type=float, default=0.02)
-    args = parser.parse_args()
-    baseline = parse_output(args.baseline)
-    new = parse_output(args.new)
-    compare(baseline, new, args.threshold, args.geomean_threshold)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/workflows/torch-inductor/scripts/common.sh b/.github/workflows/torch-inductor/scripts/common.sh
deleted file mode 100755
index 7e212a06a1ba9..0000000000000
--- a/.github/workflows/torch-inductor/scripts/common.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-TEST_REPORTS_DIR=/tmp/torchinductor_reports
-PYTORCH_DIR=/tmp/pytorch
-MODELS=(timm_models huggingface torchbench)
-
-echo "$TEST_REPORTS_DIR"
-echo "$PYTORCH_DIR"
-echo "${MODELS[@]}"
diff --git a/.github/workflows/torch-inductor/scripts/install_torchinductor.sh b/.github/workflows/torch-inductor/scripts/install_torchinductor.sh
deleted file mode 100755
index 18bea1f1716f6..0000000000000
--- a/.github/workflows/torch-inductor/scripts/install_torchinductor.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-MODEL_SPEC=$1
-
-# torchinductor venv
-whoami
-
-sudo apt-get update && sudo apt-get install -y python3-venv libgl1
-
-# clean up old venv
-rm -rf /tmp/torchinductor_venv
-python3 -m venv /tmp/torchinductor_venv
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source ./.github/workflows/torch-inductor/scripts/common.sh
-
-pip3 install --upgrade pip wheel setuptools
-
-# Install torchtext stable first. Bundling it in the same install as torch
-# nightly forces torch stable release to be installed instead.
-# From https://github.com/pytorch/text?tab=readme-ov-file#torchtext,
-# "WARNING: TorchText development is stopped and the 0.18 release (April 2024)
-# will be the last stable release of the library."
-pip3 install --force-reinstall torchtext
-
-# pytorch nightly
-pip3 install --force-reinstall --pre torch torchvision torchaudio torchrec --extra-index-url https://download.pytorch.org/whl/nightly/cu121
-# pytorch source to get torchbench for dynamo
-cd /tmp || exit
-# cleanup old pytorch
-rm -rf pytorch
-git clone --recursive https://github.com/pytorch/pytorch
-cd pytorch || exit
-# if you are updating an existing checkout
-git submodule sync
-git submodule update --init --recursive
-cd ..
-
-# required packages
-# https://github.com/pytorch/benchmark/blob/main/docker/gcp-a100-runner-dind.dockerfile#L17
-sudo apt-get install --yes libpango-1.0-0 libpangoft2-1.0-0
-pip3 install expecttest psutil lightning-utilities pyre_extensions
-
-# torchbench
-if [ "$MODEL_SPEC" == "torchbench" ] || [ "$MODEL_SPEC" != "all" ]; then
-	# clean up old torchbench
-	rm -rf benchmark
-	pip3 install pyyaml
-	git clone https://github.com/pytorch/benchmark.git
-	cd benchmark || exit
-	python3 install.py
-	cd ..
-fi
-
-# timm
-if [ "$MODEL_SPEC" == "timm_models" ] || [ "$MODEL_SPEC" != "all" ]; then
-	# clean up old timm
-	rm -rf pytorch-image-models
-	git clone https://github.com/huggingface/pytorch-image-models.git
-	cd pytorch-image-models || exit
-	pip3 install -e .
-	cd ..
-fi
-
-# clean up cache
-rm -rf /tmp/torchinductor_"$(whoami)"/
-rm -rf ~/.triton/cache
-rm -rf "$TEST_REPORTS_DIR"
-
-# go back to where we started
-cd "$ROOT" || exit
diff --git a/.github/workflows/torch-inductor/scripts/install_triton.sh b/.github/workflows/torch-inductor/scripts/install_triton.sh
deleted file mode 100755
index 43367a02f5273..0000000000000
--- a/.github/workflows/torch-inductor/scripts/install_triton.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source ./.github/workflows/torch-inductor/scripts/common.sh
-
-# Triton build-time dependencies
-pip3 install --upgrade cmake ninja lit
-
-# build our own triton and preserve the wheel build for later re-use in this test run.
-cd python || exit
-pip3 uninstall pytorch-triton -y
-rm -rf build dist
-python3 setup.py bdist_wheel
-pip3 install dist/triton*.whl
-
-# clean up cache
-rm -rf ~/.triton/cache
-
-# go back to where we started
-cd "$ROOT" || exit
diff --git a/.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh b/.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
deleted file mode 100755
index aefd798f39ff8..0000000000000
--- a/.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-INDUCTOR="$ROOT"/.github/workflows/torch-inductor
-MODEL_SPEC=$1
-
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source "$INDUCTOR"/scripts/common.sh
-
-# Dependency of 'torch/fx/experimental/validator.py'.
-pip3 install --upgrade z3-solver
-
-# Install our own triton.
-pip3 uninstall pytorch-triton -y
-cd $ROOT/python || exit
-if [ -d "./dist" ]; then
-  pip3 install dist/triton*.whl
-else
-  rm -rf build
-  pip3 install -e .
-fi
-
-cd "$PYTORCH_DIR" || exit
-TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
-mkdir -p "$TEST_REPORTS_DIR"
-
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Running accuracy test for $model"
-  python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --inference --device cuda \
-    --output "$TEST_REPORTS_DIR"/inference_"$model".csv
-  python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --training --amp --device cuda \
-    --output "$TEST_REPORTS_DIR"/training_"$model".csv
-  python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --training --dynamic-shapes --device cuda \
-    --output "$TEST_REPORTS_DIR"/dynamic_shapes_"$model".csv
-done
-
-cd "$ROOT" || exit
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Checking accuracy test for $model"
-  python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/inference_"$model".csv
-  python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/training_"$model".csv
-  python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/dynamic_shapes_"$model".csv
-done
-
-# go back to where we started
-cd "$ROOT" || exit
diff --git a/.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh b/.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh
deleted file mode 100755
index 35853d97c8fe0..0000000000000
--- a/.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-INDUCTOR="$ROOT"/.github/workflows/torch-inductor
-MODEL_SPEC=$1
-
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source "$INDUCTOR"/scripts/common.sh
-
-# lock GPU clocks to 1350 MHz
-sudo nvidia-smi -i 0 -pm 1
-sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
-
-cd "$PYTORCH_DIR" || exit
-TRITON_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/perf
-BASE_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
-mkdir -p "$TRITON_TEST_REPORTS_DIR"
-mkdir -p "$BASE_TEST_REPORTS_DIR"
-
-# Dependency of 'pytorch/benchmarks/dynamo/common.py'.
-pip3 install pandas scipy
-
-echo "Running with Triton Nightly"
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Running performance test for $model"
-  python3 benchmarks/dynamo/"$model".py --ci --float32 --training --inductor --performance --device cuda \
-    --output "$TRITON_TEST_REPORTS_DIR"/"$model".csv
-done
-
-# install pytorch-triton
-pip3 uninstall triton -y
-pip3 install --pre pytorch-triton --extra-index-url https://download.pytorch.org/whl/nightly/cu121
-
-echo "Running with pytorch-triton"
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Running performance test for $model"
-  python3 benchmarks/dynamo/"$model".py --ci --float32 --training --inductor --performance --device cuda \
-    --output "$BASE_TEST_REPORTS_DIR"/"$model".csv
-done
-
-# uninstall pytorch-triton
-pip3 uninstall pytorch-triton -y
-
-cd "$ROOT" || exit
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Checking performance test for $model"
-  python3 "$INDUCTOR"/scripts/check_perf.py --new "$TRITON_TEST_REPORTS_DIR"/"$model".csv --baseline "$BASE_TEST_REPORTS_DIR"/"$model".csv
-  EXIT_STATUS=$?
-  if [ "$EXIT_STATUS" -ne 0 ]; then
-    echo "Performance test for $model failed"
-    exit "$EXIT_STATUS"
-  fi
-done
-
-# unlock GPU clocks
-sudo nvidia-smi -i 0 -rgc
-
-# go back to where we started
-cd "$ROOT" || exit